@min-pack/tfjs-node 2.17.1 → 3.8.1-patch.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/addon-node_modules/onnxruntime-node/bin/{napi-v3/linux/x64/libonnxruntime.so.1.14.0 → napi-v6/linux/arm64/libonnxruntime.so.1} +0 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v6/linux/arm64/onnxruntime_binding.node +0 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v6/linux/x64/libonnxruntime.so.1 +0 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v6/linux/x64/onnxruntime_binding.node +0 -0
- package/index.d.ts +2 -0
- package/index.js +7102 -3371
- package/package.json +1 -10
- package/tfjs-types/backends/onnx.d.ts +37 -0
- package/tfjs-types/base/feature_extraction_utils.d.ts +41 -0
- package/tfjs-types/base/image_processors_utils.d.ts +332 -0
- package/tfjs-types/base/processing_utils.d.ts +89 -0
- package/tfjs-types/configs.d.ts +93 -0
- package/tfjs-types/env.d.ts +112 -0
- package/tfjs-types/generation/configuration_utils.d.ts +326 -0
- package/tfjs-types/generation/logits_process.d.ts +364 -0
- package/tfjs-types/generation/logits_sampler.d.ts +51 -0
- package/tfjs-types/generation/parameters.d.ts +47 -0
- package/tfjs-types/generation/stopping_criteria.d.ts +81 -0
- package/tfjs-types/generation/streamers.d.ts +88 -0
- package/tfjs-types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +25 -0
- package/tfjs-types/models/auto/feature_extraction_auto.d.ts +5 -0
- package/tfjs-types/models/auto/image_processing_auto.d.ts +5 -0
- package/tfjs-types/models/auto/processing_auto.d.ts +39 -0
- package/tfjs-types/models/beit/image_processing_beit.d.ts +4 -0
- package/tfjs-types/models/bit/image_processing_bit.d.ts +4 -0
- package/tfjs-types/models/chinese_clip/image_processing_chinese_clip.d.ts +4 -0
- package/tfjs-types/models/clap/feature_extraction_clap.d.ts +57 -0
- package/tfjs-types/models/clip/image_processing_clip.d.ts +6 -0
- package/tfjs-types/models/convnext/image_processing_convnext.d.ts +12 -0
- package/tfjs-types/models/dac/feature_extraction_dac.d.ts +4 -0
- package/tfjs-types/models/deit/image_processing_deit.d.ts +6 -0
- package/tfjs-types/models/detr/image_processing_detr.d.ts +42 -0
- package/tfjs-types/models/dinov3_vit/image_processing_dinov3_vit.d.ts +4 -0
- package/tfjs-types/models/donut/image_processing_donut.d.ts +7 -0
- package/tfjs-types/models/dpt/image_processing_dpt.d.ts +6 -0
- package/tfjs-types/models/efficientnet/image_processing_efficientnet.d.ts +6 -0
- package/tfjs-types/models/encodec/feature_extraction_encodec.d.ts +13 -0
- package/tfjs-types/models/feature_extractors.d.ts +16 -0
- package/tfjs-types/models/florence2/processing_florence2.d.ts +39 -0
- package/tfjs-types/models/gemma3n/feature_extraction_gemma3n.d.ts +35 -0
- package/tfjs-types/models/gemma3n/processing_gemma3n.d.ts +31 -0
- package/tfjs-types/models/glpn/image_processing_glpn.d.ts +4 -0
- package/tfjs-types/models/grounding_dino/image_processing_grounding_dino.d.ts +20 -0
- package/tfjs-types/models/grounding_dino/processing_grounding_dino.d.ts +27 -0
- package/tfjs-types/models/idefics3/image_processing_idefics3.d.ts +40 -0
- package/tfjs-types/models/idefics3/processing_idefics3.d.ts +19 -0
- package/tfjs-types/models/image_processors.d.ts +44 -0
- package/tfjs-types/models/janus/image_processing_janus.d.ts +7 -0
- package/tfjs-types/models/janus/processing_janus.d.ts +77 -0
- package/tfjs-types/models/jina_clip/image_processing_jina_clip.d.ts +5 -0
- package/tfjs-types/models/jina_clip/processing_jina_clip.d.ts +9 -0
- package/tfjs-types/models/llava/processing_llava.d.ts +12 -0
- package/tfjs-types/models/llava_onevision/image_processing_llava_onevision.d.ts +4 -0
- package/tfjs-types/models/mask2former/image_processing_mask2former.d.ts +4 -0
- package/tfjs-types/models/maskformer/image_processing_maskformer.d.ts +22 -0
- package/tfjs-types/models/mgp_str/processing_mgp_str.d.ts +64 -0
- package/tfjs-types/models/mobilenet_v1/image_processing_mobilenet_v1.d.ts +6 -0
- package/tfjs-types/models/mobilenet_v2/image_processing_mobilenet_v2.d.ts +6 -0
- package/tfjs-types/models/mobilenet_v3/image_processing_mobilenet_v3.d.ts +6 -0
- package/tfjs-types/models/mobilenet_v4/image_processing_mobilenet_v4.d.ts +6 -0
- package/tfjs-types/models/mobilevit/image_processing_mobilevit.d.ts +6 -0
- package/tfjs-types/models/moonshine/feature_extraction_moonshine.d.ts +13 -0
- package/tfjs-types/models/moonshine/processing_moonshine.d.ts +17 -0
- package/tfjs-types/models/nougat/image_processing_nougat.d.ts +4 -0
- package/tfjs-types/models/owlv2/image_processing_owlv2.d.ts +4 -0
- package/tfjs-types/models/owlvit/image_processing_owlvit.d.ts +10 -0
- package/tfjs-types/models/owlvit/processing_owlvit.d.ts +8 -0
- package/tfjs-types/models/paligemma/processing_paligemma.d.ts +12 -0
- package/tfjs-types/models/parakeet/feature_extraction_parakeet.d.ts +22 -0
- package/tfjs-types/models/phi3_v/image_processing_phi3_v.d.ts +17 -0
- package/tfjs-types/models/phi3_v/processing_phi3_v.d.ts +21 -0
- package/tfjs-types/models/pixtral/image_processing_pixtral.d.ts +4 -0
- package/tfjs-types/models/pixtral/processing_pixtral.d.ts +12 -0
- package/tfjs-types/models/processors.d.ts +25 -0
- package/tfjs-types/models/pvt/image_processing_pvt.d.ts +4 -0
- package/tfjs-types/models/pyannote/feature_extraction_pyannote.d.ts +31 -0
- package/tfjs-types/models/pyannote/processing_pyannote.d.ts +19 -0
- package/tfjs-types/models/qwen2_vl/image_processing_qwen2_vl.d.ts +11 -0
- package/tfjs-types/models/qwen2_vl/processing_qwen2_vl.d.ts +17 -0
- package/tfjs-types/models/rt_detr/image_processing_rt_detr.d.ts +8 -0
- package/tfjs-types/models/sam/image_processing_sam.d.ts +103 -0
- package/tfjs-types/models/sam/processing_sam.d.ts +9 -0
- package/tfjs-types/models/sam2/image_processing_sam2.d.ts +2 -0
- package/tfjs-types/models/sam2/processing_sam2.d.ts +6 -0
- package/tfjs-types/models/sam3/image_processing_sam3.d.ts +2 -0
- package/tfjs-types/models/sapiens/image_processing_sapiens.d.ts +10 -0
- package/tfjs-types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +34 -0
- package/tfjs-types/models/segformer/image_processing_segformer.d.ts +10 -0
- package/tfjs-types/models/siglip/image_processing_siglip.d.ts +4 -0
- package/tfjs-types/models/smolvlm/image_processing_smolvlm.d.ts +2 -0
- package/tfjs-types/models/smolvlm/processing_smolvlm.d.ts +2 -0
- package/tfjs-types/models/snac/feature_extraction_snac.d.ts +4 -0
- package/tfjs-types/models/speecht5/feature_extraction_speecht5.d.ts +4 -0
- package/tfjs-types/models/speecht5/processing_speecht5.d.ts +14 -0
- package/tfjs-types/models/swin2sr/image_processing_swin2sr.d.ts +5 -0
- package/tfjs-types/models/ultravox/processing_ultravox.d.ts +16 -0
- package/tfjs-types/models/vit/image_processing_vit.d.ts +6 -0
- package/tfjs-types/models/vitmatte/image_processing_vitmatte.d.ts +12 -0
- package/tfjs-types/models/vitpose/image_processing_vitpose.d.ts +26 -0
- package/tfjs-types/models/voxtral/processing_voxtral.d.ts +16 -0
- package/tfjs-types/models/wav2vec2/feature_extraction_wav2vec2.d.ts +19 -0
- package/tfjs-types/models/wav2vec2/processing_wav2vec2.d.ts +14 -0
- package/tfjs-types/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.d.ts +14 -0
- package/tfjs-types/models/wespeaker/feature_extraction_wespeaker.d.ts +23 -0
- package/tfjs-types/models/whisper/common_whisper.d.ts +8 -0
- package/tfjs-types/models/whisper/feature_extraction_whisper.d.ts +23 -0
- package/tfjs-types/models/whisper/generation_whisper.d.ts +76 -0
- package/tfjs-types/models/whisper/processing_whisper.d.ts +17 -0
- package/tfjs-types/models/yolos/image_processing_yolos.d.ts +10 -0
- package/tfjs-types/models.d.ts +4396 -0
- package/tfjs-types/ops/registry.d.ts +13 -0
- package/tfjs-types/pipelines.d.ts +2433 -0
- package/tfjs-types/tokenizers.d.ts +1002 -0
- package/tfjs-types/transformers.d.ts +27 -0
- package/tfjs-types/utils/audio.d.ts +160 -0
- package/tfjs-types/utils/constants.d.ts +8 -0
- package/tfjs-types/utils/core.d.ts +231 -0
- package/tfjs-types/utils/data-structures.d.ts +294 -0
- package/tfjs-types/utils/devices.d.ts +18 -0
- package/tfjs-types/utils/dtypes.d.ts +20 -0
- package/tfjs-types/utils/generic.d.ts +11 -0
- package/tfjs-types/utils/hub.d.ts +175 -0
- package/tfjs-types/utils/image.d.ts +141 -0
- package/tfjs-types/utils/maths.d.ts +282 -0
- package/tfjs-types/utils/tensor.d.ts +490 -0
- package/tfjs-types/utils/video.d.ts +37 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v3/linux/arm64/libonnxruntime.so.1.14.0 +0 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v3/linux/arm64/onnxruntime_binding.node +0 -0
- package/addon-node_modules/onnxruntime-node/bin/napi-v3/linux/x64/onnxruntime_binding.node +0 -0
|
@@ -0,0 +1,4396 @@
|
|
|
1
|
+
declare const PreTrainedModel_base: new () => {
|
|
2
|
+
(...args: any[]): any;
|
|
3
|
+
_call(...args: any[]): any;
|
|
4
|
+
};
|
|
5
|
+
/**
|
|
6
|
+
* A base class for pre-trained models that provides the model configuration and an ONNX session.
|
|
7
|
+
*/
|
|
8
|
+
export class PreTrainedModel extends PreTrainedModel_base {
|
|
9
|
+
/**
|
|
10
|
+
* Instantiate one of the model classes of the library from a pretrained model.
|
|
11
|
+
*
|
|
12
|
+
* The model class to instantiate is selected based on the `model_type` property of the config object
|
|
13
|
+
* (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
14
|
+
*
|
|
15
|
+
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
|
|
16
|
+
* - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
|
17
|
+
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
|
|
18
|
+
* user or organization name, like `dbmdz/bert-base-german-cased`.
|
|
19
|
+
* - A path to a *directory* containing model weights, e.g., `./my_model_directory/`.
|
|
20
|
+
* @param {import('./utils/hub.js').PretrainedModelOptions} options Additional options for loading the model.
|
|
21
|
+
*
|
|
22
|
+
* @returns {Promise<PreTrainedModel>} A new instance of the `PreTrainedModel` class.
|
|
23
|
+
*/
|
|
24
|
+
static from_pretrained(pretrained_model_name_or_path: string, { progress_callback, config, cache_dir, local_files_only, revision, model_file_name, subfolder, device, dtype, use_external_data_format, session_options, }?: import("./utils/hub.js").PretrainedModelOptions): Promise<PreTrainedModel>;
|
|
25
|
+
/**
|
|
26
|
+
* Creates a new instance of the `PreTrainedModel` class.
|
|
27
|
+
* @param {import('./configs.js').PretrainedConfig} config The model configuration.
|
|
28
|
+
* @param {Record<string, any>} sessions The inference sessions for the model.
|
|
29
|
+
* @param {Record<string, Object>} configs Additional configuration files (e.g., generation_config.json).
|
|
30
|
+
*/
|
|
31
|
+
constructor(config: import("./configs.js").PretrainedConfig, sessions: Record<string, any>, configs: Record<string, any>);
|
|
32
|
+
main_input_name: string;
|
|
33
|
+
forward_params: string[];
|
|
34
|
+
config: import("./configs.js").PretrainedConfig;
|
|
35
|
+
sessions: Record<string, any>;
|
|
36
|
+
configs: Record<string, any>;
|
|
37
|
+
can_generate: boolean;
|
|
38
|
+
_forward: typeof decoderForward | typeof autoEncoderForward;
|
|
39
|
+
_prepare_inputs_for_generation: typeof multimodal_text_to_text_prepare_inputs_for_generation;
|
|
40
|
+
/** @type {import('./configs.js').TransformersJSConfig} */
|
|
41
|
+
custom_config: import("./configs.js").TransformersJSConfig;
|
|
42
|
+
/**
|
|
43
|
+
* Disposes of all the ONNX sessions that were created during inference.
|
|
44
|
+
* @returns {Promise<unknown[]>} An array of promises, one for each ONNX session that is being disposed.
|
|
45
|
+
* @todo Use https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/FinalizationRegistry
|
|
46
|
+
*/
|
|
47
|
+
dispose(): Promise<unknown[]>;
|
|
48
|
+
/**
|
|
49
|
+
* Runs the model with the provided inputs
|
|
50
|
+
* @param {Object} model_inputs Object containing input tensors
|
|
51
|
+
* @returns {Promise<Object>} Object containing output tensors
|
|
52
|
+
*/
|
|
53
|
+
_call(model_inputs: any): Promise<any>;
|
|
54
|
+
/**
|
|
55
|
+
* Forward method for a pretrained model. If not overridden by a subclass, the correct forward method
|
|
56
|
+
* will be chosen based on the model type.
|
|
57
|
+
* @param {Object} model_inputs The input data to the model in the format specified in the ONNX model.
|
|
58
|
+
* @returns {Promise<Object>} The output data from the model in the format specified in the ONNX model.
|
|
59
|
+
* @throws {Error} This method must be implemented in subclasses.
|
|
60
|
+
*/
|
|
61
|
+
forward(model_inputs: any): Promise<any>;
|
|
62
|
+
/**
|
|
63
|
+
* Get the model's generation config, if it exists.
|
|
64
|
+
* @returns {GenerationConfig|null} The model's generation config if it exists, otherwise `null`.
|
|
65
|
+
*/
|
|
66
|
+
get generation_config(): GenerationConfig | null;
|
|
67
|
+
/**
|
|
68
|
+
* @param {GenerationConfig} generation_config
|
|
69
|
+
* @param {number} input_ids_seq_length The starting sequence length for the input ids.
|
|
70
|
+
* @returns {LogitsProcessorList}
|
|
71
|
+
* @private
|
|
72
|
+
*/
|
|
73
|
+
private _get_logits_processor;
|
|
74
|
+
/**
|
|
75
|
+
* This function merges multiple generation configs together to form a final generation config to be used by the model for text generation.
|
|
76
|
+
* It first creates an empty `GenerationConfig` object, then it applies the model's own `generation_config` property to it. Finally, if a `generation_config` object was passed in the arguments, it overwrites the corresponding properties in the final config with those of the passed config object.
|
|
77
|
+
* @param {GenerationConfig|null} generation_config A `GenerationConfig` object containing generation parameters.
|
|
78
|
+
* @param {Object} kwargs Additional generation parameters to be used in place of those in the `generation_config` object.
|
|
79
|
+
* @returns {GenerationConfig} The final generation config object to be used by the model for text generation.
|
|
80
|
+
*/
|
|
81
|
+
_prepare_generation_config(generation_config: GenerationConfig | null, kwargs: any, cls?: typeof GenerationConfig): GenerationConfig;
|
|
82
|
+
/**
|
|
83
|
+
*
|
|
84
|
+
* @param {GenerationConfig} generation_config
|
|
85
|
+
* @param {StoppingCriteriaList} [stopping_criteria=null]
|
|
86
|
+
*/
|
|
87
|
+
_get_stopping_criteria(generation_config: GenerationConfig, stopping_criteria?: StoppingCriteriaList): StoppingCriteriaList;
|
|
88
|
+
/**
|
|
89
|
+
* Confirms that the model class is compatible with generation.
|
|
90
|
+
* If not, raises an exception that points to the right class to use.
|
|
91
|
+
*/
|
|
92
|
+
_validate_model_class(): void;
|
|
93
|
+
prepare_inputs_for_generation(...args: any[]): any;
|
|
94
|
+
/**
|
|
95
|
+
*
|
|
96
|
+
* @param {Object} inputs
|
|
97
|
+
* @param {bigint[][]} inputs.generated_input_ids
|
|
98
|
+
* @param {Object} inputs.outputs
|
|
99
|
+
* @param {Object} inputs.model_inputs
|
|
100
|
+
* @param {boolean} inputs.is_encoder_decoder
|
|
101
|
+
* @returns {Object} The updated model inputs for the next generation iteration.
|
|
102
|
+
*/
|
|
103
|
+
_update_model_kwargs_for_generation({ generated_input_ids, outputs, model_inputs, is_encoder_decoder }: {
|
|
104
|
+
generated_input_ids: bigint[][];
|
|
105
|
+
outputs: any;
|
|
106
|
+
model_inputs: any;
|
|
107
|
+
is_encoder_decoder: boolean;
|
|
108
|
+
}): any;
|
|
109
|
+
/**
|
|
110
|
+
* This function extracts the model-specific `inputs` for generation.
|
|
111
|
+
* @param {Object} params
|
|
112
|
+
* @param {Tensor} [params.inputs=null]
|
|
113
|
+
* @param {number} [params.bos_token_id=null]
|
|
114
|
+
* @param {Record<string, Tensor|number[]>} [params.model_kwargs]
|
|
115
|
+
* @returns {{inputs_tensor: Tensor, model_inputs: Record<string, Tensor>, model_input_name: string}} The model-specific inputs for generation.
|
|
116
|
+
*/
|
|
117
|
+
_prepare_model_inputs({ inputs, bos_token_id, model_kwargs }: {
|
|
118
|
+
inputs?: Tensor;
|
|
119
|
+
bos_token_id?: number;
|
|
120
|
+
model_kwargs?: Record<string, Tensor | number[]>;
|
|
121
|
+
}): {
|
|
122
|
+
inputs_tensor: Tensor;
|
|
123
|
+
model_inputs: Record<string, Tensor>;
|
|
124
|
+
model_input_name: string;
|
|
125
|
+
};
|
|
126
|
+
_prepare_encoder_decoder_kwargs_for_generation({ inputs_tensor, model_inputs, model_input_name, generation_config }: {
|
|
127
|
+
inputs_tensor: any;
|
|
128
|
+
model_inputs: any;
|
|
129
|
+
model_input_name: any;
|
|
130
|
+
generation_config: any;
|
|
131
|
+
}): Promise<any>;
|
|
132
|
+
/**
|
|
133
|
+
* Prepares `decoder_input_ids` for generation with encoder-decoder models
|
|
134
|
+
* @param {*} param0
|
|
135
|
+
*/
|
|
136
|
+
_prepare_decoder_input_ids_for_generation({ batch_size, model_input_name, model_kwargs, decoder_start_token_id, bos_token_id, generation_config }: any): {
|
|
137
|
+
input_ids: any;
|
|
138
|
+
model_inputs: any;
|
|
139
|
+
};
|
|
140
|
+
/**
|
|
141
|
+
* Generates sequences of token ids for models with a language modeling head.
|
|
142
|
+
* @param {import('./generation/parameters.js').GenerationFunctionParameters} options
|
|
143
|
+
* @returns {Promise<ModelOutput|Tensor>} The output of the model, which can contain the generated token ids, attentions, and scores.
|
|
144
|
+
*/
|
|
145
|
+
generate({ inputs, generation_config, logits_processor, stopping_criteria, streamer, ...kwargs }: import("./generation/parameters.js").GenerationFunctionParameters): Promise<ModelOutput | Tensor>;
|
|
146
|
+
/**
|
|
147
|
+
* Returns an object containing past key values from the given decoder results object.
|
|
148
|
+
*
|
|
149
|
+
* @param {Object} decoderResults The decoder results object.
|
|
150
|
+
* @param {Object} pastKeyValues The previous past key values.
|
|
151
|
+
* @returns {Object} An object containing past key values.
|
|
152
|
+
*/
|
|
153
|
+
getPastKeyValues(decoderResults: any, pastKeyValues: any, disposeEncoderPKVs?: boolean): any;
|
|
154
|
+
/**
|
|
155
|
+
* Returns an object containing attentions from the given model output object.
|
|
156
|
+
*
|
|
157
|
+
* @param {Object} model_output The output of the model.
|
|
158
|
+
* @returns {{cross_attentions?: Tensor[]}} An object containing attentions.
|
|
159
|
+
*/
|
|
160
|
+
getAttentions(model_output: any): {
|
|
161
|
+
cross_attentions?: Tensor[];
|
|
162
|
+
};
|
|
163
|
+
/**
|
|
164
|
+
* Adds past key values to the decoder feeds object. If pastKeyValues is null, creates new tensors for past key values.
|
|
165
|
+
*
|
|
166
|
+
* @param {Object} decoderFeeds The decoder feeds object to add past key values to.
|
|
167
|
+
* @param {Object} pastKeyValues An object containing past key values.
|
|
168
|
+
*/
|
|
169
|
+
addPastKeyValues(decoderFeeds: any, pastKeyValues: any): void;
|
|
170
|
+
encode_image({ pixel_values }: {
|
|
171
|
+
pixel_values: any;
|
|
172
|
+
}): Promise<any>;
|
|
173
|
+
encode_text({ input_ids }: {
|
|
174
|
+
input_ids: any;
|
|
175
|
+
}): Promise<any>;
|
|
176
|
+
encode_audio({ audio_values }: {
|
|
177
|
+
audio_values: any;
|
|
178
|
+
}): Promise<any>;
|
|
179
|
+
}
|
|
180
|
+
export class ModelOutput {
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Base class for model's outputs, with potential hidden states and attentions.
|
|
184
|
+
*/
|
|
185
|
+
export class BaseModelOutput extends ModelOutput {
|
|
186
|
+
/**
|
|
187
|
+
* @param {Object} output The output of the model.
|
|
188
|
+
* @param {Tensor} output.last_hidden_state Sequence of hidden-states at the output of the last layer of the model.
|
|
189
|
+
* @param {Tensor} [output.hidden_states] Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
|
|
190
|
+
* @param {Tensor} [output.attentions] Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
|
191
|
+
*/
|
|
192
|
+
constructor({ last_hidden_state, hidden_states, attentions }: {
|
|
193
|
+
last_hidden_state: Tensor;
|
|
194
|
+
hidden_states?: Tensor;
|
|
195
|
+
attentions?: Tensor;
|
|
196
|
+
});
|
|
197
|
+
last_hidden_state: Tensor;
|
|
198
|
+
hidden_states: Tensor;
|
|
199
|
+
attentions: Tensor;
|
|
200
|
+
}
|
|
201
|
+
export class BertPreTrainedModel extends PreTrainedModel {
|
|
202
|
+
}
|
|
203
|
+
export class BertModel extends BertPreTrainedModel {
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* BertForMaskedLM is a class representing a BERT model for masked language modeling.
|
|
207
|
+
*/
|
|
208
|
+
export class BertForMaskedLM extends BertPreTrainedModel {
|
|
209
|
+
/**
|
|
210
|
+
* Calls the model on new inputs.
|
|
211
|
+
*
|
|
212
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
213
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
214
|
+
*/
|
|
215
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* BertForSequenceClassification is a class representing a BERT model for sequence classification.
|
|
219
|
+
*/
|
|
220
|
+
export class BertForSequenceClassification extends BertPreTrainedModel {
|
|
221
|
+
/**
|
|
222
|
+
* Calls the model on new inputs.
|
|
223
|
+
*
|
|
224
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
225
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
226
|
+
*/
|
|
227
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* BertForTokenClassification is a class representing a BERT model for token classification.
|
|
231
|
+
*/
|
|
232
|
+
export class BertForTokenClassification extends BertPreTrainedModel {
|
|
233
|
+
/**
|
|
234
|
+
* Calls the model on new inputs.
|
|
235
|
+
*
|
|
236
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
237
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
238
|
+
*/
|
|
239
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* BertForQuestionAnswering is a class representing a BERT model for question answering.
|
|
243
|
+
*/
|
|
244
|
+
export class BertForQuestionAnswering extends BertPreTrainedModel {
|
|
245
|
+
/**
|
|
246
|
+
* Calls the model on new inputs.
|
|
247
|
+
*
|
|
248
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
249
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
|
|
250
|
+
*/
|
|
251
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
252
|
+
}
|
|
253
|
+
export class NeoBertPreTrainedModel extends PreTrainedModel {
|
|
254
|
+
}
|
|
255
|
+
export class NeoBertModel extends NeoBertPreTrainedModel {
|
|
256
|
+
}
|
|
257
|
+
export class NeoBertForMaskedLM extends NeoBertPreTrainedModel {
|
|
258
|
+
/**
|
|
259
|
+
* Calls the model on new inputs.
|
|
260
|
+
*
|
|
261
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
262
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
263
|
+
*/
|
|
264
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
265
|
+
}
|
|
266
|
+
export class NeoBertForSequenceClassification extends NeoBertPreTrainedModel {
|
|
267
|
+
/**
|
|
268
|
+
* Calls the model on new inputs.
|
|
269
|
+
*
|
|
270
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
271
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
272
|
+
*/
|
|
273
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
274
|
+
}
|
|
275
|
+
export class NeoBertForTokenClassification extends NeoBertPreTrainedModel {
|
|
276
|
+
/**
|
|
277
|
+
* Calls the model on new inputs.
|
|
278
|
+
*
|
|
279
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
280
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
281
|
+
*/
|
|
282
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
283
|
+
}
|
|
284
|
+
export class NeoBertForQuestionAnswering extends NeoBertPreTrainedModel {
|
|
285
|
+
/**
|
|
286
|
+
* Calls the model on new inputs.
|
|
287
|
+
*
|
|
288
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
289
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
|
|
290
|
+
*/
|
|
291
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
292
|
+
}
|
|
293
|
+
export class ModernBertPreTrainedModel extends PreTrainedModel {
|
|
294
|
+
}
|
|
295
|
+
export class ModernBertModel extends ModernBertPreTrainedModel {
|
|
296
|
+
}
|
|
297
|
+
export class ModernBertForMaskedLM extends ModernBertPreTrainedModel {
|
|
298
|
+
/**
|
|
299
|
+
* Calls the model on new inputs.
|
|
300
|
+
*
|
|
301
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
302
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
303
|
+
*/
|
|
304
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
305
|
+
}
|
|
306
|
+
export class ModernBertForSequenceClassification extends ModernBertPreTrainedModel {
|
|
307
|
+
/**
|
|
308
|
+
* Calls the model on new inputs.
|
|
309
|
+
*
|
|
310
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
311
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
312
|
+
*/
|
|
313
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
314
|
+
}
|
|
315
|
+
export class ModernBertForTokenClassification extends ModernBertPreTrainedModel {
|
|
316
|
+
/**
|
|
317
|
+
* Calls the model on new inputs.
|
|
318
|
+
*
|
|
319
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
320
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
321
|
+
*/
|
|
322
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
323
|
+
}
|
|
324
|
+
export class ModernBertDecoderPreTrainedModel extends PreTrainedModel {
|
|
325
|
+
}
|
|
326
|
+
export class ModernBertDecoderModel extends ModernBertDecoderPreTrainedModel {
|
|
327
|
+
}
|
|
328
|
+
export class ModernBertDecoderForCausalLM extends ModernBertDecoderPreTrainedModel {
|
|
329
|
+
}
|
|
330
|
+
export class NomicBertPreTrainedModel extends PreTrainedModel {
|
|
331
|
+
}
|
|
332
|
+
export class NomicBertModel extends NomicBertPreTrainedModel {
|
|
333
|
+
}
|
|
334
|
+
export class RoFormerPreTrainedModel extends PreTrainedModel {
|
|
335
|
+
}
|
|
336
|
+
/**
|
|
337
|
+
* The bare RoFormer Model transformer outputting raw hidden-states without any specific head on top.
|
|
338
|
+
*/
|
|
339
|
+
export class RoFormerModel extends RoFormerPreTrainedModel {
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* RoFormer Model with a `language modeling` head on top.
|
|
343
|
+
*/
|
|
344
|
+
export class RoFormerForMaskedLM extends RoFormerPreTrainedModel {
|
|
345
|
+
/**
|
|
346
|
+
* Calls the model on new inputs.
|
|
347
|
+
*
|
|
348
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
349
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
350
|
+
*/
|
|
351
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* RoFormer Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
|
|
355
|
+
*/
|
|
356
|
+
export class RoFormerForSequenceClassification extends RoFormerPreTrainedModel {
|
|
357
|
+
/**
|
|
358
|
+
* Calls the model on new inputs.
|
|
359
|
+
*
|
|
360
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
361
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
362
|
+
*/
|
|
363
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* RoFormer Model with a token classification head on top (a linear layer on top of the hidden-states output)
|
|
367
|
+
* e.g. for Named-Entity-Recognition (NER) tasks.
|
|
368
|
+
*/
|
|
369
|
+
export class RoFormerForTokenClassification extends RoFormerPreTrainedModel {
|
|
370
|
+
/**
|
|
371
|
+
* Calls the model on new inputs.
|
|
372
|
+
*
|
|
373
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
374
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
375
|
+
*/
|
|
376
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
377
|
+
}
|
|
378
|
+
/**
|
|
379
|
+
* RoFormer Model with a span classification head on top for extractive question-answering tasks like SQuAD
|
|
380
|
+
* (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
|
|
381
|
+
*/
|
|
382
|
+
export class RoFormerForQuestionAnswering extends RoFormerPreTrainedModel {
|
|
383
|
+
/**
|
|
384
|
+
* Calls the model on new inputs.
|
|
385
|
+
*
|
|
386
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
387
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
|
|
388
|
+
*/
|
|
389
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
390
|
+
}
|
|
391
|
+
export class ConvBertPreTrainedModel extends PreTrainedModel {
|
|
392
|
+
}
|
|
393
|
+
/**
|
|
394
|
+
* The bare ConvBERT Model transformer outputting raw hidden-states without any specific head on top.
|
|
395
|
+
*/
|
|
396
|
+
export class ConvBertModel extends ConvBertPreTrainedModel {
|
|
397
|
+
}
|
|
398
|
+
/**
|
|
399
|
+
* ConvBERT Model with a language modeling head on top.
|
|
400
|
+
*/
|
|
401
|
+
export class ConvBertForMaskedLM extends ConvBertPreTrainedModel {
|
|
402
|
+
/**
|
|
403
|
+
* Calls the model on new inputs.
|
|
404
|
+
*
|
|
405
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
406
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
407
|
+
*/
|
|
408
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
409
|
+
}
|
|
410
|
+
/**
|
|
411
|
+
* ConvBERT Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
|
|
412
|
+
*/
|
|
413
|
+
export class ConvBertForSequenceClassification extends ConvBertPreTrainedModel {
|
|
414
|
+
/**
|
|
415
|
+
* Calls the model on new inputs.
|
|
416
|
+
*
|
|
417
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
418
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
419
|
+
*/
|
|
420
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
421
|
+
}
|
|
422
|
+
/**
|
|
423
|
+
* ConvBERT Model with a token classification head on top (a linear layer on top of the hidden-states output)
|
|
424
|
+
* e.g. for Named-Entity-Recognition (NER) tasks.
|
|
425
|
+
*/
|
|
426
|
+
export class ConvBertForTokenClassification extends ConvBertPreTrainedModel {
|
|
427
|
+
/**
|
|
428
|
+
* Calls the model on new inputs.
|
|
429
|
+
*
|
|
430
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
431
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
432
|
+
*/
|
|
433
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* ConvBERT Model with a span classification head on top for extractive question-answering tasks like SQuAD
|
|
437
|
+
* (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`)
|
|
438
|
+
*/
|
|
439
|
+
export class ConvBertForQuestionAnswering extends ConvBertPreTrainedModel {
|
|
440
|
+
/**
|
|
441
|
+
* Calls the model on new inputs.
|
|
442
|
+
*
|
|
443
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
444
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
|
|
445
|
+
*/
|
|
446
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
447
|
+
}
|
|
448
|
+
export class ElectraPreTrainedModel extends PreTrainedModel {
|
|
449
|
+
}
|
|
450
|
+
/**
|
|
451
|
+
* The bare Electra Model transformer outputting raw hidden-states without any specific head on top.
|
|
452
|
+
* Identical to the BERT model except that it uses an additional linear layer between the embedding
|
|
453
|
+
* layer and the encoder if the hidden size and embedding size are different.
|
|
454
|
+
*/
|
|
455
|
+
export class ElectraModel extends ElectraPreTrainedModel {
|
|
456
|
+
}
|
|
457
|
+
/**
|
|
458
|
+
* Electra model with a language modeling head on top.
|
|
459
|
+
*/
|
|
460
|
+
export class ElectraForMaskedLM extends ElectraPreTrainedModel {
|
|
461
|
+
/**
|
|
462
|
+
* Calls the model on new inputs.
|
|
463
|
+
*
|
|
464
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
465
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
466
|
+
*/
|
|
467
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
468
|
+
}
|
|
469
|
+
/**
|
|
470
|
+
* ELECTRA Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
|
|
471
|
+
*/
|
|
472
|
+
export class ElectraForSequenceClassification extends ElectraPreTrainedModel {
|
|
473
|
+
/**
|
|
474
|
+
* Calls the model on new inputs.
|
|
475
|
+
*
|
|
476
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
477
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
478
|
+
*/
|
|
479
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
480
|
+
}
|
|
481
|
+
/**
|
|
482
|
+
* Electra model with a token classification head on top.
|
|
483
|
+
*/
|
|
484
|
+
export class ElectraForTokenClassification extends ElectraPreTrainedModel {
|
|
485
|
+
/**
|
|
486
|
+
* Calls the model on new inputs.
|
|
487
|
+
*
|
|
488
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
489
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
490
|
+
*/
|
|
491
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
492
|
+
}
|
|
493
|
+
/**
|
|
494
|
+
* LECTRA Model with a span classification head on top for extractive question-answering tasks like SQuAD
|
|
495
|
+
* (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
|
|
496
|
+
*/
|
|
497
|
+
export class ElectraForQuestionAnswering extends ElectraPreTrainedModel {
|
|
498
|
+
/**
|
|
499
|
+
* Calls the model on new inputs.
|
|
500
|
+
*
|
|
501
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
502
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
|
|
503
|
+
*/
|
|
504
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
505
|
+
}
|
|
506
|
+
export class CamembertPreTrainedModel extends PreTrainedModel {
|
|
507
|
+
}
|
|
508
|
+
/**
|
|
509
|
+
* The bare CamemBERT Model transformer outputting raw hidden-states without any specific head on top.
|
|
510
|
+
*/
|
|
511
|
+
export class CamembertModel extends CamembertPreTrainedModel {
|
|
512
|
+
}
|
|
513
|
+
/**
|
|
514
|
+
* CamemBERT Model with a `language modeling` head on top.
|
|
515
|
+
*/
|
|
516
|
+
export class CamembertForMaskedLM extends CamembertPreTrainedModel {
|
|
517
|
+
/**
|
|
518
|
+
* Calls the model on new inputs.
|
|
519
|
+
*
|
|
520
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
521
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
522
|
+
*/
|
|
523
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
524
|
+
}
|
|
525
|
+
/**
|
|
526
|
+
* CamemBERT Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for GLUE tasks.
|
|
527
|
+
*/
|
|
528
|
+
export class CamembertForSequenceClassification extends CamembertPreTrainedModel {
|
|
529
|
+
/**
|
|
530
|
+
* Calls the model on new inputs.
|
|
531
|
+
*
|
|
532
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
533
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
534
|
+
*/
|
|
535
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
536
|
+
}
|
|
537
|
+
/**
|
|
538
|
+
* CamemBERT Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.
|
|
539
|
+
*/
|
|
540
|
+
export class CamembertForTokenClassification extends CamembertPreTrainedModel {
|
|
541
|
+
/**
|
|
542
|
+
* Calls the model on new inputs.
|
|
543
|
+
*
|
|
544
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
545
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
546
|
+
*/
|
|
547
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
548
|
+
}
|
|
549
|
+
/**
|
|
550
|
+
* CamemBERT Model with a span classification head on top for extractive question-answering tasks
|
|
551
|
+
*/
|
|
552
|
+
export class CamembertForQuestionAnswering extends CamembertPreTrainedModel {
|
|
553
|
+
/**
|
|
554
|
+
* Calls the model on new inputs.
|
|
555
|
+
*
|
|
556
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
557
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
|
|
558
|
+
*/
|
|
559
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
560
|
+
}
|
|
561
|
+
export class DebertaPreTrainedModel extends PreTrainedModel {
|
|
562
|
+
}
|
|
563
|
+
/**
|
|
564
|
+
* The bare DeBERTa Model transformer outputting raw hidden-states without any specific head on top.
|
|
565
|
+
*/
|
|
566
|
+
export class DebertaModel extends DebertaPreTrainedModel {
|
|
567
|
+
}
|
|
568
|
+
/**
|
|
569
|
+
* DeBERTa Model with a `language modeling` head on top.
|
|
570
|
+
*/
|
|
571
|
+
export class DebertaForMaskedLM extends DebertaPreTrainedModel {
|
|
572
|
+
/**
|
|
573
|
+
* Calls the model on new inputs.
|
|
574
|
+
*
|
|
575
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
576
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
577
|
+
*/
|
|
578
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
579
|
+
}
|
|
580
|
+
/**
|
|
581
|
+
* DeBERTa Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
|
|
582
|
+
*/
|
|
583
|
+
export class DebertaForSequenceClassification extends DebertaPreTrainedModel {
|
|
584
|
+
/**
|
|
585
|
+
* Calls the model on new inputs.
|
|
586
|
+
*
|
|
587
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
588
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
589
|
+
*/
|
|
590
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
591
|
+
}
|
|
592
|
+
/**
|
|
593
|
+
* DeBERTa Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.
|
|
594
|
+
*/
|
|
595
|
+
export class DebertaForTokenClassification extends DebertaPreTrainedModel {
|
|
596
|
+
/**
|
|
597
|
+
* Calls the model on new inputs.
|
|
598
|
+
*
|
|
599
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
600
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
601
|
+
*/
|
|
602
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
603
|
+
}
|
|
604
|
+
/**
|
|
605
|
+
* DeBERTa Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
|
|
606
|
+
* layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
|
|
607
|
+
*/
|
|
608
|
+
export class DebertaForQuestionAnswering extends DebertaPreTrainedModel {
|
|
609
|
+
/**
|
|
610
|
+
* Calls the model on new inputs.
|
|
611
|
+
*
|
|
612
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
613
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
|
|
614
|
+
*/
|
|
615
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
616
|
+
}
|
|
617
|
+
export class DebertaV2PreTrainedModel extends PreTrainedModel {
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* The bare DeBERTa-V2 Model transformer outputting raw hidden-states without any specific head on top.
|
|
621
|
+
*/
|
|
622
|
+
export class DebertaV2Model extends DebertaV2PreTrainedModel {
|
|
623
|
+
}
|
|
624
|
+
/**
|
|
625
|
+
* DeBERTa-V2 Model with a `language modeling` head on top.
|
|
626
|
+
*/
|
|
627
|
+
export class DebertaV2ForMaskedLM extends DebertaV2PreTrainedModel {
|
|
628
|
+
/**
|
|
629
|
+
* Calls the model on new inputs.
|
|
630
|
+
*
|
|
631
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
632
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
633
|
+
*/
|
|
634
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
635
|
+
}
|
|
636
|
+
/**
|
|
637
|
+
* DeBERTa-V2 Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
|
|
638
|
+
*/
|
|
639
|
+
export class DebertaV2ForSequenceClassification extends DebertaV2PreTrainedModel {
|
|
640
|
+
/**
|
|
641
|
+
* Calls the model on new inputs.
|
|
642
|
+
*
|
|
643
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
644
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
645
|
+
*/
|
|
646
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
647
|
+
}
|
|
648
|
+
/**
|
|
649
|
+
* DeBERTa-V2 Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.
|
|
650
|
+
*/
|
|
651
|
+
export class DebertaV2ForTokenClassification extends DebertaV2PreTrainedModel {
|
|
652
|
+
/**
|
|
653
|
+
* Calls the model on new inputs.
|
|
654
|
+
*
|
|
655
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
656
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
657
|
+
*/
|
|
658
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
659
|
+
}
|
|
660
|
+
/**
|
|
661
|
+
* DeBERTa-V2 Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
|
|
662
|
+
* layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
|
|
663
|
+
*/
|
|
664
|
+
export class DebertaV2ForQuestionAnswering extends DebertaV2PreTrainedModel {
|
|
665
|
+
/**
|
|
666
|
+
* Calls the model on new inputs.
|
|
667
|
+
*
|
|
668
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
669
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
|
|
670
|
+
*/
|
|
671
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
672
|
+
}
|
|
673
|
+
export class DistilBertPreTrainedModel extends PreTrainedModel {
|
|
674
|
+
}
|
|
675
|
+
export class DistilBertModel extends DistilBertPreTrainedModel {
|
|
676
|
+
}
|
|
677
|
+
/**
|
|
678
|
+
* DistilBertForSequenceClassification is a class representing a DistilBERT model for sequence classification.
|
|
679
|
+
*/
|
|
680
|
+
export class DistilBertForSequenceClassification extends DistilBertPreTrainedModel {
|
|
681
|
+
/**
|
|
682
|
+
* Calls the model on new inputs.
|
|
683
|
+
*
|
|
684
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
685
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
686
|
+
*/
|
|
687
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
688
|
+
}
|
|
689
|
+
/**
|
|
690
|
+
* DistilBertForTokenClassification is a class representing a DistilBERT model for token classification.
|
|
691
|
+
*/
|
|
692
|
+
export class DistilBertForTokenClassification extends DistilBertPreTrainedModel {
|
|
693
|
+
/**
|
|
694
|
+
* Calls the model on new inputs.
|
|
695
|
+
*
|
|
696
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
697
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
698
|
+
*/
|
|
699
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
700
|
+
}
|
|
701
|
+
/**
|
|
702
|
+
* DistilBertForQuestionAnswering is a class representing a DistilBERT model for question answering.
|
|
703
|
+
*/
|
|
704
|
+
export class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel {
|
|
705
|
+
/**
|
|
706
|
+
* Calls the model on new inputs.
|
|
707
|
+
*
|
|
708
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
709
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
|
|
710
|
+
*/
|
|
711
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
712
|
+
}
|
|
713
|
+
/**
|
|
714
|
+
* DistilBertForMaskedLM is a class representing a DistilBERT model for masking task.
|
|
715
|
+
*/
|
|
716
|
+
export class DistilBertForMaskedLM extends DistilBertPreTrainedModel {
|
|
717
|
+
/**
|
|
718
|
+
* Calls the model on new inputs.
|
|
719
|
+
*
|
|
720
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
721
|
+
* @returns {Promise<MaskedLMOutput>} returned object
|
|
722
|
+
*/
|
|
723
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
724
|
+
}
|
|
725
|
+
export class EsmPreTrainedModel extends PreTrainedModel {
|
|
726
|
+
}
|
|
727
|
+
/**
|
|
728
|
+
* The bare ESM Model transformer outputting raw hidden-states without any specific head on top.
|
|
729
|
+
*/
|
|
730
|
+
export class EsmModel extends EsmPreTrainedModel {
|
|
731
|
+
}
|
|
732
|
+
/**
|
|
733
|
+
* ESM Model with a `language modeling` head on top.
|
|
734
|
+
*/
|
|
735
|
+
export class EsmForMaskedLM extends EsmPreTrainedModel {
|
|
736
|
+
/**
|
|
737
|
+
* Calls the model on new inputs.
|
|
738
|
+
*
|
|
739
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
740
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
741
|
+
*/
|
|
742
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
743
|
+
}
|
|
744
|
+
/**
|
|
745
|
+
* ESM Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
|
|
746
|
+
*/
|
|
747
|
+
export class EsmForSequenceClassification extends EsmPreTrainedModel {
|
|
748
|
+
/**
|
|
749
|
+
* Calls the model on new inputs.
|
|
750
|
+
*
|
|
751
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
752
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
753
|
+
*/
|
|
754
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
755
|
+
}
|
|
756
|
+
/**
|
|
757
|
+
* ESM Model with a token classification head on top (a linear layer on top of the hidden-states output)
|
|
758
|
+
* e.g. for Named-Entity-Recognition (NER) tasks.
|
|
759
|
+
*/
|
|
760
|
+
export class EsmForTokenClassification extends EsmPreTrainedModel {
|
|
761
|
+
/**
|
|
762
|
+
* Calls the model on new inputs.
|
|
763
|
+
*
|
|
764
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
765
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
766
|
+
*/
|
|
767
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
768
|
+
}
|
|
769
|
+
export class MobileBertPreTrainedModel extends PreTrainedModel {
|
|
770
|
+
}
|
|
771
|
+
export class MobileBertModel extends MobileBertPreTrainedModel {
|
|
772
|
+
}
|
|
773
|
+
/**
|
|
774
|
+
* MobileBertForMaskedLM is a class representing a MobileBERT model for masking task.
|
|
775
|
+
*/
|
|
776
|
+
export class MobileBertForMaskedLM extends MobileBertPreTrainedModel {
|
|
777
|
+
/**
|
|
778
|
+
* Calls the model on new inputs.
|
|
779
|
+
*
|
|
780
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
781
|
+
* @returns {Promise<MaskedLMOutput>} returned object
|
|
782
|
+
*/
|
|
783
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
784
|
+
}
|
|
785
|
+
/**
|
|
786
|
+
* MobileBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
|
|
787
|
+
*/
|
|
788
|
+
export class MobileBertForSequenceClassification extends MobileBertPreTrainedModel {
|
|
789
|
+
/**
|
|
790
|
+
* Calls the model on new inputs.
|
|
791
|
+
*
|
|
792
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
793
|
+
* @returns {Promise<SequenceClassifierOutput>} returned object
|
|
794
|
+
*/
|
|
795
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
796
|
+
}
|
|
797
|
+
/**
|
|
798
|
+
* MobileBert Model with a span classification head on top for extractive question-answering tasks
|
|
799
|
+
*/
|
|
800
|
+
export class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
|
|
801
|
+
/**
|
|
802
|
+
* Calls the model on new inputs.
|
|
803
|
+
*
|
|
804
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
805
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} returned object
|
|
806
|
+
*/
|
|
807
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
808
|
+
}
|
|
809
|
+
export class MPNetPreTrainedModel extends PreTrainedModel {
|
|
810
|
+
}
|
|
811
|
+
/**
|
|
812
|
+
* The bare MPNet Model transformer outputting raw hidden-states without any specific head on top.
|
|
813
|
+
*/
|
|
814
|
+
export class MPNetModel extends MPNetPreTrainedModel {
|
|
815
|
+
}
|
|
816
|
+
/**
|
|
817
|
+
* MPNetForMaskedLM is a class representing a MPNet model for masked language modeling.
|
|
818
|
+
*/
|
|
819
|
+
export class MPNetForMaskedLM extends MPNetPreTrainedModel {
|
|
820
|
+
/**
|
|
821
|
+
* Calls the model on new inputs.
|
|
822
|
+
*
|
|
823
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
824
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
825
|
+
*/
|
|
826
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
827
|
+
}
|
|
828
|
+
/**
|
|
829
|
+
* MPNetForSequenceClassification is a class representing a MPNet model for sequence classification.
|
|
830
|
+
*/
|
|
831
|
+
export class MPNetForSequenceClassification extends MPNetPreTrainedModel {
|
|
832
|
+
/**
|
|
833
|
+
* Calls the model on new inputs.
|
|
834
|
+
*
|
|
835
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
836
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
837
|
+
*/
|
|
838
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
839
|
+
}
|
|
840
|
+
/**
|
|
841
|
+
* MPNetForTokenClassification is a class representing a MPNet model for token classification.
|
|
842
|
+
*/
|
|
843
|
+
export class MPNetForTokenClassification extends MPNetPreTrainedModel {
|
|
844
|
+
/**
|
|
845
|
+
* Calls the model on new inputs.
|
|
846
|
+
*
|
|
847
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
848
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
849
|
+
*/
|
|
850
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
851
|
+
}
|
|
852
|
+
/**
|
|
853
|
+
* MPNetForQuestionAnswering is a class representing a MPNet model for question answering.
|
|
854
|
+
*/
|
|
855
|
+
export class MPNetForQuestionAnswering extends MPNetPreTrainedModel {
|
|
856
|
+
/**
|
|
857
|
+
* Calls the model on new inputs.
|
|
858
|
+
*
|
|
859
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
860
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
|
|
861
|
+
*/
|
|
862
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
863
|
+
}
|
|
864
|
+
export class SqueezeBertPreTrainedModel extends PreTrainedModel {
|
|
865
|
+
}
|
|
866
|
+
export class SqueezeBertModel extends SqueezeBertPreTrainedModel {
|
|
867
|
+
}
|
|
868
|
+
export class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel {
|
|
869
|
+
/**
|
|
870
|
+
* Calls the model on new inputs.
|
|
871
|
+
*
|
|
872
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
873
|
+
* @returns {Promise<MaskedLMOutput>} returned object
|
|
874
|
+
*/
|
|
875
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
876
|
+
}
|
|
877
|
+
export class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel {
|
|
878
|
+
/**
|
|
879
|
+
* Calls the model on new inputs.
|
|
880
|
+
*
|
|
881
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
882
|
+
* @returns {Promise<SequenceClassifierOutput>} returned object
|
|
883
|
+
*/
|
|
884
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
885
|
+
}
|
|
886
|
+
export class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel {
|
|
887
|
+
/**
|
|
888
|
+
* Calls the model on new inputs.
|
|
889
|
+
*
|
|
890
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
891
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} returned object
|
|
892
|
+
*/
|
|
893
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
894
|
+
}
|
|
895
|
+
export class AlbertPreTrainedModel extends PreTrainedModel {
|
|
896
|
+
}
|
|
897
|
+
export class AlbertModel extends AlbertPreTrainedModel {
|
|
898
|
+
}
|
|
899
|
+
export class AlbertForSequenceClassification extends AlbertPreTrainedModel {
|
|
900
|
+
/**
|
|
901
|
+
* Calls the model on new inputs.
|
|
902
|
+
*
|
|
903
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
904
|
+
* @returns {Promise<SequenceClassifierOutput>} returned object
|
|
905
|
+
*/
|
|
906
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
907
|
+
}
|
|
908
|
+
export class AlbertForQuestionAnswering extends AlbertPreTrainedModel {
|
|
909
|
+
/**
|
|
910
|
+
* Calls the model on new inputs.
|
|
911
|
+
*
|
|
912
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
913
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} returned object
|
|
914
|
+
*/
|
|
915
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
916
|
+
}
|
|
917
|
+
export class AlbertForMaskedLM extends AlbertPreTrainedModel {
|
|
918
|
+
/**
|
|
919
|
+
* Calls the model on new inputs.
|
|
920
|
+
*
|
|
921
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
922
|
+
* @returns {Promise<MaskedLMOutput>} returned object
|
|
923
|
+
*/
|
|
924
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
925
|
+
}
|
|
926
|
+
export class T5PreTrainedModel extends PreTrainedModel {
|
|
927
|
+
}
|
|
928
|
+
export class T5Model extends T5PreTrainedModel {
|
|
929
|
+
}
|
|
930
|
+
/**
|
|
931
|
+
* T5Model is a class representing a T5 model for conditional generation.
|
|
932
|
+
*/
|
|
933
|
+
export class T5ForConditionalGeneration extends T5PreTrainedModel {
|
|
934
|
+
}
|
|
935
|
+
/**
|
|
936
|
+
* An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.
|
|
937
|
+
*/
|
|
938
|
+
export class LongT5PreTrainedModel extends PreTrainedModel {
|
|
939
|
+
}
|
|
940
|
+
/**
|
|
941
|
+
* The bare LONGT5 Model transformer outputting raw hidden-states without any specific head on top.
|
|
942
|
+
*/
|
|
943
|
+
export class LongT5Model extends LongT5PreTrainedModel {
|
|
944
|
+
}
|
|
945
|
+
/**
|
|
946
|
+
* LONGT5 Model with a `language modeling` head on top.
|
|
947
|
+
*/
|
|
948
|
+
export class LongT5ForConditionalGeneration extends LongT5PreTrainedModel {
|
|
949
|
+
}
|
|
950
|
+
export class MT5PreTrainedModel extends PreTrainedModel {
|
|
951
|
+
}
|
|
952
|
+
export class MT5Model extends MT5PreTrainedModel {
|
|
953
|
+
}
|
|
954
|
+
/**
|
|
955
|
+
* A class representing a conditional sequence-to-sequence model based on the MT5 architecture.
|
|
956
|
+
*/
|
|
957
|
+
export class MT5ForConditionalGeneration extends MT5PreTrainedModel {
|
|
958
|
+
}
|
|
959
|
+
export class BartPretrainedModel extends PreTrainedModel {
|
|
960
|
+
}
|
|
961
|
+
/**
|
|
962
|
+
* The bare BART Model outputting raw hidden-states without any specific head on top.
|
|
963
|
+
*/
|
|
964
|
+
export class BartModel extends BartPretrainedModel {
|
|
965
|
+
}
|
|
966
|
+
/**
|
|
967
|
+
* The BART Model with a language modeling head. Can be used for summarization.
|
|
968
|
+
*/
|
|
969
|
+
export class BartForConditionalGeneration extends BartPretrainedModel {
|
|
970
|
+
}
|
|
971
|
+
/**
|
|
972
|
+
* Bart model with a sequence classification/head on top (a linear layer on top of the pooled output)
|
|
973
|
+
*/
|
|
974
|
+
export class BartForSequenceClassification extends BartPretrainedModel {
|
|
975
|
+
/**
|
|
976
|
+
* Calls the model on new inputs.
|
|
977
|
+
*
|
|
978
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
979
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
980
|
+
*/
|
|
981
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
982
|
+
}
|
|
983
|
+
export class MBartPreTrainedModel extends PreTrainedModel {
|
|
984
|
+
}
|
|
985
|
+
/**
|
|
986
|
+
* The bare MBART Model outputting raw hidden-states without any specific head on top.
|
|
987
|
+
*/
|
|
988
|
+
export class MBartModel extends MBartPreTrainedModel {
|
|
989
|
+
}
|
|
990
|
+
/**
|
|
991
|
+
* The MBART Model with a language modeling head. Can be used for summarization, after fine-tuning the pretrained models.
|
|
992
|
+
*/
|
|
993
|
+
export class MBartForConditionalGeneration extends MBartPreTrainedModel {
|
|
994
|
+
}
|
|
995
|
+
/**
|
|
996
|
+
* MBart model with a sequence classification/head on top (a linear layer on top of the pooled output).
|
|
997
|
+
*/
|
|
998
|
+
export class MBartForSequenceClassification extends MBartPreTrainedModel {
|
|
999
|
+
/**
|
|
1000
|
+
* Calls the model on new inputs.
|
|
1001
|
+
*
|
|
1002
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1003
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
1004
|
+
*/
|
|
1005
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
1006
|
+
}
|
|
1007
|
+
export class MBartForCausalLM extends MBartPreTrainedModel {
|
|
1008
|
+
}
|
|
1009
|
+
export class BlenderbotPreTrainedModel extends PreTrainedModel {
|
|
1010
|
+
}
|
|
1011
|
+
/**
|
|
1012
|
+
* The bare Blenderbot Model outputting raw hidden-states without any specific head on top.
|
|
1013
|
+
*/
|
|
1014
|
+
export class BlenderbotModel extends BlenderbotPreTrainedModel {
|
|
1015
|
+
}
|
|
1016
|
+
/**
|
|
1017
|
+
* The Blenderbot Model with a language modeling head. Can be used for summarization.
|
|
1018
|
+
*/
|
|
1019
|
+
export class BlenderbotForConditionalGeneration extends BlenderbotPreTrainedModel {
|
|
1020
|
+
}
|
|
1021
|
+
export class BlenderbotSmallPreTrainedModel extends PreTrainedModel {
|
|
1022
|
+
}
|
|
1023
|
+
/**
|
|
1024
|
+
* The bare BlenderbotSmall Model outputting raw hidden-states without any specific head on top.
|
|
1025
|
+
*/
|
|
1026
|
+
export class BlenderbotSmallModel extends BlenderbotSmallPreTrainedModel {
|
|
1027
|
+
}
|
|
1028
|
+
/**
|
|
1029
|
+
* The BlenderbotSmall Model with a language modeling head. Can be used for summarization.
|
|
1030
|
+
*/
|
|
1031
|
+
export class BlenderbotSmallForConditionalGeneration extends BlenderbotSmallPreTrainedModel {
|
|
1032
|
+
}
|
|
1033
|
+
export class RobertaPreTrainedModel extends PreTrainedModel {
|
|
1034
|
+
}
|
|
1035
|
+
export class RobertaModel extends RobertaPreTrainedModel {
|
|
1036
|
+
}
|
|
1037
|
+
/**
|
|
1038
|
+
* RobertaForMaskedLM class for performing masked language modeling on Roberta models.
|
|
1039
|
+
*/
|
|
1040
|
+
export class RobertaForMaskedLM extends RobertaPreTrainedModel {
|
|
1041
|
+
/**
|
|
1042
|
+
* Calls the model on new inputs.
|
|
1043
|
+
*
|
|
1044
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1045
|
+
* @returns {Promise<MaskedLMOutput>} returned object
|
|
1046
|
+
*/
|
|
1047
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
1048
|
+
}
|
|
1049
|
+
/**
|
|
1050
|
+
* RobertaForSequenceClassification class for performing sequence classification on Roberta models.
|
|
1051
|
+
*/
|
|
1052
|
+
export class RobertaForSequenceClassification extends RobertaPreTrainedModel {
|
|
1053
|
+
/**
|
|
1054
|
+
* Calls the model on new inputs.
|
|
1055
|
+
*
|
|
1056
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1057
|
+
* @returns {Promise<SequenceClassifierOutput>} returned object
|
|
1058
|
+
*/
|
|
1059
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
1060
|
+
}
|
|
1061
|
+
/**
|
|
1062
|
+
* RobertaForTokenClassification class for performing token classification on Roberta models.
|
|
1063
|
+
*/
|
|
1064
|
+
export class RobertaForTokenClassification extends RobertaPreTrainedModel {
|
|
1065
|
+
/**
|
|
1066
|
+
* Calls the model on new inputs.
|
|
1067
|
+
*
|
|
1068
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1069
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
1070
|
+
*/
|
|
1071
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
1072
|
+
}
|
|
1073
|
+
/**
|
|
1074
|
+
* RobertaForQuestionAnswering class for performing question answering on Roberta models.
|
|
1075
|
+
*/
|
|
1076
|
+
export class RobertaForQuestionAnswering extends RobertaPreTrainedModel {
|
|
1077
|
+
/**
|
|
1078
|
+
* Calls the model on new inputs.
|
|
1079
|
+
*
|
|
1080
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1081
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} returned object
|
|
1082
|
+
*/
|
|
1083
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
1084
|
+
}
|
|
1085
|
+
/**
|
|
1086
|
+
* An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.
|
|
1087
|
+
*/
|
|
1088
|
+
export class XLMPreTrainedModel extends PreTrainedModel {
|
|
1089
|
+
}
|
|
1090
|
+
/**
|
|
1091
|
+
* The bare XLM Model transformer outputting raw hidden-states without any specific head on top.
|
|
1092
|
+
*/
|
|
1093
|
+
export class XLMModel extends XLMPreTrainedModel {
|
|
1094
|
+
}
|
|
1095
|
+
/**
|
|
1096
|
+
* The XLM Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
|
|
1097
|
+
*/
|
|
1098
|
+
export class XLMWithLMHeadModel extends XLMPreTrainedModel {
|
|
1099
|
+
/**
|
|
1100
|
+
* Calls the model on new inputs.
|
|
1101
|
+
*
|
|
1102
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1103
|
+
* @returns {Promise<MaskedLMOutput>} returned object
|
|
1104
|
+
*/
|
|
1105
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
1106
|
+
}
|
|
1107
|
+
/**
|
|
1108
|
+
* XLM Model with a sequence classification/regression head on top (a linear layer on top of the pooled output)
|
|
1109
|
+
*/
|
|
1110
|
+
export class XLMForSequenceClassification extends XLMPreTrainedModel {
|
|
1111
|
+
/**
|
|
1112
|
+
* Calls the model on new inputs.
|
|
1113
|
+
*
|
|
1114
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1115
|
+
* @returns {Promise<SequenceClassifierOutput>} returned object
|
|
1116
|
+
*/
|
|
1117
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
1118
|
+
}
|
|
1119
|
+
/**
|
|
1120
|
+
* XLM Model with a token classification head on top (a linear layer on top of the hidden-states output)
|
|
1121
|
+
*/
|
|
1122
|
+
export class XLMForTokenClassification extends XLMPreTrainedModel {
|
|
1123
|
+
/**
|
|
1124
|
+
* Calls the model on new inputs.
|
|
1125
|
+
*
|
|
1126
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1127
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
1128
|
+
*/
|
|
1129
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
1130
|
+
}
|
|
1131
|
+
/**
|
|
1132
|
+
* XLM Model with a span classification head on top for extractive question-answering tasks
|
|
1133
|
+
*/
|
|
1134
|
+
export class XLMForQuestionAnswering extends XLMPreTrainedModel {
|
|
1135
|
+
/**
|
|
1136
|
+
* Calls the model on new inputs.
|
|
1137
|
+
*
|
|
1138
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1139
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} returned object
|
|
1140
|
+
*/
|
|
1141
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
1142
|
+
}
|
|
1143
|
+
export class XLMRobertaPreTrainedModel extends PreTrainedModel {
|
|
1144
|
+
}
|
|
1145
|
+
export class XLMRobertaModel extends XLMRobertaPreTrainedModel {
|
|
1146
|
+
}
|
|
1147
|
+
/**
|
|
1148
|
+
* XLMRobertaForMaskedLM class for performing masked language modeling on XLMRoberta models.
|
|
1149
|
+
*/
|
|
1150
|
+
export class XLMRobertaForMaskedLM extends XLMRobertaPreTrainedModel {
|
|
1151
|
+
/**
|
|
1152
|
+
* Calls the model on new inputs.
|
|
1153
|
+
*
|
|
1154
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1155
|
+
* @returns {Promise<MaskedLMOutput>} returned object
|
|
1156
|
+
*/
|
|
1157
|
+
_call(model_inputs: any): Promise<MaskedLMOutput>;
|
|
1158
|
+
}
|
|
1159
|
+
/**
|
|
1160
|
+
* XLMRobertaForSequenceClassification class for performing sequence classification on XLMRoberta models.
|
|
1161
|
+
*/
|
|
1162
|
+
export class XLMRobertaForSequenceClassification extends XLMRobertaPreTrainedModel {
|
|
1163
|
+
/**
|
|
1164
|
+
* Calls the model on new inputs.
|
|
1165
|
+
*
|
|
1166
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1167
|
+
* @returns {Promise<SequenceClassifierOutput>} returned object
|
|
1168
|
+
*/
|
|
1169
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
1170
|
+
}
|
|
1171
|
+
/**
|
|
1172
|
+
* XLMRobertaForTokenClassification class for performing token classification on XLMRoberta models.
|
|
1173
|
+
*/
|
|
1174
|
+
export class XLMRobertaForTokenClassification extends XLMRobertaPreTrainedModel {
|
|
1175
|
+
/**
|
|
1176
|
+
* Calls the model on new inputs.
|
|
1177
|
+
*
|
|
1178
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1179
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
1180
|
+
*/
|
|
1181
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
1182
|
+
}
|
|
1183
|
+
/**
|
|
1184
|
+
* XLMRobertaForQuestionAnswering class for performing question answering on XLMRoberta models.
|
|
1185
|
+
*/
|
|
1186
|
+
export class XLMRobertaForQuestionAnswering extends XLMRobertaPreTrainedModel {
|
|
1187
|
+
/**
|
|
1188
|
+
* Calls the model on new inputs.
|
|
1189
|
+
*
|
|
1190
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1191
|
+
* @returns {Promise<QuestionAnsweringModelOutput>} returned object
|
|
1192
|
+
*/
|
|
1193
|
+
_call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
|
|
1194
|
+
}
|
|
1195
|
+
export class ASTPreTrainedModel extends PreTrainedModel {
|
|
1196
|
+
}
|
|
1197
|
+
/**
|
|
1198
|
+
* The bare AST Model transformer outputting raw hidden-states without any specific head on top.
|
|
1199
|
+
*/
|
|
1200
|
+
export class ASTModel extends ASTPreTrainedModel {
|
|
1201
|
+
}
|
|
1202
|
+
/**
|
|
1203
|
+
* Audio Spectrogram Transformer model with an audio classification head on top
|
|
1204
|
+
* (a linear layer on top of the pooled output) e.g. for datasets like AudioSet, Speech Commands v2.
|
|
1205
|
+
*/
|
|
1206
|
+
export class ASTForAudioClassification extends ASTPreTrainedModel {
|
|
1207
|
+
}
|
|
1208
|
+
export class WhisperPreTrainedModel extends PreTrainedModel {
|
|
1209
|
+
requires_attention_mask: boolean;
|
|
1210
|
+
}
|
|
1211
|
+
/**
|
|
1212
|
+
* WhisperModel class for training Whisper models without a language model head.
|
|
1213
|
+
*/
|
|
1214
|
+
export class WhisperModel extends WhisperPreTrainedModel {
|
|
1215
|
+
}
|
|
1216
|
+
/**
|
|
1217
|
+
* WhisperForConditionalGeneration class for generating conditional outputs from Whisper models.
|
|
1218
|
+
*/
|
|
1219
|
+
export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
|
|
1220
|
+
_prepare_generation_config(generation_config: any, kwargs: any): WhisperGenerationConfig;
|
|
1221
|
+
/**
|
|
1222
|
+
*
|
|
1223
|
+
* @param {WhisperGenerationConfig} generation_config
|
|
1224
|
+
*/
|
|
1225
|
+
_retrieve_init_tokens(generation_config: WhisperGenerationConfig): number[];
|
|
1226
|
+
/**
|
|
1227
|
+
* Transcribes or translates log-mel input features to a sequence of auto-regressively generated token ids.
|
|
1228
|
+
* @param {import('./models/whisper/generation_whisper.js').WhisperGenerationFunctionParameters} options
|
|
1229
|
+
* @returns {Promise<ModelOutput|Tensor>} The output of the model, which can contain the generated token ids, attentions, and scores.
|
|
1230
|
+
*/
|
|
1231
|
+
generate({ inputs, generation_config, logits_processor, stopping_criteria, ...kwargs }: import("./models/whisper/generation_whisper.js").WhisperGenerationFunctionParameters): Promise<ModelOutput | Tensor>;
|
|
1232
|
+
/**
|
|
1233
|
+
* Calculates token-level timestamps using the encoder-decoder cross-attentions and
|
|
1234
|
+
* dynamic time-warping (DTW) to map each output token to a position in the input audio.
|
|
1235
|
+
* If `num_frames` is specified, the encoder-decoder cross-attentions will be cropped before applying DTW.
|
|
1236
|
+
* @param {Object} generate_outputs Outputs generated by the model
|
|
1237
|
+
* @param {Tensor[][]} generate_outputs.cross_attentions The cross attentions output by the model
|
|
1238
|
+
* @param {Tensor} generate_outputs.sequences The sequences output by the model
|
|
1239
|
+
* @param {number[][]} alignment_heads Alignment heads of the model
|
|
1240
|
+
* @param {number} [num_frames=null] Number of frames in the input audio.
|
|
1241
|
+
* @param {number} [time_precision=0.02] Precision of the timestamps in seconds
|
|
1242
|
+
* @returns {Tensor} tensor containing the timestamps in seconds for each predicted token
|
|
1243
|
+
*/
|
|
1244
|
+
_extract_token_timestamps(generate_outputs: {
|
|
1245
|
+
cross_attentions: Tensor[][];
|
|
1246
|
+
sequences: Tensor;
|
|
1247
|
+
}, alignment_heads: number[][], num_frames?: number, time_precision?: number): Tensor;
|
|
1248
|
+
}
|
|
1249
|
+
export class LiteWhisperForConditionalGeneration extends WhisperForConditionalGeneration {
|
|
1250
|
+
}
|
|
1251
|
+
export class MoonshinePreTrainedModel extends PreTrainedModel {
|
|
1252
|
+
requires_attention_mask: boolean;
|
|
1253
|
+
}
|
|
1254
|
+
/**
|
|
1255
|
+
* MoonshineModel class for training Moonshine models without a language model head.
|
|
1256
|
+
*/
|
|
1257
|
+
export class MoonshineModel extends MoonshinePreTrainedModel {
|
|
1258
|
+
}
|
|
1259
|
+
export class MoonshineForConditionalGeneration extends MoonshinePreTrainedModel {
|
|
1260
|
+
}
|
|
1261
|
+
/**
|
|
1262
|
+
* Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
|
|
1263
|
+
*/
|
|
1264
|
+
export class VisionEncoderDecoderModel extends PreTrainedModel {
|
|
1265
|
+
}
|
|
1266
|
+
export class LlavaPreTrainedModel extends PreTrainedModel {
|
|
1267
|
+
}
|
|
1268
|
+
/**
|
|
1269
|
+
* The LLAVA model which consists of a vision backbone and a language model.
|
|
1270
|
+
*/
|
|
1271
|
+
export class LlavaForConditionalGeneration extends LlavaPreTrainedModel {
|
|
1272
|
+
_merge_input_ids_with_image_features(kwargs: any): {
|
|
1273
|
+
inputs_embeds: any;
|
|
1274
|
+
attention_mask: any;
|
|
1275
|
+
};
|
|
1276
|
+
}
|
|
1277
|
+
export class LlavaOnevisionForConditionalGeneration extends LlavaForConditionalGeneration {
|
|
1278
|
+
}
|
|
1279
|
+
export class Moondream1ForConditionalGeneration extends LlavaForConditionalGeneration {
|
|
1280
|
+
}
|
|
1281
|
+
export class Florence2PreTrainedModel extends PreTrainedModel {
|
|
1282
|
+
}
|
|
1283
|
+
export class Florence2ForConditionalGeneration extends Florence2PreTrainedModel {
|
|
1284
|
+
_merge_input_ids_with_image_features({ inputs_embeds, image_features, input_ids, attention_mask, }: {
|
|
1285
|
+
inputs_embeds: any;
|
|
1286
|
+
image_features: any;
|
|
1287
|
+
input_ids: any;
|
|
1288
|
+
attention_mask: any;
|
|
1289
|
+
}): {
|
|
1290
|
+
inputs_embeds: Tensor;
|
|
1291
|
+
attention_mask: Tensor;
|
|
1292
|
+
};
|
|
1293
|
+
_prepare_inputs_embeds({ input_ids, pixel_values, inputs_embeds, attention_mask }: {
|
|
1294
|
+
input_ids: any;
|
|
1295
|
+
pixel_values: any;
|
|
1296
|
+
inputs_embeds: any;
|
|
1297
|
+
attention_mask: any;
|
|
1298
|
+
}): Promise<{
|
|
1299
|
+
inputs_embeds: any;
|
|
1300
|
+
attention_mask: any;
|
|
1301
|
+
}>;
|
|
1302
|
+
forward({ input_ids, pixel_values, attention_mask, decoder_input_ids, decoder_attention_mask, encoder_outputs, past_key_values, inputs_embeds, decoder_inputs_embeds, }: {
|
|
1303
|
+
input_ids: any;
|
|
1304
|
+
pixel_values: any;
|
|
1305
|
+
attention_mask: any;
|
|
1306
|
+
decoder_input_ids: any;
|
|
1307
|
+
decoder_attention_mask: any;
|
|
1308
|
+
encoder_outputs: any;
|
|
1309
|
+
past_key_values: any;
|
|
1310
|
+
inputs_embeds: any;
|
|
1311
|
+
decoder_inputs_embeds: any;
|
|
1312
|
+
}): Promise<any>;
|
|
1313
|
+
}
|
|
1314
|
+
export class PaliGemmaPreTrainedModel extends PreTrainedModel {
|
|
1315
|
+
}
|
|
1316
|
+
export class PaliGemmaForConditionalGeneration extends PaliGemmaPreTrainedModel {
|
|
1317
|
+
_merge_input_ids_with_image_features(kwargs: any): {
|
|
1318
|
+
inputs_embeds: any;
|
|
1319
|
+
attention_mask: any;
|
|
1320
|
+
};
|
|
1321
|
+
}
|
|
1322
|
+
export class LlavaQwen2ForCausalLM extends LlavaPreTrainedModel {
|
|
1323
|
+
_merge_input_ids_with_image_features(kwargs: any): {
|
|
1324
|
+
inputs_embeds: any;
|
|
1325
|
+
attention_mask: any;
|
|
1326
|
+
};
|
|
1327
|
+
}
|
|
1328
|
+
export class Mistral3ForConditionalGeneration extends LlavaQwen2ForCausalLM {
|
|
1329
|
+
}
|
|
1330
|
+
export class Gemma3nPreTrainedModel extends PreTrainedModel {
|
|
1331
|
+
}
|
|
1332
|
+
export class Gemma3nForConditionalGeneration extends Gemma3nPreTrainedModel {
|
|
1333
|
+
forward({ input_ids, attention_mask, pixel_values, input_features, input_features_mask, position_ids, inputs_embeds, per_layer_inputs, past_key_values, generation_config, logits_processor, ...kwargs }: {
|
|
1334
|
+
[x: string]: any;
|
|
1335
|
+
input_ids?: any;
|
|
1336
|
+
attention_mask?: any;
|
|
1337
|
+
pixel_values?: any;
|
|
1338
|
+
input_features?: any;
|
|
1339
|
+
input_features_mask?: any;
|
|
1340
|
+
position_ids?: any;
|
|
1341
|
+
inputs_embeds?: any;
|
|
1342
|
+
per_layer_inputs?: any;
|
|
1343
|
+
past_key_values?: any;
|
|
1344
|
+
generation_config?: any;
|
|
1345
|
+
logits_processor?: any;
|
|
1346
|
+
}): Promise<any>;
|
|
1347
|
+
_merge_input_ids_with_image_features(kwargs: any): {
|
|
1348
|
+
inputs_embeds: any;
|
|
1349
|
+
attention_mask: any;
|
|
1350
|
+
};
|
|
1351
|
+
_merge_input_ids_with_audio_features(kwargs: any): {
|
|
1352
|
+
inputs_embeds: any;
|
|
1353
|
+
attention_mask: any;
|
|
1354
|
+
};
|
|
1355
|
+
}
|
|
1356
|
+
export class Idefics3PreTrainedModel extends PreTrainedModel {
|
|
1357
|
+
}
|
|
1358
|
+
/**
|
|
1359
|
+
* The Idefics3 model which consists of a vision backbone and a language model.
|
|
1360
|
+
*/
|
|
1361
|
+
export class Idefics3ForConditionalGeneration extends Idefics3PreTrainedModel {
|
|
1362
|
+
encode_image({ pixel_values, pixel_attention_mask }: {
|
|
1363
|
+
pixel_values: any;
|
|
1364
|
+
pixel_attention_mask: any;
|
|
1365
|
+
}): Promise<any>;
|
|
1366
|
+
_merge_input_ids_with_image_features(kwargs: any): {
|
|
1367
|
+
inputs_embeds: any;
|
|
1368
|
+
attention_mask: any;
|
|
1369
|
+
};
|
|
1370
|
+
}
|
|
1371
|
+
/**
|
|
1372
|
+
* The SmolVLM Model with a language modeling head.
|
|
1373
|
+
* It is made up a SigLIP vision encoder, with a language modeling head on top.
|
|
1374
|
+
*/
|
|
1375
|
+
export class SmolVLMForConditionalGeneration extends Idefics3ForConditionalGeneration {
|
|
1376
|
+
}
|
|
1377
|
+
export class Phi3VPreTrainedModel extends PreTrainedModel {
|
|
1378
|
+
}
|
|
1379
|
+
export class Phi3VForCausalLM extends Phi3VPreTrainedModel {
|
|
1380
|
+
forward({ input_ids, attention_mask, pixel_values, image_sizes, position_ids, inputs_embeds, past_key_values, generation_config, logits_processor, ...kwargs }: {
|
|
1381
|
+
[x: string]: any;
|
|
1382
|
+
input_ids?: any;
|
|
1383
|
+
attention_mask?: any;
|
|
1384
|
+
pixel_values?: any;
|
|
1385
|
+
image_sizes?: any;
|
|
1386
|
+
position_ids?: any;
|
|
1387
|
+
inputs_embeds?: any;
|
|
1388
|
+
past_key_values?: any;
|
|
1389
|
+
generation_config?: any;
|
|
1390
|
+
logits_processor?: any;
|
|
1391
|
+
}): Promise<any>;
|
|
1392
|
+
}
|
|
1393
|
+
export class CLIPPreTrainedModel extends PreTrainedModel {
|
|
1394
|
+
}
|
|
1395
|
+
/**
|
|
1396
|
+
* CLIP Text and Vision Model with a projection layers on top
|
|
1397
|
+
*
|
|
1398
|
+
* **Example:** Perform zero-shot image classification with a `CLIPModel`.
|
|
1399
|
+
*
|
|
1400
|
+
* ```javascript
|
|
1401
|
+
* import { AutoTokenizer, AutoProcessor, CLIPModel, RawImage } from '@huggingface/transformers';
|
|
1402
|
+
*
|
|
1403
|
+
* // Load tokenizer, processor, and model
|
|
1404
|
+
* let tokenizer = await AutoTokenizer.from_pretrained('Xenova/clip-vit-base-patch16');
|
|
1405
|
+
* let processor = await AutoProcessor.from_pretrained('Xenova/clip-vit-base-patch16');
|
|
1406
|
+
* let model = await CLIPModel.from_pretrained('Xenova/clip-vit-base-patch16');
|
|
1407
|
+
*
|
|
1408
|
+
* // Run tokenization
|
|
1409
|
+
* let texts = ['a photo of a car', 'a photo of a football match']
|
|
1410
|
+
* let text_inputs = tokenizer(texts, { padding: true, truncation: true });
|
|
1411
|
+
*
|
|
1412
|
+
* // Read image and run processor
|
|
1413
|
+
* let image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
|
|
1414
|
+
* let image_inputs = await processor(image);
|
|
1415
|
+
*
|
|
1416
|
+
* // Run model with both text and pixel inputs
|
|
1417
|
+
* let output = await model({ ...text_inputs, ...image_inputs });
|
|
1418
|
+
* // {
|
|
1419
|
+
* // logits_per_image: Tensor {
|
|
1420
|
+
* // dims: [ 1, 2 ],
|
|
1421
|
+
* // data: Float32Array(2) [ 18.579734802246094, 24.31830596923828 ],
|
|
1422
|
+
* // },
|
|
1423
|
+
* // logits_per_text: Tensor {
|
|
1424
|
+
* // dims: [ 2, 1 ],
|
|
1425
|
+
* // data: Float32Array(2) [ 18.579734802246094, 24.31830596923828 ],
|
|
1426
|
+
* // },
|
|
1427
|
+
* // text_embeds: Tensor {
|
|
1428
|
+
* // dims: [ 2, 512 ],
|
|
1429
|
+
* // data: Float32Array(1024) [ ... ],
|
|
1430
|
+
* // },
|
|
1431
|
+
* // image_embeds: Tensor {
|
|
1432
|
+
* // dims: [ 1, 512 ],
|
|
1433
|
+
* // data: Float32Array(512) [ ... ],
|
|
1434
|
+
* // }
|
|
1435
|
+
* // }
|
|
1436
|
+
* ```
|
|
1437
|
+
*/
|
|
1438
|
+
export class CLIPModel extends CLIPPreTrainedModel {
|
|
1439
|
+
}
|
|
1440
|
+
/**
|
|
1441
|
+
* The text model from CLIP without any head or projection on top.
|
|
1442
|
+
*/
|
|
1443
|
+
export class CLIPTextModel extends CLIPPreTrainedModel {
|
|
1444
|
+
}
|
|
1445
|
+
/**
|
|
1446
|
+
* CLIP Text Model with a projection layer on top (a linear layer on top of the pooled output)
|
|
1447
|
+
*
|
|
1448
|
+
* **Example:** Compute text embeddings with `CLIPTextModelWithProjection`.
|
|
1449
|
+
*
|
|
1450
|
+
* ```javascript
|
|
1451
|
+
* import { AutoTokenizer, CLIPTextModelWithProjection } from '@huggingface/transformers';
|
|
1452
|
+
*
|
|
1453
|
+
* // Load tokenizer and text model
|
|
1454
|
+
* const tokenizer = await AutoTokenizer.from_pretrained('Xenova/clip-vit-base-patch16');
|
|
1455
|
+
* const text_model = await CLIPTextModelWithProjection.from_pretrained('Xenova/clip-vit-base-patch16');
|
|
1456
|
+
*
|
|
1457
|
+
* // Run tokenization
|
|
1458
|
+
* let texts = ['a photo of a car', 'a photo of a football match'];
|
|
1459
|
+
* let text_inputs = tokenizer(texts, { padding: true, truncation: true });
|
|
1460
|
+
*
|
|
1461
|
+
* // Compute embeddings
|
|
1462
|
+
* const { text_embeds } = await text_model(text_inputs);
|
|
1463
|
+
* // Tensor {
|
|
1464
|
+
* // dims: [ 2, 512 ],
|
|
1465
|
+
* // type: 'float32',
|
|
1466
|
+
* // data: Float32Array(1024) [ ... ],
|
|
1467
|
+
* // size: 1024
|
|
1468
|
+
* // }
|
|
1469
|
+
* ```
|
|
1470
|
+
*/
|
|
1471
|
+
export class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
|
|
1472
|
+
}
|
|
1473
|
+
/**
|
|
1474
|
+
* The vision model from CLIP without any head or projection on top.
|
|
1475
|
+
*/
|
|
1476
|
+
export class CLIPVisionModel extends CLIPPreTrainedModel {
|
|
1477
|
+
}
|
|
1478
|
+
/**
|
|
1479
|
+
* CLIP Vision Model with a projection layer on top (a linear layer on top of the pooled output)
|
|
1480
|
+
*
|
|
1481
|
+
* **Example:** Compute vision embeddings with `CLIPVisionModelWithProjection`.
|
|
1482
|
+
*
|
|
1483
|
+
* ```javascript
|
|
1484
|
+
* import { AutoProcessor, CLIPVisionModelWithProjection, RawImage} from '@huggingface/transformers';
|
|
1485
|
+
*
|
|
1486
|
+
* // Load processor and vision model
|
|
1487
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/clip-vit-base-patch16');
|
|
1488
|
+
* const vision_model = await CLIPVisionModelWithProjection.from_pretrained('Xenova/clip-vit-base-patch16');
|
|
1489
|
+
*
|
|
1490
|
+
* // Read image and run processor
|
|
1491
|
+
* let image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
|
|
1492
|
+
* let image_inputs = await processor(image);
|
|
1493
|
+
*
|
|
1494
|
+
* // Compute embeddings
|
|
1495
|
+
* const { image_embeds } = await vision_model(image_inputs);
|
|
1496
|
+
* // Tensor {
|
|
1497
|
+
* // dims: [ 1, 512 ],
|
|
1498
|
+
* // type: 'float32',
|
|
1499
|
+
* // data: Float32Array(512) [ ... ],
|
|
1500
|
+
* // size: 512
|
|
1501
|
+
* // }
|
|
1502
|
+
* ```
|
|
1503
|
+
*/
|
|
1504
|
+
export class CLIPVisionModelWithProjection extends CLIPPreTrainedModel {
|
|
1505
|
+
}
|
|
1506
|
+
export class SiglipPreTrainedModel extends PreTrainedModel {
|
|
1507
|
+
}
|
|
1508
|
+
/**
|
|
1509
|
+
* SigLIP Text and Vision Model with a projection layers on top
|
|
1510
|
+
*
|
|
1511
|
+
* **Example:** Perform zero-shot image classification with a `SiglipModel`.
|
|
1512
|
+
*
|
|
1513
|
+
* ```javascript
|
|
1514
|
+
* import { AutoTokenizer, AutoProcessor, SiglipModel, RawImage } from '@huggingface/transformers';
|
|
1515
|
+
*
|
|
1516
|
+
* // Load tokenizer, processor, and model
|
|
1517
|
+
* const tokenizer = await AutoTokenizer.from_pretrained('Xenova/siglip-base-patch16-224');
|
|
1518
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/siglip-base-patch16-224');
|
|
1519
|
+
* const model = await SiglipModel.from_pretrained('Xenova/siglip-base-patch16-224');
|
|
1520
|
+
*
|
|
1521
|
+
* // Run tokenization
|
|
1522
|
+
* const texts = ['a photo of 2 cats', 'a photo of 2 dogs'];
|
|
1523
|
+
* const text_inputs = tokenizer(texts, { padding: 'max_length', truncation: true });
|
|
1524
|
+
*
|
|
1525
|
+
* // Read image and run processor
|
|
1526
|
+
* const image = await RawImage.read('http://images.cocodataset.org/val2017/000000039769.jpg');
|
|
1527
|
+
* const image_inputs = await processor(image);
|
|
1528
|
+
*
|
|
1529
|
+
* // Run model with both text and pixel inputs
|
|
1530
|
+
* const output = await model({ ...text_inputs, ...image_inputs });
|
|
1531
|
+
* // {
|
|
1532
|
+
* // logits_per_image: Tensor {
|
|
1533
|
+
* // dims: [ 1, 2 ],
|
|
1534
|
+
* // data: Float32Array(2) [ -1.6019744873046875, -10.720091819763184 ],
|
|
1535
|
+
* // },
|
|
1536
|
+
* // logits_per_text: Tensor {
|
|
1537
|
+
* // dims: [ 2, 1 ],
|
|
1538
|
+
* // data: Float32Array(2) [ -1.6019744873046875, -10.720091819763184 ],
|
|
1539
|
+
* // },
|
|
1540
|
+
* // text_embeds: Tensor {
|
|
1541
|
+
* // dims: [ 2, 768 ],
|
|
1542
|
+
* // data: Float32Array(1536) [ ... ],
|
|
1543
|
+
* // },
|
|
1544
|
+
* // image_embeds: Tensor {
|
|
1545
|
+
* // dims: [ 1, 768 ],
|
|
1546
|
+
* // data: Float32Array(768) [ ... ],
|
|
1547
|
+
* // }
|
|
1548
|
+
* // }
|
|
1549
|
+
* ```
|
|
1550
|
+
*/
|
|
1551
|
+
export class SiglipModel extends SiglipPreTrainedModel {
|
|
1552
|
+
}
|
|
1553
|
+
/**
|
|
1554
|
+
* The text model from SigLIP without any head or projection on top.
|
|
1555
|
+
*
|
|
1556
|
+
* **Example:** Compute text embeddings with `SiglipTextModel`.
|
|
1557
|
+
*
|
|
1558
|
+
* ```javascript
|
|
1559
|
+
* import { AutoTokenizer, SiglipTextModel } from '@huggingface/transformers';
|
|
1560
|
+
*
|
|
1561
|
+
* // Load tokenizer and text model
|
|
1562
|
+
* const tokenizer = await AutoTokenizer.from_pretrained('Xenova/siglip-base-patch16-224');
|
|
1563
|
+
* const text_model = await SiglipTextModel.from_pretrained('Xenova/siglip-base-patch16-224');
|
|
1564
|
+
*
|
|
1565
|
+
* // Run tokenization
|
|
1566
|
+
* const texts = ['a photo of 2 cats', 'a photo of 2 dogs'];
|
|
1567
|
+
* const text_inputs = tokenizer(texts, { padding: 'max_length', truncation: true });
|
|
1568
|
+
*
|
|
1569
|
+
* // Compute embeddings
|
|
1570
|
+
* const { pooler_output } = await text_model(text_inputs);
|
|
1571
|
+
* // Tensor {
|
|
1572
|
+
* // dims: [ 2, 768 ],
|
|
1573
|
+
* // type: 'float32',
|
|
1574
|
+
* // data: Float32Array(1536) [ ... ],
|
|
1575
|
+
* // size: 1536
|
|
1576
|
+
* // }
|
|
1577
|
+
* ```
|
|
1578
|
+
*/
|
|
1579
|
+
export class SiglipTextModel extends SiglipPreTrainedModel {
|
|
1580
|
+
}
|
|
1581
|
+
/**
|
|
1582
|
+
* The vision model from SigLIP without any head or projection on top.
|
|
1583
|
+
*
|
|
1584
|
+
* **Example:** Compute vision embeddings with `SiglipVisionModel`.
|
|
1585
|
+
*
|
|
1586
|
+
* ```javascript
|
|
1587
|
+
* import { AutoProcessor, SiglipVisionModel, RawImage} from '@huggingface/transformers';
|
|
1588
|
+
*
|
|
1589
|
+
* // Load processor and vision model
|
|
1590
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/siglip-base-patch16-224');
|
|
1591
|
+
* const vision_model = await SiglipVisionModel.from_pretrained('Xenova/siglip-base-patch16-224');
|
|
1592
|
+
*
|
|
1593
|
+
* // Read image and run processor
|
|
1594
|
+
* const image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
|
|
1595
|
+
* const image_inputs = await processor(image);
|
|
1596
|
+
*
|
|
1597
|
+
* // Compute embeddings
|
|
1598
|
+
* const { pooler_output } = await vision_model(image_inputs);
|
|
1599
|
+
* // Tensor {
|
|
1600
|
+
* // dims: [ 1, 768 ],
|
|
1601
|
+
* // type: 'float32',
|
|
1602
|
+
* // data: Float32Array(768) [ ... ],
|
|
1603
|
+
* // size: 768
|
|
1604
|
+
* // }
|
|
1605
|
+
* ```
|
|
1606
|
+
*/
|
|
1607
|
+
export class SiglipVisionModel extends CLIPPreTrainedModel {
|
|
1608
|
+
}
|
|
1609
|
+
export class ChineseCLIPPreTrainedModel extends PreTrainedModel {
|
|
1610
|
+
}
|
|
1611
|
+
export class ChineseCLIPModel extends ChineseCLIPPreTrainedModel {
|
|
1612
|
+
}
|
|
1613
|
+
export class JinaCLIPPreTrainedModel extends PreTrainedModel {
|
|
1614
|
+
}
|
|
1615
|
+
export class JinaCLIPModel extends JinaCLIPPreTrainedModel {
|
|
1616
|
+
forward(model_inputs: any): Promise<{
|
|
1617
|
+
text_embeddings: any;
|
|
1618
|
+
l2norm_text_embeddings: any;
|
|
1619
|
+
image_embeddings: any;
|
|
1620
|
+
l2norm_image_embeddings: any;
|
|
1621
|
+
}>;
|
|
1622
|
+
}
|
|
1623
|
+
export class JinaCLIPTextModel extends JinaCLIPPreTrainedModel {
|
|
1624
|
+
}
|
|
1625
|
+
export class JinaCLIPVisionModel extends JinaCLIPPreTrainedModel {
|
|
1626
|
+
}
|
|
1627
|
+
export class CLIPSegPreTrainedModel extends PreTrainedModel {
|
|
1628
|
+
}
|
|
1629
|
+
export class CLIPSegModel extends CLIPSegPreTrainedModel {
|
|
1630
|
+
}
|
|
1631
|
+
/**
|
|
1632
|
+
* CLIPSeg model with a Transformer-based decoder on top for zero-shot and one-shot image segmentation.
|
|
1633
|
+
*
|
|
1634
|
+
* **Example:** Perform zero-shot image segmentation with a `CLIPSegForImageSegmentation` model.
|
|
1635
|
+
*
|
|
1636
|
+
* ```javascript
|
|
1637
|
+
* import { AutoTokenizer, AutoProcessor, CLIPSegForImageSegmentation, RawImage } from '@huggingface/transformers';
|
|
1638
|
+
*
|
|
1639
|
+
* // Load tokenizer, processor, and model
|
|
1640
|
+
* const tokenizer = await AutoTokenizer.from_pretrained('Xenova/clipseg-rd64-refined');
|
|
1641
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/clipseg-rd64-refined');
|
|
1642
|
+
* const model = await CLIPSegForImageSegmentation.from_pretrained('Xenova/clipseg-rd64-refined');
|
|
1643
|
+
*
|
|
1644
|
+
* // Run tokenization
|
|
1645
|
+
* const texts = ['a glass', 'something to fill', 'wood', 'a jar'];
|
|
1646
|
+
* const text_inputs = tokenizer(texts, { padding: true, truncation: true });
|
|
1647
|
+
*
|
|
1648
|
+
* // Read image and run processor
|
|
1649
|
+
* const image = await RawImage.read('https://github.com/timojl/clipseg/blob/master/example_image.jpg?raw=true');
|
|
1650
|
+
* const image_inputs = await processor(image);
|
|
1651
|
+
*
|
|
1652
|
+
* // Run model with both text and pixel inputs
|
|
1653
|
+
* const { logits } = await model({ ...text_inputs, ...image_inputs });
|
|
1654
|
+
* // logits: Tensor {
|
|
1655
|
+
* // dims: [4, 352, 352],
|
|
1656
|
+
* // type: 'float32',
|
|
1657
|
+
* // data: Float32Array(495616) [ ... ],
|
|
1658
|
+
* // size: 495616
|
|
1659
|
+
* // }
|
|
1660
|
+
* ```
|
|
1661
|
+
*
|
|
1662
|
+
* You can visualize the predictions as follows:
|
|
1663
|
+
* ```javascript
|
|
1664
|
+
* const preds = logits
|
|
1665
|
+
* .unsqueeze_(1)
|
|
1666
|
+
* .sigmoid_()
|
|
1667
|
+
* .mul_(255)
|
|
1668
|
+
* .round_()
|
|
1669
|
+
* .to('uint8');
|
|
1670
|
+
*
|
|
1671
|
+
* for (let i = 0; i < preds.dims[0]; ++i) {
|
|
1672
|
+
* const img = RawImage.fromTensor(preds[i]);
|
|
1673
|
+
* img.save(`prediction_${i}.png`);
|
|
1674
|
+
* }
|
|
1675
|
+
* ```
|
|
1676
|
+
*/
|
|
1677
|
+
export class CLIPSegForImageSegmentation extends CLIPSegPreTrainedModel {
|
|
1678
|
+
}
|
|
1679
|
+
export class GPT2PreTrainedModel extends PreTrainedModel {
|
|
1680
|
+
}
|
|
1681
|
+
export class GPT2Model extends GPT2PreTrainedModel {
|
|
1682
|
+
}
|
|
1683
|
+
/**
|
|
1684
|
+
* GPT-2 language model head on top of the GPT-2 base model. This model is suitable for text generation tasks.
|
|
1685
|
+
*/
|
|
1686
|
+
export class GPT2LMHeadModel extends GPT2PreTrainedModel {
|
|
1687
|
+
}
|
|
1688
|
+
export class JAISPreTrainedModel extends PreTrainedModel {
|
|
1689
|
+
}
|
|
1690
|
+
/**
|
|
1691
|
+
* The bare JAIS Model transformer outputting raw hidden-states without any specific head on top.
|
|
1692
|
+
*/
|
|
1693
|
+
export class JAISModel extends JAISPreTrainedModel {
|
|
1694
|
+
}
|
|
1695
|
+
/**
|
|
1696
|
+
* The JAIS Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
|
|
1697
|
+
*/
|
|
1698
|
+
export class JAISLMHeadModel extends JAISPreTrainedModel {
|
|
1699
|
+
}
|
|
1700
|
+
export class GPTNeoPreTrainedModel extends PreTrainedModel {
|
|
1701
|
+
}
|
|
1702
|
+
export class GPTNeoModel extends GPTNeoPreTrainedModel {
|
|
1703
|
+
}
|
|
1704
|
+
export class GPTNeoForCausalLM extends GPTNeoPreTrainedModel {
|
|
1705
|
+
}
|
|
1706
|
+
export class GPTNeoXPreTrainedModel extends PreTrainedModel {
|
|
1707
|
+
}
|
|
1708
|
+
export class GPTNeoXModel extends GPTNeoXPreTrainedModel {
|
|
1709
|
+
}
|
|
1710
|
+
export class GPTNeoXForCausalLM extends GPTNeoXPreTrainedModel {
|
|
1711
|
+
}
|
|
1712
|
+
export class GPTJPreTrainedModel extends PreTrainedModel {
|
|
1713
|
+
}
|
|
1714
|
+
export class GPTJModel extends GPTJPreTrainedModel {
|
|
1715
|
+
}
|
|
1716
|
+
export class GPTJForCausalLM extends GPTJPreTrainedModel {
|
|
1717
|
+
}
|
|
1718
|
+
export class GPTBigCodePreTrainedModel extends PreTrainedModel {
|
|
1719
|
+
}
|
|
1720
|
+
export class GPTBigCodeModel extends GPTBigCodePreTrainedModel {
|
|
1721
|
+
}
|
|
1722
|
+
export class GPTBigCodeForCausalLM extends GPTBigCodePreTrainedModel {
|
|
1723
|
+
}
|
|
1724
|
+
export class CodeGenPreTrainedModel extends PreTrainedModel {
|
|
1725
|
+
}
|
|
1726
|
+
/**
|
|
1727
|
+
* CodeGenModel is a class representing a code generation model without a language model head.
|
|
1728
|
+
*/
|
|
1729
|
+
export class CodeGenModel extends CodeGenPreTrainedModel {
|
|
1730
|
+
}
|
|
1731
|
+
/**
|
|
1732
|
+
* CodeGenForCausalLM is a class that represents a code generation model based on the GPT-2 architecture. It extends the `CodeGenPreTrainedModel` class.
|
|
1733
|
+
*/
|
|
1734
|
+
export class CodeGenForCausalLM extends CodeGenPreTrainedModel {
|
|
1735
|
+
}
|
|
1736
|
+
/**
|
|
1737
|
+
* The bare LLama Model outputting raw hidden-states without any specific head on top.
|
|
1738
|
+
*/
|
|
1739
|
+
export class LlamaPreTrainedModel extends PreTrainedModel {
|
|
1740
|
+
}
|
|
1741
|
+
/**
|
|
1742
|
+
* The bare LLaMA Model outputting raw hidden-states without any specific head on top.
|
|
1743
|
+
*/
|
|
1744
|
+
export class LlamaModel extends LlamaPreTrainedModel {
|
|
1745
|
+
}
|
|
1746
|
+
export class LlamaForCausalLM extends LlamaPreTrainedModel {
|
|
1747
|
+
}
|
|
1748
|
+
export class Llama4PreTrainedModel extends PreTrainedModel {
|
|
1749
|
+
}
|
|
1750
|
+
export class Llama4ForCausalLM extends Llama4PreTrainedModel {
|
|
1751
|
+
}
|
|
1752
|
+
export class NanoChatPreTrainedModel extends PreTrainedModel {
|
|
1753
|
+
}
|
|
1754
|
+
export class NanoChatModel extends NanoChatPreTrainedModel {
|
|
1755
|
+
}
|
|
1756
|
+
export class NanoChatForCausalLM extends NanoChatPreTrainedModel {
|
|
1757
|
+
}
|
|
1758
|
+
export class ArceePreTrainedModel extends PreTrainedModel {
|
|
1759
|
+
}
|
|
1760
|
+
export class ArceeModel extends ArceePreTrainedModel {
|
|
1761
|
+
}
|
|
1762
|
+
export class ArceeForCausalLM extends ArceePreTrainedModel {
|
|
1763
|
+
}
|
|
1764
|
+
export class Lfm2PreTrainedModel extends PreTrainedModel {
|
|
1765
|
+
}
|
|
1766
|
+
export class Lfm2Model extends Lfm2PreTrainedModel {
|
|
1767
|
+
}
|
|
1768
|
+
export class Lfm2ForCausalLM extends Lfm2PreTrainedModel {
|
|
1769
|
+
}
|
|
1770
|
+
export class SmolLM3PreTrainedModel extends PreTrainedModel {
|
|
1771
|
+
}
|
|
1772
|
+
export class SmolLM3Model extends SmolLM3PreTrainedModel {
|
|
1773
|
+
}
|
|
1774
|
+
export class SmolLM3ForCausalLM extends SmolLM3PreTrainedModel {
|
|
1775
|
+
}
|
|
1776
|
+
export class HeliumPreTrainedModel extends PreTrainedModel {
|
|
1777
|
+
}
|
|
1778
|
+
export class HeliumModel extends HeliumPreTrainedModel {
|
|
1779
|
+
}
|
|
1780
|
+
export class HeliumForCausalLM extends HeliumPreTrainedModel {
|
|
1781
|
+
}
|
|
1782
|
+
export class GlmPreTrainedModel extends PreTrainedModel {
|
|
1783
|
+
}
|
|
1784
|
+
export class GlmModel extends GlmPreTrainedModel {
|
|
1785
|
+
}
|
|
1786
|
+
export class GlmForCausalLM extends GlmPreTrainedModel {
|
|
1787
|
+
}
|
|
1788
|
+
export class ExaonePreTrainedModel extends PreTrainedModel {
|
|
1789
|
+
}
|
|
1790
|
+
export class ExaoneModel extends ExaonePreTrainedModel {
|
|
1791
|
+
}
|
|
1792
|
+
export class ExaoneForCausalLM extends ExaonePreTrainedModel {
|
|
1793
|
+
}
|
|
1794
|
+
export class MobileLLMPreTrainedModel extends PreTrainedModel {
|
|
1795
|
+
}
|
|
1796
|
+
export class MobileLLMModel extends MobileLLMPreTrainedModel {
|
|
1797
|
+
}
|
|
1798
|
+
export class MobileLLMForCausalLM extends MobileLLMPreTrainedModel {
|
|
1799
|
+
}
|
|
1800
|
+
export class OlmoPreTrainedModel extends PreTrainedModel {
|
|
1801
|
+
}
|
|
1802
|
+
export class OlmoModel extends OlmoPreTrainedModel {
|
|
1803
|
+
}
|
|
1804
|
+
export class OlmoForCausalLM extends OlmoPreTrainedModel {
|
|
1805
|
+
}
|
|
1806
|
+
export class Olmo2PreTrainedModel extends PreTrainedModel {
|
|
1807
|
+
}
|
|
1808
|
+
export class Olmo2Model extends Olmo2PreTrainedModel {
|
|
1809
|
+
}
|
|
1810
|
+
export class Olmo2ForCausalLM extends Olmo2PreTrainedModel {
|
|
1811
|
+
}
|
|
1812
|
+
export class GranitePreTrainedModel extends PreTrainedModel {
|
|
1813
|
+
}
|
|
1814
|
+
export class GraniteModel extends GranitePreTrainedModel {
|
|
1815
|
+
}
|
|
1816
|
+
export class GraniteForCausalLM extends GranitePreTrainedModel {
|
|
1817
|
+
}
|
|
1818
|
+
export class GraniteMoeHybridPreTrainedModel extends PreTrainedModel {
|
|
1819
|
+
}
|
|
1820
|
+
export class GraniteMoeHybridModel extends GraniteMoeHybridPreTrainedModel {
|
|
1821
|
+
}
|
|
1822
|
+
export class GraniteMoeHybridForCausalLM extends GraniteMoeHybridPreTrainedModel {
|
|
1823
|
+
}
|
|
1824
|
+
/**
|
|
1825
|
+
* The bare Cohere Model outputting raw hidden-states without any specific head on top.
|
|
1826
|
+
*/
|
|
1827
|
+
export class CoherePreTrainedModel extends PreTrainedModel {
|
|
1828
|
+
}
|
|
1829
|
+
export class CohereModel extends CoherePreTrainedModel {
|
|
1830
|
+
}
|
|
1831
|
+
export class CohereForCausalLM extends CoherePreTrainedModel {
|
|
1832
|
+
}
|
|
1833
|
+
/**
|
|
1834
|
+
* The bare Gemma Model outputting raw hidden-states without any specific head on top.
|
|
1835
|
+
*/
|
|
1836
|
+
export class GemmaPreTrainedModel extends PreTrainedModel {
|
|
1837
|
+
}
|
|
1838
|
+
/**
|
|
1839
|
+
* The bare Gemma Model outputting raw hidden-states without any specific head on top.
|
|
1840
|
+
*/
|
|
1841
|
+
export class GemmaModel extends GemmaPreTrainedModel {
|
|
1842
|
+
}
|
|
1843
|
+
export class GemmaForCausalLM extends GemmaPreTrainedModel {
|
|
1844
|
+
}
|
|
1845
|
+
/**
|
|
1846
|
+
* The bare Gemma2 Model outputting raw hidden-states without any specific head on top.
|
|
1847
|
+
*/
|
|
1848
|
+
export class Gemma2PreTrainedModel extends PreTrainedModel {
|
|
1849
|
+
}
|
|
1850
|
+
/**
|
|
1851
|
+
* The bare Gemma2 Model outputting raw hidden-states without any specific head on top.
|
|
1852
|
+
*/
|
|
1853
|
+
export class Gemma2Model extends Gemma2PreTrainedModel {
|
|
1854
|
+
}
|
|
1855
|
+
export class Gemma2ForCausalLM extends Gemma2PreTrainedModel {
|
|
1856
|
+
}
|
|
1857
|
+
export class VaultGemmaPreTrainedModel extends PreTrainedModel {
|
|
1858
|
+
}
|
|
1859
|
+
export class VaultGemmaModel extends VaultGemmaPreTrainedModel {
|
|
1860
|
+
}
|
|
1861
|
+
export class VaultGemmaForCausalLM extends VaultGemmaPreTrainedModel {
|
|
1862
|
+
}
|
|
1863
|
+
/**
|
|
1864
|
+
* The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
|
|
1865
|
+
*/
|
|
1866
|
+
export class Gemma3PreTrainedModel extends PreTrainedModel {
|
|
1867
|
+
}
|
|
1868
|
+
/**
|
|
1869
|
+
* The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
|
|
1870
|
+
*/
|
|
1871
|
+
export class Gemma3Model extends Gemma3PreTrainedModel {
|
|
1872
|
+
}
|
|
1873
|
+
export class Gemma3ForCausalLM extends Gemma3PreTrainedModel {
|
|
1874
|
+
}
|
|
1875
|
+
export class OpenELMPreTrainedModel extends PreTrainedModel {
|
|
1876
|
+
}
|
|
1877
|
+
export class OpenELMModel extends OpenELMPreTrainedModel {
|
|
1878
|
+
}
|
|
1879
|
+
export class OpenELMForCausalLM extends OpenELMPreTrainedModel {
|
|
1880
|
+
}
|
|
1881
|
+
/**
|
|
1882
|
+
* The bare Qwen2 Model outputting raw hidden-states without any specific head on top.
|
|
1883
|
+
*/
|
|
1884
|
+
export class Qwen2PreTrainedModel extends PreTrainedModel {
|
|
1885
|
+
}
|
|
1886
|
+
/**
|
|
1887
|
+
* The bare Qwen2 Model outputting raw hidden-states without any specific head on top.
|
|
1888
|
+
*/
|
|
1889
|
+
export class Qwen2Model extends Qwen2PreTrainedModel {
|
|
1890
|
+
}
|
|
1891
|
+
export class Qwen2ForCausalLM extends Qwen2PreTrainedModel {
|
|
1892
|
+
}
|
|
1893
|
+
/**
|
|
1894
|
+
* The bare Qwen3 Model outputting raw hidden-states without any specific head on top.
|
|
1895
|
+
*/
|
|
1896
|
+
export class Qwen3PreTrainedModel extends PreTrainedModel {
|
|
1897
|
+
}
|
|
1898
|
+
/**
|
|
1899
|
+
* The bare Qwen3 Model outputting raw hidden-states without any specific head on top.
|
|
1900
|
+
*/
|
|
1901
|
+
export class Qwen3Model extends Qwen3PreTrainedModel {
|
|
1902
|
+
}
|
|
1903
|
+
export class Qwen3ForCausalLM extends Qwen3PreTrainedModel {
|
|
1904
|
+
}
|
|
1905
|
+
export class Qwen2VLPreTrainedModel extends PreTrainedModel {
|
|
1906
|
+
}
|
|
1907
|
+
export class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
|
|
1908
|
+
/**
|
|
1909
|
+
* Calculate the 3D rope index based on image and video's temporal, height and width in LLM.
|
|
1910
|
+
*
|
|
1911
|
+
* Explanation:
|
|
1912
|
+
* Each embedding sequence contains vision embedding and text embedding or just contains text embedding.
|
|
1913
|
+
*
|
|
1914
|
+
* For pure text embedding sequence, the rotary position embedding has no difference with mordern LLMs.
|
|
1915
|
+
* Examples:
|
|
1916
|
+
* input_ids: [T T T T T], here T is for text.
|
|
1917
|
+
* temporal position_ids: [0, 1, 2, 3, 4]
|
|
1918
|
+
* height position_ids: [0, 1, 2, 3, 4]
|
|
1919
|
+
* width position_ids: [0, 1, 2, 3, 4]
|
|
1920
|
+
*
|
|
1921
|
+
* For vision and text embedding sequence, we calculate 3D rotary position embedding for vision part
|
|
1922
|
+
* and 1D rotary position embeddin for text part.
|
|
1923
|
+
* Examples:
|
|
1924
|
+
* Assume we have a video input with 3 temporal patches, 2 height patches and 2 width patches.
|
|
1925
|
+
* input_ids: [V V V V V V V V V V V V T T T T T], here V is for vision.
|
|
1926
|
+
* vision temporal position_ids: [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]
|
|
1927
|
+
* vision height position_ids: [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
|
|
1928
|
+
* vision width position_ids: [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
|
|
1929
|
+
* text temporal position_ids: [3, 4, 5, 6, 7]
|
|
1930
|
+
* text height position_ids: [3, 4, 5, 6, 7]
|
|
1931
|
+
* text width position_ids: [3, 4, 5, 6, 7]
|
|
1932
|
+
* Here we calculate the text start position_ids as the max vision position_ids plus 1.
|
|
1933
|
+
*
|
|
1934
|
+
* @param {Tensor} input_ids Indices of input sequence tokens in the vocabulary. Tensor of shape `(batch_size, sequence_length)`.
|
|
1935
|
+
* @param {Tensor} image_grid_thw (Optional) The temporal, height and width of feature shape of each image in LLM. Tensor of shape `(num_images, 3)`.
|
|
1936
|
+
* @param {Tensor} video_grid_thw (Optional) The temporal, height and width of feature shape of each video in LLM. Tensor of shape `(num_videos, 3)`.
|
|
1937
|
+
* @param {Tensor} attention_mask (Optional) Mask to avoid performing attention on padding token indices. Tensor of shape `(batch_size, sequence_length)`. Mask values selected in `[0, 1]`:
|
|
1938
|
+
* - 1 for tokens that are **not masked**,
|
|
1939
|
+
* - 0 for tokens that are **masked**.
|
|
1940
|
+
* @returns {[Tensor, Tensor]} [position_ids, mrope_position_deltas] with:
|
|
1941
|
+
* - position_ids: Tensor of shape `(3, batch_size, sequence_length)`.
|
|
1942
|
+
* - mrope_position_deltas: Tensor of shape `(batch_size)`.
|
|
1943
|
+
*/
|
|
1944
|
+
get_rope_index(input_ids: Tensor, image_grid_thw: Tensor, video_grid_thw: Tensor, attention_mask: Tensor): [Tensor, Tensor];
|
|
1945
|
+
encode_image({ pixel_values, image_grid_thw }: {
|
|
1946
|
+
pixel_values: any;
|
|
1947
|
+
image_grid_thw: any;
|
|
1948
|
+
}): Promise<any>;
|
|
1949
|
+
_merge_input_ids_with_image_features(kwargs: any): {
|
|
1950
|
+
inputs_embeds: any;
|
|
1951
|
+
attention_mask: any;
|
|
1952
|
+
};
|
|
1953
|
+
prepare_inputs_for_generation(input_ids: any, model_inputs: any, generation_config: any): any;
|
|
1954
|
+
}
|
|
1955
|
+
export class PhiPreTrainedModel extends PreTrainedModel {
|
|
1956
|
+
}
|
|
1957
|
+
/**
|
|
1958
|
+
* The bare Phi Model outputting raw hidden-states without any specific head on top.
|
|
1959
|
+
*/
|
|
1960
|
+
export class PhiModel extends PhiPreTrainedModel {
|
|
1961
|
+
}
|
|
1962
|
+
export class PhiForCausalLM extends PhiPreTrainedModel {
|
|
1963
|
+
}
|
|
1964
|
+
export class Phi3PreTrainedModel extends PreTrainedModel {
|
|
1965
|
+
}
|
|
1966
|
+
/**
|
|
1967
|
+
* The bare Phi3 Model outputting raw hidden-states without any specific head on top.
|
|
1968
|
+
*/
|
|
1969
|
+
export class Phi3Model extends Phi3PreTrainedModel {
|
|
1970
|
+
}
|
|
1971
|
+
export class Phi3ForCausalLM extends Phi3PreTrainedModel {
|
|
1972
|
+
}
|
|
1973
|
+
/**
|
|
1974
|
+
* The Bloom Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
|
|
1975
|
+
*/
|
|
1976
|
+
export class BloomPreTrainedModel extends PreTrainedModel {
|
|
1977
|
+
}
|
|
1978
|
+
/**
|
|
1979
|
+
* The bare Bloom Model transformer outputting raw hidden-states without any specific head on top.
|
|
1980
|
+
*/
|
|
1981
|
+
export class BloomModel extends BloomPreTrainedModel {
|
|
1982
|
+
}
|
|
1983
|
+
/**
|
|
1984
|
+
* The Bloom Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
|
|
1985
|
+
*/
|
|
1986
|
+
export class BloomForCausalLM extends BloomPreTrainedModel {
|
|
1987
|
+
}
|
|
1988
|
+
export class MptPreTrainedModel extends PreTrainedModel {
|
|
1989
|
+
}
|
|
1990
|
+
/**
|
|
1991
|
+
* The bare Mpt Model transformer outputting raw hidden-states without any specific head on top.
|
|
1992
|
+
*/
|
|
1993
|
+
export class MptModel extends MptPreTrainedModel {
|
|
1994
|
+
}
|
|
1995
|
+
/**
|
|
1996
|
+
* The MPT Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
|
|
1997
|
+
*/
|
|
1998
|
+
export class MptForCausalLM extends MptPreTrainedModel {
|
|
1999
|
+
}
|
|
2000
|
+
export class OPTPreTrainedModel extends PreTrainedModel {
|
|
2001
|
+
}
|
|
2002
|
+
/**
|
|
2003
|
+
* The bare OPT Model outputting raw hidden-states without any specific head on top.
|
|
2004
|
+
*/
|
|
2005
|
+
export class OPTModel extends OPTPreTrainedModel {
|
|
2006
|
+
}
|
|
2007
|
+
/**
|
|
2008
|
+
* The OPT Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
|
|
2009
|
+
*/
|
|
2010
|
+
export class OPTForCausalLM extends OPTPreTrainedModel {
|
|
2011
|
+
}
|
|
2012
|
+
export class ViTPreTrainedModel extends PreTrainedModel {
|
|
2013
|
+
}
|
|
2014
|
+
export class ViTModel extends ViTPreTrainedModel {
|
|
2015
|
+
}
|
|
2016
|
+
export class ViTForImageClassification extends ViTPreTrainedModel {
|
|
2017
|
+
/**
|
|
2018
|
+
* @param {any} model_inputs
|
|
2019
|
+
*/
|
|
2020
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2021
|
+
}
|
|
2022
|
+
export class IJepaPreTrainedModel extends PreTrainedModel {
|
|
2023
|
+
}
|
|
2024
|
+
export class IJepaModel extends IJepaPreTrainedModel {
|
|
2025
|
+
}
|
|
2026
|
+
export class IJepaForImageClassification extends IJepaPreTrainedModel {
|
|
2027
|
+
/**
|
|
2028
|
+
* @param {any} model_inputs
|
|
2029
|
+
*/
|
|
2030
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2031
|
+
}
|
|
2032
|
+
export class VitPosePreTrainedModel extends PreTrainedModel {
|
|
2033
|
+
}
|
|
2034
|
+
/**
|
|
2035
|
+
* The VitPose model with a pose estimation head on top.
|
|
2036
|
+
*/
|
|
2037
|
+
export class VitPoseForPoseEstimation extends VitPosePreTrainedModel {
|
|
2038
|
+
}
|
|
2039
|
+
export class PvtPreTrainedModel extends PreTrainedModel {
|
|
2040
|
+
}
|
|
2041
|
+
export class PvtModel extends PvtPreTrainedModel {
|
|
2042
|
+
}
|
|
2043
|
+
export class PvtForImageClassification extends PvtPreTrainedModel {
|
|
2044
|
+
/**
|
|
2045
|
+
* @param {any} model_inputs
|
|
2046
|
+
*/
|
|
2047
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2048
|
+
}
|
|
2049
|
+
export class ViTMAEPreTrainedModel extends PreTrainedModel {
|
|
2050
|
+
}
|
|
2051
|
+
export class ViTMAEModel extends ViTMAEPreTrainedModel {
|
|
2052
|
+
}
|
|
2053
|
+
export class ViTMSNPreTrainedModel extends PreTrainedModel {
|
|
2054
|
+
}
|
|
2055
|
+
export class ViTMSNModel extends ViTMSNPreTrainedModel {
|
|
2056
|
+
}
|
|
2057
|
+
export class ViTMSNForImageClassification extends ViTMSNPreTrainedModel {
|
|
2058
|
+
/**
|
|
2059
|
+
* @param {any} model_inputs
|
|
2060
|
+
*/
|
|
2061
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2062
|
+
}
|
|
2063
|
+
export class GroupViTPreTrainedModel extends PreTrainedModel {
|
|
2064
|
+
}
|
|
2065
|
+
export class GroupViTModel extends GroupViTPreTrainedModel {
|
|
2066
|
+
}
|
|
2067
|
+
export class FastViTPreTrainedModel extends PreTrainedModel {
|
|
2068
|
+
}
|
|
2069
|
+
export class FastViTModel extends FastViTPreTrainedModel {
|
|
2070
|
+
}
|
|
2071
|
+
export class FastViTForImageClassification extends FastViTPreTrainedModel {
|
|
2072
|
+
/**
|
|
2073
|
+
* @param {any} model_inputs
|
|
2074
|
+
*/
|
|
2075
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2076
|
+
}
|
|
2077
|
+
export class VitMattePreTrainedModel extends PreTrainedModel {
|
|
2078
|
+
}
|
|
2079
|
+
/**
|
|
2080
|
+
* ViTMatte framework leveraging any vision backbone e.g. for ADE20k, CityScapes.
|
|
2081
|
+
*
|
|
2082
|
+
* **Example:** Perform image matting with a `VitMatteForImageMatting` model.
|
|
2083
|
+
* ```javascript
|
|
2084
|
+
* import { AutoProcessor, VitMatteForImageMatting, RawImage } from '@huggingface/transformers';
|
|
2085
|
+
*
|
|
2086
|
+
* // Load processor and model
|
|
2087
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/vitmatte-small-distinctions-646');
|
|
2088
|
+
* const model = await VitMatteForImageMatting.from_pretrained('Xenova/vitmatte-small-distinctions-646');
|
|
2089
|
+
*
|
|
2090
|
+
* // Load image and trimap
|
|
2091
|
+
* const image = await RawImage.fromURL('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/vitmatte_image.png');
|
|
2092
|
+
* const trimap = await RawImage.fromURL('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/vitmatte_trimap.png');
|
|
2093
|
+
*
|
|
2094
|
+
* // Prepare image + trimap for the model
|
|
2095
|
+
* const inputs = await processor(image, trimap);
|
|
2096
|
+
*
|
|
2097
|
+
* // Predict alpha matte
|
|
2098
|
+
* const { alphas } = await model(inputs);
|
|
2099
|
+
* // Tensor {
|
|
2100
|
+
* // dims: [ 1, 1, 640, 960 ],
|
|
2101
|
+
* // type: 'float32',
|
|
2102
|
+
* // size: 614400,
|
|
2103
|
+
* // data: Float32Array(614400) [ 0.9894027709960938, 0.9970508813858032, ... ]
|
|
2104
|
+
* // }
|
|
2105
|
+
* ```
|
|
2106
|
+
*
|
|
2107
|
+
* You can visualize the alpha matte as follows:
|
|
2108
|
+
* ```javascript
|
|
2109
|
+
* import { Tensor, cat } from '@huggingface/transformers';
|
|
2110
|
+
*
|
|
2111
|
+
* // Visualize predicted alpha matte
|
|
2112
|
+
* const imageTensor = image.toTensor();
|
|
2113
|
+
*
|
|
2114
|
+
* // Convert float (0-1) alpha matte to uint8 (0-255)
|
|
2115
|
+
* const alphaChannel = alphas
|
|
2116
|
+
* .squeeze(0)
|
|
2117
|
+
* .mul_(255)
|
|
2118
|
+
* .clamp_(0, 255)
|
|
2119
|
+
* .round_()
|
|
2120
|
+
* .to('uint8');
|
|
2121
|
+
*
|
|
2122
|
+
* // Concatenate original image with predicted alpha
|
|
2123
|
+
* const imageData = cat([imageTensor, alphaChannel], 0);
|
|
2124
|
+
*
|
|
2125
|
+
* // Save output image
|
|
2126
|
+
* const outputImage = RawImage.fromTensor(imageData);
|
|
2127
|
+
* outputImage.save('output.png');
|
|
2128
|
+
* ```
|
|
2129
|
+
*/
|
|
2130
|
+
export class VitMatteForImageMatting extends VitMattePreTrainedModel {
|
|
2131
|
+
/**
|
|
2132
|
+
* @param {any} model_inputs
|
|
2133
|
+
*/
|
|
2134
|
+
_call(model_inputs: any): Promise<ImageMattingOutput>;
|
|
2135
|
+
}
|
|
2136
|
+
export class MobileViTPreTrainedModel extends PreTrainedModel {
|
|
2137
|
+
}
|
|
2138
|
+
export class MobileViTModel extends MobileViTPreTrainedModel {
|
|
2139
|
+
}
|
|
2140
|
+
export class MobileViTForImageClassification extends MobileViTPreTrainedModel {
|
|
2141
|
+
/**
|
|
2142
|
+
* @param {any} model_inputs
|
|
2143
|
+
*/
|
|
2144
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2145
|
+
}
|
|
2146
|
+
export class MobileViTV2PreTrainedModel extends PreTrainedModel {
|
|
2147
|
+
}
|
|
2148
|
+
export class MobileViTV2Model extends MobileViTV2PreTrainedModel {
|
|
2149
|
+
}
|
|
2150
|
+
export class MobileViTV2ForImageClassification extends MobileViTV2PreTrainedModel {
|
|
2151
|
+
/**
|
|
2152
|
+
* @param {any} model_inputs
|
|
2153
|
+
*/
|
|
2154
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2155
|
+
}
|
|
2156
|
+
export class OwlViTPreTrainedModel extends PreTrainedModel {
|
|
2157
|
+
}
|
|
2158
|
+
export class OwlViTModel extends OwlViTPreTrainedModel {
|
|
2159
|
+
}
|
|
2160
|
+
export class OwlViTForObjectDetection extends OwlViTPreTrainedModel {
|
|
2161
|
+
}
|
|
2162
|
+
export class Owlv2PreTrainedModel extends PreTrainedModel {
|
|
2163
|
+
}
|
|
2164
|
+
export class Owlv2Model extends Owlv2PreTrainedModel {
|
|
2165
|
+
}
|
|
2166
|
+
export class Owlv2ForObjectDetection extends Owlv2PreTrainedModel {
|
|
2167
|
+
}
|
|
2168
|
+
export class BeitPreTrainedModel extends PreTrainedModel {
|
|
2169
|
+
}
|
|
2170
|
+
export class BeitModel extends BeitPreTrainedModel {
|
|
2171
|
+
}
|
|
2172
|
+
export class BeitForImageClassification extends BeitPreTrainedModel {
|
|
2173
|
+
/**
|
|
2174
|
+
* @param {any} model_inputs
|
|
2175
|
+
*/
|
|
2176
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2177
|
+
}
|
|
2178
|
+
export class DetrPreTrainedModel extends PreTrainedModel {
|
|
2179
|
+
}
|
|
2180
|
+
export class DetrModel extends DetrPreTrainedModel {
|
|
2181
|
+
}
|
|
2182
|
+
export class DetrForObjectDetection extends DetrPreTrainedModel {
|
|
2183
|
+
/**
|
|
2184
|
+
* @param {any} model_inputs
|
|
2185
|
+
*/
|
|
2186
|
+
_call(model_inputs: any): Promise<DetrObjectDetectionOutput>;
|
|
2187
|
+
}
|
|
2188
|
+
export class DetrForSegmentation extends DetrPreTrainedModel {
|
|
2189
|
+
/**
|
|
2190
|
+
* Runs the model with the provided inputs
|
|
2191
|
+
* @param {Object} model_inputs Model inputs
|
|
2192
|
+
* @returns {Promise<DetrSegmentationOutput>} Object containing segmentation outputs
|
|
2193
|
+
*/
|
|
2194
|
+
_call(model_inputs: any): Promise<DetrSegmentationOutput>;
|
|
2195
|
+
}
|
|
2196
|
+
export class DetrObjectDetectionOutput extends ModelOutput {
|
|
2197
|
+
/**
|
|
2198
|
+
* @param {Object} output The output of the model.
|
|
2199
|
+
* @param {Tensor} output.logits Classification logits (including no-object) for all queries.
|
|
2200
|
+
* @param {Tensor} output.pred_boxes Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height).
|
|
2201
|
+
* These values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding).
|
|
2202
|
+
*/
|
|
2203
|
+
constructor({ logits, pred_boxes }: {
|
|
2204
|
+
logits: Tensor;
|
|
2205
|
+
pred_boxes: Tensor;
|
|
2206
|
+
});
|
|
2207
|
+
logits: Tensor;
|
|
2208
|
+
pred_boxes: Tensor;
|
|
2209
|
+
}
|
|
2210
|
+
export class DetrSegmentationOutput extends ModelOutput {
|
|
2211
|
+
/**
|
|
2212
|
+
* @param {Object} output The output of the model.
|
|
2213
|
+
* @param {Tensor} output.logits The output logits of the model.
|
|
2214
|
+
* @param {Tensor} output.pred_boxes Predicted boxes.
|
|
2215
|
+
* @param {Tensor} output.pred_masks Predicted masks.
|
|
2216
|
+
*/
|
|
2217
|
+
constructor({ logits, pred_boxes, pred_masks }: {
|
|
2218
|
+
logits: Tensor;
|
|
2219
|
+
pred_boxes: Tensor;
|
|
2220
|
+
pred_masks: Tensor;
|
|
2221
|
+
});
|
|
2222
|
+
logits: Tensor;
|
|
2223
|
+
pred_boxes: Tensor;
|
|
2224
|
+
pred_masks: Tensor;
|
|
2225
|
+
}
|
|
2226
|
+
export class RTDetrPreTrainedModel extends PreTrainedModel {
|
|
2227
|
+
}
|
|
2228
|
+
export class RTDetrModel extends RTDetrPreTrainedModel {
|
|
2229
|
+
}
|
|
2230
|
+
export class RTDetrForObjectDetection extends RTDetrPreTrainedModel {
|
|
2231
|
+
/**
|
|
2232
|
+
* @param {any} model_inputs
|
|
2233
|
+
*/
|
|
2234
|
+
_call(model_inputs: any): Promise<RTDetrObjectDetectionOutput>;
|
|
2235
|
+
}
|
|
2236
|
+
export class RTDetrObjectDetectionOutput extends ModelOutput {
|
|
2237
|
+
/**
|
|
2238
|
+
* @param {Object} output The output of the model.
|
|
2239
|
+
* @param {Tensor} output.logits Classification logits (including no-object) for all queries.
|
|
2240
|
+
* @param {Tensor} output.pred_boxes Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height).
|
|
2241
|
+
* These values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding).
|
|
2242
|
+
*/
|
|
2243
|
+
constructor({ logits, pred_boxes }: {
|
|
2244
|
+
logits: Tensor;
|
|
2245
|
+
pred_boxes: Tensor;
|
|
2246
|
+
});
|
|
2247
|
+
logits: Tensor;
|
|
2248
|
+
pred_boxes: Tensor;
|
|
2249
|
+
}
|
|
2250
|
+
export class RTDetrV2PreTrainedModel extends PreTrainedModel {
|
|
2251
|
+
}
|
|
2252
|
+
export class RTDetrV2Model extends RTDetrV2PreTrainedModel {
|
|
2253
|
+
}
|
|
2254
|
+
export class RTDetrV2ForObjectDetection extends RTDetrV2PreTrainedModel {
|
|
2255
|
+
/**
|
|
2256
|
+
* @param {any} model_inputs
|
|
2257
|
+
*/
|
|
2258
|
+
_call(model_inputs: any): Promise<RTDetrV2ObjectDetectionOutput>;
|
|
2259
|
+
}
|
|
2260
|
+
export class RTDetrV2ObjectDetectionOutput extends RTDetrObjectDetectionOutput {
|
|
2261
|
+
}
|
|
2262
|
+
export class RFDetrPreTrainedModel extends PreTrainedModel {
|
|
2263
|
+
}
|
|
2264
|
+
export class RFDetrModel extends RFDetrPreTrainedModel {
|
|
2265
|
+
}
|
|
2266
|
+
export class RFDetrForObjectDetection extends RFDetrPreTrainedModel {
|
|
2267
|
+
/**
|
|
2268
|
+
* @param {any} model_inputs
|
|
2269
|
+
*/
|
|
2270
|
+
_call(model_inputs: any): Promise<RFDetrObjectDetectionOutput>;
|
|
2271
|
+
}
|
|
2272
|
+
export class RFDetrObjectDetectionOutput extends RTDetrObjectDetectionOutput {
|
|
2273
|
+
}
|
|
2274
|
+
export class DFinePreTrainedModel extends PreTrainedModel {
|
|
2275
|
+
}
|
|
2276
|
+
export class DFineModel extends DFinePreTrainedModel {
|
|
2277
|
+
}
|
|
2278
|
+
export class DFineForObjectDetection extends DFinePreTrainedModel {
|
|
2279
|
+
/**
|
|
2280
|
+
* @param {any} model_inputs
|
|
2281
|
+
*/
|
|
2282
|
+
_call(model_inputs: any): Promise<RTDetrObjectDetectionOutput>;
|
|
2283
|
+
}
|
|
2284
|
+
export class TableTransformerPreTrainedModel extends PreTrainedModel {
|
|
2285
|
+
}
|
|
2286
|
+
/**
|
|
2287
|
+
* The bare Table Transformer Model (consisting of a backbone and encoder-decoder Transformer)
|
|
2288
|
+
* outputting raw hidden-states without any specific head on top.
|
|
2289
|
+
*/
|
|
2290
|
+
export class TableTransformerModel extends TableTransformerPreTrainedModel {
|
|
2291
|
+
}
|
|
2292
|
+
/**
|
|
2293
|
+
* Table Transformer Model (consisting of a backbone and encoder-decoder Transformer)
|
|
2294
|
+
* with object detection heads on top, for tasks such as COCO detection.
|
|
2295
|
+
*/
|
|
2296
|
+
export class TableTransformerForObjectDetection extends TableTransformerPreTrainedModel {
|
|
2297
|
+
/**
|
|
2298
|
+
* @param {any} model_inputs
|
|
2299
|
+
*/
|
|
2300
|
+
_call(model_inputs: any): Promise<TableTransformerObjectDetectionOutput>;
|
|
2301
|
+
}
|
|
2302
|
+
export class TableTransformerObjectDetectionOutput extends DetrObjectDetectionOutput {
|
|
2303
|
+
}
|
|
2304
|
+
export class DeiTPreTrainedModel extends PreTrainedModel {
|
|
2305
|
+
}
|
|
2306
|
+
export class DeiTModel extends DeiTPreTrainedModel {
|
|
2307
|
+
}
|
|
2308
|
+
export class DeiTForImageClassification extends DeiTPreTrainedModel {
|
|
2309
|
+
/**
|
|
2310
|
+
* @param {any} model_inputs
|
|
2311
|
+
*/
|
|
2312
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2313
|
+
}
|
|
2314
|
+
export class HieraPreTrainedModel extends PreTrainedModel {
|
|
2315
|
+
}
|
|
2316
|
+
export class HieraModel extends HieraPreTrainedModel {
|
|
2317
|
+
}
|
|
2318
|
+
export class HieraForImageClassification extends HieraPreTrainedModel {
|
|
2319
|
+
/**
|
|
2320
|
+
* @param {any} model_inputs
|
|
2321
|
+
*/
|
|
2322
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2323
|
+
}
|
|
2324
|
+
/**
|
|
2325
|
+
* An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.
|
|
2326
|
+
*/
|
|
2327
|
+
export class ResNetPreTrainedModel extends PreTrainedModel {
|
|
2328
|
+
}
|
|
2329
|
+
/**
|
|
2330
|
+
* The bare ResNet model outputting raw features without any specific head on top.
|
|
2331
|
+
*/
|
|
2332
|
+
export class ResNetModel extends ResNetPreTrainedModel {
|
|
2333
|
+
}
|
|
2334
|
+
/**
|
|
2335
|
+
* ResNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for ImageNet.
|
|
2336
|
+
*/
|
|
2337
|
+
export class ResNetForImageClassification extends ResNetPreTrainedModel {
|
|
2338
|
+
/**
|
|
2339
|
+
* @param {any} model_inputs
|
|
2340
|
+
*/
|
|
2341
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2342
|
+
}
|
|
2343
|
+
export class SwinPreTrainedModel extends PreTrainedModel {
|
|
2344
|
+
}
|
|
2345
|
+
export class SwinModel extends SwinPreTrainedModel {
|
|
2346
|
+
}
|
|
2347
|
+
export class SwinForImageClassification extends SwinPreTrainedModel {
|
|
2348
|
+
/**
|
|
2349
|
+
* @param {any} model_inputs
|
|
2350
|
+
*/
|
|
2351
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2352
|
+
}
|
|
2353
|
+
export class SwinForSemanticSegmentation extends SwinPreTrainedModel {
|
|
2354
|
+
}
|
|
2355
|
+
export class Swin2SRPreTrainedModel extends PreTrainedModel {
|
|
2356
|
+
}
|
|
2357
|
+
/**
|
|
2358
|
+
* The bare Swin2SR Model transformer outputting raw hidden-states without any specific head on top.
|
|
2359
|
+
*/
|
|
2360
|
+
export class Swin2SRModel extends Swin2SRPreTrainedModel {
|
|
2361
|
+
}
|
|
2362
|
+
/**
|
|
2363
|
+
* Swin2SR Model transformer with an upsampler head on top for image super resolution and restoration.
|
|
2364
|
+
*
|
|
2365
|
+
* **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`.
|
|
2366
|
+
*
|
|
2367
|
+
* ```javascript
|
|
2368
|
+
* import { AutoProcessor, Swin2SRForImageSuperResolution, RawImage } from '@huggingface/transformers';
|
|
2369
|
+
*
|
|
2370
|
+
* // Load processor and model
|
|
2371
|
+
* const model_id = 'Xenova/swin2SR-classical-sr-x2-64';
|
|
2372
|
+
* const processor = await AutoProcessor.from_pretrained(model_id);
|
|
2373
|
+
* const model = await Swin2SRForImageSuperResolution.from_pretrained(model_id);
|
|
2374
|
+
*
|
|
2375
|
+
* // Prepare model inputs
|
|
2376
|
+
* const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/butterfly.jpg';
|
|
2377
|
+
* const image = await RawImage.fromURL(url);
|
|
2378
|
+
* const inputs = await processor(image);
|
|
2379
|
+
*
|
|
2380
|
+
* // Run model
|
|
2381
|
+
* const outputs = await model(inputs);
|
|
2382
|
+
*
|
|
2383
|
+
* // Convert Tensor to RawImage
|
|
2384
|
+
* const output = outputs.reconstruction.squeeze().clamp_(0, 1).mul_(255).round_().to('uint8');
|
|
2385
|
+
* const outputImage = RawImage.fromTensor(output);
|
|
2386
|
+
* // RawImage {
|
|
2387
|
+
* // data: Uint8Array(786432) [ 41, 31, 24, ... ],
|
|
2388
|
+
* // width: 512,
|
|
2389
|
+
* // height: 512,
|
|
2390
|
+
* // channels: 3
|
|
2391
|
+
* // }
|
|
2392
|
+
* ```
|
|
2393
|
+
*/
|
|
2394
|
+
export class Swin2SRForImageSuperResolution extends Swin2SRPreTrainedModel {
|
|
2395
|
+
}
|
|
2396
|
+
export class DPTPreTrainedModel extends PreTrainedModel {
|
|
2397
|
+
}
|
|
2398
|
+
/**
|
|
2399
|
+
* The bare DPT Model transformer outputting raw hidden-states without any specific head on top.
|
|
2400
|
+
*/
|
|
2401
|
+
export class DPTModel extends DPTPreTrainedModel {
|
|
2402
|
+
}
|
|
2403
|
+
/**
|
|
2404
|
+
* DPT Model with a depth estimation head on top (consisting of 3 convolutional layers) e.g. for KITTI, NYUv2.
|
|
2405
|
+
*
|
|
2406
|
+
* **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`.
|
|
2407
|
+
* ```javascript
|
|
2408
|
+
* import { DPTForDepthEstimation, AutoProcessor, RawImage, interpolate_4d } from '@huggingface/transformers';
|
|
2409
|
+
*
|
|
2410
|
+
* // Load model and processor
|
|
2411
|
+
* const model_id = 'Xenova/dpt-hybrid-midas';
|
|
2412
|
+
* const model = await DPTForDepthEstimation.from_pretrained(model_id);
|
|
2413
|
+
* const processor = await AutoProcessor.from_pretrained(model_id);
|
|
2414
|
+
*
|
|
2415
|
+
* // Load image from URL
|
|
2416
|
+
* const url = 'http://images.cocodataset.org/val2017/000000039769.jpg';
|
|
2417
|
+
* const image = await RawImage.read(url);
|
|
2418
|
+
*
|
|
2419
|
+
* // Prepare image for the model
|
|
2420
|
+
* const inputs = await processor(image);
|
|
2421
|
+
*
|
|
2422
|
+
* // Run model
|
|
2423
|
+
* const { predicted_depth } = await model(inputs);
|
|
2424
|
+
*
|
|
2425
|
+
* // Interpolate to original size
|
|
2426
|
+
* const prediction = (await interpolate_4d(predicted_depth.unsqueeze(1), {
|
|
2427
|
+
* size: image.size.reverse(),
|
|
2428
|
+
* mode: 'bilinear',
|
|
2429
|
+
* })).squeeze(1);
|
|
2430
|
+
*
|
|
2431
|
+
* // Visualize the prediction
|
|
2432
|
+
* const min = prediction.min().item();
|
|
2433
|
+
* const max = prediction.max().item();
|
|
2434
|
+
* const formatted = prediction.sub_(min).div_(max - min).mul_(255).to('uint8');
|
|
2435
|
+
* const depth = RawImage.fromTensor(formatted);
|
|
2436
|
+
* // RawImage {
|
|
2437
|
+
* // data: Uint8Array(307200) [ 85, 85, 84, ... ],
|
|
2438
|
+
* // width: 640,
|
|
2439
|
+
* // height: 480,
|
|
2440
|
+
* // channels: 1
|
|
2441
|
+
* // }
|
|
2442
|
+
* ```
|
|
2443
|
+
*/
|
|
2444
|
+
export class DPTForDepthEstimation extends DPTPreTrainedModel {
|
|
2445
|
+
}
|
|
2446
|
+
export class DepthAnythingPreTrainedModel extends PreTrainedModel {
|
|
2447
|
+
}
|
|
2448
|
+
/**
|
|
2449
|
+
* Depth Anything Model with a depth estimation head on top (consisting of 3 convolutional layers) e.g. for KITTI, NYUv2.
|
|
2450
|
+
*/
|
|
2451
|
+
export class DepthAnythingForDepthEstimation extends DepthAnythingPreTrainedModel {
|
|
2452
|
+
}
|
|
2453
|
+
export class SapiensPreTrainedModel extends PreTrainedModel {
|
|
2454
|
+
}
|
|
2455
|
+
export class SapiensForSemanticSegmentation extends SapiensPreTrainedModel {
|
|
2456
|
+
}
|
|
2457
|
+
export class SapiensForDepthEstimation extends SapiensPreTrainedModel {
|
|
2458
|
+
}
|
|
2459
|
+
export class SapiensForNormalEstimation extends SapiensPreTrainedModel {
|
|
2460
|
+
}
|
|
2461
|
+
export class DepthProPreTrainedModel extends PreTrainedModel {
|
|
2462
|
+
}
|
|
2463
|
+
export class DepthProForDepthEstimation extends DepthProPreTrainedModel {
|
|
2464
|
+
}
|
|
2465
|
+
export class Metric3DPreTrainedModel extends PreTrainedModel {
|
|
2466
|
+
}
|
|
2467
|
+
export class Metric3DForDepthEstimation extends Metric3DPreTrainedModel {
|
|
2468
|
+
}
|
|
2469
|
+
export class Metric3Dv2PreTrainedModel extends PreTrainedModel {
|
|
2470
|
+
}
|
|
2471
|
+
export class Metric3Dv2ForDepthEstimation extends Metric3Dv2PreTrainedModel {
|
|
2472
|
+
}
|
|
2473
|
+
export class MaskFormerPreTrainedModel extends PreTrainedModel {
|
|
2474
|
+
}
|
|
2475
|
+
export class MaskFormerModel extends MaskFormerPreTrainedModel {
|
|
2476
|
+
}
|
|
2477
|
+
export class MaskFormerForInstanceSegmentation extends MaskFormerPreTrainedModel {
|
|
2478
|
+
}
|
|
2479
|
+
export class GLPNPreTrainedModel extends PreTrainedModel {
|
|
2480
|
+
}
|
|
2481
|
+
/**
|
|
2482
|
+
* The bare GLPN encoder (Mix-Transformer) outputting raw hidden-states without any specific head on top.
|
|
2483
|
+
*/
|
|
2484
|
+
export class GLPNModel extends GLPNPreTrainedModel {
|
|
2485
|
+
}
|
|
2486
|
+
/**
|
|
2487
|
+
* import { GLPNForDepthEstimation, AutoProcessor, RawImage, interpolate_4d } from '@huggingface/transformers';
|
|
2488
|
+
*
|
|
2489
|
+
* // Load model and processor
|
|
2490
|
+
* const model_id = 'Xenova/glpn-kitti';
|
|
2491
|
+
* const model = await GLPNForDepthEstimation.from_pretrained(model_id);
|
|
2492
|
+
* const processor = await AutoProcessor.from_pretrained(model_id);
|
|
2493
|
+
*
|
|
2494
|
+
* // Load image from URL
|
|
2495
|
+
* const url = 'http://images.cocodataset.org/val2017/000000039769.jpg';
|
|
2496
|
+
* const image = await RawImage.read(url);
|
|
2497
|
+
*
|
|
2498
|
+
* // Prepare image for the model
|
|
2499
|
+
* const inputs = await processor(image);
|
|
2500
|
+
*
|
|
2501
|
+
* // Run model
|
|
2502
|
+
* const { predicted_depth } = await model(inputs);
|
|
2503
|
+
*
|
|
2504
|
+
* // Interpolate to original size
|
|
2505
|
+
* const prediction = (await interpolate_4d(predicted_depth.unsqueeze(1), {
|
|
2506
|
+
* size: image.size.reverse(),
|
|
2507
|
+
* mode: 'bilinear',
|
|
2508
|
+
* })).squeeze(1);
|
|
2509
|
+
*
|
|
2510
|
+
* // Visualize the prediction
|
|
2511
|
+
* const min = prediction.min().item();
|
|
2512
|
+
* const max = prediction.max().item();
|
|
2513
|
+
* const formatted = prediction.sub_(min).div_(max - min).mul_(255).to('uint8');
|
|
2514
|
+
* const depth = RawImage.fromTensor(formatted);
|
|
2515
|
+
* // RawImage {
|
|
2516
|
+
* // data: Uint8Array(307200) [ 85, 85, 84, ... ],
|
|
2517
|
+
* // width: 640,
|
|
2518
|
+
* // height: 480,
|
|
2519
|
+
* // channels: 1
|
|
2520
|
+
* // }
|
|
2521
|
+
* ```
|
|
2522
|
+
*/
|
|
2523
|
+
export class GLPNForDepthEstimation extends GLPNPreTrainedModel {
|
|
2524
|
+
}
|
|
2525
|
+
export class DonutSwinPreTrainedModel extends PreTrainedModel {
|
|
2526
|
+
}
|
|
2527
|
+
/**
|
|
2528
|
+
* The bare Donut Swin Model transformer outputting raw hidden-states without any specific head on top.
|
|
2529
|
+
*
|
|
2530
|
+
* **Example:** Step-by-step Document Parsing.
|
|
2531
|
+
*
|
|
2532
|
+
* ```javascript
|
|
2533
|
+
* import { AutoProcessor, AutoTokenizer, AutoModelForVision2Seq, RawImage } from '@huggingface/transformers';
|
|
2534
|
+
*
|
|
2535
|
+
* // Choose model to use
|
|
2536
|
+
* const model_id = 'Xenova/donut-base-finetuned-cord-v2';
|
|
2537
|
+
*
|
|
2538
|
+
* // Prepare image inputs
|
|
2539
|
+
* const processor = await AutoProcessor.from_pretrained(model_id);
|
|
2540
|
+
* const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/receipt.png';
|
|
2541
|
+
* const image = await RawImage.read(url);
|
|
2542
|
+
* const image_inputs = await processor(image);
|
|
2543
|
+
*
|
|
2544
|
+
* // Prepare decoder inputs
|
|
2545
|
+
* const tokenizer = await AutoTokenizer.from_pretrained(model_id);
|
|
2546
|
+
* const task_prompt = '<s_cord-v2>';
|
|
2547
|
+
* const decoder_input_ids = tokenizer(task_prompt, {
|
|
2548
|
+
* add_special_tokens: false,
|
|
2549
|
+
* }).input_ids;
|
|
2550
|
+
*
|
|
2551
|
+
* // Create the model
|
|
2552
|
+
* const model = await AutoModelForVision2Seq.from_pretrained(model_id);
|
|
2553
|
+
*
|
|
2554
|
+
* // Run inference
|
|
2555
|
+
* const output = await model.generate(image_inputs.pixel_values, {
|
|
2556
|
+
* decoder_input_ids,
|
|
2557
|
+
* max_length: model.config.decoder.max_position_embeddings,
|
|
2558
|
+
* });
|
|
2559
|
+
*
|
|
2560
|
+
* // Decode output
|
|
2561
|
+
* const decoded = tokenizer.batch_decode(output)[0];
|
|
2562
|
+
* // <s_cord-v2><s_menu><s_nm> CINNAMON SUGAR</s_nm><s_unitprice> 17,000</s_unitprice><s_cnt> 1 x</s_cnt><s_price> 17,000</s_price></s_menu><s_sub_total><s_subtotal_price> 17,000</s_subtotal_price></s_sub_total><s_total><s_total_price> 17,000</s_total_price><s_cashprice> 20,000</s_cashprice><s_changeprice> 3,000</s_changeprice></s_total></s>
|
|
2563
|
+
* ```
|
|
2564
|
+
*
|
|
2565
|
+
* **Example:** Step-by-step Document Visual Question Answering (DocVQA)
|
|
2566
|
+
*
|
|
2567
|
+
* ```javascript
|
|
2568
|
+
* import { AutoProcessor, AutoTokenizer, AutoModelForVision2Seq, RawImage } from '@huggingface/transformers';
|
|
2569
|
+
*
|
|
2570
|
+
* // Choose model to use
|
|
2571
|
+
* const model_id = 'Xenova/donut-base-finetuned-docvqa';
|
|
2572
|
+
*
|
|
2573
|
+
* // Prepare image inputs
|
|
2574
|
+
* const processor = await AutoProcessor.from_pretrained(model_id);
|
|
2575
|
+
* const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/invoice.png';
|
|
2576
|
+
* const image = await RawImage.read(url);
|
|
2577
|
+
* const image_inputs = await processor(image);
|
|
2578
|
+
*
|
|
2579
|
+
* // Prepare decoder inputs
|
|
2580
|
+
* const tokenizer = await AutoTokenizer.from_pretrained(model_id);
|
|
2581
|
+
* const question = 'What is the invoice number?';
|
|
2582
|
+
* const task_prompt = `<s_docvqa><s_question>${question}</s_question><s_answer>`;
|
|
2583
|
+
* const decoder_input_ids = tokenizer(task_prompt, {
|
|
2584
|
+
* add_special_tokens: false,
|
|
2585
|
+
* }).input_ids;
|
|
2586
|
+
*
|
|
2587
|
+
* // Create the model
|
|
2588
|
+
* const model = await AutoModelForVision2Seq.from_pretrained(model_id);
|
|
2589
|
+
*
|
|
2590
|
+
* // Run inference
|
|
2591
|
+
* const output = await model.generate(image_inputs.pixel_values, {
|
|
2592
|
+
* decoder_input_ids,
|
|
2593
|
+
* max_length: model.config.decoder.max_position_embeddings,
|
|
2594
|
+
* });
|
|
2595
|
+
*
|
|
2596
|
+
* // Decode output
|
|
2597
|
+
* const decoded = tokenizer.batch_decode(output)[0];
|
|
2598
|
+
* // <s_docvqa><s_question> What is the invoice number?</s_question><s_answer> us-001</s_answer></s>
|
|
2599
|
+
* ```
|
|
2600
|
+
*/
|
|
2601
|
+
export class DonutSwinModel extends DonutSwinPreTrainedModel {
|
|
2602
|
+
}
|
|
2603
|
+
export class ConvNextPreTrainedModel extends PreTrainedModel {
|
|
2604
|
+
}
|
|
2605
|
+
/**
|
|
2606
|
+
* The bare ConvNext model outputting raw features without any specific head on top.
|
|
2607
|
+
*/
|
|
2608
|
+
export class ConvNextModel extends ConvNextPreTrainedModel {
|
|
2609
|
+
}
|
|
2610
|
+
/**
|
|
2611
|
+
* ConvNext Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for ImageNet.
|
|
2612
|
+
*/
|
|
2613
|
+
export class ConvNextForImageClassification extends ConvNextPreTrainedModel {
|
|
2614
|
+
/**
|
|
2615
|
+
* @param {any} model_inputs
|
|
2616
|
+
*/
|
|
2617
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2618
|
+
}
|
|
2619
|
+
export class ConvNextV2PreTrainedModel extends PreTrainedModel {
|
|
2620
|
+
}
|
|
2621
|
+
/**
|
|
2622
|
+
* The bare ConvNextV2 model outputting raw features without any specific head on top.
|
|
2623
|
+
*/
|
|
2624
|
+
export class ConvNextV2Model extends ConvNextV2PreTrainedModel {
|
|
2625
|
+
}
|
|
2626
|
+
/**
|
|
2627
|
+
* ConvNextV2 Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for ImageNet.
|
|
2628
|
+
*/
|
|
2629
|
+
export class ConvNextV2ForImageClassification extends ConvNextV2PreTrainedModel {
|
|
2630
|
+
/**
|
|
2631
|
+
* @param {any} model_inputs
|
|
2632
|
+
*/
|
|
2633
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2634
|
+
}
|
|
2635
|
+
export class Dinov2PreTrainedModel extends PreTrainedModel {
|
|
2636
|
+
}
|
|
2637
|
+
/**
|
|
2638
|
+
* The bare DINOv2 Model transformer outputting raw hidden-states without any specific head on top.
|
|
2639
|
+
*/
|
|
2640
|
+
export class Dinov2Model extends Dinov2PreTrainedModel {
|
|
2641
|
+
}
|
|
2642
|
+
/**
|
|
2643
|
+
* Dinov2 Model transformer with an image classification head on top (a linear layer on top of the final hidden state of the [CLS] token) e.g. for ImageNet.
|
|
2644
|
+
*/
|
|
2645
|
+
export class Dinov2ForImageClassification extends Dinov2PreTrainedModel {
|
|
2646
|
+
/**
|
|
2647
|
+
* @param {any} model_inputs
|
|
2648
|
+
*/
|
|
2649
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2650
|
+
}
|
|
2651
|
+
export class Dinov2WithRegistersPreTrainedModel extends PreTrainedModel {
|
|
2652
|
+
}
|
|
2653
|
+
/**
|
|
2654
|
+
* The bare Dinov2WithRegisters Model transformer outputting raw hidden-states without any specific head on top.
|
|
2655
|
+
*/
|
|
2656
|
+
export class Dinov2WithRegistersModel extends Dinov2WithRegistersPreTrainedModel {
|
|
2657
|
+
}
|
|
2658
|
+
/**
|
|
2659
|
+
* Dinov2WithRegisters Model transformer with an image classification head on top (a linear layer on top of the final hidden state of the [CLS] token) e.g. for ImageNet.
|
|
2660
|
+
*/
|
|
2661
|
+
export class Dinov2WithRegistersForImageClassification extends Dinov2WithRegistersPreTrainedModel {
|
|
2662
|
+
/**
|
|
2663
|
+
* @param {any} model_inputs
|
|
2664
|
+
*/
|
|
2665
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2666
|
+
}
|
|
2667
|
+
export class DINOv3ViTPreTrainedModel extends PreTrainedModel {
|
|
2668
|
+
}
|
|
2669
|
+
export class DINOv3ViTModel extends DINOv3ViTPreTrainedModel {
|
|
2670
|
+
}
|
|
2671
|
+
export class DINOv3ConvNextPreTrainedModel extends PreTrainedModel {
|
|
2672
|
+
}
|
|
2673
|
+
export class DINOv3ConvNextModel extends DINOv3ConvNextPreTrainedModel {
|
|
2674
|
+
}
|
|
2675
|
+
export class GroundingDinoPreTrainedModel extends PreTrainedModel {
|
|
2676
|
+
}
|
|
2677
|
+
export class GroundingDinoForObjectDetection extends GroundingDinoPreTrainedModel {
|
|
2678
|
+
}
|
|
2679
|
+
export class YolosPreTrainedModel extends PreTrainedModel {
|
|
2680
|
+
}
|
|
2681
|
+
export class YolosModel extends YolosPreTrainedModel {
|
|
2682
|
+
}
|
|
2683
|
+
export class YolosForObjectDetection extends YolosPreTrainedModel {
|
|
2684
|
+
/**
|
|
2685
|
+
* @param {any} model_inputs
|
|
2686
|
+
*/
|
|
2687
|
+
_call(model_inputs: any): Promise<YolosObjectDetectionOutput>;
|
|
2688
|
+
}
|
|
2689
|
+
export class YolosObjectDetectionOutput extends ModelOutput {
|
|
2690
|
+
/**
|
|
2691
|
+
* @param {Object} output The output of the model.
|
|
2692
|
+
* @param {Tensor} output.logits Classification logits (including no-object) for all queries.
|
|
2693
|
+
* @param {Tensor} output.pred_boxes Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height).
|
|
2694
|
+
* These values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding).
|
|
2695
|
+
*/
|
|
2696
|
+
constructor({ logits, pred_boxes }: {
|
|
2697
|
+
logits: Tensor;
|
|
2698
|
+
pred_boxes: Tensor;
|
|
2699
|
+
});
|
|
2700
|
+
logits: Tensor;
|
|
2701
|
+
pred_boxes: Tensor;
|
|
2702
|
+
}
|
|
2703
|
+
export class SamPreTrainedModel extends PreTrainedModel {
|
|
2704
|
+
}
|
|
2705
|
+
/**
|
|
2706
|
+
* Segment Anything Model (SAM) for generating segmentation masks, given an input image
|
|
2707
|
+
* and optional 2D location and bounding boxes.
|
|
2708
|
+
*
|
|
2709
|
+
* **Example:** Perform mask generation w/ `Xenova/sam-vit-base`.
|
|
2710
|
+
* ```javascript
|
|
2711
|
+
* import { SamModel, AutoProcessor, RawImage } from '@huggingface/transformers';
|
|
2712
|
+
*
|
|
2713
|
+
* const model = await SamModel.from_pretrained('Xenova/sam-vit-base');
|
|
2714
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/sam-vit-base');
|
|
2715
|
+
*
|
|
2716
|
+
* const img_url = 'https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png';
|
|
2717
|
+
* const raw_image = await RawImage.read(img_url);
|
|
2718
|
+
* const input_points = [[[450, 600]]] // 2D localization of a window
|
|
2719
|
+
*
|
|
2720
|
+
* const inputs = await processor(raw_image, { input_points });
|
|
2721
|
+
* const outputs = await model(inputs);
|
|
2722
|
+
*
|
|
2723
|
+
* const masks = await processor.post_process_masks(outputs.pred_masks, inputs.original_sizes, inputs.reshaped_input_sizes);
|
|
2724
|
+
* // [
|
|
2725
|
+
* // Tensor {
|
|
2726
|
+
* // dims: [ 1, 3, 1764, 2646 ],
|
|
2727
|
+
* // type: 'bool',
|
|
2728
|
+
* // data: Uint8Array(14002632) [ ... ],
|
|
2729
|
+
* // size: 14002632
|
|
2730
|
+
* // }
|
|
2731
|
+
* // ]
|
|
2732
|
+
* const scores = outputs.iou_scores;
|
|
2733
|
+
* // Tensor {
|
|
2734
|
+
* // dims: [ 1, 1, 3 ],
|
|
2735
|
+
* // type: 'float32',
|
|
2736
|
+
* // data: Float32Array(3) [
|
|
2737
|
+
* // 0.8892380595207214,
|
|
2738
|
+
* // 0.9311248064041138,
|
|
2739
|
+
* // 0.983696699142456
|
|
2740
|
+
* // ],
|
|
2741
|
+
* // size: 3
|
|
2742
|
+
* // }
|
|
2743
|
+
* ```
|
|
2744
|
+
*/
|
|
2745
|
+
export class SamModel extends SamPreTrainedModel {
|
|
2746
|
+
/**
|
|
2747
|
+
* Compute image embeddings and positional image embeddings, given the pixel values of an image.
|
|
2748
|
+
* @param {Object} model_inputs Object containing the model inputs.
|
|
2749
|
+
* @param {Tensor} model_inputs.pixel_values Pixel values obtained using a `SamProcessor`.
|
|
2750
|
+
* @returns {Promise<{ image_embeddings: Tensor, image_positional_embeddings: Tensor }>} The image embeddings and positional image embeddings.
|
|
2751
|
+
*/
|
|
2752
|
+
get_image_embeddings({ pixel_values }: {
|
|
2753
|
+
pixel_values: Tensor;
|
|
2754
|
+
}): Promise<{
|
|
2755
|
+
image_embeddings: Tensor;
|
|
2756
|
+
image_positional_embeddings: Tensor;
|
|
2757
|
+
}>;
|
|
2758
|
+
/**
|
|
2759
|
+
* @typedef {Object} SamModelInputs Object containing the model inputs.
|
|
2760
|
+
* @property {Tensor} pixel_values Pixel values as a Tensor with shape `(batch_size, num_channels, height, width)`.
|
|
2761
|
+
* These can be obtained using a `SamProcessor`.
|
|
2762
|
+
* @property {Tensor} [input_points] Input 2D spatial points with shape `(batch_size, num_points, 2)`.
|
|
2763
|
+
* This is used by the prompt encoder to encode the prompt.
|
|
2764
|
+
* @property {Tensor} [input_labels] Input labels for the points, as a Tensor of shape `(batch_size, point_batch_size, num_points)`.
|
|
2765
|
+
* This is used by the prompt encoder to encode the prompt. There are 4 types of labels:
|
|
2766
|
+
* - `1`: the point is a point that contains the object of interest
|
|
2767
|
+
* - `0`: the point is a point that does not contain the object of interest
|
|
2768
|
+
* - `-1`: the point corresponds to the background
|
|
2769
|
+
* - `-10`: the point is a padding point, thus should be ignored by the prompt encoder
|
|
2770
|
+
* @property {Tensor} [input_boxes] Input bounding boxes with shape `(batch_size, num_boxes, 4)`.
|
|
2771
|
+
* @property {Tensor} [image_embeddings] Image embeddings used by the mask decoder.
|
|
2772
|
+
* @property {Tensor} [image_positional_embeddings] Image positional embeddings used by the mask decoder.
|
|
2773
|
+
*/
|
|
2774
|
+
/**
|
|
2775
|
+
* @param {SamModelInputs} model_inputs Object containing the model inputs.
|
|
2776
|
+
* @returns {Promise<Object>} The output of the model.
|
|
2777
|
+
*/
|
|
2778
|
+
forward(model_inputs: {
|
|
2779
|
+
/**
|
|
2780
|
+
* Pixel values as a Tensor with shape `(batch_size, num_channels, height, width)`.
|
|
2781
|
+
* These can be obtained using a `SamProcessor`.
|
|
2782
|
+
*/
|
|
2783
|
+
pixel_values: Tensor;
|
|
2784
|
+
/**
|
|
2785
|
+
* Input 2D spatial points with shape `(batch_size, num_points, 2)`.
|
|
2786
|
+
* This is used by the prompt encoder to encode the prompt.
|
|
2787
|
+
*/
|
|
2788
|
+
input_points?: Tensor;
|
|
2789
|
+
/**
|
|
2790
|
+
* Input labels for the points, as a Tensor of shape `(batch_size, point_batch_size, num_points)`.
|
|
2791
|
+
* This is used by the prompt encoder to encode the prompt. There are 4 types of labels:
|
|
2792
|
+
* - `1`: the point is a point that contains the object of interest
|
|
2793
|
+
* - `0`: the point is a point that does not contain the object of interest
|
|
2794
|
+
* - `-1`: the point corresponds to the background
|
|
2795
|
+
* - `-10`: the point is a padding point, thus should be ignored by the prompt encoder
|
|
2796
|
+
*/
|
|
2797
|
+
input_labels?: Tensor;
|
|
2798
|
+
/**
|
|
2799
|
+
* Input bounding boxes with shape `(batch_size, num_boxes, 4)`.
|
|
2800
|
+
*/
|
|
2801
|
+
input_boxes?: Tensor;
|
|
2802
|
+
/**
|
|
2803
|
+
* Image embeddings used by the mask decoder.
|
|
2804
|
+
*/
|
|
2805
|
+
image_embeddings?: Tensor;
|
|
2806
|
+
/**
|
|
2807
|
+
* Image positional embeddings used by the mask decoder.
|
|
2808
|
+
*/
|
|
2809
|
+
image_positional_embeddings?: Tensor;
|
|
2810
|
+
}): Promise<any>;
|
|
2811
|
+
/**
|
|
2812
|
+
* Runs the model with the provided inputs
|
|
2813
|
+
* @param {Object} model_inputs Model inputs
|
|
2814
|
+
* @returns {Promise<SamImageSegmentationOutput>} Object containing segmentation outputs
|
|
2815
|
+
*/
|
|
2816
|
+
_call(model_inputs: any): Promise<SamImageSegmentationOutput>;
|
|
2817
|
+
}
|
|
2818
|
+
/**
|
|
2819
|
+
* Base class for Segment-Anything model's output.
|
|
2820
|
+
*/
|
|
2821
|
+
export class SamImageSegmentationOutput extends ModelOutput {
|
|
2822
|
+
/**
|
|
2823
|
+
* @param {Object} output The output of the model.
|
|
2824
|
+
* @param {Tensor} output.iou_scores The output logits of the model.
|
|
2825
|
+
* @param {Tensor} output.pred_masks Predicted boxes.
|
|
2826
|
+
*/
|
|
2827
|
+
constructor({ iou_scores, pred_masks }: {
|
|
2828
|
+
iou_scores: Tensor;
|
|
2829
|
+
pred_masks: Tensor;
|
|
2830
|
+
});
|
|
2831
|
+
iou_scores: Tensor;
|
|
2832
|
+
pred_masks: Tensor;
|
|
2833
|
+
}
|
|
2834
|
+
export class Sam2ImageSegmentationOutput extends ModelOutput {
|
|
2835
|
+
/**
|
|
2836
|
+
* @param {Object} output The output of the model.
|
|
2837
|
+
* @param {Tensor} output.iou_scores The output logits of the model.
|
|
2838
|
+
* @param {Tensor} output.pred_masks Predicted boxes.
|
|
2839
|
+
* @param {Tensor} output.object_score_logits Logits for the object score, indicating if an object is present.
|
|
2840
|
+
*/
|
|
2841
|
+
constructor({ iou_scores, pred_masks, object_score_logits }: {
|
|
2842
|
+
iou_scores: Tensor;
|
|
2843
|
+
pred_masks: Tensor;
|
|
2844
|
+
object_score_logits: Tensor;
|
|
2845
|
+
});
|
|
2846
|
+
iou_scores: Tensor;
|
|
2847
|
+
pred_masks: Tensor;
|
|
2848
|
+
object_score_logits: Tensor;
|
|
2849
|
+
}
|
|
2850
|
+
export class Sam2PreTrainedModel extends PreTrainedModel {
|
|
2851
|
+
}
|
|
2852
|
+
export class Sam2Model extends Sam2PreTrainedModel {
|
|
2853
|
+
/**
|
|
2854
|
+
* Compute image embeddings and positional image embeddings, given the pixel values of an image.
|
|
2855
|
+
* @param {Object} model_inputs Object containing the model inputs.
|
|
2856
|
+
* @param {Tensor} model_inputs.pixel_values Pixel values obtained using a `Sam2Processor`.
|
|
2857
|
+
* @returns {Promise<Record<String, Tensor>>} The image embeddings.
|
|
2858
|
+
*/
|
|
2859
|
+
get_image_embeddings({ pixel_values }: {
|
|
2860
|
+
pixel_values: Tensor;
|
|
2861
|
+
}): Promise<Record<string, Tensor>>;
|
|
2862
|
+
forward(model_inputs: any): Promise<any>;
|
|
2863
|
+
/**
|
|
2864
|
+
* Runs the model with the provided inputs
|
|
2865
|
+
* @param {Object} model_inputs Model inputs
|
|
2866
|
+
* @returns {Promise<Sam2ImageSegmentationOutput>} Object containing segmentation outputs
|
|
2867
|
+
*/
|
|
2868
|
+
_call(model_inputs: any): Promise<Sam2ImageSegmentationOutput>;
|
|
2869
|
+
}
|
|
2870
|
+
export class EdgeTamModel extends Sam2Model {
|
|
2871
|
+
}
|
|
2872
|
+
export class Sam3TrackerModel extends Sam2Model {
|
|
2873
|
+
}
|
|
2874
|
+
export class MarianPreTrainedModel extends PreTrainedModel {
|
|
2875
|
+
}
|
|
2876
|
+
export class MarianModel extends MarianPreTrainedModel {
|
|
2877
|
+
}
|
|
2878
|
+
export class MarianMTModel extends MarianPreTrainedModel {
|
|
2879
|
+
}
|
|
2880
|
+
export class M2M100PreTrainedModel extends PreTrainedModel {
|
|
2881
|
+
}
|
|
2882
|
+
export class M2M100Model extends M2M100PreTrainedModel {
|
|
2883
|
+
}
|
|
2884
|
+
export class M2M100ForConditionalGeneration extends M2M100PreTrainedModel {
|
|
2885
|
+
}
|
|
2886
|
+
export class Wav2Vec2PreTrainedModel extends PreTrainedModel {
|
|
2887
|
+
}
|
|
2888
|
+
/**
|
|
2889
|
+
* The bare Wav2Vec2 Model transformer outputting raw hidden-states without any specific head on top.
|
|
2890
|
+
*
|
|
2891
|
+
* **Example:** Load and run a `Wav2Vec2Model` for feature extraction.
|
|
2892
|
+
*
|
|
2893
|
+
* ```javascript
|
|
2894
|
+
* import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
|
|
2895
|
+
*
|
|
2896
|
+
* // Read and preprocess audio
|
|
2897
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/mms-300m');
|
|
2898
|
+
* const audio = await read_audio('https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac', 16000);
|
|
2899
|
+
* const inputs = await processor(audio);
|
|
2900
|
+
*
|
|
2901
|
+
* // Run model with inputs
|
|
2902
|
+
* const model = await AutoModel.from_pretrained('Xenova/mms-300m');
|
|
2903
|
+
* const output = await model(inputs);
|
|
2904
|
+
* // {
|
|
2905
|
+
* // last_hidden_state: Tensor {
|
|
2906
|
+
* // dims: [ 1, 1144, 1024 ],
|
|
2907
|
+
* // type: 'float32',
|
|
2908
|
+
* // data: Float32Array(1171456) [ ... ],
|
|
2909
|
+
* // size: 1171456
|
|
2910
|
+
* // }
|
|
2911
|
+
* // }
|
|
2912
|
+
* ```
|
|
2913
|
+
*/
|
|
2914
|
+
export class Wav2Vec2Model extends Wav2Vec2PreTrainedModel {
|
|
2915
|
+
}
|
|
2916
|
+
export class Wav2Vec2ForCTC extends Wav2Vec2PreTrainedModel {
|
|
2917
|
+
/**
|
|
2918
|
+
* @param {Object} model_inputs
|
|
2919
|
+
* @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
|
|
2920
|
+
* @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
|
|
2921
|
+
*/
|
|
2922
|
+
_call(model_inputs: {
|
|
2923
|
+
input_values: Tensor;
|
|
2924
|
+
attention_mask: Tensor;
|
|
2925
|
+
}): Promise<CausalLMOutput>;
|
|
2926
|
+
}
|
|
2927
|
+
export class Wav2Vec2ForSequenceClassification extends Wav2Vec2PreTrainedModel {
|
|
2928
|
+
/**
|
|
2929
|
+
* Calls the model on new inputs.
|
|
2930
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
2931
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
2932
|
+
*/
|
|
2933
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
2934
|
+
}
|
|
2935
|
+
/**
|
|
2936
|
+
* Wav2Vec2 Model with a frame classification head on top for tasks like Speaker Diarization.
|
|
2937
|
+
*/
|
|
2938
|
+
export class Wav2Vec2ForAudioFrameClassification extends Wav2Vec2PreTrainedModel {
|
|
2939
|
+
/**
|
|
2940
|
+
* Calls the model on new inputs.
|
|
2941
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
2942
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
2943
|
+
*/
|
|
2944
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
2945
|
+
}
|
|
2946
|
+
export class ParakeetPreTrainedModel extends PreTrainedModel {
|
|
2947
|
+
}
|
|
2948
|
+
export class ParakeetForCTC extends ParakeetPreTrainedModel {
|
|
2949
|
+
/**
|
|
2950
|
+
* @param {Object} model_inputs
|
|
2951
|
+
* @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
|
|
2952
|
+
* @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
|
|
2953
|
+
*/
|
|
2954
|
+
_call(model_inputs: {
|
|
2955
|
+
input_values: Tensor;
|
|
2956
|
+
attention_mask: Tensor;
|
|
2957
|
+
}): Promise<CausalLMOutput>;
|
|
2958
|
+
}
|
|
2959
|
+
export class PyAnnotePreTrainedModel extends PreTrainedModel {
|
|
2960
|
+
}
|
|
2961
|
+
/**
|
|
2962
|
+
* The bare PyAnnote Model transformer outputting raw hidden-states without any specific head on top.
|
|
2963
|
+
*/
|
|
2964
|
+
export class PyAnnoteModel extends PyAnnotePreTrainedModel {
|
|
2965
|
+
}
|
|
2966
|
+
/**
|
|
2967
|
+
* PyAnnote Model with a frame classification head on top for tasks like Speaker Diarization.
|
|
2968
|
+
*
|
|
2969
|
+
* **Example:** Load and run a `PyAnnoteForAudioFrameClassification` for speaker diarization.
|
|
2970
|
+
*
|
|
2971
|
+
* ```javascript
|
|
2972
|
+
* import { AutoProcessor, AutoModelForAudioFrameClassification, read_audio } from '@huggingface/transformers';
|
|
2973
|
+
*
|
|
2974
|
+
* // Load model and processor
|
|
2975
|
+
* const model_id = 'onnx-community/pyannote-segmentation-3.0';
|
|
2976
|
+
* const model = await AutoModelForAudioFrameClassification.from_pretrained(model_id);
|
|
2977
|
+
* const processor = await AutoProcessor.from_pretrained(model_id);
|
|
2978
|
+
*
|
|
2979
|
+
* // Read and preprocess audio
|
|
2980
|
+
* const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/mlk.wav';
|
|
2981
|
+
* const audio = await read_audio(url, processor.feature_extractor.config.sampling_rate);
|
|
2982
|
+
* const inputs = await processor(audio);
|
|
2983
|
+
*
|
|
2984
|
+
* // Run model with inputs
|
|
2985
|
+
* const { logits } = await model(inputs);
|
|
2986
|
+
* // {
|
|
2987
|
+
* // logits: Tensor {
|
|
2988
|
+
* // dims: [ 1, 767, 7 ], // [batch_size, num_frames, num_classes]
|
|
2989
|
+
* // type: 'float32',
|
|
2990
|
+
* // data: Float32Array(5369) [ ... ],
|
|
2991
|
+
* // size: 5369
|
|
2992
|
+
* // }
|
|
2993
|
+
* // }
|
|
2994
|
+
*
|
|
2995
|
+
* const result = processor.post_process_speaker_diarization(logits, audio.length);
|
|
2996
|
+
* // [
|
|
2997
|
+
* // [
|
|
2998
|
+
* // { id: 0, start: 0, end: 1.0512535626298245, confidence: 0.8220156481664611 },
|
|
2999
|
+
* // { id: 2, start: 1.0512535626298245, end: 2.3398869619825127, confidence: 0.9008811707860472 },
|
|
3000
|
+
* // ...
|
|
3001
|
+
* // ]
|
|
3002
|
+
* // ]
|
|
3003
|
+
*
|
|
3004
|
+
* // Display result
|
|
3005
|
+
* console.table(result[0], ['start', 'end', 'id', 'confidence']);
|
|
3006
|
+
* // ┌─────────┬────────────────────┬────────────────────┬────┬─────────────────────┐
|
|
3007
|
+
* // │ (index) │ start │ end │ id │ confidence │
|
|
3008
|
+
* // ├─────────┼────────────────────┼────────────────────┼────┼─────────────────────┤
|
|
3009
|
+
* // │ 0 │ 0 │ 1.0512535626298245 │ 0 │ 0.8220156481664611 │
|
|
3010
|
+
* // │ 1 │ 1.0512535626298245 │ 2.3398869619825127 │ 2 │ 0.9008811707860472 │
|
|
3011
|
+
* // │ 2 │ 2.3398869619825127 │ 3.5946089560890773 │ 0 │ 0.7521651315796233 │
|
|
3012
|
+
* // │ 3 │ 3.5946089560890773 │ 4.578039708226655 │ 2 │ 0.8491978128022479 │
|
|
3013
|
+
* // │ 4 │ 4.578039708226655 │ 4.594995410849717 │ 0 │ 0.2935352600416393 │
|
|
3014
|
+
* // │ 5 │ 4.594995410849717 │ 6.121008646925269 │ 3 │ 0.6788051309866024 │
|
|
3015
|
+
* // │ 6 │ 6.121008646925269 │ 6.256654267909762 │ 0 │ 0.37125512393851134 │
|
|
3016
|
+
* // │ 7 │ 6.256654267909762 │ 8.630452635138397 │ 2 │ 0.7467035186353542 │
|
|
3017
|
+
* // │ 8 │ 8.630452635138397 │ 10.088643060721703 │ 0 │ 0.7689364814666032 │
|
|
3018
|
+
* // │ 9 │ 10.088643060721703 │ 12.58113134631177 │ 2 │ 0.9123324509131324 │
|
|
3019
|
+
* // │ 10 │ 12.58113134631177 │ 13.005023911888312 │ 0 │ 0.4828358177572041 │
|
|
3020
|
+
* // └─────────┴────────────────────┴────────────────────┴────┴─────────────────────┘
|
|
3021
|
+
* ```
|
|
3022
|
+
*/
|
|
3023
|
+
export class PyAnnoteForAudioFrameClassification extends PyAnnotePreTrainedModel {
|
|
3024
|
+
/**
|
|
3025
|
+
* Calls the model on new inputs.
|
|
3026
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
3027
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
3028
|
+
*/
|
|
3029
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
3030
|
+
}
|
|
3031
|
+
export class WeSpeakerResNetPreTrainedModel extends PreTrainedModel {
|
|
3032
|
+
}
|
|
3033
|
+
export class WeSpeakerResNetModel extends WeSpeakerResNetPreTrainedModel {
|
|
3034
|
+
}
|
|
3035
|
+
export class UniSpeechPreTrainedModel extends PreTrainedModel {
|
|
3036
|
+
}
|
|
3037
|
+
/**
|
|
3038
|
+
* The bare UniSpeech Model transformer outputting raw hidden-states without any specific head on top.
|
|
3039
|
+
*/
|
|
3040
|
+
export class UniSpeechModel extends UniSpeechPreTrainedModel {
|
|
3041
|
+
}
|
|
3042
|
+
/**
|
|
3043
|
+
* UniSpeech Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
|
|
3044
|
+
*/
|
|
3045
|
+
export class UniSpeechForCTC extends UniSpeechPreTrainedModel {
|
|
3046
|
+
/**
|
|
3047
|
+
* @param {Object} model_inputs
|
|
3048
|
+
* @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
|
|
3049
|
+
* @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
|
|
3050
|
+
*/
|
|
3051
|
+
_call(model_inputs: {
|
|
3052
|
+
input_values: Tensor;
|
|
3053
|
+
attention_mask: Tensor;
|
|
3054
|
+
}): Promise<CausalLMOutput>;
|
|
3055
|
+
}
|
|
3056
|
+
/**
|
|
3057
|
+
* UniSpeech Model with a sequence classification head on top (a linear layer over the pooled output).
|
|
3058
|
+
*/
|
|
3059
|
+
export class UniSpeechForSequenceClassification extends UniSpeechPreTrainedModel {
|
|
3060
|
+
/**
|
|
3061
|
+
* Calls the model on new inputs.
|
|
3062
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
3063
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
3064
|
+
*/
|
|
3065
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
3066
|
+
}
|
|
3067
|
+
export class UniSpeechSatPreTrainedModel extends PreTrainedModel {
|
|
3068
|
+
}
|
|
3069
|
+
/**
|
|
3070
|
+
* The bare UniSpeechSat Model transformer outputting raw hidden-states without any specific head on top.
|
|
3071
|
+
*/
|
|
3072
|
+
export class UniSpeechSatModel extends UniSpeechSatPreTrainedModel {
|
|
3073
|
+
}
|
|
3074
|
+
/**
|
|
3075
|
+
* UniSpeechSat Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
|
|
3076
|
+
*/
|
|
3077
|
+
export class UniSpeechSatForCTC extends UniSpeechSatPreTrainedModel {
|
|
3078
|
+
/**
|
|
3079
|
+
* @param {Object} model_inputs
|
|
3080
|
+
* @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
|
|
3081
|
+
* @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
|
|
3082
|
+
*/
|
|
3083
|
+
_call(model_inputs: {
|
|
3084
|
+
input_values: Tensor;
|
|
3085
|
+
attention_mask: Tensor;
|
|
3086
|
+
}): Promise<CausalLMOutput>;
|
|
3087
|
+
}
|
|
3088
|
+
/**
|
|
3089
|
+
* UniSpeechSat Model with a sequence classification head on top (a linear layer over the pooled output).
|
|
3090
|
+
*/
|
|
3091
|
+
export class UniSpeechSatForSequenceClassification extends UniSpeechSatPreTrainedModel {
|
|
3092
|
+
/**
|
|
3093
|
+
* Calls the model on new inputs.
|
|
3094
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
3095
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
3096
|
+
*/
|
|
3097
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
3098
|
+
}
|
|
3099
|
+
/**
|
|
3100
|
+
* UniSpeechSat Model with a frame classification head on top for tasks like Speaker Diarization.
|
|
3101
|
+
*/
|
|
3102
|
+
export class UniSpeechSatForAudioFrameClassification extends UniSpeechSatPreTrainedModel {
|
|
3103
|
+
/**
|
|
3104
|
+
* Calls the model on new inputs.
|
|
3105
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
3106
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
3107
|
+
*/
|
|
3108
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
3109
|
+
}
|
|
3110
|
+
export class Wav2Vec2BertPreTrainedModel extends PreTrainedModel {
|
|
3111
|
+
}
|
|
3112
|
+
/**
|
|
3113
|
+
* The bare Wav2Vec2Bert Model transformer outputting raw hidden-states without any specific head on top.
|
|
3114
|
+
*/
|
|
3115
|
+
export class Wav2Vec2BertModel extends Wav2Vec2BertPreTrainedModel {
|
|
3116
|
+
}
|
|
3117
|
+
/**
|
|
3118
|
+
* Wav2Vec2Bert Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
|
|
3119
|
+
*/
|
|
3120
|
+
export class Wav2Vec2BertForCTC extends Wav2Vec2BertPreTrainedModel {
|
|
3121
|
+
/**
|
|
3122
|
+
* @param {Object} model_inputs
|
|
3123
|
+
* @param {Tensor} model_inputs.input_features Float values of input mel-spectrogram.
|
|
3124
|
+
* @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
|
|
3125
|
+
*/
|
|
3126
|
+
_call(model_inputs: {
|
|
3127
|
+
input_features: Tensor;
|
|
3128
|
+
attention_mask: Tensor;
|
|
3129
|
+
}): Promise<CausalLMOutput>;
|
|
3130
|
+
}
|
|
3131
|
+
/**
|
|
3132
|
+
* Wav2Vec2Bert Model with a sequence classification head on top (a linear layer over the pooled output).
|
|
3133
|
+
*/
|
|
3134
|
+
export class Wav2Vec2BertForSequenceClassification extends Wav2Vec2BertPreTrainedModel {
|
|
3135
|
+
/**
|
|
3136
|
+
* Calls the model on new inputs.
|
|
3137
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
3138
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
3139
|
+
*/
|
|
3140
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
3141
|
+
}
|
|
3142
|
+
export class HubertPreTrainedModel extends PreTrainedModel {
|
|
3143
|
+
}
|
|
3144
|
+
/**
|
|
3145
|
+
* The bare Hubert Model transformer outputting raw hidden-states without any specific head on top.
|
|
3146
|
+
*
|
|
3147
|
+
* **Example:** Load and run a `HubertModel` for feature extraction.
|
|
3148
|
+
*
|
|
3149
|
+
* ```javascript
|
|
3150
|
+
* import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
|
|
3151
|
+
*
|
|
3152
|
+
* // Read and preprocess audio
|
|
3153
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/hubert-base-ls960');
|
|
3154
|
+
* const audio = await read_audio('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav', 16000);
|
|
3155
|
+
* const inputs = await processor(audio);
|
|
3156
|
+
*
|
|
3157
|
+
* // Load and run model with inputs
|
|
3158
|
+
* const model = await AutoModel.from_pretrained('Xenova/hubert-base-ls960');
|
|
3159
|
+
* const output = await model(inputs);
|
|
3160
|
+
* // {
|
|
3161
|
+
* // last_hidden_state: Tensor {
|
|
3162
|
+
* // dims: [ 1, 549, 768 ],
|
|
3163
|
+
* // type: 'float32',
|
|
3164
|
+
* // data: Float32Array(421632) [0.0682469978928566, 0.08104046434164047, -0.4975186586380005, ...],
|
|
3165
|
+
* // size: 421632
|
|
3166
|
+
* // }
|
|
3167
|
+
* // }
|
|
3168
|
+
* ```
|
|
3169
|
+
*/
|
|
3170
|
+
export class HubertModel extends Wav2Vec2PreTrainedModel {
|
|
3171
|
+
}
|
|
3172
|
+
/**
|
|
3173
|
+
* Hubert Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
|
|
3174
|
+
*/
|
|
3175
|
+
export class HubertForCTC extends Wav2Vec2PreTrainedModel {
|
|
3176
|
+
/**
|
|
3177
|
+
* @param {Object} model_inputs
|
|
3178
|
+
* @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
|
|
3179
|
+
* @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
|
|
3180
|
+
*/
|
|
3181
|
+
_call(model_inputs: {
|
|
3182
|
+
input_values: Tensor;
|
|
3183
|
+
attention_mask: Tensor;
|
|
3184
|
+
}): Promise<CausalLMOutput>;
|
|
3185
|
+
}
|
|
3186
|
+
/**
|
|
3187
|
+
* Hubert Model with a sequence classification head on top (a linear layer over the pooled output) for tasks like SUPERB Keyword Spotting.
|
|
3188
|
+
*/
|
|
3189
|
+
export class HubertForSequenceClassification extends Wav2Vec2PreTrainedModel {
|
|
3190
|
+
/**
|
|
3191
|
+
* Calls the model on new inputs.
|
|
3192
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
3193
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
3194
|
+
*/
|
|
3195
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
3196
|
+
}
|
|
3197
|
+
/**
|
|
3198
|
+
* An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.
|
|
3199
|
+
*/
|
|
3200
|
+
export class WavLMPreTrainedModel extends PreTrainedModel {
|
|
3201
|
+
}
|
|
3202
|
+
/**
|
|
3203
|
+
* The bare WavLM Model transformer outputting raw hidden-states without any specific head on top.
|
|
3204
|
+
*
|
|
3205
|
+
* **Example:** Load and run a `WavLMModel` for feature extraction.
|
|
3206
|
+
*
|
|
3207
|
+
* ```javascript
|
|
3208
|
+
* import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
|
|
3209
|
+
*
|
|
3210
|
+
* // Read and preprocess audio
|
|
3211
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/wavlm-base');
|
|
3212
|
+
* const audio = await read_audio('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav', 16000);
|
|
3213
|
+
* const inputs = await processor(audio);
|
|
3214
|
+
*
|
|
3215
|
+
* // Run model with inputs
|
|
3216
|
+
* const model = await AutoModel.from_pretrained('Xenova/wavlm-base');
|
|
3217
|
+
* const output = await model(inputs);
|
|
3218
|
+
* // {
|
|
3219
|
+
* // last_hidden_state: Tensor {
|
|
3220
|
+
* // dims: [ 1, 549, 768 ],
|
|
3221
|
+
* // type: 'float32',
|
|
3222
|
+
* // data: Float32Array(421632) [-0.349443256855011, -0.39341306686401367, 0.022836603224277496, ...],
|
|
3223
|
+
* // size: 421632
|
|
3224
|
+
* // }
|
|
3225
|
+
* // }
|
|
3226
|
+
* ```
|
|
3227
|
+
*/
|
|
3228
|
+
export class WavLMModel extends WavLMPreTrainedModel {
|
|
3229
|
+
}
|
|
3230
|
+
/**
|
|
3231
|
+
* WavLM Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
|
|
3232
|
+
*/
|
|
3233
|
+
export class WavLMForCTC extends WavLMPreTrainedModel {
|
|
3234
|
+
/**
|
|
3235
|
+
* @param {Object} model_inputs
|
|
3236
|
+
* @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
|
|
3237
|
+
* @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
|
|
3238
|
+
*/
|
|
3239
|
+
_call(model_inputs: {
|
|
3240
|
+
input_values: Tensor;
|
|
3241
|
+
attention_mask: Tensor;
|
|
3242
|
+
}): Promise<CausalLMOutput>;
|
|
3243
|
+
}
|
|
3244
|
+
/**
|
|
3245
|
+
* WavLM Model with a sequence classification head on top (a linear layer over the pooled output).
|
|
3246
|
+
*/
|
|
3247
|
+
export class WavLMForSequenceClassification extends WavLMPreTrainedModel {
|
|
3248
|
+
/**
|
|
3249
|
+
* Calls the model on new inputs.
|
|
3250
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
3251
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
3252
|
+
*/
|
|
3253
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
3254
|
+
}
|
|
3255
|
+
/**
|
|
3256
|
+
* WavLM Model with an XVector feature extraction head on top for tasks like Speaker Verification.
|
|
3257
|
+
*
|
|
3258
|
+
* **Example:** Extract speaker embeddings with `WavLMForXVector`.
|
|
3259
|
+
* ```javascript
|
|
3260
|
+
* import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
|
|
3261
|
+
*
|
|
3262
|
+
* // Read and preprocess audio
|
|
3263
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/wavlm-base-plus-sv');
|
|
3264
|
+
* const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
|
|
3265
|
+
* const audio = await read_audio(url, 16000);
|
|
3266
|
+
* const inputs = await processor(audio);
|
|
3267
|
+
*
|
|
3268
|
+
* // Run model with inputs
|
|
3269
|
+
* const model = await AutoModel.from_pretrained('Xenova/wavlm-base-plus-sv');
|
|
3270
|
+
* const outputs = await model(inputs);
|
|
3271
|
+
* // {
|
|
3272
|
+
* // logits: Tensor {
|
|
3273
|
+
* // dims: [ 1, 512 ],
|
|
3274
|
+
* // type: 'float32',
|
|
3275
|
+
* // data: Float32Array(512) [0.5847219228744507, ...],
|
|
3276
|
+
* // size: 512
|
|
3277
|
+
* // },
|
|
3278
|
+
* // embeddings: Tensor {
|
|
3279
|
+
* // dims: [ 1, 512 ],
|
|
3280
|
+
* // type: 'float32',
|
|
3281
|
+
* // data: Float32Array(512) [-0.09079201519489288, ...],
|
|
3282
|
+
* // size: 512
|
|
3283
|
+
* // }
|
|
3284
|
+
* // }
|
|
3285
|
+
* ```
|
|
3286
|
+
*/
|
|
3287
|
+
export class WavLMForXVector extends WavLMPreTrainedModel {
|
|
3288
|
+
/**
|
|
3289
|
+
* Calls the model on new inputs.
|
|
3290
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
3291
|
+
* @returns {Promise<XVectorOutput>} An object containing the model's output logits and speaker embeddings.
|
|
3292
|
+
*/
|
|
3293
|
+
_call(model_inputs: any): Promise<XVectorOutput>;
|
|
3294
|
+
}
|
|
3295
|
+
/**
|
|
3296
|
+
* WavLM Model with a frame classification head on top for tasks like Speaker Diarization.
|
|
3297
|
+
*
|
|
3298
|
+
* **Example:** Perform speaker diarization with `WavLMForAudioFrameClassification`.
|
|
3299
|
+
* ```javascript
|
|
3300
|
+
* import { AutoProcessor, AutoModelForAudioFrameClassification, read_audio } from '@huggingface/transformers';
|
|
3301
|
+
*
|
|
3302
|
+
* // Read and preprocess audio
|
|
3303
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/wavlm-base-plus-sd');
|
|
3304
|
+
* const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
|
|
3305
|
+
* const audio = await read_audio(url, 16000);
|
|
3306
|
+
* const inputs = await processor(audio);
|
|
3307
|
+
*
|
|
3308
|
+
* // Run model with inputs
|
|
3309
|
+
* const model = await AutoModelForAudioFrameClassification.from_pretrained('Xenova/wavlm-base-plus-sd');
|
|
3310
|
+
* const { logits } = await model(inputs);
|
|
3311
|
+
* // {
|
|
3312
|
+
* // logits: Tensor {
|
|
3313
|
+
* // dims: [ 1, 549, 2 ], // [batch_size, num_frames, num_speakers]
|
|
3314
|
+
* // type: 'float32',
|
|
3315
|
+
* // data: Float32Array(1098) [-3.5301010608673096, ...],
|
|
3316
|
+
* // size: 1098
|
|
3317
|
+
* // }
|
|
3318
|
+
* // }
|
|
3319
|
+
*
|
|
3320
|
+
* const labels = logits[0].sigmoid().tolist().map(
|
|
3321
|
+
* frames => frames.map(speaker => speaker > 0.5 ? 1 : 0)
|
|
3322
|
+
* );
|
|
3323
|
+
* console.log(labels); // labels is a one-hot array of shape (num_frames, num_speakers)
|
|
3324
|
+
* // [
|
|
3325
|
+
* // [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
|
|
3326
|
+
* // [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
|
|
3327
|
+
* // [0, 0], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1],
|
|
3328
|
+
* // ...
|
|
3329
|
+
* // ]
|
|
3330
|
+
* ```
|
|
3331
|
+
*/
|
|
3332
|
+
export class WavLMForAudioFrameClassification extends WavLMPreTrainedModel {
|
|
3333
|
+
/**
|
|
3334
|
+
* Calls the model on new inputs.
|
|
3335
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
3336
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
3337
|
+
*/
|
|
3338
|
+
_call(model_inputs: any): Promise<TokenClassifierOutput>;
|
|
3339
|
+
}
|
|
3340
|
+
export class StyleTextToSpeech2PreTrainedModel extends PreTrainedModel {
|
|
3341
|
+
}
|
|
3342
|
+
export class StyleTextToSpeech2Model extends StyleTextToSpeech2PreTrainedModel {
|
|
3343
|
+
}
|
|
3344
|
+
/**
|
|
3345
|
+
* An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.
|
|
3346
|
+
*/
|
|
3347
|
+
export class SpeechT5PreTrainedModel extends PreTrainedModel {
|
|
3348
|
+
}
|
|
3349
|
+
/**
|
|
3350
|
+
* The bare SpeechT5 Encoder-Decoder Model outputting raw hidden-states without any specific pre- or post-nets.
|
|
3351
|
+
*/
|
|
3352
|
+
export class SpeechT5Model extends SpeechT5PreTrainedModel {
|
|
3353
|
+
}
|
|
3354
|
+
/**
|
|
3355
|
+
* SpeechT5 Model with a speech encoder and a text decoder.
|
|
3356
|
+
*
|
|
3357
|
+
* **Example:** Generate speech from text with `SpeechT5ForSpeechToText`.
|
|
3358
|
+
* ```javascript
|
|
3359
|
+
* import { AutoTokenizer, AutoProcessor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, Tensor } from '@huggingface/transformers';
|
|
3360
|
+
*
|
|
3361
|
+
* // Load the tokenizer and processor
|
|
3362
|
+
* const tokenizer = await AutoTokenizer.from_pretrained('Xenova/speecht5_tts');
|
|
3363
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/speecht5_tts');
|
|
3364
|
+
*
|
|
3365
|
+
* // Load the models
|
|
3366
|
+
* // NOTE: We use the full-precision versions as they are more accurate
|
|
3367
|
+
* const model = await SpeechT5ForTextToSpeech.from_pretrained('Xenova/speecht5_tts', { dtype: 'fp32' });
|
|
3368
|
+
* const vocoder = await SpeechT5HifiGan.from_pretrained('Xenova/speecht5_hifigan', { dtype: 'fp32' });
|
|
3369
|
+
*
|
|
3370
|
+
* // Load speaker embeddings from URL
|
|
3371
|
+
* const speaker_embeddings_data = new Float32Array(
|
|
3372
|
+
* await (await fetch('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin')).arrayBuffer()
|
|
3373
|
+
* );
|
|
3374
|
+
* const speaker_embeddings = new Tensor(
|
|
3375
|
+
* 'float32',
|
|
3376
|
+
* speaker_embeddings_data,
|
|
3377
|
+
* [1, speaker_embeddings_data.length]
|
|
3378
|
+
* )
|
|
3379
|
+
*
|
|
3380
|
+
* // Run tokenization
|
|
3381
|
+
* const { input_ids } = tokenizer('Hello, my dog is cute');
|
|
3382
|
+
*
|
|
3383
|
+
* // Generate waveform
|
|
3384
|
+
* const { waveform } = await model.generate_speech(input_ids, speaker_embeddings, { vocoder });
|
|
3385
|
+
* console.log(waveform)
|
|
3386
|
+
* // Tensor {
|
|
3387
|
+
* // dims: [ 26112 ],
|
|
3388
|
+
* // type: 'float32',
|
|
3389
|
+
* // size: 26112,
|
|
3390
|
+
* // data: Float32Array(26112) [ -0.00043630177970044315, -0.00018082228780258447, ... ],
|
|
3391
|
+
* // }
|
|
3392
|
+
* ```
|
|
3393
|
+
*/
|
|
3394
|
+
export class SpeechT5ForSpeechToText extends SpeechT5PreTrainedModel {
|
|
3395
|
+
}
|
|
3396
|
+
/**
|
|
3397
|
+
* SpeechT5 Model with a text encoder and a speech decoder.
|
|
3398
|
+
*/
|
|
3399
|
+
export class SpeechT5ForTextToSpeech extends SpeechT5PreTrainedModel {
|
|
3400
|
+
/**
|
|
3401
|
+
* @typedef {Object} SpeechOutput
|
|
3402
|
+
* @property {Tensor} [spectrogram] The predicted log-mel spectrogram of shape
|
|
3403
|
+
* `(output_sequence_length, config.num_mel_bins)`. Returned when no `vocoder` is provided
|
|
3404
|
+
* @property {Tensor} [waveform] The predicted waveform of shape `(num_frames,)`. Returned when a `vocoder` is provided.
|
|
3405
|
+
* @property {Tensor} [cross_attentions] The outputs of the decoder's cross-attention layers of shape
|
|
3406
|
+
* `(config.decoder_layers, config.decoder_attention_heads, output_sequence_length, input_sequence_length)`. returned when `output_cross_attentions` is `true`.
|
|
3407
|
+
*/
|
|
3408
|
+
/**
|
|
3409
|
+
* Converts a sequence of input tokens into a sequence of mel spectrograms, which are subsequently turned into a speech waveform using a vocoder.
|
|
3410
|
+
* @param {Tensor} input_values Indices of input sequence tokens in the vocabulary.
|
|
3411
|
+
* @param {Tensor} speaker_embeddings Tensor containing the speaker embeddings.
|
|
3412
|
+
* @param {Object} options Optional parameters for generating speech.
|
|
3413
|
+
* @param {number} [options.threshold=0.5] The generated sequence ends when the predicted stop token probability exceeds this value.
|
|
3414
|
+
* @param {number} [options.minlenratio=0.0] Used to calculate the minimum required length for the output sequence.
|
|
3415
|
+
* @param {number} [options.maxlenratio=20.0] Used to calculate the maximum allowed length for the output sequence.
|
|
3416
|
+
* @param {Object} [options.vocoder=null] The vocoder that converts the mel spectrogram into a speech waveform. If `null`, the output is the mel spectrogram.
|
|
3417
|
+
* @param {boolean} [options.output_cross_attentions=false] Whether or not to return the attentions tensors of the decoder's cross-attention layers.
|
|
3418
|
+
* @returns {Promise<SpeechOutput>} A promise which resolves to an object containing the spectrogram, waveform, and cross-attention tensors.
|
|
3419
|
+
*/
|
|
3420
|
+
generate_speech(input_values: Tensor, speaker_embeddings: Tensor, { threshold, minlenratio, maxlenratio, vocoder, }?: {
|
|
3421
|
+
threshold?: number;
|
|
3422
|
+
minlenratio?: number;
|
|
3423
|
+
maxlenratio?: number;
|
|
3424
|
+
vocoder?: any;
|
|
3425
|
+
output_cross_attentions?: boolean;
|
|
3426
|
+
}): Promise<{
|
|
3427
|
+
/**
|
|
3428
|
+
* The predicted log-mel spectrogram of shape
|
|
3429
|
+
* `(output_sequence_length, config.num_mel_bins)`. Returned when no `vocoder` is provided
|
|
3430
|
+
*/
|
|
3431
|
+
spectrogram?: Tensor;
|
|
3432
|
+
/**
|
|
3433
|
+
* The predicted waveform of shape `(num_frames,)`. Returned when a `vocoder` is provided.
|
|
3434
|
+
*/
|
|
3435
|
+
waveform?: Tensor;
|
|
3436
|
+
/**
|
|
3437
|
+
* The outputs of the decoder's cross-attention layers of shape
|
|
3438
|
+
* `(config.decoder_layers, config.decoder_attention_heads, output_sequence_length, input_sequence_length)`. returned when `output_cross_attentions` is `true`.
|
|
3439
|
+
*/
|
|
3440
|
+
cross_attentions?: Tensor;
|
|
3441
|
+
}>;
|
|
3442
|
+
}
|
|
3443
|
+
/**
|
|
3444
|
+
* HiFi-GAN vocoder.
|
|
3445
|
+
*
|
|
3446
|
+
* See [SpeechT5ForSpeechToText](./models#module_models.SpeechT5ForSpeechToText) for example usage.
|
|
3447
|
+
*/
|
|
3448
|
+
export class SpeechT5HifiGan extends PreTrainedModel {
|
|
3449
|
+
}
|
|
3450
|
+
export class SupertonicPreTrainedModel extends PreTrainedModel {
|
|
3451
|
+
}
|
|
3452
|
+
export class SupertonicForConditionalGeneration extends SupertonicPreTrainedModel {
|
|
3453
|
+
generate_speech({ input_ids, attention_mask, style, num_inference_steps, speed, }: {
|
|
3454
|
+
input_ids: any;
|
|
3455
|
+
attention_mask: any;
|
|
3456
|
+
style: any;
|
|
3457
|
+
num_inference_steps?: number;
|
|
3458
|
+
speed?: number;
|
|
3459
|
+
}): Promise<{
|
|
3460
|
+
waveform: any;
|
|
3461
|
+
durations: any;
|
|
3462
|
+
}>;
|
|
3463
|
+
}
|
|
3464
|
+
export class TrOCRPreTrainedModel extends PreTrainedModel {
|
|
3465
|
+
}
|
|
3466
|
+
/**
|
|
3467
|
+
* The TrOCR Decoder with a language modeling head.
|
|
3468
|
+
*/
|
|
3469
|
+
export class TrOCRForCausalLM extends TrOCRPreTrainedModel {
|
|
3470
|
+
}
|
|
3471
|
+
/**
|
|
3472
|
+
* The bare Mistral Model outputting raw hidden-states without any specific head on top.
|
|
3473
|
+
*/
|
|
3474
|
+
export class MistralPreTrainedModel extends PreTrainedModel {
|
|
3475
|
+
}
|
|
3476
|
+
export class MistralModel extends MistralPreTrainedModel {
|
|
3477
|
+
}
|
|
3478
|
+
export class MistralForCausalLM extends MistralPreTrainedModel {
|
|
3479
|
+
}
|
|
3480
|
+
export class MinistralPreTrainedModel extends PreTrainedModel {
|
|
3481
|
+
}
|
|
3482
|
+
export class MinistralModel extends MinistralPreTrainedModel {
|
|
3483
|
+
}
|
|
3484
|
+
export class MinistralForCausalLM extends MinistralPreTrainedModel {
|
|
3485
|
+
}
|
|
3486
|
+
export class Ministral3PreTrainedModel extends PreTrainedModel {
|
|
3487
|
+
}
|
|
3488
|
+
export class Ministral3Model extends Ministral3PreTrainedModel {
|
|
3489
|
+
}
|
|
3490
|
+
export class Ministral3ForCausalLM extends Ministral3PreTrainedModel {
|
|
3491
|
+
}
|
|
3492
|
+
export class Ernie4_5PreTrainedModel extends PreTrainedModel {
|
|
3493
|
+
}
|
|
3494
|
+
export class Ernie4_5Model extends Ernie4_5PreTrainedModel {
|
|
3495
|
+
}
|
|
3496
|
+
export class Ernie4_5ForCausalLM extends Ernie4_5PreTrainedModel {
|
|
3497
|
+
}
|
|
3498
|
+
/**
|
|
3499
|
+
* The bare Starcoder2 Model outputting raw hidden-states without any specific head on top.
|
|
3500
|
+
*/
|
|
3501
|
+
export class Starcoder2PreTrainedModel extends PreTrainedModel {
|
|
3502
|
+
}
|
|
3503
|
+
export class Starcoder2Model extends Starcoder2PreTrainedModel {
|
|
3504
|
+
}
|
|
3505
|
+
export class Starcoder2ForCausalLM extends Starcoder2PreTrainedModel {
|
|
3506
|
+
}
|
|
3507
|
+
/**
|
|
3508
|
+
* The bare Falcon Model outputting raw hidden-states without any specific head on top.
|
|
3509
|
+
*/
|
|
3510
|
+
export class FalconPreTrainedModel extends PreTrainedModel {
|
|
3511
|
+
}
|
|
3512
|
+
export class FalconModel extends FalconPreTrainedModel {
|
|
3513
|
+
}
|
|
3514
|
+
export class FalconForCausalLM extends FalconPreTrainedModel {
|
|
3515
|
+
}
|
|
3516
|
+
export class ClapPreTrainedModel extends PreTrainedModel {
|
|
3517
|
+
}
|
|
3518
|
+
export class ClapModel extends ClapPreTrainedModel {
|
|
3519
|
+
}
|
|
3520
|
+
/**
|
|
3521
|
+
* CLAP Text Model with a projection layer on top (a linear layer on top of the pooled output).
|
|
3522
|
+
*
|
|
3523
|
+
* **Example:** Compute text embeddings with `ClapTextModelWithProjection`.
|
|
3524
|
+
*
|
|
3525
|
+
* ```javascript
|
|
3526
|
+
* import { AutoTokenizer, ClapTextModelWithProjection } from '@huggingface/transformers';
|
|
3527
|
+
*
|
|
3528
|
+
* // Load tokenizer and text model
|
|
3529
|
+
* const tokenizer = await AutoTokenizer.from_pretrained('Xenova/clap-htsat-unfused');
|
|
3530
|
+
* const text_model = await ClapTextModelWithProjection.from_pretrained('Xenova/clap-htsat-unfused');
|
|
3531
|
+
*
|
|
3532
|
+
* // Run tokenization
|
|
3533
|
+
* const texts = ['a sound of a cat', 'a sound of a dog'];
|
|
3534
|
+
* const text_inputs = tokenizer(texts, { padding: true, truncation: true });
|
|
3535
|
+
*
|
|
3536
|
+
* // Compute embeddings
|
|
3537
|
+
* const { text_embeds } = await text_model(text_inputs);
|
|
3538
|
+
* // Tensor {
|
|
3539
|
+
* // dims: [ 2, 512 ],
|
|
3540
|
+
* // type: 'float32',
|
|
3541
|
+
* // data: Float32Array(1024) [ ... ],
|
|
3542
|
+
* // size: 1024
|
|
3543
|
+
* // }
|
|
3544
|
+
* ```
|
|
3545
|
+
*/
|
|
3546
|
+
export class ClapTextModelWithProjection extends ClapPreTrainedModel {
|
|
3547
|
+
}
|
|
3548
|
+
/**
|
|
3549
|
+
* CLAP Audio Model with a projection layer on top (a linear layer on top of the pooled output).
|
|
3550
|
+
*
|
|
3551
|
+
* **Example:** Compute audio embeddings with `ClapAudioModelWithProjection`.
|
|
3552
|
+
*
|
|
3553
|
+
* ```javascript
|
|
3554
|
+
* import { AutoProcessor, ClapAudioModelWithProjection, read_audio } from '@huggingface/transformers';
|
|
3555
|
+
*
|
|
3556
|
+
* // Load processor and audio model
|
|
3557
|
+
* const processor = await AutoProcessor.from_pretrained('Xenova/clap-htsat-unfused');
|
|
3558
|
+
* const audio_model = await ClapAudioModelWithProjection.from_pretrained('Xenova/clap-htsat-unfused');
|
|
3559
|
+
*
|
|
3560
|
+
* // Read audio and run processor
|
|
3561
|
+
* const audio = await read_audio('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cat_meow.wav');
|
|
3562
|
+
* const audio_inputs = await processor(audio);
|
|
3563
|
+
*
|
|
3564
|
+
* // Compute embeddings
|
|
3565
|
+
* const { audio_embeds } = await audio_model(audio_inputs);
|
|
3566
|
+
* // Tensor {
|
|
3567
|
+
* // dims: [ 1, 512 ],
|
|
3568
|
+
* // type: 'float32',
|
|
3569
|
+
* // data: Float32Array(512) [ ... ],
|
|
3570
|
+
* // size: 512
|
|
3571
|
+
* // }
|
|
3572
|
+
* ```
|
|
3573
|
+
*/
|
|
3574
|
+
export class ClapAudioModelWithProjection extends ClapPreTrainedModel {
|
|
3575
|
+
}
|
|
3576
|
+
export class VitsPreTrainedModel extends PreTrainedModel {
|
|
3577
|
+
}
|
|
3578
|
+
/**
|
|
3579
|
+
* The complete VITS model, for text-to-speech synthesis.
|
|
3580
|
+
*
|
|
3581
|
+
* **Example:** Generate speech from text with `VitsModel`.
|
|
3582
|
+
* ```javascript
|
|
3583
|
+
* import { AutoTokenizer, VitsModel } from '@huggingface/transformers';
|
|
3584
|
+
*
|
|
3585
|
+
* // Load the tokenizer and model
|
|
3586
|
+
* const tokenizer = await AutoTokenizer.from_pretrained('Xenova/mms-tts-eng');
|
|
3587
|
+
* const model = await VitsModel.from_pretrained('Xenova/mms-tts-eng');
|
|
3588
|
+
*
|
|
3589
|
+
* // Run tokenization
|
|
3590
|
+
* const inputs = tokenizer('I love transformers');
|
|
3591
|
+
*
|
|
3592
|
+
* // Generate waveform
|
|
3593
|
+
* const { waveform } = await model(inputs);
|
|
3594
|
+
* // Tensor {
|
|
3595
|
+
* // dims: [ 1, 35328 ],
|
|
3596
|
+
* // type: 'float32',
|
|
3597
|
+
* // data: Float32Array(35328) [ ... ],
|
|
3598
|
+
* // size: 35328,
|
|
3599
|
+
* // }
|
|
3600
|
+
* ```
|
|
3601
|
+
*/
|
|
3602
|
+
export class VitsModel extends VitsPreTrainedModel {
|
|
3603
|
+
/**
|
|
3604
|
+
* Calls the model on new inputs.
|
|
3605
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
3606
|
+
* @returns {Promise<VitsModelOutput>} The outputs for the VITS model.
|
|
3607
|
+
*/
|
|
3608
|
+
_call(model_inputs: any): Promise<VitsModelOutput>;
|
|
3609
|
+
}
|
|
3610
|
+
export class SegformerPreTrainedModel extends PreTrainedModel {
|
|
3611
|
+
}
|
|
3612
|
+
/**
|
|
3613
|
+
* The bare SegFormer encoder (Mix-Transformer) outputting raw hidden-states without any specific head on top.
|
|
3614
|
+
*/
|
|
3615
|
+
export class SegformerModel extends SegformerPreTrainedModel {
|
|
3616
|
+
}
|
|
3617
|
+
/**
|
|
3618
|
+
* SegFormer Model transformer with an image classification head on top (a linear layer on top of the final hidden states) e.g. for ImageNet.
|
|
3619
|
+
*/
|
|
3620
|
+
export class SegformerForImageClassification extends SegformerPreTrainedModel {
|
|
3621
|
+
}
|
|
3622
|
+
/**
|
|
3623
|
+
* SegFormer Model transformer with an all-MLP decode head on top e.g. for ADE20k, CityScapes.
|
|
3624
|
+
*/
|
|
3625
|
+
export class SegformerForSemanticSegmentation extends SegformerPreTrainedModel {
|
|
3626
|
+
}
|
|
3627
|
+
export class StableLmPreTrainedModel extends PreTrainedModel {
|
|
3628
|
+
}
|
|
3629
|
+
/**
|
|
3630
|
+
* The bare StableLm Model transformer outputting raw hidden-states without any specific head on top.
|
|
3631
|
+
*/
|
|
3632
|
+
export class StableLmModel extends StableLmPreTrainedModel {
|
|
3633
|
+
}
|
|
3634
|
+
/**
|
|
3635
|
+
* StableLm Model with a `language modeling` head on top for Causal Language Modeling (with past).
|
|
3636
|
+
*/
|
|
3637
|
+
export class StableLmForCausalLM extends StableLmPreTrainedModel {
|
|
3638
|
+
}
|
|
3639
|
+
export class EfficientNetPreTrainedModel extends PreTrainedModel {
|
|
3640
|
+
}
|
|
3641
|
+
/**
|
|
3642
|
+
* The bare EfficientNet model outputting raw features without any specific head on top.
|
|
3643
|
+
*/
|
|
3644
|
+
export class EfficientNetModel extends EfficientNetPreTrainedModel {
|
|
3645
|
+
}
|
|
3646
|
+
/**
|
|
3647
|
+
* EfficientNet Model with an image classification head on top (a linear layer on top of the pooled features).
|
|
3648
|
+
*/
|
|
3649
|
+
export class EfficientNetForImageClassification extends EfficientNetPreTrainedModel {
|
|
3650
|
+
/**
|
|
3651
|
+
* @param {any} model_inputs
|
|
3652
|
+
*/
|
|
3653
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
3654
|
+
}
|
|
3655
|
+
export class MusicgenPreTrainedModel extends PreTrainedModel {
|
|
3656
|
+
}
|
|
3657
|
+
/**
|
|
3658
|
+
* The bare Musicgen decoder model outputting raw hidden-states without any specific head on top.
|
|
3659
|
+
*/
|
|
3660
|
+
export class MusicgenModel extends MusicgenPreTrainedModel {
|
|
3661
|
+
}
|
|
3662
|
+
/**
|
|
3663
|
+
* The MusicGen decoder model with a language modelling head on top.
|
|
3664
|
+
*/
|
|
3665
|
+
export class MusicgenForCausalLM extends MusicgenPreTrainedModel {
|
|
3666
|
+
}
|
|
3667
|
+
/**
|
|
3668
|
+
* The composite MusicGen model with a text encoder, audio encoder and Musicgen decoder,
|
|
3669
|
+
* for music generation tasks with one or both of text and audio prompts.
|
|
3670
|
+
*
|
|
3671
|
+
* **Example:** Generate music from text with `Xenova/musicgen-small`.
|
|
3672
|
+
* ```javascript
|
|
3673
|
+
* import { AutoTokenizer, MusicgenForConditionalGeneration } from '@huggingface/transformers';
|
|
3674
|
+
*
|
|
3675
|
+
* // Load tokenizer and model
|
|
3676
|
+
* const tokenizer = await AutoTokenizer.from_pretrained('Xenova/musicgen-small');
|
|
3677
|
+
* const model = await MusicgenForConditionalGeneration.from_pretrained(
|
|
3678
|
+
* 'Xenova/musicgen-small', { dtype: 'fp32' }
|
|
3679
|
+
* );
|
|
3680
|
+
*
|
|
3681
|
+
* // Prepare text input
|
|
3682
|
+
* const prompt = '80s pop track with bassy drums and synth';
|
|
3683
|
+
* const inputs = tokenizer(prompt);
|
|
3684
|
+
*
|
|
3685
|
+
* // Generate audio
|
|
3686
|
+
* const audio_values = await model.generate({
|
|
3687
|
+
* ...inputs,
|
|
3688
|
+
* max_new_tokens: 512,
|
|
3689
|
+
* do_sample: true,
|
|
3690
|
+
* guidance_scale: 3,
|
|
3691
|
+
* });
|
|
3692
|
+
*
|
|
3693
|
+
* // (Optional) Write the output to a WAV file
|
|
3694
|
+
* import wavefile from 'wavefile';
|
|
3695
|
+
* import fs from 'fs';
|
|
3696
|
+
*
|
|
3697
|
+
* const wav = new wavefile.WaveFile();
|
|
3698
|
+
* wav.fromScratch(1, model.config.audio_encoder.sampling_rate, '32f', audio_values.data);
|
|
3699
|
+
* fs.writeFileSync('musicgen_out.wav', wav.toBuffer());
|
|
3700
|
+
* ```
|
|
3701
|
+
*/
|
|
3702
|
+
export class MusicgenForConditionalGeneration extends PreTrainedModel {
|
|
3703
|
+
/**
|
|
3704
|
+
* Apply the pattern mask to the final ids,
|
|
3705
|
+
* then revert the pattern delay mask by filtering the pad token id in a single step.
|
|
3706
|
+
* @param {Tensor} outputs The output tensor from the model.
|
|
3707
|
+
* @returns {Tensor} The filtered output tensor.
|
|
3708
|
+
*/
|
|
3709
|
+
_apply_and_filter_by_delay_pattern_mask(outputs: Tensor): Tensor;
|
|
3710
|
+
prepare_inputs_for_generation(input_ids: any, model_inputs: any, generation_config: any): any;
|
|
3711
|
+
}
|
|
3712
|
+
export class MobileNetV1PreTrainedModel extends PreTrainedModel {
|
|
3713
|
+
}
|
|
3714
|
+
/**
|
|
3715
|
+
* The bare MobileNetV1 model outputting raw hidden-states without any specific head on top.
|
|
3716
|
+
*/
|
|
3717
|
+
export class MobileNetV1Model extends MobileNetV1PreTrainedModel {
|
|
3718
|
+
}
|
|
3719
|
+
/**
|
|
3720
|
+
* MobileNetV1 model with an image classification head on top (a linear layer on top of the pooled features),
|
|
3721
|
+
* e.g. for ImageNet.
|
|
3722
|
+
*/
|
|
3723
|
+
export class MobileNetV1ForImageClassification extends MobileNetV1PreTrainedModel {
|
|
3724
|
+
/**
|
|
3725
|
+
* @param {any} model_inputs
|
|
3726
|
+
*/
|
|
3727
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
3728
|
+
}
|
|
3729
|
+
export class MobileNetV1ForSemanticSegmentation extends MobileNetV1PreTrainedModel {
|
|
3730
|
+
}
|
|
3731
|
+
export class MobileNetV2PreTrainedModel extends PreTrainedModel {
|
|
3732
|
+
}
|
|
3733
|
+
/**
|
|
3734
|
+
* The bare MobileNetV2 model outputting raw hidden-states without any specific head on top.
|
|
3735
|
+
*/
|
|
3736
|
+
export class MobileNetV2Model extends MobileNetV2PreTrainedModel {
|
|
3737
|
+
}
|
|
3738
|
+
/**
|
|
3739
|
+
* MobileNetV2 model with an image classification head on top (a linear layer on top of the pooled features),
|
|
3740
|
+
* e.g. for ImageNet.
|
|
3741
|
+
*/
|
|
3742
|
+
export class MobileNetV2ForImageClassification extends MobileNetV2PreTrainedModel {
|
|
3743
|
+
/**
|
|
3744
|
+
* @param {any} model_inputs
|
|
3745
|
+
*/
|
|
3746
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
3747
|
+
}
|
|
3748
|
+
export class MobileNetV2ForSemanticSegmentation extends MobileNetV2PreTrainedModel {
|
|
3749
|
+
}
|
|
3750
|
+
export class MobileNetV3PreTrainedModel extends PreTrainedModel {
|
|
3751
|
+
}
|
|
3752
|
+
/**
|
|
3753
|
+
* The bare MobileNetV3 model outputting raw hidden-states without any specific head on top.
|
|
3754
|
+
*/
|
|
3755
|
+
export class MobileNetV3Model extends MobileNetV3PreTrainedModel {
|
|
3756
|
+
}
|
|
3757
|
+
/**
|
|
3758
|
+
* MobileNetV3 model with an image classification head on top (a linear layer on top of the pooled features),
|
|
3759
|
+
* e.g. for ImageNet.
|
|
3760
|
+
*/
|
|
3761
|
+
export class MobileNetV3ForImageClassification extends MobileNetV3PreTrainedModel {
|
|
3762
|
+
/**
|
|
3763
|
+
* @param {any} model_inputs
|
|
3764
|
+
*/
|
|
3765
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
3766
|
+
}
|
|
3767
|
+
export class MobileNetV3ForSemanticSegmentation extends MobileNetV3PreTrainedModel {
|
|
3768
|
+
}
|
|
3769
|
+
export class MobileNetV4PreTrainedModel extends PreTrainedModel {
|
|
3770
|
+
}
|
|
3771
|
+
/**
|
|
3772
|
+
* The bare MobileNetV4 model outputting raw hidden-states without any specific head on top.
|
|
3773
|
+
*/
|
|
3774
|
+
export class MobileNetV4Model extends MobileNetV4PreTrainedModel {
|
|
3775
|
+
}
|
|
3776
|
+
/**
|
|
3777
|
+
* MobileNetV4 model with an image classification head on top (a linear layer on top of the pooled features),
|
|
3778
|
+
* e.g. for ImageNet.
|
|
3779
|
+
*/
|
|
3780
|
+
export class MobileNetV4ForImageClassification extends MobileNetV4PreTrainedModel {
|
|
3781
|
+
/**
|
|
3782
|
+
* @param {any} model_inputs
|
|
3783
|
+
*/
|
|
3784
|
+
_call(model_inputs: any): Promise<SequenceClassifierOutput>;
|
|
3785
|
+
}
|
|
3786
|
+
export class MobileNetV4ForSemanticSegmentation extends MobileNetV4PreTrainedModel {
|
|
3787
|
+
}
|
|
3788
|
+
export class DecisionTransformerPreTrainedModel extends PreTrainedModel {
|
|
3789
|
+
}
|
|
3790
|
+
/**
|
|
3791
|
+
* The model builds upon the GPT2 architecture to perform autoregressive prediction of actions in an offline RL setting.
|
|
3792
|
+
* Refer to the paper for more details: https://huggingface.co/papers/2106.01345
|
|
3793
|
+
*/
|
|
3794
|
+
export class DecisionTransformerModel extends DecisionTransformerPreTrainedModel {
|
|
3795
|
+
}
|
|
3796
|
+
export class MultiModalityPreTrainedModel extends PreTrainedModel {
|
|
3797
|
+
}
|
|
3798
|
+
export class MultiModalityCausalLM extends MultiModalityPreTrainedModel {
|
|
3799
|
+
_generation_mode: string;
|
|
3800
|
+
forward(model_inputs: any): Promise<any>;
|
|
3801
|
+
/**
|
|
3802
|
+
* @param {import('./generation/parameters.js').GenerationFunctionParameters} options
|
|
3803
|
+
*/
|
|
3804
|
+
generate_images(options: import("./generation/parameters.js").GenerationFunctionParameters): Promise<RawImage[]>;
|
|
3805
|
+
}
|
|
3806
|
+
export class MgpstrModelOutput extends ModelOutput {
|
|
3807
|
+
constructor({ char_logits, bpe_logits, wp_logits }: {
|
|
3808
|
+
char_logits: any;
|
|
3809
|
+
bpe_logits: any;
|
|
3810
|
+
wp_logits: any;
|
|
3811
|
+
});
|
|
3812
|
+
char_logits: any;
|
|
3813
|
+
bpe_logits: any;
|
|
3814
|
+
wp_logits: any;
|
|
3815
|
+
get logits(): any[];
|
|
3816
|
+
}
|
|
3817
|
+
export class MgpstrPreTrainedModel extends PreTrainedModel {
|
|
3818
|
+
}
|
|
3819
|
+
/**
|
|
3820
|
+
* MGP-STR Model transformer with three classification heads on top
|
|
3821
|
+
* (three A^3 modules and three linear layer on top of the transformer encoder output) for scene text recognition (STR).
|
|
3822
|
+
*/
|
|
3823
|
+
export class MgpstrForSceneTextRecognition extends MgpstrPreTrainedModel {
|
|
3824
|
+
/**
|
|
3825
|
+
* @param {any} model_inputs
|
|
3826
|
+
*/
|
|
3827
|
+
_call(model_inputs: any): Promise<MgpstrModelOutput>;
|
|
3828
|
+
}
|
|
3829
|
+
export class PatchTSTPreTrainedModel extends PreTrainedModel {
|
|
3830
|
+
}
|
|
3831
|
+
/**
|
|
3832
|
+
* The bare PatchTST Model outputting raw hidden-states without any specific head.
|
|
3833
|
+
*/
|
|
3834
|
+
export class PatchTSTModel extends PatchTSTPreTrainedModel {
|
|
3835
|
+
}
|
|
3836
|
+
/**
|
|
3837
|
+
* The PatchTST for prediction model.
|
|
3838
|
+
*/
|
|
3839
|
+
export class PatchTSTForPrediction extends PatchTSTPreTrainedModel {
|
|
3840
|
+
}
|
|
3841
|
+
export class PatchTSMixerPreTrainedModel extends PreTrainedModel {
|
|
3842
|
+
}
|
|
3843
|
+
/**
|
|
3844
|
+
* The bare PatchTSMixer Model outputting raw hidden-states without any specific head.
|
|
3845
|
+
*/
|
|
3846
|
+
export class PatchTSMixerModel extends PatchTSMixerPreTrainedModel {
|
|
3847
|
+
}
|
|
3848
|
+
/**
|
|
3849
|
+
* The PatchTSMixer for prediction model.
|
|
3850
|
+
*/
|
|
3851
|
+
export class PatchTSMixerForPrediction extends PatchTSMixerPreTrainedModel {
|
|
3852
|
+
}
|
|
3853
|
+
export class UltravoxPreTrainedModel extends PreTrainedModel {
|
|
3854
|
+
}
|
|
3855
|
+
export class UltravoxModel extends UltravoxPreTrainedModel {
|
|
3856
|
+
_merge_input_ids_with_audio_features(kwargs: any): {
|
|
3857
|
+
inputs_embeds: any;
|
|
3858
|
+
attention_mask: any;
|
|
3859
|
+
};
|
|
3860
|
+
}
|
|
3861
|
+
export class VoxtralForConditionalGeneration extends UltravoxModel {
|
|
3862
|
+
}
|
|
3863
|
+
export class MimiPreTrainedModel extends PreTrainedModel {
|
|
3864
|
+
}
|
|
3865
|
+
export class MimiEncoderOutput extends ModelOutput {
|
|
3866
|
+
/**
|
|
3867
|
+
* @param {Object} output The output of the model.
|
|
3868
|
+
* @param {Tensor} output.audio_codes Discrete code embeddings, of shape `(batch_size, num_quantizers, codes_length)`.
|
|
3869
|
+
*/
|
|
3870
|
+
constructor({ audio_codes }: {
|
|
3871
|
+
audio_codes: Tensor;
|
|
3872
|
+
});
|
|
3873
|
+
audio_codes: Tensor;
|
|
3874
|
+
}
|
|
3875
|
+
export class MimiDecoderOutput extends ModelOutput {
|
|
3876
|
+
/**
|
|
3877
|
+
* @param {Object} output The output of the model.
|
|
3878
|
+
* @param {Tensor} output.audio_values Decoded audio values, of shape `(batch_size, num_channels, sequence_length)`.
|
|
3879
|
+
*/
|
|
3880
|
+
constructor({ audio_values }: {
|
|
3881
|
+
audio_values: Tensor;
|
|
3882
|
+
});
|
|
3883
|
+
audio_values: Tensor;
|
|
3884
|
+
}
|
|
3885
|
+
/**
|
|
3886
|
+
* The Mimi neural audio codec model.
|
|
3887
|
+
*/
|
|
3888
|
+
export class MimiModel extends MimiPreTrainedModel {
|
|
3889
|
+
/**
|
|
3890
|
+
* Encodes the input audio waveform into discrete codes.
|
|
3891
|
+
* @param {Object} inputs Model inputs
|
|
3892
|
+
* @param {Tensor} [inputs.input_values] Float values of the input audio waveform, of shape `(batch_size, channels, sequence_length)`).
|
|
3893
|
+
* @returns {Promise<MimiEncoderOutput>} The output tensor of shape `(batch_size, num_codebooks, sequence_length)`.
|
|
3894
|
+
*/
|
|
3895
|
+
encode(inputs: {
|
|
3896
|
+
input_values?: Tensor;
|
|
3897
|
+
}): Promise<MimiEncoderOutput>;
|
|
3898
|
+
/**
|
|
3899
|
+
* Decodes the given frames into an output audio waveform.
|
|
3900
|
+
* @param {MimiEncoderOutput} inputs The encoded audio codes.
|
|
3901
|
+
* @returns {Promise<MimiDecoderOutput>} The output tensor of shape `(batch_size, num_channels, sequence_length)`.
|
|
3902
|
+
*/
|
|
3903
|
+
decode(inputs: MimiEncoderOutput): Promise<MimiDecoderOutput>;
|
|
3904
|
+
}
|
|
3905
|
+
export class MimiEncoderModel extends MimiPreTrainedModel {
|
|
3906
|
+
}
|
|
3907
|
+
export class MimiDecoderModel extends MimiPreTrainedModel {
|
|
3908
|
+
}
|
|
3909
|
+
export class DacPreTrainedModel extends PreTrainedModel {
|
|
3910
|
+
}
|
|
3911
|
+
export class DacEncoderOutput extends ModelOutput {
|
|
3912
|
+
/**
|
|
3913
|
+
* @param {Object} output The output of the model.
|
|
3914
|
+
* @param {Tensor} output.audio_codes Discrete code embeddings, of shape `(batch_size, num_quantizers, codes_length)`.
|
|
3915
|
+
*/
|
|
3916
|
+
constructor({ audio_codes }: {
|
|
3917
|
+
audio_codes: Tensor;
|
|
3918
|
+
});
|
|
3919
|
+
audio_codes: Tensor;
|
|
3920
|
+
}
|
|
3921
|
+
export class DacDecoderOutput extends ModelOutput {
|
|
3922
|
+
/**
|
|
3923
|
+
* @param {Object} output The output of the model.
|
|
3924
|
+
* @param {Tensor} output.audio_values Decoded audio values, of shape `(batch_size, num_channels, sequence_length)`.
|
|
3925
|
+
*/
|
|
3926
|
+
constructor({ audio_values }: {
|
|
3927
|
+
audio_values: Tensor;
|
|
3928
|
+
});
|
|
3929
|
+
audio_values: Tensor;
|
|
3930
|
+
}
|
|
3931
|
+
/**
|
|
3932
|
+
* The DAC (Descript Audio Codec) model.
|
|
3933
|
+
*/
|
|
3934
|
+
export class DacModel extends DacPreTrainedModel {
|
|
3935
|
+
/**
|
|
3936
|
+
* Encodes the input audio waveform into discrete codes.
|
|
3937
|
+
* @param {Object} inputs Model inputs
|
|
3938
|
+
* @param {Tensor} [inputs.input_values] Float values of the input audio waveform, of shape `(batch_size, channels, sequence_length)`).
|
|
3939
|
+
* @returns {Promise<DacEncoderOutput>} The output tensor of shape `(batch_size, num_codebooks, sequence_length)`.
|
|
3940
|
+
*/
|
|
3941
|
+
encode(inputs: {
|
|
3942
|
+
input_values?: Tensor;
|
|
3943
|
+
}): Promise<DacEncoderOutput>;
|
|
3944
|
+
/**
|
|
3945
|
+
* Decodes the given frames into an output audio waveform.
|
|
3946
|
+
* @param {DacEncoderOutput} inputs The encoded audio codes.
|
|
3947
|
+
* @returns {Promise<DacDecoderOutput>} The output tensor of shape `(batch_size, num_channels, sequence_length)`.
|
|
3948
|
+
*/
|
|
3949
|
+
decode(inputs: DacEncoderOutput): Promise<DacDecoderOutput>;
|
|
3950
|
+
}
|
|
3951
|
+
export class DacEncoderModel extends DacPreTrainedModel {
|
|
3952
|
+
}
|
|
3953
|
+
export class DacDecoderModel extends DacPreTrainedModel {
|
|
3954
|
+
}
|
|
3955
|
+
export class SnacPreTrainedModel extends PreTrainedModel {
|
|
3956
|
+
}
|
|
3957
|
+
/**
|
|
3958
|
+
* The SNAC (Multi-Scale Neural Audio Codec) model.
|
|
3959
|
+
*/
|
|
3960
|
+
export class SnacModel extends SnacPreTrainedModel {
|
|
3961
|
+
/**
|
|
3962
|
+
* Encodes the input audio waveform into discrete codes.
|
|
3963
|
+
* @param {Object} inputs Model inputs
|
|
3964
|
+
* @param {Tensor} [inputs.input_values] Float values of the input audio waveform, of shape `(batch_size, channels, sequence_length)`).
|
|
3965
|
+
* @returns {Promise<Record<string, Tensor>>} The output tensors of shape `(batch_size, num_codebooks, sequence_length)`.
|
|
3966
|
+
*/
|
|
3967
|
+
encode(inputs: {
|
|
3968
|
+
input_values?: Tensor;
|
|
3969
|
+
}): Promise<Record<string, Tensor>>;
|
|
3970
|
+
/**
|
|
3971
|
+
* Decodes the given frames into an output audio waveform.
|
|
3972
|
+
* @param {Record<string, Tensor>} inputs The encoded audio codes.
|
|
3973
|
+
* @returns {Promise<{audio_values: Tensor}>} The output tensor of shape `(batch_size, num_channels, sequence_length)`.
|
|
3974
|
+
*/
|
|
3975
|
+
decode(inputs: Record<string, Tensor>): Promise<{
|
|
3976
|
+
audio_values: Tensor;
|
|
3977
|
+
}>;
|
|
3978
|
+
}
|
|
3979
|
+
export class SnacEncoderModel extends SnacPreTrainedModel {
|
|
3980
|
+
}
|
|
3981
|
+
export class SnacDecoderModel extends SnacPreTrainedModel {
|
|
3982
|
+
}
|
|
3983
|
+
/**
|
|
3984
|
+
* Base class of all AutoModels. Contains the `from_pretrained` function
|
|
3985
|
+
* which is used to instantiate pretrained models.
|
|
3986
|
+
*/
|
|
3987
|
+
export class PretrainedMixin {
|
|
3988
|
+
/**
|
|
3989
|
+
* Mapping from model type to model class.
|
|
3990
|
+
* @type {Map<string, Object>[]}
|
|
3991
|
+
*/
|
|
3992
|
+
static MODEL_CLASS_MAPPINGS: Map<string, any>[];
|
|
3993
|
+
/**
|
|
3994
|
+
* Whether to attempt to instantiate the base class (`PretrainedModel`) if
|
|
3995
|
+
* the model type is not found in the mapping.
|
|
3996
|
+
*/
|
|
3997
|
+
static BASE_IF_FAIL: boolean;
|
|
3998
|
+
/**
|
|
3999
|
+
* Instantiate one of the model classes of the library from a pretrained model.
|
|
4000
|
+
*
|
|
4001
|
+
* The model class to instantiate is selected based on the `model_type` property of the config object
|
|
4002
|
+
* (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
|
|
4003
|
+
*
|
|
4004
|
+
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
|
|
4005
|
+
* - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
|
|
4006
|
+
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
|
|
4007
|
+
* user or organization name, like `dbmdz/bert-base-german-cased`.
|
|
4008
|
+
* - A path to a *directory* containing model weights, e.g., `./my_model_directory/`.
|
|
4009
|
+
* @param {import('./utils/hub.js').PretrainedModelOptions} options Additional options for loading the model.
|
|
4010
|
+
*
|
|
4011
|
+
* @returns {Promise<PreTrainedModel>} A new instance of the `PreTrainedModel` class.
|
|
4012
|
+
*/
|
|
4013
|
+
static from_pretrained(pretrained_model_name_or_path: string, { progress_callback, config, cache_dir, local_files_only, revision, model_file_name, subfolder, device, dtype, use_external_data_format, session_options, }?: import("./utils/hub.js").PretrainedModelOptions): Promise<PreTrainedModel>;
|
|
4014
|
+
}
|
|
4015
|
+
/**
|
|
4016
|
+
* Helper class which is used to instantiate pretrained models with the `from_pretrained` function.
|
|
4017
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4018
|
+
*
|
|
4019
|
+
* @example
|
|
4020
|
+
* let model = await AutoModel.from_pretrained('Xenova/bert-base-uncased');
|
|
4021
|
+
*/
|
|
4022
|
+
export class AutoModel extends PretrainedMixin {
|
|
4023
|
+
}
|
|
4024
|
+
/**
|
|
4025
|
+
* Helper class which is used to instantiate pretrained sequence classification models with the `from_pretrained` function.
|
|
4026
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4027
|
+
*
|
|
4028
|
+
* @example
|
|
4029
|
+
* let model = await AutoModelForSequenceClassification.from_pretrained('Xenova/distilbert-base-uncased-finetuned-sst-2-english');
|
|
4030
|
+
*/
|
|
4031
|
+
export class AutoModelForSequenceClassification extends PretrainedMixin {
|
|
4032
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof BertForSequenceClassification)[]>[];
|
|
4033
|
+
}
|
|
4034
|
+
/**
|
|
4035
|
+
* Helper class which is used to instantiate pretrained token classification models with the `from_pretrained` function.
|
|
4036
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4037
|
+
*
|
|
4038
|
+
* @example
|
|
4039
|
+
* let model = await AutoModelForTokenClassification.from_pretrained('Xenova/distilbert-base-multilingual-cased-ner-hrl');
|
|
4040
|
+
*/
|
|
4041
|
+
export class AutoModelForTokenClassification extends PretrainedMixin {
|
|
4042
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof BertForTokenClassification)[]>[];
|
|
4043
|
+
}
|
|
4044
|
+
/**
|
|
4045
|
+
* Helper class which is used to instantiate pretrained sequence-to-sequence models with the `from_pretrained` function.
|
|
4046
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4047
|
+
*
|
|
4048
|
+
* @example
|
|
4049
|
+
* let model = await AutoModelForSeq2SeqLM.from_pretrained('Xenova/t5-small');
|
|
4050
|
+
*/
|
|
4051
|
+
export class AutoModelForSeq2SeqLM extends PretrainedMixin {
|
|
4052
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof T5ForConditionalGeneration)[]>[];
|
|
4053
|
+
}
|
|
4054
|
+
/**
|
|
4055
|
+
* Helper class which is used to instantiate pretrained sequence-to-sequence speech-to-text models with the `from_pretrained` function.
|
|
4056
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4057
|
+
*
|
|
4058
|
+
* @example
|
|
4059
|
+
* let model = await AutoModelForSpeechSeq2Seq.from_pretrained('openai/whisper-tiny.en');
|
|
4060
|
+
*/
|
|
4061
|
+
export class AutoModelForSpeechSeq2Seq extends PretrainedMixin {
|
|
4062
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof SpeechT5ForSpeechToText)[] | (string | typeof WhisperForConditionalGeneration)[]>[];
|
|
4063
|
+
}
|
|
4064
|
+
/**
|
|
4065
|
+
* Helper class which is used to instantiate pretrained sequence-to-sequence text-to-spectrogram models with the `from_pretrained` function.
|
|
4066
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4067
|
+
*
|
|
4068
|
+
* @example
|
|
4069
|
+
* let model = await AutoModelForTextToSpectrogram.from_pretrained('microsoft/speecht5_tts');
|
|
4070
|
+
*/
|
|
4071
|
+
export class AutoModelForTextToSpectrogram extends PretrainedMixin {
|
|
4072
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof SpeechT5ForTextToSpeech)[]>[];
|
|
4073
|
+
}
|
|
4074
|
+
/**
|
|
4075
|
+
* Helper class which is used to instantiate pretrained text-to-waveform models with the `from_pretrained` function.
|
|
4076
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4077
|
+
*
|
|
4078
|
+
* @example
|
|
4079
|
+
* let model = await AutoModelForTextToSpectrogram.from_pretrained('facebook/mms-tts-eng');
|
|
4080
|
+
*/
|
|
4081
|
+
export class AutoModelForTextToWaveform extends PretrainedMixin {
|
|
4082
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof VitsModel)[] | (string | typeof MusicgenForConditionalGeneration)[] | (string | typeof SupertonicForConditionalGeneration)[]>[];
|
|
4083
|
+
}
|
|
4084
|
+
/**
|
|
4085
|
+
* Helper class which is used to instantiate pretrained causal language models with the `from_pretrained` function.
|
|
4086
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4087
|
+
*
|
|
4088
|
+
* @example
|
|
4089
|
+
* let model = await AutoModelForCausalLM.from_pretrained('Xenova/gpt2');
|
|
4090
|
+
*/
|
|
4091
|
+
export class AutoModelForCausalLM extends PretrainedMixin {
|
|
4092
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof Phi3VForCausalLM)[]>[];
|
|
4093
|
+
}
|
|
4094
|
+
/**
|
|
4095
|
+
* Helper class which is used to instantiate pretrained masked language models with the `from_pretrained` function.
|
|
4096
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4097
|
+
*
|
|
4098
|
+
* @example
|
|
4099
|
+
* let model = await AutoModelForMaskedLM.from_pretrained('Xenova/bert-base-uncased');
|
|
4100
|
+
*/
|
|
4101
|
+
export class AutoModelForMaskedLM extends PretrainedMixin {
|
|
4102
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof BertForMaskedLM)[]>[];
|
|
4103
|
+
}
|
|
4104
|
+
/**
|
|
4105
|
+
* Helper class which is used to instantiate pretrained question answering models with the `from_pretrained` function.
|
|
4106
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4107
|
+
*
|
|
4108
|
+
* @example
|
|
4109
|
+
* let model = await AutoModelForQuestionAnswering.from_pretrained('Xenova/distilbert-base-cased-distilled-squad');
|
|
4110
|
+
*/
|
|
4111
|
+
export class AutoModelForQuestionAnswering extends PretrainedMixin {
|
|
4112
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof BertForQuestionAnswering)[]>[];
|
|
4113
|
+
}
|
|
4114
|
+
/**
|
|
4115
|
+
* Helper class which is used to instantiate pretrained vision-to-sequence models with the `from_pretrained` function.
|
|
4116
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4117
|
+
*
|
|
4118
|
+
* @example
|
|
4119
|
+
* let model = await AutoModelForVision2Seq.from_pretrained('Xenova/vit-gpt2-image-captioning');
|
|
4120
|
+
*/
|
|
4121
|
+
export class AutoModelForVision2Seq extends PretrainedMixin {
|
|
4122
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof VisionEncoderDecoderModel)[]>[];
|
|
4123
|
+
}
|
|
4124
|
+
/**
|
|
4125
|
+
* Helper class which is used to instantiate pretrained image classification models with the `from_pretrained` function.
|
|
4126
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4127
|
+
*
|
|
4128
|
+
* @example
|
|
4129
|
+
* let model = await AutoModelForImageClassification.from_pretrained('Xenova/vit-base-patch16-224');
|
|
4130
|
+
*/
|
|
4131
|
+
export class AutoModelForImageClassification extends PretrainedMixin {
|
|
4132
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof SegformerForImageClassification)[]>[];
|
|
4133
|
+
}
|
|
4134
|
+
/**
|
|
4135
|
+
* Helper class which is used to instantiate pretrained image segmentation models with the `from_pretrained` function.
|
|
4136
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4137
|
+
*
|
|
4138
|
+
* @example
|
|
4139
|
+
* let model = await AutoModelForImageSegmentation.from_pretrained('Xenova/detr-resnet-50-panoptic');
|
|
4140
|
+
*/
|
|
4141
|
+
export class AutoModelForImageSegmentation extends PretrainedMixin {
|
|
4142
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof CLIPSegForImageSegmentation)[]>[];
|
|
4143
|
+
}
|
|
4144
|
+
/**
|
|
4145
|
+
* Helper class which is used to instantiate pretrained image segmentation models with the `from_pretrained` function.
|
|
4146
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4147
|
+
*
|
|
4148
|
+
* @example
|
|
4149
|
+
* let model = await AutoModelForSemanticSegmentation.from_pretrained('nvidia/segformer-b3-finetuned-cityscapes-1024-1024');
|
|
4150
|
+
*/
|
|
4151
|
+
export class AutoModelForSemanticSegmentation extends PretrainedMixin {
|
|
4152
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof SegformerForSemanticSegmentation)[]>[];
|
|
4153
|
+
}
|
|
4154
|
+
/**
|
|
4155
|
+
* Helper class which is used to instantiate pretrained universal image segmentation models with the `from_pretrained` function.
|
|
4156
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4157
|
+
*
|
|
4158
|
+
* @example
|
|
4159
|
+
* let model = await AutoModelForUniversalSegmentation.from_pretrained('hf-internal-testing/tiny-random-MaskFormerForInstanceSegmentation');
|
|
4160
|
+
*/
|
|
4161
|
+
export class AutoModelForUniversalSegmentation extends PretrainedMixin {
|
|
4162
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof MaskFormerForInstanceSegmentation)[]>[];
|
|
4163
|
+
}
|
|
4164
|
+
/**
|
|
4165
|
+
* Helper class which is used to instantiate pretrained object detection models with the `from_pretrained` function.
|
|
4166
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4167
|
+
*
|
|
4168
|
+
* @example
|
|
4169
|
+
* let model = await AutoModelForObjectDetection.from_pretrained('Xenova/detr-resnet-50');
|
|
4170
|
+
*/
|
|
4171
|
+
export class AutoModelForObjectDetection extends PretrainedMixin {
|
|
4172
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof DetrForObjectDetection)[]>[];
|
|
4173
|
+
}
|
|
4174
|
+
export class AutoModelForZeroShotObjectDetection extends PretrainedMixin {
|
|
4175
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof OwlViTForObjectDetection)[]>[];
|
|
4176
|
+
}
|
|
4177
|
+
/**
|
|
4178
|
+
* Helper class which is used to instantiate pretrained mask generation models with the `from_pretrained` function.
|
|
4179
|
+
* The chosen model class is determined by the type specified in the model config.
|
|
4180
|
+
*
|
|
4181
|
+
* @example
|
|
4182
|
+
* let model = await AutoModelForMaskGeneration.from_pretrained('Xenova/sam-vit-base');
|
|
4183
|
+
*/
|
|
4184
|
+
export class AutoModelForMaskGeneration extends PretrainedMixin {
|
|
4185
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof SamModel)[] | (string | typeof Sam2Model)[]>[];
|
|
4186
|
+
}
|
|
4187
|
+
export class AutoModelForCTC extends PretrainedMixin {
|
|
4188
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof Wav2Vec2ForCTC)[] | (string | typeof Wav2Vec2BertForCTC)[]>[];
|
|
4189
|
+
}
|
|
4190
|
+
export class AutoModelForAudioClassification extends PretrainedMixin {
|
|
4191
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof ASTForAudioClassification)[]>[];
|
|
4192
|
+
}
|
|
4193
|
+
export class AutoModelForXVector extends PretrainedMixin {
|
|
4194
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof WavLMForXVector)[]>[];
|
|
4195
|
+
}
|
|
4196
|
+
export class AutoModelForAudioFrameClassification extends PretrainedMixin {
|
|
4197
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof UniSpeechSatForAudioFrameClassification)[]>[];
|
|
4198
|
+
}
|
|
4199
|
+
export class AutoModelForDocumentQuestionAnswering extends PretrainedMixin {
|
|
4200
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof VisionEncoderDecoderModel)[]>[];
|
|
4201
|
+
}
|
|
4202
|
+
export class AutoModelForImageMatting extends PretrainedMixin {
|
|
4203
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof VitMatteForImageMatting)[]>[];
|
|
4204
|
+
}
|
|
4205
|
+
export class AutoModelForImageToImage extends PretrainedMixin {
|
|
4206
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof Swin2SRForImageSuperResolution)[]>[];
|
|
4207
|
+
}
|
|
4208
|
+
export class AutoModelForDepthEstimation extends PretrainedMixin {
|
|
4209
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof DPTForDepthEstimation)[]>[];
|
|
4210
|
+
}
|
|
4211
|
+
export class AutoModelForNormalEstimation extends PretrainedMixin {
|
|
4212
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof SapiensForNormalEstimation)[]>[];
|
|
4213
|
+
}
|
|
4214
|
+
export class AutoModelForPoseEstimation extends PretrainedMixin {
|
|
4215
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof VitPoseForPoseEstimation)[]>[];
|
|
4216
|
+
}
|
|
4217
|
+
export class AutoModelForImageFeatureExtraction extends PretrainedMixin {
|
|
4218
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof CLIPVisionModelWithProjection)[]>[];
|
|
4219
|
+
}
|
|
4220
|
+
export class AutoModelForImageTextToText extends PretrainedMixin {
|
|
4221
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof Idefics3ForConditionalGeneration)[] | (string | typeof Florence2ForConditionalGeneration)[] | (string | typeof Gemma3nForConditionalGeneration)[]>[];
|
|
4222
|
+
}
|
|
4223
|
+
export class AutoModelForAudioTextToText extends PretrainedMixin {
|
|
4224
|
+
static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof UltravoxModel)[]>[];
|
|
4225
|
+
}
|
|
4226
|
+
export class Seq2SeqLMOutput extends ModelOutput {
|
|
4227
|
+
/**
|
|
4228
|
+
* @param {Object} output The output of the model.
|
|
4229
|
+
* @param {Tensor} output.logits The output logits of the model.
|
|
4230
|
+
* @param {Tensor} output.past_key_values An tensor of key/value pairs that represent the previous state of the model.
|
|
4231
|
+
* @param {Tensor} output.encoder_outputs The output of the encoder in a sequence-to-sequence model.
|
|
4232
|
+
* @param {Tensor} [output.decoder_attentions] Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the self-attention heads.
|
|
4233
|
+
* @param {Tensor} [output.cross_attentions] Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the weighted average in the cross-attention heads.
|
|
4234
|
+
*/
|
|
4235
|
+
constructor({ logits, past_key_values, encoder_outputs, decoder_attentions, cross_attentions }: {
|
|
4236
|
+
logits: Tensor;
|
|
4237
|
+
past_key_values: Tensor;
|
|
4238
|
+
encoder_outputs: Tensor;
|
|
4239
|
+
decoder_attentions?: Tensor;
|
|
4240
|
+
cross_attentions?: Tensor;
|
|
4241
|
+
});
|
|
4242
|
+
logits: Tensor;
|
|
4243
|
+
past_key_values: Tensor;
|
|
4244
|
+
encoder_outputs: Tensor;
|
|
4245
|
+
decoder_attentions: Tensor;
|
|
4246
|
+
cross_attentions: Tensor;
|
|
4247
|
+
}
|
|
4248
|
+
/**
|
|
4249
|
+
* Base class for outputs of sentence classification models.
|
|
4250
|
+
*/
|
|
4251
|
+
export class SequenceClassifierOutput extends ModelOutput {
|
|
4252
|
+
/**
|
|
4253
|
+
* @param {Object} output The output of the model.
|
|
4254
|
+
* @param {Tensor} output.logits classification (or regression if config.num_labels==1) scores (before SoftMax).
|
|
4255
|
+
* @param {Record<string, Tensor>} [output.attentions] Object of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, sequence_length)`.
|
|
4256
|
+
* Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
|
|
4257
|
+
*/
|
|
4258
|
+
constructor({ logits, ...attentions }: {
|
|
4259
|
+
logits: Tensor;
|
|
4260
|
+
attentions?: Record<string, Tensor>;
|
|
4261
|
+
});
|
|
4262
|
+
logits: Tensor;
|
|
4263
|
+
attentions: Record<string, Tensor>[];
|
|
4264
|
+
}
|
|
4265
|
+
/**
|
|
4266
|
+
* Base class for outputs of XVector models.
|
|
4267
|
+
*/
|
|
4268
|
+
export class XVectorOutput extends ModelOutput {
|
|
4269
|
+
/**
|
|
4270
|
+
* @param {Object} output The output of the model.
|
|
4271
|
+
* @param {Tensor} output.logits Classification hidden states before AMSoftmax, of shape `(batch_size, config.xvector_output_dim)`.
|
|
4272
|
+
* @param {Tensor} output.embeddings Utterance embeddings used for vector similarity-based retrieval, of shape `(batch_size, config.xvector_output_dim)`.
|
|
4273
|
+
*/
|
|
4274
|
+
constructor({ logits, embeddings }: {
|
|
4275
|
+
logits: Tensor;
|
|
4276
|
+
embeddings: Tensor;
|
|
4277
|
+
});
|
|
4278
|
+
logits: Tensor;
|
|
4279
|
+
embeddings: Tensor;
|
|
4280
|
+
}
|
|
4281
|
+
/**
|
|
4282
|
+
* Base class for outputs of token classification models.
|
|
4283
|
+
*/
|
|
4284
|
+
export class TokenClassifierOutput extends ModelOutput {
|
|
4285
|
+
/**
|
|
4286
|
+
* @param {Object} output The output of the model.
|
|
4287
|
+
* @param {Tensor} output.logits Classification scores (before SoftMax).
|
|
4288
|
+
*/
|
|
4289
|
+
constructor({ logits }: {
|
|
4290
|
+
logits: Tensor;
|
|
4291
|
+
});
|
|
4292
|
+
logits: Tensor;
|
|
4293
|
+
}
|
|
4294
|
+
/**
|
|
4295
|
+
* Base class for masked language models outputs.
|
|
4296
|
+
*/
|
|
4297
|
+
export class MaskedLMOutput extends ModelOutput {
|
|
4298
|
+
/**
|
|
4299
|
+
* @param {Object} output The output of the model.
|
|
4300
|
+
* @param {Tensor} output.logits Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
|
|
4301
|
+
*/
|
|
4302
|
+
constructor({ logits }: {
|
|
4303
|
+
logits: Tensor;
|
|
4304
|
+
});
|
|
4305
|
+
logits: Tensor;
|
|
4306
|
+
}
|
|
4307
|
+
/**
|
|
4308
|
+
* Base class for outputs of question answering models.
|
|
4309
|
+
*/
|
|
4310
|
+
export class QuestionAnsweringModelOutput extends ModelOutput {
|
|
4311
|
+
/**
|
|
4312
|
+
* @param {Object} output The output of the model.
|
|
4313
|
+
* @param {Tensor} output.start_logits Span-start scores (before SoftMax).
|
|
4314
|
+
* @param {Tensor} output.end_logits Span-end scores (before SoftMax).
|
|
4315
|
+
*/
|
|
4316
|
+
constructor({ start_logits, end_logits }: {
|
|
4317
|
+
start_logits: Tensor;
|
|
4318
|
+
end_logits: Tensor;
|
|
4319
|
+
});
|
|
4320
|
+
start_logits: Tensor;
|
|
4321
|
+
end_logits: Tensor;
|
|
4322
|
+
}
|
|
4323
|
+
/**
|
|
4324
|
+
* Base class for causal language model (or autoregressive) outputs.
|
|
4325
|
+
*/
|
|
4326
|
+
export class CausalLMOutput extends ModelOutput {
|
|
4327
|
+
/**
|
|
4328
|
+
* @param {Object} output The output of the model.
|
|
4329
|
+
* @param {Tensor} output.logits Prediction scores of the language modeling head (scores for each vocabulary token before softmax).
|
|
4330
|
+
*/
|
|
4331
|
+
constructor({ logits }: {
|
|
4332
|
+
logits: Tensor;
|
|
4333
|
+
});
|
|
4334
|
+
logits: Tensor;
|
|
4335
|
+
}
|
|
4336
|
+
/**
|
|
4337
|
+
* Base class for causal language model (or autoregressive) outputs.
|
|
4338
|
+
*/
|
|
4339
|
+
export class CausalLMOutputWithPast extends ModelOutput {
|
|
4340
|
+
/**
|
|
4341
|
+
* @param {Object} output The output of the model.
|
|
4342
|
+
* @param {Tensor} output.logits Prediction scores of the language modeling head (scores for each vocabulary token before softmax).
|
|
4343
|
+
* @param {Tensor} output.past_key_values Contains pre-computed hidden-states (key and values in the self-attention blocks)
|
|
4344
|
+
* that can be used (see `past_key_values` input) to speed up sequential decoding.
|
|
4345
|
+
*/
|
|
4346
|
+
constructor({ logits, past_key_values }: {
|
|
4347
|
+
logits: Tensor;
|
|
4348
|
+
past_key_values: Tensor;
|
|
4349
|
+
});
|
|
4350
|
+
logits: Tensor;
|
|
4351
|
+
past_key_values: Tensor;
|
|
4352
|
+
}
|
|
4353
|
+
export class ImageMattingOutput extends ModelOutput {
|
|
4354
|
+
/**
|
|
4355
|
+
* @param {Object} output The output of the model.
|
|
4356
|
+
* @param {Tensor} output.alphas Estimated alpha values, of shape `(batch_size, num_channels, height, width)`.
|
|
4357
|
+
*/
|
|
4358
|
+
constructor({ alphas }: {
|
|
4359
|
+
alphas: Tensor;
|
|
4360
|
+
});
|
|
4361
|
+
alphas: Tensor;
|
|
4362
|
+
}
|
|
4363
|
+
/**
|
|
4364
|
+
* Describes the outputs for the VITS model.
|
|
4365
|
+
*/
|
|
4366
|
+
export class VitsModelOutput extends ModelOutput {
|
|
4367
|
+
/**
|
|
4368
|
+
* @param {Object} output The output of the model.
|
|
4369
|
+
* @param {Tensor} output.waveform The final audio waveform predicted by the model, of shape `(batch_size, sequence_length)`.
|
|
4370
|
+
* @param {Tensor} output.spectrogram The log-mel spectrogram predicted at the output of the flow model.
|
|
4371
|
+
* This spectrogram is passed to the Hi-Fi GAN decoder model to obtain the final audio waveform.
|
|
4372
|
+
*/
|
|
4373
|
+
constructor({ waveform, spectrogram }: {
|
|
4374
|
+
waveform: Tensor;
|
|
4375
|
+
spectrogram: Tensor;
|
|
4376
|
+
});
|
|
4377
|
+
waveform: Tensor;
|
|
4378
|
+
spectrogram: Tensor;
|
|
4379
|
+
}
|
|
4380
|
+
/**
|
|
4381
|
+
* Forward pass of a decoder model.
|
|
4382
|
+
* @param {Object} self The decoder model.
|
|
4383
|
+
* @param {Object} model_inputs The input data to be used for the forward pass.
|
|
4384
|
+
* @returns {Promise<Object>} The logits and past key values.
|
|
4385
|
+
* @private
|
|
4386
|
+
*/
|
|
4387
|
+
declare function decoderForward(self: any, model_inputs: any, is_encoder_decoder?: boolean): Promise<any>;
|
|
4388
|
+
declare function autoEncoderForward(self: any, model_inputs: any): Promise<any>;
|
|
4389
|
+
declare function multimodal_text_to_text_prepare_inputs_for_generation(self: any, ...args: any[]): any;
|
|
4390
|
+
import { GenerationConfig } from './generation/configuration_utils.js';
|
|
4391
|
+
import { StoppingCriteriaList } from './generation/stopping_criteria.js';
|
|
4392
|
+
import { Tensor } from './utils/tensor.js';
|
|
4393
|
+
import { WhisperGenerationConfig } from './models/whisper/generation_whisper.js';
|
|
4394
|
+
import { RawImage } from './utils/image.js';
|
|
4395
|
+
export {};
|
|
4396
|
+
//# sourceMappingURL=models.d.ts.map
|