@huggingface/transformers 3.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +376 -0
  3. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  4. package/dist/transformers.cjs +30741 -0
  5. package/dist/transformers.cjs.map +1 -0
  6. package/dist/transformers.js +33858 -0
  7. package/dist/transformers.js.map +1 -0
  8. package/dist/transformers.min.cjs +173 -0
  9. package/dist/transformers.min.cjs.map +1 -0
  10. package/dist/transformers.min.js +231 -0
  11. package/dist/transformers.min.js.map +1 -0
  12. package/package.json +92 -0
  13. package/src/backends/onnx.js +151 -0
  14. package/src/configs.js +360 -0
  15. package/src/env.js +152 -0
  16. package/src/generation/configuration_utils.js +381 -0
  17. package/src/generation/logits_process.js +716 -0
  18. package/src/generation/logits_sampler.js +204 -0
  19. package/src/generation/parameters.js +35 -0
  20. package/src/generation/stopping_criteria.js +156 -0
  21. package/src/generation/streamers.js +212 -0
  22. package/src/models/whisper/common_whisper.js +151 -0
  23. package/src/models/whisper/generation_whisper.js +89 -0
  24. package/src/models.js +7028 -0
  25. package/src/ops/registry.js +92 -0
  26. package/src/pipelines.js +3341 -0
  27. package/src/processors.js +2614 -0
  28. package/src/tokenizers.js +4395 -0
  29. package/src/transformers.js +28 -0
  30. package/src/utils/audio.js +704 -0
  31. package/src/utils/constants.js +2 -0
  32. package/src/utils/core.js +149 -0
  33. package/src/utils/data-structures.js +445 -0
  34. package/src/utils/devices.js +11 -0
  35. package/src/utils/dtypes.js +62 -0
  36. package/src/utils/generic.js +35 -0
  37. package/src/utils/hub.js +671 -0
  38. package/src/utils/image.js +745 -0
  39. package/src/utils/maths.js +1050 -0
  40. package/src/utils/tensor.js +1378 -0
  41. package/types/backends/onnx.d.ts +26 -0
  42. package/types/backends/onnx.d.ts.map +1 -0
  43. package/types/configs.d.ts +59 -0
  44. package/types/configs.d.ts.map +1 -0
  45. package/types/env.d.ts +106 -0
  46. package/types/env.d.ts.map +1 -0
  47. package/types/generation/configuration_utils.d.ts +320 -0
  48. package/types/generation/configuration_utils.d.ts.map +1 -0
  49. package/types/generation/logits_process.d.ts +354 -0
  50. package/types/generation/logits_process.d.ts.map +1 -0
  51. package/types/generation/logits_sampler.d.ts +51 -0
  52. package/types/generation/logits_sampler.d.ts.map +1 -0
  53. package/types/generation/parameters.d.ts +47 -0
  54. package/types/generation/parameters.d.ts.map +1 -0
  55. package/types/generation/stopping_criteria.d.ts +81 -0
  56. package/types/generation/stopping_criteria.d.ts.map +1 -0
  57. package/types/generation/streamers.d.ts +81 -0
  58. package/types/generation/streamers.d.ts.map +1 -0
  59. package/types/models/whisper/common_whisper.d.ts +8 -0
  60. package/types/models/whisper/common_whisper.d.ts.map +1 -0
  61. package/types/models/whisper/generation_whisper.d.ts +76 -0
  62. package/types/models/whisper/generation_whisper.d.ts.map +1 -0
  63. package/types/models.d.ts +3845 -0
  64. package/types/models.d.ts.map +1 -0
  65. package/types/ops/registry.d.ts +11 -0
  66. package/types/ops/registry.d.ts.map +1 -0
  67. package/types/pipelines.d.ts +2403 -0
  68. package/types/pipelines.d.ts.map +1 -0
  69. package/types/processors.d.ts +917 -0
  70. package/types/processors.d.ts.map +1 -0
  71. package/types/tokenizers.d.ts +999 -0
  72. package/types/tokenizers.d.ts.map +1 -0
  73. package/types/transformers.d.ts +13 -0
  74. package/types/transformers.d.ts.map +1 -0
  75. package/types/utils/audio.d.ts +130 -0
  76. package/types/utils/audio.d.ts.map +1 -0
  77. package/types/utils/constants.d.ts +2 -0
  78. package/types/utils/constants.d.ts.map +1 -0
  79. package/types/utils/core.d.ts +91 -0
  80. package/types/utils/core.d.ts.map +1 -0
  81. package/types/utils/data-structures.d.ts +236 -0
  82. package/types/utils/data-structures.d.ts.map +1 -0
  83. package/types/utils/devices.d.ts +8 -0
  84. package/types/utils/devices.d.ts.map +1 -0
  85. package/types/utils/dtypes.d.ts +22 -0
  86. package/types/utils/dtypes.d.ts.map +1 -0
  87. package/types/utils/generic.d.ts +11 -0
  88. package/types/utils/generic.d.ts.map +1 -0
  89. package/types/utils/hub.d.ts +191 -0
  90. package/types/utils/hub.d.ts.map +1 -0
  91. package/types/utils/image.d.ts +119 -0
  92. package/types/utils/image.d.ts.map +1 -0
  93. package/types/utils/maths.d.ts +280 -0
  94. package/types/utils/maths.d.ts.map +1 -0
  95. package/types/utils/tensor.d.ts +392 -0
  96. package/types/utils/tensor.d.ts.map +1 -0
@@ -0,0 +1,3845 @@
1
+ declare const PreTrainedModel_base: new () => {
2
+ (...args: any[]): any;
3
+ _call(...args: any[]): any;
4
+ };
5
+ /**
6
+ * A base class for pre-trained models that provides the model configuration and an ONNX session.
7
+ */
8
+ export class PreTrainedModel extends PreTrainedModel_base {
9
+ /**
10
+ * Instantiate one of the model classes of the library from a pretrained model.
11
+ *
12
+ * The model class to instantiate is selected based on the `model_type` property of the config object
13
+ * (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
14
+ *
15
+ * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
16
+ * - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
17
+ * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
18
+ * user or organization name, like `dbmdz/bert-base-german-cased`.
19
+ * - A path to a *directory* containing model weights, e.g., `./my_model_directory/`.
20
+ * @param {import('./utils/hub.js').PretrainedModelOptions} options Additional options for loading the model.
21
+ *
22
+ * @returns {Promise<PreTrainedModel>} A new instance of the `PreTrainedModel` class.
23
+ */
24
+ static from_pretrained(pretrained_model_name_or_path: string, { progress_callback, config, cache_dir, local_files_only, revision, model_file_name, subfolder, device, dtype, use_external_data_format, session_options, }?: import('./utils/hub.js').PretrainedModelOptions): Promise<PreTrainedModel>;
25
+ /**
26
+ * Creates a new instance of the `PreTrainedModel` class.
27
+ * @param {import('./configs.js').PretrainedConfig} config The model configuration.
28
+ * @param {Record<string, any>} sessions The inference sessions for the model.
29
+ */
30
+ constructor(config: import('./configs.js').PretrainedConfig, sessions: Record<string, any>);
31
+ main_input_name: string;
32
+ forward_params: string[];
33
+ config: import("./configs.js").PretrainedConfig;
34
+ sessions: Record<string, any>;
35
+ can_generate: boolean;
36
+ _forward: typeof decoderForward;
37
+ _prepare_inputs_for_generation: typeof image_text_to_text_prepare_inputs_for_generation;
38
+ /** @type {import('./configs.js').TransformersJSConfig} */
39
+ custom_config: import('./configs.js').TransformersJSConfig;
40
+ /**
41
+ * Disposes of all the ONNX sessions that were created during inference.
42
+ * @returns {Promise<unknown[]>} An array of promises, one for each ONNX session that is being disposed.
43
+ * @todo Use https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/FinalizationRegistry
44
+ */
45
+ dispose(): Promise<unknown[]>;
46
+ /**
47
+ * Runs the model with the provided inputs
48
+ * @param {Object} model_inputs Object containing input tensors
49
+ * @returns {Promise<Object>} Object containing output tensors
50
+ */
51
+ _call(model_inputs: any): Promise<any>;
52
+ /**
53
+ * Forward method for a pretrained model. If not overridden by a subclass, the correct forward method
54
+ * will be chosen based on the model type.
55
+ * @param {Object} model_inputs The input data to the model in the format specified in the ONNX model.
56
+ * @returns {Promise<Object>} The output data from the model in the format specified in the ONNX model.
57
+ * @throws {Error} This method must be implemented in subclasses.
58
+ */
59
+ forward(model_inputs: any): Promise<any>;
60
+ /**
61
+ * This function returns a [`LogitsProcessorList`] list object that contains all relevant [`LogitsWarper`]
62
+ * instances used for multinomial sampling.
63
+ * @param {GenerationConfig} generation_config The generation config.
64
+ * @returns {LogitsProcessorList} generation_config
65
+ */
66
+ _get_logits_warper(generation_config: GenerationConfig): LogitsProcessorList;
67
+ /**
68
+ * @param {GenerationConfig} generation_config
69
+ * @param {number} input_ids_seq_length The starting sequence length for the input ids.
70
+ * @returns {LogitsProcessorList}
71
+ * @private
72
+ */
73
+ private _get_logits_processor;
74
+ /**
75
+ * This function merges multiple generation configs together to form a final generation config to be used by the model for text generation.
76
+ * It first creates an empty `GenerationConfig` object, then it applies the model's own `generation_config` property to it. Finally, if a `generation_config` object was passed in the arguments, it overwrites the corresponding properties in the final config with those of the passed config object.
77
+ * @param {GenerationConfig|null} generation_config A `GenerationConfig` object containing generation parameters.
78
+ * @param {Object} kwargs Additional generation parameters to be used in place of those in the `generation_config` object.
79
+ * @returns {GenerationConfig} The final generation config object to be used by the model for text generation.
80
+ */
81
+ _prepare_generation_config(generation_config: GenerationConfig | null, kwargs: any, cls?: typeof GenerationConfig): GenerationConfig;
82
+ /**
83
+ *
84
+ * @param {GenerationConfig} generation_config
85
+ * @param {StoppingCriteriaList} [stopping_criteria=null]
86
+ */
87
+ _get_stopping_criteria(generation_config: GenerationConfig, stopping_criteria?: StoppingCriteriaList): StoppingCriteriaList;
88
+ /**
89
+ * Confirms that the model class is compatible with generation.
90
+ * If not, raises an exception that points to the right class to use.
91
+ */
92
+ _validate_model_class(): void;
93
+ prepare_inputs_for_generation(...args: any[]): any;
94
+ /**
95
+ *
96
+ * @param {Object} inputs
97
+ * @param {bigint[][]} inputs.generated_input_ids
98
+ * @param {Object} inputs.outputs
99
+ * @param {Object} inputs.model_inputs
100
+ * @param {boolean} inputs.is_encoder_decoder
101
+ * @returns {Object} The updated model inputs for the next generation iteration.
102
+ */
103
+ _update_model_kwargs_for_generation({ generated_input_ids, outputs, model_inputs, is_encoder_decoder }: {
104
+ generated_input_ids: bigint[][];
105
+ outputs: any;
106
+ model_inputs: any;
107
+ is_encoder_decoder: boolean;
108
+ }): any;
109
+ /**
110
+ * This function extracts the model-specific `inputs` for generation.
111
+ * @param {Object} params
112
+ * @param {Tensor} [params.inputs=null]
113
+ * @param {number} [params.bos_token_id=null]
114
+ * @param {Record<string, Tensor|number[]>} [params.model_kwargs]
115
+ * @returns {{inputs_tensor: Tensor, model_inputs: Record<string, Tensor>, model_input_name: string}} The model-specific inputs for generation.
116
+ */
117
+ _prepare_model_inputs({ inputs, bos_token_id, model_kwargs }: {
118
+ inputs?: Tensor;
119
+ bos_token_id?: number;
120
+ model_kwargs?: Record<string, Tensor | number[]>;
121
+ }): {
122
+ inputs_tensor: Tensor;
123
+ model_inputs: Record<string, Tensor>;
124
+ model_input_name: string;
125
+ };
126
+ _prepare_encoder_decoder_kwargs_for_generation({ inputs_tensor, model_inputs, model_input_name, generation_config }: {
127
+ inputs_tensor: any;
128
+ model_inputs: any;
129
+ model_input_name: any;
130
+ generation_config: any;
131
+ }): Promise<any>;
132
+ /**
133
+ * Prepares `decoder_input_ids` for generation with encoder-decoder models
134
+ * @param {*} param0
135
+ */
136
+ _prepare_decoder_input_ids_for_generation({ batch_size, model_input_name, model_kwargs, decoder_start_token_id, bos_token_id, generation_config }: any): {
137
+ input_ids: any;
138
+ model_inputs: any;
139
+ };
140
+ /**
141
+ * Generates sequences of token ids for models with a language modeling head.
142
+ * @param {import('./generation/parameters.js').GenerationFunctionParameters} options
143
+ * @returns {Promise<ModelOutput|Tensor>} The output of the model, which can contain the generated token ids, attentions, and scores.
144
+ */
145
+ generate({ inputs, generation_config, logits_processor, stopping_criteria, streamer, ...kwargs }: any): Promise<ModelOutput | Tensor>;
146
+ /**
147
+ * Returns an object containing past key values from the given decoder results object.
148
+ *
149
+ * @param {Object} decoderResults The decoder results object.
150
+ * @param {Object} pastKeyValues The previous past key values.
151
+ * @param {boolean} [dispose=true] Whether to dispose of the old gpu buffer.
152
+ * @returns {Object} An object containing past key values.
153
+ */
154
+ getPastKeyValues(decoderResults: any, pastKeyValues: any, dispose?: boolean): any;
155
+ /**
156
+ * Returns an object containing attentions from the given model output object.
157
+ *
158
+ * @param {Object} model_output The output of the model.
159
+ * @returns {{cross_attentions?: Tensor[]}} An object containing attentions.
160
+ */
161
+ getAttentions(model_output: any): {
162
+ cross_attentions?: Tensor[];
163
+ };
164
+ /**
165
+ * Adds past key values to the decoder feeds object. If pastKeyValues is null, creates new tensors for past key values.
166
+ *
167
+ * @param {Object} decoderFeeds The decoder feeds object to add past key values to.
168
+ * @param {Object} pastKeyValues An object containing past key values.
169
+ */
170
+ addPastKeyValues(decoderFeeds: any, pastKeyValues: any): void;
171
+ encode_image({ pixel_values }: {
172
+ pixel_values: any;
173
+ }): Promise<any>;
174
+ encode_text({ input_ids }: {
175
+ input_ids: any;
176
+ }): Promise<any>;
177
+ }
178
+ export class ModelOutput {
179
+ }
180
+ /**
181
+ * Base class for model's outputs, with potential hidden states and attentions.
182
+ */
183
+ export class BaseModelOutput extends ModelOutput {
184
+ /**
185
+ * @param {Object} output The output of the model.
186
+ * @param {Tensor} output.last_hidden_state Sequence of hidden-states at the output of the last layer of the model.
187
+ * @param {Tensor} [output.hidden_states] Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
188
+ * @param {Tensor} [output.attentions] Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
189
+ */
190
+ constructor({ last_hidden_state, hidden_states, attentions }: {
191
+ last_hidden_state: Tensor;
192
+ hidden_states?: Tensor;
193
+ attentions?: Tensor;
194
+ });
195
+ last_hidden_state: Tensor;
196
+ hidden_states: Tensor;
197
+ attentions: Tensor;
198
+ }
199
+ export class BertPreTrainedModel extends PreTrainedModel {
200
+ }
201
+ export class BertModel extends BertPreTrainedModel {
202
+ }
203
+ /**
204
+ * BertForMaskedLM is a class representing a BERT model for masked language modeling.
205
+ */
206
+ export class BertForMaskedLM extends BertPreTrainedModel {
207
+ /**
208
+ * Calls the model on new inputs.
209
+ *
210
+ * @param {Object} model_inputs The inputs to the model.
211
+ * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
212
+ */
213
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
214
+ }
215
+ /**
216
+ * BertForSequenceClassification is a class representing a BERT model for sequence classification.
217
+ */
218
+ export class BertForSequenceClassification extends BertPreTrainedModel {
219
+ /**
220
+ * Calls the model on new inputs.
221
+ *
222
+ * @param {Object} model_inputs The inputs to the model.
223
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
224
+ */
225
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
226
+ }
227
+ /**
228
+ * BertForTokenClassification is a class representing a BERT model for token classification.
229
+ */
230
+ export class BertForTokenClassification extends BertPreTrainedModel {
231
+ /**
232
+ * Calls the model on new inputs.
233
+ *
234
+ * @param {Object} model_inputs The inputs to the model.
235
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
236
+ */
237
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
238
+ }
239
+ /**
240
+ * BertForQuestionAnswering is a class representing a BERT model for question answering.
241
+ */
242
+ export class BertForQuestionAnswering extends BertPreTrainedModel {
243
+ /**
244
+ * Calls the model on new inputs.
245
+ *
246
+ * @param {Object} model_inputs The inputs to the model.
247
+ * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
248
+ */
249
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
250
+ }
251
+ export class NomicBertPreTrainedModel extends PreTrainedModel {
252
+ }
253
+ export class NomicBertModel extends NomicBertPreTrainedModel {
254
+ }
255
+ export class RoFormerPreTrainedModel extends PreTrainedModel {
256
+ }
257
+ /**
258
+ * The bare RoFormer Model transformer outputting raw hidden-states without any specific head on top.
259
+ */
260
+ export class RoFormerModel extends RoFormerPreTrainedModel {
261
+ }
262
+ /**
263
+ * RoFormer Model with a `language modeling` head on top.
264
+ */
265
+ export class RoFormerForMaskedLM extends RoFormerPreTrainedModel {
266
+ /**
267
+ * Calls the model on new inputs.
268
+ *
269
+ * @param {Object} model_inputs The inputs to the model.
270
+ * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
271
+ */
272
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
273
+ }
274
+ /**
275
+ * RoFormer Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
276
+ */
277
+ export class RoFormerForSequenceClassification extends RoFormerPreTrainedModel {
278
+ /**
279
+ * Calls the model on new inputs.
280
+ *
281
+ * @param {Object} model_inputs The inputs to the model.
282
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
283
+ */
284
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
285
+ }
286
+ /**
287
+ * RoFormer Model with a token classification head on top (a linear layer on top of the hidden-states output)
288
+ * e.g. for Named-Entity-Recognition (NER) tasks.
289
+ */
290
+ export class RoFormerForTokenClassification extends RoFormerPreTrainedModel {
291
+ /**
292
+ * Calls the model on new inputs.
293
+ *
294
+ * @param {Object} model_inputs The inputs to the model.
295
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
296
+ */
297
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
298
+ }
299
+ /**
300
+ * RoFormer Model with a span classification head on top for extractive question-answering tasks like SQuAD
301
+ * (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
302
+ */
303
+ export class RoFormerForQuestionAnswering extends RoFormerPreTrainedModel {
304
+ /**
305
+ * Calls the model on new inputs.
306
+ *
307
+ * @param {Object} model_inputs The inputs to the model.
308
+ * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
309
+ */
310
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
311
+ }
312
+ export class ConvBertPreTrainedModel extends PreTrainedModel {
313
+ }
314
+ /**
315
+ * The bare ConvBERT Model transformer outputting raw hidden-states without any specific head on top.
316
+ */
317
+ export class ConvBertModel extends ConvBertPreTrainedModel {
318
+ }
319
+ /**
320
+ * ConvBERT Model with a language modeling head on top.
321
+ */
322
+ export class ConvBertForMaskedLM extends ConvBertPreTrainedModel {
323
+ /**
324
+ * Calls the model on new inputs.
325
+ *
326
+ * @param {Object} model_inputs The inputs to the model.
327
+ * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
328
+ */
329
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
330
+ }
331
+ /**
332
+ * ConvBERT Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
333
+ */
334
+ export class ConvBertForSequenceClassification extends ConvBertPreTrainedModel {
335
+ /**
336
+ * Calls the model on new inputs.
337
+ *
338
+ * @param {Object} model_inputs The inputs to the model.
339
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
340
+ */
341
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
342
+ }
343
+ /**
344
+ * ConvBERT Model with a token classification head on top (a linear layer on top of the hidden-states output)
345
+ * e.g. for Named-Entity-Recognition (NER) tasks.
346
+ */
347
+ export class ConvBertForTokenClassification extends ConvBertPreTrainedModel {
348
+ /**
349
+ * Calls the model on new inputs.
350
+ *
351
+ * @param {Object} model_inputs The inputs to the model.
352
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
353
+ */
354
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
355
+ }
356
+ /**
357
+ * ConvBERT Model with a span classification head on top for extractive question-answering tasks like SQuAD
358
+ * (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`)
359
+ */
360
+ export class ConvBertForQuestionAnswering extends ConvBertPreTrainedModel {
361
+ /**
362
+ * Calls the model on new inputs.
363
+ *
364
+ * @param {Object} model_inputs The inputs to the model.
365
+ * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
366
+ */
367
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
368
+ }
369
+ export class ElectraPreTrainedModel extends PreTrainedModel {
370
+ }
371
+ /**
372
+ * The bare Electra Model transformer outputting raw hidden-states without any specific head on top.
373
+ * Identical to the BERT model except that it uses an additional linear layer between the embedding
374
+ * layer and the encoder if the hidden size and embedding size are different.
375
+ */
376
+ export class ElectraModel extends ElectraPreTrainedModel {
377
+ }
378
+ /**
379
+ * Electra model with a language modeling head on top.
380
+ */
381
+ export class ElectraForMaskedLM extends ElectraPreTrainedModel {
382
+ /**
383
+ * Calls the model on new inputs.
384
+ *
385
+ * @param {Object} model_inputs The inputs to the model.
386
+ * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
387
+ */
388
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
389
+ }
390
+ /**
391
+ * ELECTRA Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
392
+ */
393
+ export class ElectraForSequenceClassification extends ElectraPreTrainedModel {
394
+ /**
395
+ * Calls the model on new inputs.
396
+ *
397
+ * @param {Object} model_inputs The inputs to the model.
398
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
399
+ */
400
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
401
+ }
402
+ /**
403
+ * Electra model with a token classification head on top.
404
+ */
405
+ export class ElectraForTokenClassification extends ElectraPreTrainedModel {
406
+ /**
407
+ * Calls the model on new inputs.
408
+ *
409
+ * @param {Object} model_inputs The inputs to the model.
410
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
411
+ */
412
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
413
+ }
414
+ /**
415
+ * LECTRA Model with a span classification head on top for extractive question-answering tasks like SQuAD
416
+ * (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
417
+ */
418
+ export class ElectraForQuestionAnswering extends ElectraPreTrainedModel {
419
+ /**
420
+ * Calls the model on new inputs.
421
+ *
422
+ * @param {Object} model_inputs The inputs to the model.
423
+ * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
424
+ */
425
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
426
+ }
427
+ export class CamembertPreTrainedModel extends PreTrainedModel {
428
+ }
429
+ /**
430
+ * The bare CamemBERT Model transformer outputting raw hidden-states without any specific head on top.
431
+ */
432
+ export class CamembertModel extends CamembertPreTrainedModel {
433
+ }
434
+ /**
435
+ * CamemBERT Model with a `language modeling` head on top.
436
+ */
437
+ export class CamembertForMaskedLM extends CamembertPreTrainedModel {
438
+ /**
439
+ * Calls the model on new inputs.
440
+ *
441
+ * @param {Object} model_inputs The inputs to the model.
442
+ * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
443
+ */
444
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
445
+ }
446
+ /**
447
+ * CamemBERT Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for GLUE tasks.
448
+ */
449
+ export class CamembertForSequenceClassification extends CamembertPreTrainedModel {
450
+ /**
451
+ * Calls the model on new inputs.
452
+ *
453
+ * @param {Object} model_inputs The inputs to the model.
454
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
455
+ */
456
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
457
+ }
458
+ /**
459
+ * CamemBERT Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.
460
+ */
461
+ export class CamembertForTokenClassification extends CamembertPreTrainedModel {
462
+ /**
463
+ * Calls the model on new inputs.
464
+ *
465
+ * @param {Object} model_inputs The inputs to the model.
466
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
467
+ */
468
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
469
+ }
470
+ /**
471
+ * CamemBERT Model with a span classification head on top for extractive question-answering tasks
472
+ */
473
+ export class CamembertForQuestionAnswering extends CamembertPreTrainedModel {
474
+ /**
475
+ * Calls the model on new inputs.
476
+ *
477
+ * @param {Object} model_inputs The inputs to the model.
478
+ * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
479
+ */
480
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
481
+ }
482
+ export class DebertaPreTrainedModel extends PreTrainedModel {
483
+ }
484
+ /**
485
+ * The bare DeBERTa Model transformer outputting raw hidden-states without any specific head on top.
486
+ */
487
+ export class DebertaModel extends DebertaPreTrainedModel {
488
+ }
489
+ /**
490
+ * DeBERTa Model with a `language modeling` head on top.
491
+ */
492
+ export class DebertaForMaskedLM extends DebertaPreTrainedModel {
493
+ /**
494
+ * Calls the model on new inputs.
495
+ *
496
+ * @param {Object} model_inputs The inputs to the model.
497
+ * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
498
+ */
499
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
500
+ }
501
+ /**
502
+ * DeBERTa Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
503
+ */
504
+ export class DebertaForSequenceClassification extends DebertaPreTrainedModel {
505
+ /**
506
+ * Calls the model on new inputs.
507
+ *
508
+ * @param {Object} model_inputs The inputs to the model.
509
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
510
+ */
511
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
512
+ }
513
+ /**
514
+ * DeBERTa Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.
515
+ */
516
+ export class DebertaForTokenClassification extends DebertaPreTrainedModel {
517
+ /**
518
+ * Calls the model on new inputs.
519
+ *
520
+ * @param {Object} model_inputs The inputs to the model.
521
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
522
+ */
523
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
524
+ }
525
+ /**
526
+ * DeBERTa Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
527
+ * layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
528
+ */
529
+ export class DebertaForQuestionAnswering extends DebertaPreTrainedModel {
530
+ /**
531
+ * Calls the model on new inputs.
532
+ *
533
+ * @param {Object} model_inputs The inputs to the model.
534
+ * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
535
+ */
536
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
537
+ }
538
+ export class DebertaV2PreTrainedModel extends PreTrainedModel {
539
+ }
540
+ /**
541
+ * The bare DeBERTa-V2 Model transformer outputting raw hidden-states without any specific head on top.
542
+ */
543
+ export class DebertaV2Model extends DebertaV2PreTrainedModel {
544
+ }
545
+ /**
546
+ * DeBERTa-V2 Model with a `language modeling` head on top.
547
+ */
548
+ export class DebertaV2ForMaskedLM extends DebertaV2PreTrainedModel {
549
+ /**
550
+ * Calls the model on new inputs.
551
+ *
552
+ * @param {Object} model_inputs The inputs to the model.
553
+ * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
554
+ */
555
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
556
+ }
557
+ /**
558
+ * DeBERTa-V2 Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
559
+ */
560
+ export class DebertaV2ForSequenceClassification extends DebertaV2PreTrainedModel {
561
+ /**
562
+ * Calls the model on new inputs.
563
+ *
564
+ * @param {Object} model_inputs The inputs to the model.
565
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
566
+ */
567
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
568
+ }
569
+ /**
570
+ * DeBERTa-V2 Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks.
571
+ */
572
+ export class DebertaV2ForTokenClassification extends DebertaV2PreTrainedModel {
573
+ /**
574
+ * Calls the model on new inputs.
575
+ *
576
+ * @param {Object} model_inputs The inputs to the model.
577
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
578
+ */
579
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
580
+ }
581
+ /**
582
+ * DeBERTa-V2 Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
583
+ * layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
584
+ */
585
+ export class DebertaV2ForQuestionAnswering extends DebertaV2PreTrainedModel {
586
+ /**
587
+ * Calls the model on new inputs.
588
+ *
589
+ * @param {Object} model_inputs The inputs to the model.
590
+ * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
591
+ */
592
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
593
+ }
594
+ export class DistilBertPreTrainedModel extends PreTrainedModel {
595
+ }
596
+ export class DistilBertModel extends DistilBertPreTrainedModel {
597
+ }
598
+ /**
599
+ * DistilBertForSequenceClassification is a class representing a DistilBERT model for sequence classification.
600
+ */
601
+ export class DistilBertForSequenceClassification extends DistilBertPreTrainedModel {
602
+ /**
603
+ * Calls the model on new inputs.
604
+ *
605
+ * @param {Object} model_inputs The inputs to the model.
606
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
607
+ */
608
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
609
+ }
610
+ /**
611
+ * DistilBertForTokenClassification is a class representing a DistilBERT model for token classification.
612
+ */
613
+ export class DistilBertForTokenClassification extends DistilBertPreTrainedModel {
614
+ /**
615
+ * Calls the model on new inputs.
616
+ *
617
+ * @param {Object} model_inputs The inputs to the model.
618
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
619
+ */
620
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
621
+ }
622
+ /**
623
+ * DistilBertForQuestionAnswering is a class representing a DistilBERT model for question answering.
624
+ */
625
+ export class DistilBertForQuestionAnswering extends DistilBertPreTrainedModel {
626
+ /**
627
+ * Calls the model on new inputs.
628
+ *
629
+ * @param {Object} model_inputs The inputs to the model.
630
+ * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
631
+ */
632
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
633
+ }
634
+ /**
635
+ * DistilBertForMaskedLM is a class representing a DistilBERT model for masking task.
636
+ */
637
+ export class DistilBertForMaskedLM extends DistilBertPreTrainedModel {
638
+ /**
639
+ * Calls the model on new inputs.
640
+ *
641
+ * @param {Object} model_inputs The inputs to the model.
642
+ * @returns {Promise<MaskedLMOutput>} returned object
643
+ */
644
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
645
+ }
646
+ export class EsmPreTrainedModel extends PreTrainedModel {
647
+ }
648
+ /**
649
+ * The bare ESM Model transformer outputting raw hidden-states without any specific head on top.
650
+ */
651
+ export class EsmModel extends EsmPreTrainedModel {
652
+ }
653
+ /**
654
+ * ESM Model with a `language modeling` head on top.
655
+ */
656
+ export class EsmForMaskedLM extends EsmPreTrainedModel {
657
+ /**
658
+ * Calls the model on new inputs.
659
+ *
660
+ * @param {Object} model_inputs The inputs to the model.
661
+ * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
662
+ */
663
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
664
+ }
665
+ /**
666
+ * ESM Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
667
+ */
668
+ export class EsmForSequenceClassification extends EsmPreTrainedModel {
669
+ /**
670
+ * Calls the model on new inputs.
671
+ *
672
+ * @param {Object} model_inputs The inputs to the model.
673
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
674
+ */
675
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
676
+ }
677
+ /**
678
+ * ESM Model with a token classification head on top (a linear layer on top of the hidden-states output)
679
+ * e.g. for Named-Entity-Recognition (NER) tasks.
680
+ */
681
+ export class EsmForTokenClassification extends EsmPreTrainedModel {
682
+ /**
683
+ * Calls the model on new inputs.
684
+ *
685
+ * @param {Object} model_inputs The inputs to the model.
686
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
687
+ */
688
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
689
+ }
690
+ export class MobileBertPreTrainedModel extends PreTrainedModel {
691
+ }
692
+ export class MobileBertModel extends MobileBertPreTrainedModel {
693
+ }
694
+ /**
695
+ * MobileBertForMaskedLM is a class representing a MobileBERT model for masking task.
696
+ */
697
+ export class MobileBertForMaskedLM extends MobileBertPreTrainedModel {
698
+ /**
699
+ * Calls the model on new inputs.
700
+ *
701
+ * @param {Object} model_inputs The inputs to the model.
702
+ * @returns {Promise<MaskedLMOutput>} returned object
703
+ */
704
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
705
+ }
706
+ /**
707
+ * MobileBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled output)
708
+ */
709
+ export class MobileBertForSequenceClassification extends MobileBertPreTrainedModel {
710
+ /**
711
+ * Calls the model on new inputs.
712
+ *
713
+ * @param {Object} model_inputs The inputs to the model.
714
+ * @returns {Promise<SequenceClassifierOutput>} returned object
715
+ */
716
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
717
+ }
718
+ /**
719
+ * MobileBert Model with a span classification head on top for extractive question-answering tasks
720
+ */
721
+ export class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
722
+ /**
723
+ * Calls the model on new inputs.
724
+ *
725
+ * @param {Object} model_inputs The inputs to the model.
726
+ * @returns {Promise<QuestionAnsweringModelOutput>} returned object
727
+ */
728
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
729
+ }
730
+ export class MPNetPreTrainedModel extends PreTrainedModel {
731
+ }
732
+ /**
733
+ * The bare MPNet Model transformer outputting raw hidden-states without any specific head on top.
734
+ */
735
+ export class MPNetModel extends MPNetPreTrainedModel {
736
+ }
737
+ /**
738
+ * MPNetForMaskedLM is a class representing a MPNet model for masked language modeling.
739
+ */
740
+ export class MPNetForMaskedLM extends MPNetPreTrainedModel {
741
+ /**
742
+ * Calls the model on new inputs.
743
+ *
744
+ * @param {Object} model_inputs The inputs to the model.
745
+ * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
746
+ */
747
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
748
+ }
749
+ /**
750
+ * MPNetForSequenceClassification is a class representing a MPNet model for sequence classification.
751
+ */
752
+ export class MPNetForSequenceClassification extends MPNetPreTrainedModel {
753
+ /**
754
+ * Calls the model on new inputs.
755
+ *
756
+ * @param {Object} model_inputs The inputs to the model.
757
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
758
+ */
759
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
760
+ }
761
+ /**
762
+ * MPNetForTokenClassification is a class representing a MPNet model for token classification.
763
+ */
764
+ export class MPNetForTokenClassification extends MPNetPreTrainedModel {
765
+ /**
766
+ * Calls the model on new inputs.
767
+ *
768
+ * @param {Object} model_inputs The inputs to the model.
769
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
770
+ */
771
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
772
+ }
773
+ /**
774
+ * MPNetForQuestionAnswering is a class representing a MPNet model for question answering.
775
+ */
776
+ export class MPNetForQuestionAnswering extends MPNetPreTrainedModel {
777
+ /**
778
+ * Calls the model on new inputs.
779
+ *
780
+ * @param {Object} model_inputs The inputs to the model.
781
+ * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
782
+ */
783
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
784
+ }
785
+ export class SqueezeBertPreTrainedModel extends PreTrainedModel {
786
+ }
787
+ export class SqueezeBertModel extends SqueezeBertPreTrainedModel {
788
+ }
789
+ export class SqueezeBertForMaskedLM extends SqueezeBertPreTrainedModel {
790
+ /**
791
+ * Calls the model on new inputs.
792
+ *
793
+ * @param {Object} model_inputs The inputs to the model.
794
+ * @returns {Promise<MaskedLMOutput>} returned object
795
+ */
796
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
797
+ }
798
+ export class SqueezeBertForSequenceClassification extends SqueezeBertPreTrainedModel {
799
+ /**
800
+ * Calls the model on new inputs.
801
+ *
802
+ * @param {Object} model_inputs The inputs to the model.
803
+ * @returns {Promise<SequenceClassifierOutput>} returned object
804
+ */
805
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
806
+ }
807
+ export class SqueezeBertForQuestionAnswering extends SqueezeBertPreTrainedModel {
808
+ /**
809
+ * Calls the model on new inputs.
810
+ *
811
+ * @param {Object} model_inputs The inputs to the model.
812
+ * @returns {Promise<QuestionAnsweringModelOutput>} returned object
813
+ */
814
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
815
+ }
816
+ export class AlbertPreTrainedModel extends PreTrainedModel {
817
+ }
818
+ export class AlbertModel extends AlbertPreTrainedModel {
819
+ }
820
+ export class AlbertForSequenceClassification extends AlbertPreTrainedModel {
821
+ /**
822
+ * Calls the model on new inputs.
823
+ *
824
+ * @param {Object} model_inputs The inputs to the model.
825
+ * @returns {Promise<SequenceClassifierOutput>} returned object
826
+ */
827
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
828
+ }
829
+ export class AlbertForQuestionAnswering extends AlbertPreTrainedModel {
830
+ /**
831
+ * Calls the model on new inputs.
832
+ *
833
+ * @param {Object} model_inputs The inputs to the model.
834
+ * @returns {Promise<QuestionAnsweringModelOutput>} returned object
835
+ */
836
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
837
+ }
838
+ export class AlbertForMaskedLM extends AlbertPreTrainedModel {
839
+ /**
840
+ * Calls the model on new inputs.
841
+ *
842
+ * @param {Object} model_inputs The inputs to the model.
843
+ * @returns {Promise<MaskedLMOutput>} returned object
844
+ */
845
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
846
+ }
847
+ export class T5PreTrainedModel extends PreTrainedModel {
848
+ /**
849
+ * Creates a new instance of the `T5PreTrainedModel` class.
850
+ * @param {Object} config The model configuration.
851
+ * @param {Record<string, any>} sessions The inference sessions for the model.
852
+ * @param {GenerationConfig} generation_config The generation configuration.
853
+ */
854
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
855
+ generation_config: GenerationConfig;
856
+ }
857
+ export class T5Model extends T5PreTrainedModel {
858
+ }
859
+ /**
860
+ * T5Model is a class representing a T5 model for conditional generation.
861
+ */
862
+ export class T5ForConditionalGeneration extends T5PreTrainedModel {
863
+ }
864
+ /**
865
+ * An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.
866
+ */
867
+ export class LongT5PreTrainedModel extends PreTrainedModel {
868
+ /**
869
+ * Creates a new instance of the `LongT5ForConditionalGeneration` class.
870
+ * @param {Object} config The model configuration.
871
+ * @param {Record<string, any>} sessions The inference sessions for the model.
872
+ * @param {GenerationConfig} generation_config The generation configuration.
873
+ */
874
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
875
+ generation_config: GenerationConfig;
876
+ }
877
+ /**
878
+ * The bare LONGT5 Model transformer outputting raw hidden-states without any specific head on top.
879
+ */
880
+ export class LongT5Model extends LongT5PreTrainedModel {
881
+ }
882
+ /**
883
+ * LONGT5 Model with a `language modeling` head on top.
884
+ */
885
+ export class LongT5ForConditionalGeneration extends LongT5PreTrainedModel {
886
+ }
887
+ export class MT5PreTrainedModel extends PreTrainedModel {
888
+ /**
889
+ * Creates a new instance of the `MT5ForConditionalGeneration` class.
890
+ * @param {Object} config The model configuration.
891
+ * @param {Record<string, any>} sessions The inference sessions for the model.
892
+ * @param {GenerationConfig} generation_config The generation configuration.
893
+ */
894
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
895
+ generation_config: GenerationConfig;
896
+ }
897
+ export class MT5Model extends MT5PreTrainedModel {
898
+ }
899
+ /**
900
+ * A class representing a conditional sequence-to-sequence model based on the MT5 architecture.
901
+ */
902
+ export class MT5ForConditionalGeneration extends MT5PreTrainedModel {
903
+ }
904
+ export class BartPretrainedModel extends PreTrainedModel {
905
+ /**
906
+ * Creates a new instance of the `BartForConditionalGeneration` class.
907
+ * @param {Object} config The model configuration.
908
+ * @param {Record<string, any>} sessions The inference sessions for the model.
909
+ * @param {GenerationConfig} generation_config The generation configuration.
910
+ */
911
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
912
+ generation_config: GenerationConfig;
913
+ }
914
+ /**
915
+ * The bare BART Model outputting raw hidden-states without any specific head on top.
916
+ */
917
+ export class BartModel extends BartPretrainedModel {
918
+ }
919
+ /**
920
+ * The BART Model with a language modeling head. Can be used for summarization.
921
+ */
922
+ export class BartForConditionalGeneration extends BartPretrainedModel {
923
+ }
924
+ /**
925
+ * Bart model with a sequence classification/head on top (a linear layer on top of the pooled output)
926
+ */
927
+ export class BartForSequenceClassification extends BartPretrainedModel {
928
+ /**
929
+ * Calls the model on new inputs.
930
+ *
931
+ * @param {Object} model_inputs The inputs to the model.
932
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
933
+ */
934
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
935
+ }
936
+ export class MBartPreTrainedModel extends PreTrainedModel {
937
+ /**
938
+ * Creates a new instance of the `MBartForConditionalGeneration` class.
939
+ * @param {Object} config The model configuration.
940
+ * @param {Record<string, any>} sessions The inference sessions for the model.
941
+ * @param {GenerationConfig} generation_config The generation configuration.
942
+ */
943
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
944
+ generation_config: GenerationConfig;
945
+ }
946
+ /**
947
+ * The bare MBART Model outputting raw hidden-states without any specific head on top.
948
+ */
949
+ export class MBartModel extends MBartPreTrainedModel {
950
+ }
951
+ /**
952
+ * The MBART Model with a language modeling head. Can be used for summarization, after fine-tuning the pretrained models.
953
+ */
954
+ export class MBartForConditionalGeneration extends MBartPreTrainedModel {
955
+ }
956
+ /**
957
+ * MBart model with a sequence classification/head on top (a linear layer on top of the pooled output).
958
+ */
959
+ export class MBartForSequenceClassification extends MBartPreTrainedModel {
960
+ /**
961
+ * Calls the model on new inputs.
962
+ *
963
+ * @param {Object} model_inputs The inputs to the model.
964
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
965
+ */
966
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
967
+ }
968
+ export class MBartForCausalLM extends MBartPreTrainedModel {
969
+ }
970
+ export class BlenderbotPreTrainedModel extends PreTrainedModel {
971
+ /**
972
+ * Creates a new instance of the `BlenderbotForConditionalGeneration` class.
973
+ * @param {Object} config The model configuration.
974
+ * @param {Record<string, any>} sessions The inference sessions for the model.
975
+ * @param {GenerationConfig} generation_config The generation configuration.
976
+ */
977
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
978
+ generation_config: GenerationConfig;
979
+ }
980
+ /**
981
+ * The bare Blenderbot Model outputting raw hidden-states without any specific head on top.
982
+ */
983
+ export class BlenderbotModel extends BlenderbotPreTrainedModel {
984
+ }
985
+ /**
986
+ * The Blenderbot Model with a language modeling head. Can be used for summarization.
987
+ */
988
+ export class BlenderbotForConditionalGeneration extends BlenderbotPreTrainedModel {
989
+ }
990
+ export class BlenderbotSmallPreTrainedModel extends PreTrainedModel {
991
+ /**
992
+ * Creates a new instance of the `BlenderbotForConditionalGeneration` class.
993
+ * @param {Object} config The model configuration.
994
+ * @param {Record<string, any>} sessions The inference sessions for the model.
995
+ * @param {GenerationConfig} generation_config The generation configuration.
996
+ */
997
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
998
+ generation_config: GenerationConfig;
999
+ }
1000
+ /**
1001
+ * The bare BlenderbotSmall Model outputting raw hidden-states without any specific head on top.
1002
+ */
1003
+ export class BlenderbotSmallModel extends BlenderbotSmallPreTrainedModel {
1004
+ }
1005
+ /**
1006
+ * The BlenderbotSmall Model with a language modeling head. Can be used for summarization.
1007
+ */
1008
+ export class BlenderbotSmallForConditionalGeneration extends BlenderbotSmallPreTrainedModel {
1009
+ }
1010
+ export class RobertaPreTrainedModel extends PreTrainedModel {
1011
+ }
1012
+ export class RobertaModel extends RobertaPreTrainedModel {
1013
+ }
1014
+ /**
1015
+ * RobertaForMaskedLM class for performing masked language modeling on Roberta models.
1016
+ */
1017
+ export class RobertaForMaskedLM extends RobertaPreTrainedModel {
1018
+ /**
1019
+ * Calls the model on new inputs.
1020
+ *
1021
+ * @param {Object} model_inputs The inputs to the model.
1022
+ * @returns {Promise<MaskedLMOutput>} returned object
1023
+ */
1024
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
1025
+ }
1026
+ /**
1027
+ * RobertaForSequenceClassification class for performing sequence classification on Roberta models.
1028
+ */
1029
+ export class RobertaForSequenceClassification extends RobertaPreTrainedModel {
1030
+ /**
1031
+ * Calls the model on new inputs.
1032
+ *
1033
+ * @param {Object} model_inputs The inputs to the model.
1034
+ * @returns {Promise<SequenceClassifierOutput>} returned object
1035
+ */
1036
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
1037
+ }
1038
+ /**
1039
+ * RobertaForTokenClassification class for performing token classification on Roberta models.
1040
+ */
1041
+ export class RobertaForTokenClassification extends RobertaPreTrainedModel {
1042
+ /**
1043
+ * Calls the model on new inputs.
1044
+ *
1045
+ * @param {Object} model_inputs The inputs to the model.
1046
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
1047
+ */
1048
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
1049
+ }
1050
+ /**
1051
+ * RobertaForQuestionAnswering class for performing question answering on Roberta models.
1052
+ */
1053
+ export class RobertaForQuestionAnswering extends RobertaPreTrainedModel {
1054
+ /**
1055
+ * Calls the model on new inputs.
1056
+ *
1057
+ * @param {Object} model_inputs The inputs to the model.
1058
+ * @returns {Promise<QuestionAnsweringModelOutput>} returned object
1059
+ */
1060
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
1061
+ }
1062
+ /**
1063
+ * An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.
1064
+ */
1065
+ export class XLMPreTrainedModel extends PreTrainedModel {
1066
+ }
1067
+ /**
1068
+ * The bare XLM Model transformer outputting raw hidden-states without any specific head on top.
1069
+ */
1070
+ export class XLMModel extends XLMPreTrainedModel {
1071
+ }
1072
+ /**
1073
+ * The XLM Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
1074
+ */
1075
+ export class XLMWithLMHeadModel extends XLMPreTrainedModel {
1076
+ /**
1077
+ * Calls the model on new inputs.
1078
+ *
1079
+ * @param {Object} model_inputs The inputs to the model.
1080
+ * @returns {Promise<MaskedLMOutput>} returned object
1081
+ */
1082
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
1083
+ }
1084
+ /**
1085
+ * XLM Model with a sequence classification/regression head on top (a linear layer on top of the pooled output)
1086
+ */
1087
+ export class XLMForSequenceClassification extends XLMPreTrainedModel {
1088
+ /**
1089
+ * Calls the model on new inputs.
1090
+ *
1091
+ * @param {Object} model_inputs The inputs to the model.
1092
+ * @returns {Promise<SequenceClassifierOutput>} returned object
1093
+ */
1094
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
1095
+ }
1096
+ /**
1097
+ * XLM Model with a token classification head on top (a linear layer on top of the hidden-states output)
1098
+ */
1099
+ export class XLMForTokenClassification extends XLMPreTrainedModel {
1100
+ /**
1101
+ * Calls the model on new inputs.
1102
+ *
1103
+ * @param {Object} model_inputs The inputs to the model.
1104
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
1105
+ */
1106
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
1107
+ }
1108
+ /**
1109
+ * XLM Model with a span classification head on top for extractive question-answering tasks
1110
+ */
1111
+ export class XLMForQuestionAnswering extends XLMPreTrainedModel {
1112
+ /**
1113
+ * Calls the model on new inputs.
1114
+ *
1115
+ * @param {Object} model_inputs The inputs to the model.
1116
+ * @returns {Promise<QuestionAnsweringModelOutput>} returned object
1117
+ */
1118
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
1119
+ }
1120
+ export class XLMRobertaPreTrainedModel extends PreTrainedModel {
1121
+ }
1122
+ export class XLMRobertaModel extends XLMRobertaPreTrainedModel {
1123
+ }
1124
+ /**
1125
+ * XLMRobertaForMaskedLM class for performing masked language modeling on XLMRoberta models.
1126
+ */
1127
+ export class XLMRobertaForMaskedLM extends XLMRobertaPreTrainedModel {
1128
+ /**
1129
+ * Calls the model on new inputs.
1130
+ *
1131
+ * @param {Object} model_inputs The inputs to the model.
1132
+ * @returns {Promise<MaskedLMOutput>} returned object
1133
+ */
1134
+ _call(model_inputs: any): Promise<MaskedLMOutput>;
1135
+ }
1136
+ /**
1137
+ * XLMRobertaForSequenceClassification class for performing sequence classification on XLMRoberta models.
1138
+ */
1139
+ export class XLMRobertaForSequenceClassification extends XLMRobertaPreTrainedModel {
1140
+ /**
1141
+ * Calls the model on new inputs.
1142
+ *
1143
+ * @param {Object} model_inputs The inputs to the model.
1144
+ * @returns {Promise<SequenceClassifierOutput>} returned object
1145
+ */
1146
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
1147
+ }
1148
+ /**
1149
+ * XLMRobertaForTokenClassification class for performing token classification on XLMRoberta models.
1150
+ */
1151
+ export class XLMRobertaForTokenClassification extends XLMRobertaPreTrainedModel {
1152
+ /**
1153
+ * Calls the model on new inputs.
1154
+ *
1155
+ * @param {Object} model_inputs The inputs to the model.
1156
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
1157
+ */
1158
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
1159
+ }
1160
+ /**
1161
+ * XLMRobertaForQuestionAnswering class for performing question answering on XLMRoberta models.
1162
+ */
1163
+ export class XLMRobertaForQuestionAnswering extends XLMRobertaPreTrainedModel {
1164
+ /**
1165
+ * Calls the model on new inputs.
1166
+ *
1167
+ * @param {Object} model_inputs The inputs to the model.
1168
+ * @returns {Promise<QuestionAnsweringModelOutput>} returned object
1169
+ */
1170
+ _call(model_inputs: any): Promise<QuestionAnsweringModelOutput>;
1171
+ }
1172
+ export class ASTPreTrainedModel extends PreTrainedModel {
1173
+ }
1174
+ /**
1175
+ * The bare AST Model transformer outputting raw hidden-states without any specific head on top.
1176
+ */
1177
+ export class ASTModel extends ASTPreTrainedModel {
1178
+ }
1179
+ /**
1180
+ * Audio Spectrogram Transformer model with an audio classification head on top
1181
+ * (a linear layer on top of the pooled output) e.g. for datasets like AudioSet, Speech Commands v2.
1182
+ */
1183
+ export class ASTForAudioClassification extends ASTPreTrainedModel {
1184
+ }
1185
+ export class WhisperPreTrainedModel extends PreTrainedModel {
1186
+ /**
1187
+ * Creates a new instance of the `WhisperPreTrainedModel` class.
1188
+ * @param {Object} config The model configuration.
1189
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1190
+ * @param {GenerationConfig} generation_config The generation configuration.
1191
+ */
1192
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1193
+ requires_attention_mask: boolean;
1194
+ generation_config: GenerationConfig;
1195
+ }
1196
+ /**
1197
+ * WhisperModel class for training Whisper models without a language model head.
1198
+ */
1199
+ export class WhisperModel extends WhisperPreTrainedModel {
1200
+ }
1201
+ /**
1202
+ * WhisperForConditionalGeneration class for generating conditional outputs from Whisper models.
1203
+ */
1204
+ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
1205
+ _prepare_generation_config(generation_config: any, kwargs: any): WhisperGenerationConfig;
1206
+ /**
1207
+ *
1208
+ * @param {WhisperGenerationConfig} generation_config
1209
+ */
1210
+ _retrieve_init_tokens(generation_config: WhisperGenerationConfig): number[];
1211
+ /**
1212
+ * Calculates token-level timestamps using the encoder-decoder cross-attentions and
1213
+ * dynamic time-warping (DTW) to map each output token to a position in the input audio.
1214
+ * If `num_frames` is specified, the encoder-decoder cross-attentions will be cropped before applying DTW.
1215
+ * @param {Object} generate_outputs Outputs generated by the model
1216
+ * @param {Tensor[][]} generate_outputs.cross_attentions The cross attentions output by the model
1217
+ * @param {Tensor} generate_outputs.sequences The sequences output by the model
1218
+ * @param {number[][]} alignment_heads Alignment heads of the model
1219
+ * @param {number} [num_frames=null] Number of frames in the input audio.
1220
+ * @param {number} [time_precision=0.02] Precision of the timestamps in seconds
1221
+ * @returns {Tensor} tensor containing the timestamps in seconds for each predicted token
1222
+ */
1223
+ _extract_token_timestamps(generate_outputs: {
1224
+ cross_attentions: Tensor[][];
1225
+ sequences: Tensor;
1226
+ }, alignment_heads: number[][], num_frames?: number, time_precision?: number): Tensor;
1227
+ }
1228
+ /**
1229
+ * Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
1230
+ */
1231
+ export class VisionEncoderDecoderModel extends PreTrainedModel {
1232
+ /**
1233
+ * Creates a new instance of the `VisionEncoderDecoderModel` class.
1234
+ * @param {Object} config The model configuration.
1235
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1236
+ * @param {GenerationConfig} generation_config The generation configuration.
1237
+ */
1238
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1239
+ generation_config: GenerationConfig;
1240
+ }
1241
+ export class LlavaPreTrainedModel extends PreTrainedModel {
1242
+ constructor(config: any, sessions: any, generation_config: any);
1243
+ generation_config: any;
1244
+ }
1245
+ /**
1246
+ * The LLAVA model which consists of a vision backbone and a language model.
1247
+ */
1248
+ export class LlavaForConditionalGeneration extends LlavaPreTrainedModel {
1249
+ _merge_input_ids_with_image_features({ inputs_embeds, image_features, input_ids, attention_mask, }: {
1250
+ inputs_embeds: any;
1251
+ image_features: any;
1252
+ input_ids: any;
1253
+ attention_mask: any;
1254
+ }): {
1255
+ inputs_embeds: any;
1256
+ attention_mask: any;
1257
+ };
1258
+ }
1259
+ export class Moondream1ForConditionalGeneration extends LlavaForConditionalGeneration {
1260
+ }
1261
+ export class Florence2PreTrainedModel extends PreTrainedModel {
1262
+ constructor(config: any, sessions: any, generation_config: any);
1263
+ generation_config: any;
1264
+ }
1265
+ export class Florence2ForConditionalGeneration extends Florence2PreTrainedModel {
1266
+ _merge_input_ids_with_image_features({ inputs_embeds, image_features, input_ids, attention_mask, }: {
1267
+ inputs_embeds: any;
1268
+ image_features: any;
1269
+ input_ids: any;
1270
+ attention_mask: any;
1271
+ }): {
1272
+ inputs_embeds: Tensor;
1273
+ attention_mask: Tensor;
1274
+ };
1275
+ _prepare_inputs_embeds({ input_ids, pixel_values, inputs_embeds, attention_mask }: {
1276
+ input_ids: any;
1277
+ pixel_values: any;
1278
+ inputs_embeds: any;
1279
+ attention_mask: any;
1280
+ }): Promise<{
1281
+ inputs_embeds: any;
1282
+ attention_mask: any;
1283
+ }>;
1284
+ forward({ input_ids, pixel_values, attention_mask, decoder_input_ids, decoder_attention_mask, encoder_outputs, past_key_values, inputs_embeds, decoder_inputs_embeds, }: {
1285
+ input_ids: any;
1286
+ pixel_values: any;
1287
+ attention_mask: any;
1288
+ decoder_input_ids: any;
1289
+ decoder_attention_mask: any;
1290
+ encoder_outputs: any;
1291
+ past_key_values: any;
1292
+ inputs_embeds: any;
1293
+ decoder_inputs_embeds: any;
1294
+ }): Promise<any>;
1295
+ }
1296
+ export class CLIPPreTrainedModel extends PreTrainedModel {
1297
+ }
1298
+ /**
1299
+ * CLIP Text and Vision Model with a projection layers on top
1300
+ *
1301
+ * **Example:** Perform zero-shot image classification with a `CLIPModel`.
1302
+ *
1303
+ * ```javascript
1304
+ * import { AutoTokenizer, AutoProcessor, CLIPModel, RawImage } from '@huggingface/transformers';
1305
+ *
1306
+ * // Load tokenizer, processor, and model
1307
+ * let tokenizer = await AutoTokenizer.from_pretrained('Xenova/clip-vit-base-patch16');
1308
+ * let processor = await AutoProcessor.from_pretrained('Xenova/clip-vit-base-patch16');
1309
+ * let model = await CLIPModel.from_pretrained('Xenova/clip-vit-base-patch16');
1310
+ *
1311
+ * // Run tokenization
1312
+ * let texts = ['a photo of a car', 'a photo of a football match']
1313
+ * let text_inputs = tokenizer(texts, { padding: true, truncation: true });
1314
+ *
1315
+ * // Read image and run processor
1316
+ * let image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
1317
+ * let image_inputs = await processor(image);
1318
+ *
1319
+ * // Run model with both text and pixel inputs
1320
+ * let output = await model({ ...text_inputs, ...image_inputs });
1321
+ * // {
1322
+ * // logits_per_image: Tensor {
1323
+ * // dims: [ 1, 2 ],
1324
+ * // data: Float32Array(2) [ 18.579734802246094, 24.31830596923828 ],
1325
+ * // },
1326
+ * // logits_per_text: Tensor {
1327
+ * // dims: [ 2, 1 ],
1328
+ * // data: Float32Array(2) [ 18.579734802246094, 24.31830596923828 ],
1329
+ * // },
1330
+ * // text_embeds: Tensor {
1331
+ * // dims: [ 2, 512 ],
1332
+ * // data: Float32Array(1024) [ ... ],
1333
+ * // },
1334
+ * // image_embeds: Tensor {
1335
+ * // dims: [ 1, 512 ],
1336
+ * // data: Float32Array(512) [ ... ],
1337
+ * // }
1338
+ * // }
1339
+ * ```
1340
+ */
1341
+ export class CLIPModel extends CLIPPreTrainedModel {
1342
+ }
1343
+ /**
1344
+ * CLIP Text Model with a projection layer on top (a linear layer on top of the pooled output)
1345
+ *
1346
+ * **Example:** Compute text embeddings with `CLIPTextModelWithProjection`.
1347
+ *
1348
+ * ```javascript
1349
+ * import { AutoTokenizer, CLIPTextModelWithProjection } from '@huggingface/transformers';
1350
+ *
1351
+ * // Load tokenizer and text model
1352
+ * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/clip-vit-base-patch16');
1353
+ * const text_model = await CLIPTextModelWithProjection.from_pretrained('Xenova/clip-vit-base-patch16');
1354
+ *
1355
+ * // Run tokenization
1356
+ * let texts = ['a photo of a car', 'a photo of a football match'];
1357
+ * let text_inputs = tokenizer(texts, { padding: true, truncation: true });
1358
+ *
1359
+ * // Compute embeddings
1360
+ * const { text_embeds } = await text_model(text_inputs);
1361
+ * // Tensor {
1362
+ * // dims: [ 2, 512 ],
1363
+ * // type: 'float32',
1364
+ * // data: Float32Array(1024) [ ... ],
1365
+ * // size: 1024
1366
+ * // }
1367
+ * ```
1368
+ */
1369
+ export class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
1370
+ }
1371
+ /**
1372
+ * CLIP Vision Model with a projection layer on top (a linear layer on top of the pooled output)
1373
+ *
1374
+ * **Example:** Compute vision embeddings with `CLIPVisionModelWithProjection`.
1375
+ *
1376
+ * ```javascript
1377
+ * import { AutoProcessor, CLIPVisionModelWithProjection, RawImage} from '@huggingface/transformers';
1378
+ *
1379
+ * // Load processor and vision model
1380
+ * const processor = await AutoProcessor.from_pretrained('Xenova/clip-vit-base-patch16');
1381
+ * const vision_model = await CLIPVisionModelWithProjection.from_pretrained('Xenova/clip-vit-base-patch16');
1382
+ *
1383
+ * // Read image and run processor
1384
+ * let image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
1385
+ * let image_inputs = await processor(image);
1386
+ *
1387
+ * // Compute embeddings
1388
+ * const { image_embeds } = await vision_model(image_inputs);
1389
+ * // Tensor {
1390
+ * // dims: [ 1, 512 ],
1391
+ * // type: 'float32',
1392
+ * // data: Float32Array(512) [ ... ],
1393
+ * // size: 512
1394
+ * // }
1395
+ * ```
1396
+ */
1397
+ export class CLIPVisionModelWithProjection extends CLIPPreTrainedModel {
1398
+ }
1399
+ export class SiglipPreTrainedModel extends PreTrainedModel {
1400
+ }
1401
+ /**
1402
+ * SigLIP Text and Vision Model with a projection layers on top
1403
+ *
1404
+ * **Example:** Perform zero-shot image classification with a `SiglipModel`.
1405
+ *
1406
+ * ```javascript
1407
+ * import { AutoTokenizer, AutoProcessor, SiglipModel, RawImage } from '@huggingface/transformers';
1408
+ *
1409
+ * // Load tokenizer, processor, and model
1410
+ * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/siglip-base-patch16-224');
1411
+ * const processor = await AutoProcessor.from_pretrained('Xenova/siglip-base-patch16-224');
1412
+ * const model = await SiglipModel.from_pretrained('Xenova/siglip-base-patch16-224');
1413
+ *
1414
+ * // Run tokenization
1415
+ * const texts = ['a photo of 2 cats', 'a photo of 2 dogs'];
1416
+ * const text_inputs = tokenizer(texts, { padding: 'max_length', truncation: true });
1417
+ *
1418
+ * // Read image and run processor
1419
+ * const image = await RawImage.read('http://images.cocodataset.org/val2017/000000039769.jpg');
1420
+ * const image_inputs = await processor(image);
1421
+ *
1422
+ * // Run model with both text and pixel inputs
1423
+ * const output = await model({ ...text_inputs, ...image_inputs });
1424
+ * // {
1425
+ * // logits_per_image: Tensor {
1426
+ * // dims: [ 1, 2 ],
1427
+ * // data: Float32Array(2) [ -1.6019744873046875, -10.720091819763184 ],
1428
+ * // },
1429
+ * // logits_per_text: Tensor {
1430
+ * // dims: [ 2, 1 ],
1431
+ * // data: Float32Array(2) [ -1.6019744873046875, -10.720091819763184 ],
1432
+ * // },
1433
+ * // text_embeds: Tensor {
1434
+ * // dims: [ 2, 768 ],
1435
+ * // data: Float32Array(1536) [ ... ],
1436
+ * // },
1437
+ * // image_embeds: Tensor {
1438
+ * // dims: [ 1, 768 ],
1439
+ * // data: Float32Array(768) [ ... ],
1440
+ * // }
1441
+ * // }
1442
+ * ```
1443
+ */
1444
+ export class SiglipModel extends SiglipPreTrainedModel {
1445
+ }
1446
+ /**
1447
+ * The text model from SigLIP without any head or projection on top.
1448
+ *
1449
+ * **Example:** Compute text embeddings with `SiglipTextModel`.
1450
+ *
1451
+ * ```javascript
1452
+ * import { AutoTokenizer, SiglipTextModel } from '@huggingface/transformers';
1453
+ *
1454
+ * // Load tokenizer and text model
1455
+ * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/siglip-base-patch16-224');
1456
+ * const text_model = await SiglipTextModel.from_pretrained('Xenova/siglip-base-patch16-224');
1457
+ *
1458
+ * // Run tokenization
1459
+ * const texts = ['a photo of 2 cats', 'a photo of 2 dogs'];
1460
+ * const text_inputs = tokenizer(texts, { padding: 'max_length', truncation: true });
1461
+ *
1462
+ * // Compute embeddings
1463
+ * const { pooler_output } = await text_model(text_inputs);
1464
+ * // Tensor {
1465
+ * // dims: [ 2, 768 ],
1466
+ * // type: 'float32',
1467
+ * // data: Float32Array(1536) [ ... ],
1468
+ * // size: 1536
1469
+ * // }
1470
+ * ```
1471
+ */
1472
+ export class SiglipTextModel extends SiglipPreTrainedModel {
1473
+ }
1474
+ /**
1475
+ * The vision model from SigLIP without any head or projection on top.
1476
+ *
1477
+ * **Example:** Compute vision embeddings with `SiglipVisionModel`.
1478
+ *
1479
+ * ```javascript
1480
+ * import { AutoProcessor, SiglipVisionModel, RawImage} from '@huggingface/transformers';
1481
+ *
1482
+ * // Load processor and vision model
1483
+ * const processor = await AutoProcessor.from_pretrained('Xenova/siglip-base-patch16-224');
1484
+ * const vision_model = await SiglipVisionModel.from_pretrained('Xenova/siglip-base-patch16-224');
1485
+ *
1486
+ * // Read image and run processor
1487
+ * const image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
1488
+ * const image_inputs = await processor(image);
1489
+ *
1490
+ * // Compute embeddings
1491
+ * const { pooler_output } = await vision_model(image_inputs);
1492
+ * // Tensor {
1493
+ * // dims: [ 1, 768 ],
1494
+ * // type: 'float32',
1495
+ * // data: Float32Array(768) [ ... ],
1496
+ * // size: 768
1497
+ * // }
1498
+ * ```
1499
+ */
1500
+ export class SiglipVisionModel extends CLIPPreTrainedModel {
1501
+ }
1502
+ export class ChineseCLIPPreTrainedModel extends PreTrainedModel {
1503
+ }
1504
+ export class ChineseCLIPModel extends ChineseCLIPPreTrainedModel {
1505
+ }
1506
+ export class CLIPSegPreTrainedModel extends PreTrainedModel {
1507
+ }
1508
+ export class CLIPSegModel extends CLIPSegPreTrainedModel {
1509
+ }
1510
+ /**
1511
+ * CLIPSeg model with a Transformer-based decoder on top for zero-shot and one-shot image segmentation.
1512
+ *
1513
+ * **Example:** Perform zero-shot image segmentation with a `CLIPSegForImageSegmentation` model.
1514
+ *
1515
+ * ```javascript
1516
+ * import { AutoTokenizer, AutoProcessor, CLIPSegForImageSegmentation, RawImage } from '@huggingface/transformers';
1517
+ *
1518
+ * // Load tokenizer, processor, and model
1519
+ * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/clipseg-rd64-refined');
1520
+ * const processor = await AutoProcessor.from_pretrained('Xenova/clipseg-rd64-refined');
1521
+ * const model = await CLIPSegForImageSegmentation.from_pretrained('Xenova/clipseg-rd64-refined');
1522
+ *
1523
+ * // Run tokenization
1524
+ * const texts = ['a glass', 'something to fill', 'wood', 'a jar'];
1525
+ * const text_inputs = tokenizer(texts, { padding: true, truncation: true });
1526
+ *
1527
+ * // Read image and run processor
1528
+ * const image = await RawImage.read('https://github.com/timojl/clipseg/blob/master/example_image.jpg?raw=true');
1529
+ * const image_inputs = await processor(image);
1530
+ *
1531
+ * // Run model with both text and pixel inputs
1532
+ * const { logits } = await model({ ...text_inputs, ...image_inputs });
1533
+ * // logits: Tensor {
1534
+ * // dims: [4, 352, 352],
1535
+ * // type: 'float32',
1536
+ * // data: Float32Array(495616) [ ... ],
1537
+ * // size: 495616
1538
+ * // }
1539
+ * ```
1540
+ *
1541
+ * You can visualize the predictions as follows:
1542
+ * ```javascript
1543
+ * const preds = logits
1544
+ * .unsqueeze_(1)
1545
+ * .sigmoid_()
1546
+ * .mul_(255)
1547
+ * .round_()
1548
+ * .to('uint8');
1549
+ *
1550
+ * for (let i = 0; i < preds.dims[0]; ++i) {
1551
+ * const img = RawImage.fromTensor(preds[i]);
1552
+ * img.save(`prediction_${i}.png`);
1553
+ * }
1554
+ * ```
1555
+ */
1556
+ export class CLIPSegForImageSegmentation extends CLIPSegPreTrainedModel {
1557
+ }
1558
+ export class GPT2PreTrainedModel extends PreTrainedModel {
1559
+ /**
1560
+ * Creates a new instance of the `GPT2PreTrainedModel` class.
1561
+ * @param {Object} config The model configuration.
1562
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1563
+ * @param {GenerationConfig} generation_config The generation configuration.
1564
+ */
1565
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1566
+ generation_config: GenerationConfig;
1567
+ }
1568
+ export class GPT2Model extends GPT2PreTrainedModel {
1569
+ }
1570
+ /**
1571
+ * GPT-2 language model head on top of the GPT-2 base model. This model is suitable for text generation tasks.
1572
+ */
1573
+ export class GPT2LMHeadModel extends GPT2PreTrainedModel {
1574
+ }
1575
+ export class GPTNeoPreTrainedModel extends PreTrainedModel {
1576
+ /**
1577
+ * Creates a new instance of the `GPTNeoPreTrainedModel` class.
1578
+ * @param {Object} config The model configuration.
1579
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1580
+ * @param {GenerationConfig} generation_config The generation configuration.
1581
+ */
1582
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1583
+ generation_config: GenerationConfig;
1584
+ }
1585
+ export class GPTNeoModel extends GPTNeoPreTrainedModel {
1586
+ }
1587
+ export class GPTNeoForCausalLM extends GPTNeoPreTrainedModel {
1588
+ }
1589
+ export class GPTNeoXPreTrainedModel extends PreTrainedModel {
1590
+ /**
1591
+ * Creates a new instance of the `GPTNeoXPreTrainedModel` class.
1592
+ * @param {Object} config The model configuration.
1593
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1594
+ * @param {GenerationConfig} generation_config The generation configuration.
1595
+ */
1596
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1597
+ generation_config: GenerationConfig;
1598
+ }
1599
+ export class GPTNeoXModel extends GPTNeoXPreTrainedModel {
1600
+ }
1601
+ export class GPTNeoXForCausalLM extends GPTNeoXPreTrainedModel {
1602
+ }
1603
+ export class GPTJPreTrainedModel extends PreTrainedModel {
1604
+ /**
1605
+ * Creates a new instance of the `GPTJPreTrainedModel` class.
1606
+ * @param {Object} config The model configuration.
1607
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1608
+ * @param {GenerationConfig} generation_config The generation configuration.
1609
+ */
1610
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1611
+ generation_config: GenerationConfig;
1612
+ }
1613
+ export class GPTJModel extends GPTJPreTrainedModel {
1614
+ }
1615
+ export class GPTJForCausalLM extends GPTJPreTrainedModel {
1616
+ }
1617
+ export class GPTBigCodePreTrainedModel extends PreTrainedModel {
1618
+ /**
1619
+ * Creates a new instance of the `GPTBigCodePreTrainedModel` class.
1620
+ * @param {Object} config The model configuration.
1621
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1622
+ * @param {GenerationConfig} generation_config The generation configuration.
1623
+ */
1624
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1625
+ generation_config: GenerationConfig;
1626
+ }
1627
+ export class GPTBigCodeModel extends GPTBigCodePreTrainedModel {
1628
+ }
1629
+ export class GPTBigCodeForCausalLM extends GPTBigCodePreTrainedModel {
1630
+ }
1631
+ export class CodeGenPreTrainedModel extends PreTrainedModel {
1632
+ /**
1633
+ * Creates a new instance of the `CodeGenPreTrainedModel` class.
1634
+ * @param {Object} config The model configuration.
1635
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1636
+ * @param {GenerationConfig} generation_config The generation configuration.
1637
+ */
1638
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1639
+ generation_config: GenerationConfig;
1640
+ }
1641
+ /**
1642
+ * CodeGenModel is a class representing a code generation model without a language model head.
1643
+ */
1644
+ export class CodeGenModel extends CodeGenPreTrainedModel {
1645
+ }
1646
+ /**
1647
+ * CodeGenForCausalLM is a class that represents a code generation model based on the GPT-2 architecture. It extends the `CodeGenPreTrainedModel` class.
1648
+ */
1649
+ export class CodeGenForCausalLM extends CodeGenPreTrainedModel {
1650
+ }
1651
+ /**
1652
+ * The bare LLama Model outputting raw hidden-states without any specific head on top.
1653
+ */
1654
+ export class LlamaPreTrainedModel extends PreTrainedModel {
1655
+ /**
1656
+ * Creates a new instance of the `LlamaPreTrainedModel` class.
1657
+ * @param {Object} config The model configuration.
1658
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1659
+ * @param {GenerationConfig} generation_config The generation configuration.
1660
+ */
1661
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1662
+ generation_config: GenerationConfig;
1663
+ }
1664
+ /**
1665
+ * The bare LLaMA Model outputting raw hidden-states without any specific head on top.
1666
+ */
1667
+ export class LlamaModel extends LlamaPreTrainedModel {
1668
+ }
1669
+ export class LlamaForCausalLM extends LlamaPreTrainedModel {
1670
+ }
1671
+ /**
1672
+ * The bare Cohere Model outputting raw hidden-states without any specific head on top.
1673
+ */
1674
+ export class CoherePreTrainedModel extends PreTrainedModel {
1675
+ /**
1676
+ * Creates a new instance of the `CoherePreTrainedModel` class.
1677
+ * @param {Object} config The model configuration.
1678
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1679
+ * @param {GenerationConfig} generation_config The generation configuration.
1680
+ */
1681
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1682
+ generation_config: GenerationConfig;
1683
+ }
1684
+ export class CohereModel extends CoherePreTrainedModel {
1685
+ }
1686
+ export class CohereForCausalLM extends CoherePreTrainedModel {
1687
+ }
1688
+ /**
1689
+ * The bare Gemma Model outputting raw hidden-states without any specific head on top.
1690
+ */
1691
+ export class GemmaPreTrainedModel extends PreTrainedModel {
1692
+ /**
1693
+ * Creates a new instance of the `GemmaPreTrainedModel` class.
1694
+ * @param {Object} config The model configuration.
1695
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1696
+ * @param {GenerationConfig} generation_config The generation configuration.
1697
+ */
1698
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1699
+ generation_config: GenerationConfig;
1700
+ }
1701
+ /**
1702
+ * The bare Gemma Model outputting raw hidden-states without any specific head on top.
1703
+ */
1704
+ export class GemmaModel extends GemmaPreTrainedModel {
1705
+ }
1706
+ export class GemmaForCausalLM extends GemmaPreTrainedModel {
1707
+ }
1708
+ /**
1709
+ * The bare Gemma2 Model outputting raw hidden-states without any specific head on top.
1710
+ */
1711
+ export class Gemma2PreTrainedModel extends PreTrainedModel {
1712
+ /**
1713
+ * Creates a new instance of the `Gemma2PreTrainedModel` class.
1714
+ * @param {Object} config The model configuration.
1715
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1716
+ * @param {GenerationConfig} generation_config The generation configuration.
1717
+ */
1718
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1719
+ generation_config: GenerationConfig;
1720
+ }
1721
+ /**
1722
+ * The bare Gemma2 Model outputting raw hidden-states without any specific head on top.
1723
+ */
1724
+ export class Gemma2Model extends Gemma2PreTrainedModel {
1725
+ }
1726
+ export class Gemma2ForCausalLM extends Gemma2PreTrainedModel {
1727
+ }
1728
+ export class OpenELMPreTrainedModel extends PreTrainedModel {
1729
+ /**
1730
+ * Creates a new instance of the `OpenELMPreTrainedModel` class.
1731
+ * @param {Object} config The model configuration.
1732
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1733
+ * @param {GenerationConfig} generation_config The generation configuration.
1734
+ */
1735
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1736
+ generation_config: GenerationConfig;
1737
+ }
1738
+ export class OpenELMModel extends OpenELMPreTrainedModel {
1739
+ }
1740
+ export class OpenELMForCausalLM extends OpenELMPreTrainedModel {
1741
+ }
1742
+ /**
1743
+ * The bare Qwen2 Model outputting raw hidden-states without any specific head on top.
1744
+ */
1745
+ export class Qwen2PreTrainedModel extends PreTrainedModel {
1746
+ /**
1747
+ * Creates a new instance of the `Qwen2PreTrainedModel` class.
1748
+ * @param {Object} config The model configuration.
1749
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1750
+ * @param {GenerationConfig} generation_config The generation configuration.
1751
+ */
1752
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1753
+ generation_config: GenerationConfig;
1754
+ }
1755
+ /**
1756
+ * The bare Qwen2 Model outputting raw hidden-states without any specific head on top.
1757
+ */
1758
+ export class Qwen2Model extends Qwen2PreTrainedModel {
1759
+ }
1760
+ export class Qwen2ForCausalLM extends Qwen2PreTrainedModel {
1761
+ }
1762
+ export class PhiPreTrainedModel extends PreTrainedModel {
1763
+ /**
1764
+ * Creates a new instance of the `PhiPreTrainedModel` class.
1765
+ * @param {Object} config The model configuration.
1766
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1767
+ * @param {GenerationConfig} generation_config The generation configuration.
1768
+ */
1769
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1770
+ generation_config: GenerationConfig;
1771
+ }
1772
+ /**
1773
+ * The bare Phi Model outputting raw hidden-states without any specific head on top.
1774
+ */
1775
+ export class PhiModel extends PhiPreTrainedModel {
1776
+ }
1777
+ export class PhiForCausalLM extends PhiPreTrainedModel {
1778
+ }
1779
+ export class Phi3PreTrainedModel extends PreTrainedModel {
1780
+ /**
1781
+ * Creates a new instance of the `Phi3PreTrainedModel` class.
1782
+ * @param {Object} config The model configuration.
1783
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1784
+ * @param {GenerationConfig} generation_config The generation configuration.
1785
+ */
1786
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1787
+ generation_config: GenerationConfig;
1788
+ }
1789
+ /**
1790
+ * The bare Phi3 Model outputting raw hidden-states without any specific head on top.
1791
+ */
1792
+ export class Phi3Model extends Phi3PreTrainedModel {
1793
+ }
1794
+ export class Phi3ForCausalLM extends Phi3PreTrainedModel {
1795
+ }
1796
+ /**
1797
+ * The Bloom Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
1798
+ */
1799
+ export class BloomPreTrainedModel extends PreTrainedModel {
1800
+ /**
1801
+ * Creates a new instance of the `BloomPreTrainedModel` class.
1802
+ * @param {Object} config The model configuration.
1803
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1804
+ * @param {GenerationConfig} generation_config The generation configuration.
1805
+ */
1806
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1807
+ generation_config: GenerationConfig;
1808
+ }
1809
+ /**
1810
+ * The bare Bloom Model transformer outputting raw hidden-states without any specific head on top.
1811
+ */
1812
+ export class BloomModel extends BloomPreTrainedModel {
1813
+ }
1814
+ /**
1815
+ * The Bloom Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
1816
+ */
1817
+ export class BloomForCausalLM extends BloomPreTrainedModel {
1818
+ }
1819
+ export class MptPreTrainedModel extends PreTrainedModel {
1820
+ /**
1821
+ * Creates a new instance of the `MptPreTrainedModel` class.
1822
+ * @param {Object} config The model configuration.
1823
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1824
+ * @param {GenerationConfig} generation_config The generation configuration.
1825
+ */
1826
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1827
+ generation_config: GenerationConfig;
1828
+ }
1829
+ /**
1830
+ * The bare Mpt Model transformer outputting raw hidden-states without any specific head on top.
1831
+ */
1832
+ export class MptModel extends MptPreTrainedModel {
1833
+ }
1834
+ /**
1835
+ * The MPT Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
1836
+ */
1837
+ export class MptForCausalLM extends MptPreTrainedModel {
1838
+ }
1839
+ export class OPTPreTrainedModel extends PreTrainedModel {
1840
+ /**
1841
+ * Creates a new instance of the `OPTPreTrainedModel` class.
1842
+ * @param {Object} config The model configuration.
1843
+ * @param {Record<string, any>} sessions The inference sessions for the model.
1844
+ * @param {GenerationConfig} generation_config The generation configuration.
1845
+ */
1846
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
1847
+ generation_config: GenerationConfig;
1848
+ }
1849
+ /**
1850
+ * The bare OPT Model outputting raw hidden-states without any specific head on top.
1851
+ */
1852
+ export class OPTModel extends OPTPreTrainedModel {
1853
+ }
1854
+ /**
1855
+ * The OPT Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
1856
+ */
1857
+ export class OPTForCausalLM extends OPTPreTrainedModel {
1858
+ }
1859
+ export class ViTPreTrainedModel extends PreTrainedModel {
1860
+ }
1861
+ export class ViTModel extends ViTPreTrainedModel {
1862
+ }
1863
+ export class ViTForImageClassification extends ViTPreTrainedModel {
1864
+ /**
1865
+ * @param {any} model_inputs
1866
+ */
1867
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
1868
+ }
1869
+ export class FastViTPreTrainedModel extends PreTrainedModel {
1870
+ }
1871
+ export class FastViTModel extends FastViTPreTrainedModel {
1872
+ }
1873
+ export class FastViTForImageClassification extends FastViTPreTrainedModel {
1874
+ /**
1875
+ * @param {any} model_inputs
1876
+ */
1877
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
1878
+ }
1879
+ export class VitMattePreTrainedModel extends PreTrainedModel {
1880
+ }
1881
+ /**
1882
+ * ViTMatte framework leveraging any vision backbone e.g. for ADE20k, CityScapes.
1883
+ *
1884
+ * **Example:** Perform image matting with a `VitMatteForImageMatting` model.
1885
+ * ```javascript
1886
+ * import { AutoProcessor, VitMatteForImageMatting, RawImage } from '@huggingface/transformers';
1887
+ *
1888
+ * // Load processor and model
1889
+ * const processor = await AutoProcessor.from_pretrained('Xenova/vitmatte-small-distinctions-646');
1890
+ * const model = await VitMatteForImageMatting.from_pretrained('Xenova/vitmatte-small-distinctions-646');
1891
+ *
1892
+ * // Load image and trimap
1893
+ * const image = await RawImage.fromURL('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/vitmatte_image.png');
1894
+ * const trimap = await RawImage.fromURL('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/vitmatte_trimap.png');
1895
+ *
1896
+ * // Prepare image + trimap for the model
1897
+ * const inputs = await processor(image, trimap);
1898
+ *
1899
+ * // Predict alpha matte
1900
+ * const { alphas } = await model(inputs);
1901
+ * // Tensor {
1902
+ * // dims: [ 1, 1, 640, 960 ],
1903
+ * // type: 'float32',
1904
+ * // size: 614400,
1905
+ * // data: Float32Array(614400) [ 0.9894027709960938, 0.9970508813858032, ... ]
1906
+ * // }
1907
+ * ```
1908
+ *
1909
+ * You can visualize the alpha matte as follows:
1910
+ * ```javascript
1911
+ * import { Tensor, cat } from '@huggingface/transformers';
1912
+ *
1913
+ * // Visualize predicted alpha matte
1914
+ * const imageTensor = image.toTensor();
1915
+ *
1916
+ * // Convert float (0-1) alpha matte to uint8 (0-255)
1917
+ * const alphaChannel = alphas
1918
+ * .squeeze(0)
1919
+ * .mul_(255)
1920
+ * .clamp_(0, 255)
1921
+ * .round_()
1922
+ * .to('uint8');
1923
+ *
1924
+ * // Concatenate original image with predicted alpha
1925
+ * const imageData = cat([imageTensor, alphaChannel], 0);
1926
+ *
1927
+ * // Save output image
1928
+ * const outputImage = RawImage.fromTensor(imageData);
1929
+ * outputImage.save('output.png');
1930
+ * ```
1931
+ */
1932
+ export class VitMatteForImageMatting extends VitMattePreTrainedModel {
1933
+ /**
1934
+ * @param {any} model_inputs
1935
+ */
1936
+ _call(model_inputs: any): Promise<ImageMattingOutput>;
1937
+ }
1938
+ export class MobileViTPreTrainedModel extends PreTrainedModel {
1939
+ }
1940
+ export class MobileViTModel extends MobileViTPreTrainedModel {
1941
+ }
1942
+ export class MobileViTForImageClassification extends MobileViTPreTrainedModel {
1943
+ /**
1944
+ * @param {any} model_inputs
1945
+ */
1946
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
1947
+ }
1948
+ export class MobileViTV2PreTrainedModel extends PreTrainedModel {
1949
+ }
1950
+ export class MobileViTV2Model extends MobileViTV2PreTrainedModel {
1951
+ }
1952
+ export class MobileViTV2ForImageClassification extends MobileViTV2PreTrainedModel {
1953
+ /**
1954
+ * @param {any} model_inputs
1955
+ */
1956
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
1957
+ }
1958
+ export class OwlViTPreTrainedModel extends PreTrainedModel {
1959
+ }
1960
+ export class OwlViTModel extends OwlViTPreTrainedModel {
1961
+ }
1962
+ export class OwlViTForObjectDetection extends OwlViTPreTrainedModel {
1963
+ }
1964
+ export class Owlv2PreTrainedModel extends PreTrainedModel {
1965
+ }
1966
+ export class Owlv2Model extends Owlv2PreTrainedModel {
1967
+ }
1968
+ export class Owlv2ForObjectDetection extends Owlv2PreTrainedModel {
1969
+ }
1970
+ export class BeitPreTrainedModel extends PreTrainedModel {
1971
+ }
1972
+ export class BeitModel extends BeitPreTrainedModel {
1973
+ }
1974
+ export class BeitForImageClassification extends BeitPreTrainedModel {
1975
+ /**
1976
+ * @param {any} model_inputs
1977
+ */
1978
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
1979
+ }
1980
+ export class DetrPreTrainedModel extends PreTrainedModel {
1981
+ }
1982
+ export class DetrModel extends DetrPreTrainedModel {
1983
+ }
1984
+ export class DetrForObjectDetection extends DetrPreTrainedModel {
1985
+ /**
1986
+ * @param {any} model_inputs
1987
+ */
1988
+ _call(model_inputs: any): Promise<DetrObjectDetectionOutput>;
1989
+ }
1990
+ export class DetrForSegmentation extends DetrPreTrainedModel {
1991
+ /**
1992
+ * Runs the model with the provided inputs
1993
+ * @param {Object} model_inputs Model inputs
1994
+ * @returns {Promise<DetrSegmentationOutput>} Object containing segmentation outputs
1995
+ */
1996
+ _call(model_inputs: any): Promise<DetrSegmentationOutput>;
1997
+ }
1998
+ export class DetrObjectDetectionOutput extends ModelOutput {
1999
+ /**
2000
+ * @param {Object} output The output of the model.
2001
+ * @param {Tensor} output.logits Classification logits (including no-object) for all queries.
2002
+ * @param {Tensor} output.pred_boxes Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height).
2003
+ * These values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding).
2004
+ */
2005
+ constructor({ logits, pred_boxes }: {
2006
+ logits: Tensor;
2007
+ pred_boxes: Tensor;
2008
+ });
2009
+ logits: Tensor;
2010
+ pred_boxes: Tensor;
2011
+ }
2012
+ export class DetrSegmentationOutput extends ModelOutput {
2013
+ /**
2014
+ * @param {Object} output The output of the model.
2015
+ * @param {Tensor} output.logits The output logits of the model.
2016
+ * @param {Tensor} output.pred_boxes Predicted boxes.
2017
+ * @param {Tensor} output.pred_masks Predicted masks.
2018
+ */
2019
+ constructor({ logits, pred_boxes, pred_masks }: {
2020
+ logits: Tensor;
2021
+ pred_boxes: Tensor;
2022
+ pred_masks: Tensor;
2023
+ });
2024
+ logits: Tensor;
2025
+ pred_boxes: Tensor;
2026
+ pred_masks: Tensor;
2027
+ }
2028
+ export class RTDetrPreTrainedModel extends PreTrainedModel {
2029
+ }
2030
+ export class RTDetrModel extends RTDetrPreTrainedModel {
2031
+ }
2032
+ export class RTDetrForObjectDetection extends RTDetrPreTrainedModel {
2033
+ /**
2034
+ * @param {any} model_inputs
2035
+ */
2036
+ _call(model_inputs: any): Promise<RTDetrObjectDetectionOutput>;
2037
+ }
2038
+ export class RTDetrObjectDetectionOutput extends ModelOutput {
2039
+ /**
2040
+ * @param {Object} output The output of the model.
2041
+ * @param {Tensor} output.logits Classification logits (including no-object) for all queries.
2042
+ * @param {Tensor} output.pred_boxes Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height).
2043
+ * These values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding).
2044
+ */
2045
+ constructor({ logits, pred_boxes }: {
2046
+ logits: Tensor;
2047
+ pred_boxes: Tensor;
2048
+ });
2049
+ logits: Tensor;
2050
+ pred_boxes: Tensor;
2051
+ }
2052
+ export class TableTransformerPreTrainedModel extends PreTrainedModel {
2053
+ }
2054
+ /**
2055
+ * The bare Table Transformer Model (consisting of a backbone and encoder-decoder Transformer)
2056
+ * outputting raw hidden-states without any specific head on top.
2057
+ */
2058
+ export class TableTransformerModel extends TableTransformerPreTrainedModel {
2059
+ }
2060
+ /**
2061
+ * Table Transformer Model (consisting of a backbone and encoder-decoder Transformer)
2062
+ * with object detection heads on top, for tasks such as COCO detection.
2063
+ */
2064
+ export class TableTransformerForObjectDetection extends TableTransformerPreTrainedModel {
2065
+ /**
2066
+ * @param {any} model_inputs
2067
+ */
2068
+ _call(model_inputs: any): Promise<TableTransformerObjectDetectionOutput>;
2069
+ }
2070
+ export class TableTransformerObjectDetectionOutput extends DetrObjectDetectionOutput {
2071
+ }
2072
+ export class DeiTPreTrainedModel extends PreTrainedModel {
2073
+ }
2074
+ export class DeiTModel extends DeiTPreTrainedModel {
2075
+ }
2076
+ export class DeiTForImageClassification extends DeiTPreTrainedModel {
2077
+ /**
2078
+ * @param {any} model_inputs
2079
+ */
2080
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2081
+ }
2082
+ /**
2083
+ * An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.
2084
+ */
2085
+ export class ResNetPreTrainedModel extends PreTrainedModel {
2086
+ }
2087
+ /**
2088
+ * The bare ResNet model outputting raw features without any specific head on top.
2089
+ */
2090
+ export class ResNetModel extends ResNetPreTrainedModel {
2091
+ }
2092
+ /**
2093
+ * ResNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for ImageNet.
2094
+ */
2095
+ export class ResNetForImageClassification extends ResNetPreTrainedModel {
2096
+ /**
2097
+ * @param {any} model_inputs
2098
+ */
2099
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2100
+ }
2101
+ export class SwinPreTrainedModel extends PreTrainedModel {
2102
+ }
2103
+ export class SwinModel extends SwinPreTrainedModel {
2104
+ }
2105
+ export class SwinForImageClassification extends SwinPreTrainedModel {
2106
+ /**
2107
+ * @param {any} model_inputs
2108
+ */
2109
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2110
+ }
2111
+ export class Swin2SRPreTrainedModel extends PreTrainedModel {
2112
+ }
2113
+ /**
2114
+ * The bare Swin2SR Model transformer outputting raw hidden-states without any specific head on top.
2115
+ */
2116
+ export class Swin2SRModel extends Swin2SRPreTrainedModel {
2117
+ }
2118
+ /**
2119
+ * Swin2SR Model transformer with an upsampler head on top for image super resolution and restoration.
2120
+ *
2121
+ * **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`.
2122
+ *
2123
+ * ```javascript
2124
+ * import { AutoProcessor, Swin2SRForImageSuperResolution, RawImage } from '@huggingface/transformers';
2125
+ *
2126
+ * // Load processor and model
2127
+ * const model_id = 'Xenova/swin2SR-classical-sr-x2-64';
2128
+ * const processor = await AutoProcessor.from_pretrained(model_id);
2129
+ * const model = await Swin2SRForImageSuperResolution.from_pretrained(model_id);
2130
+ *
2131
+ * // Prepare model inputs
2132
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/butterfly.jpg';
2133
+ * const image = await RawImage.fromURL(url);
2134
+ * const inputs = await processor(image);
2135
+ *
2136
+ * // Run model
2137
+ * const outputs = await model(inputs);
2138
+ *
2139
+ * // Convert Tensor to RawImage
2140
+ * const output = outputs.reconstruction.squeeze().clamp_(0, 1).mul_(255).round_().to('uint8');
2141
+ * const outputImage = RawImage.fromTensor(output);
2142
+ * // RawImage {
2143
+ * // data: Uint8Array(786432) [ 41, 31, 24, ... ],
2144
+ * // width: 512,
2145
+ * // height: 512,
2146
+ * // channels: 3
2147
+ * // }
2148
+ * ```
2149
+ */
2150
+ export class Swin2SRForImageSuperResolution extends Swin2SRPreTrainedModel {
2151
+ }
2152
+ export class DPTPreTrainedModel extends PreTrainedModel {
2153
+ }
2154
+ /**
2155
+ * The bare DPT Model transformer outputting raw hidden-states without any specific head on top.
2156
+ */
2157
+ export class DPTModel extends DPTPreTrainedModel {
2158
+ }
2159
+ /**
2160
+ * DPT Model with a depth estimation head on top (consisting of 3 convolutional layers) e.g. for KITTI, NYUv2.
2161
+ *
2162
+ * **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`.
2163
+ * ```javascript
2164
+ * import { DPTForDepthEstimation, AutoProcessor, RawImage, interpolate, max } from '@huggingface/transformers';
2165
+ *
2166
+ * // Load model and processor
2167
+ * const model_id = 'Xenova/dpt-hybrid-midas';
2168
+ * const model = await DPTForDepthEstimation.from_pretrained(model_id);
2169
+ * const processor = await AutoProcessor.from_pretrained(model_id);
2170
+ *
2171
+ * // Load image from URL
2172
+ * const url = 'http://images.cocodataset.org/val2017/000000039769.jpg';
2173
+ * const image = await RawImage.fromURL(url);
2174
+ *
2175
+ * // Prepare image for the model
2176
+ * const inputs = await processor(image);
2177
+ *
2178
+ * // Run model
2179
+ * const { predicted_depth } = await model(inputs);
2180
+ *
2181
+ * // Interpolate to original size
2182
+ * const prediction = interpolate(predicted_depth, image.size.reverse(), 'bilinear', false);
2183
+ *
2184
+ * // Visualize the prediction
2185
+ * const formatted = prediction.mul_(255 / max(prediction.data)[0]).to('uint8');
2186
+ * const depth = RawImage.fromTensor(formatted);
2187
+ * // RawImage {
2188
+ * // data: Uint8Array(307200) [ 85, 85, 84, ... ],
2189
+ * // width: 640,
2190
+ * // height: 480,
2191
+ * // channels: 1
2192
+ * // }
2193
+ * ```
2194
+ */
2195
+ export class DPTForDepthEstimation extends DPTPreTrainedModel {
2196
+ }
2197
+ export class DepthAnythingPreTrainedModel extends PreTrainedModel {
2198
+ }
2199
+ /**
2200
+ * Depth Anything Model with a depth estimation head on top (consisting of 3 convolutional layers) e.g. for KITTI, NYUv2.
2201
+ */
2202
+ export class DepthAnythingForDepthEstimation extends DepthAnythingPreTrainedModel {
2203
+ }
2204
+ export class GLPNPreTrainedModel extends PreTrainedModel {
2205
+ }
2206
+ /**
2207
+ * The bare GLPN encoder (Mix-Transformer) outputting raw hidden-states without any specific head on top.
2208
+ */
2209
+ export class GLPNModel extends GLPNPreTrainedModel {
2210
+ }
2211
+ /**
2212
+ * GLPN Model transformer with a lightweight depth estimation head on top e.g. for KITTI, NYUv2.
2213
+ *
2214
+ * **Example:** Depth estimation w/ `Xenova/glpn-kitti`.
2215
+ * ```javascript
2216
+ * import { GLPNForDepthEstimation, AutoProcessor, RawImage, interpolate, max } from '@huggingface/transformers';
2217
+ *
2218
+ * // Load model and processor
2219
+ * const model_id = 'Xenova/glpn-kitti';
2220
+ * const model = await GLPNForDepthEstimation.from_pretrained(model_id);
2221
+ * const processor = await AutoProcessor.from_pretrained(model_id);
2222
+ *
2223
+ * // Load image from URL
2224
+ * const url = 'http://images.cocodataset.org/val2017/000000039769.jpg';
2225
+ * const image = await RawImage.fromURL(url);
2226
+ *
2227
+ * // Prepare image for the model
2228
+ * const inputs = await processor(image);
2229
+ *
2230
+ * // Run model
2231
+ * const { predicted_depth } = await model(inputs);
2232
+ *
2233
+ * // Interpolate to original size
2234
+ * const prediction = interpolate(predicted_depth, image.size.reverse(), 'bilinear', false);
2235
+ *
2236
+ * // Visualize the prediction
2237
+ * const formatted = prediction.mul_(255 / max(prediction.data)[0]).to('uint8');
2238
+ * const depth = RawImage.fromTensor(formatted);
2239
+ * // RawImage {
2240
+ * // data: Uint8Array(307200) [ 207, 169, 154, ... ],
2241
+ * // width: 640,
2242
+ * // height: 480,
2243
+ * // channels: 1
2244
+ * // }
2245
+ * ```
2246
+ */
2247
+ export class GLPNForDepthEstimation extends GLPNPreTrainedModel {
2248
+ }
2249
+ export class DonutSwinPreTrainedModel extends PreTrainedModel {
2250
+ }
2251
+ /**
2252
+ * The bare Donut Swin Model transformer outputting raw hidden-states without any specific head on top.
2253
+ *
2254
+ * **Example:** Step-by-step Document Parsing.
2255
+ *
2256
+ * ```javascript
2257
+ * import { AutoProcessor, AutoTokenizer, AutoModelForVision2Seq, RawImage } from '@huggingface/transformers';
2258
+ *
2259
+ * // Choose model to use
2260
+ * const model_id = 'Xenova/donut-base-finetuned-cord-v2';
2261
+ *
2262
+ * // Prepare image inputs
2263
+ * const processor = await AutoProcessor.from_pretrained(model_id);
2264
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/receipt.png';
2265
+ * const image = await RawImage.read(url);
2266
+ * const image_inputs = await processor(image);
2267
+ *
2268
+ * // Prepare decoder inputs
2269
+ * const tokenizer = await AutoTokenizer.from_pretrained(model_id);
2270
+ * const task_prompt = '<s_cord-v2>';
2271
+ * const decoder_input_ids = tokenizer(task_prompt, {
2272
+ * add_special_tokens: false,
2273
+ * }).input_ids;
2274
+ *
2275
+ * // Create the model
2276
+ * const model = await AutoModelForVision2Seq.from_pretrained(model_id);
2277
+ *
2278
+ * // Run inference
2279
+ * const output = await model.generate(image_inputs.pixel_values, {
2280
+ * decoder_input_ids,
2281
+ * max_length: model.config.decoder.max_position_embeddings,
2282
+ * });
2283
+ *
2284
+ * // Decode output
2285
+ * const decoded = tokenizer.batch_decode(output)[0];
2286
+ * // <s_cord-v2><s_menu><s_nm> CINNAMON SUGAR</s_nm><s_unitprice> 17,000</s_unitprice><s_cnt> 1 x</s_cnt><s_price> 17,000</s_price></s_menu><s_sub_total><s_subtotal_price> 17,000</s_subtotal_price></s_sub_total><s_total><s_total_price> 17,000</s_total_price><s_cashprice> 20,000</s_cashprice><s_changeprice> 3,000</s_changeprice></s_total></s>
2287
+ * ```
2288
+ *
2289
+ * **Example:** Step-by-step Document Visual Question Answering (DocVQA)
2290
+ *
2291
+ * ```javascript
2292
+ * import { AutoProcessor, AutoTokenizer, AutoModelForVision2Seq, RawImage } from '@huggingface/transformers';
2293
+ *
2294
+ * // Choose model to use
2295
+ * const model_id = 'Xenova/donut-base-finetuned-docvqa';
2296
+ *
2297
+ * // Prepare image inputs
2298
+ * const processor = await AutoProcessor.from_pretrained(model_id);
2299
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/invoice.png';
2300
+ * const image = await RawImage.read(url);
2301
+ * const image_inputs = await processor(image);
2302
+ *
2303
+ * // Prepare decoder inputs
2304
+ * const tokenizer = await AutoTokenizer.from_pretrained(model_id);
2305
+ * const question = 'What is the invoice number?';
2306
+ * const task_prompt = `<s_docvqa><s_question>${question}</s_question><s_answer>`;
2307
+ * const decoder_input_ids = tokenizer(task_prompt, {
2308
+ * add_special_tokens: false,
2309
+ * }).input_ids;
2310
+ *
2311
+ * // Create the model
2312
+ * const model = await AutoModelForVision2Seq.from_pretrained(model_id);
2313
+ *
2314
+ * // Run inference
2315
+ * const output = await model.generate(image_inputs.pixel_values, {
2316
+ * decoder_input_ids,
2317
+ * max_length: model.config.decoder.max_position_embeddings,
2318
+ * });
2319
+ *
2320
+ * // Decode output
2321
+ * const decoded = tokenizer.batch_decode(output)[0];
2322
+ * // <s_docvqa><s_question> What is the invoice number?</s_question><s_answer> us-001</s_answer></s>
2323
+ * ```
2324
+ */
2325
+ export class DonutSwinModel extends DonutSwinPreTrainedModel {
2326
+ }
2327
+ export class ConvNextPreTrainedModel extends PreTrainedModel {
2328
+ }
2329
+ /**
2330
+ * The bare ConvNext model outputting raw features without any specific head on top.
2331
+ */
2332
+ export class ConvNextModel extends ConvNextPreTrainedModel {
2333
+ }
2334
+ /**
2335
+ * ConvNext Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for ImageNet.
2336
+ */
2337
+ export class ConvNextForImageClassification extends ConvNextPreTrainedModel {
2338
+ /**
2339
+ * @param {any} model_inputs
2340
+ */
2341
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2342
+ }
2343
+ export class ConvNextV2PreTrainedModel extends PreTrainedModel {
2344
+ }
2345
+ /**
2346
+ * The bare ConvNextV2 model outputting raw features without any specific head on top.
2347
+ */
2348
+ export class ConvNextV2Model extends ConvNextV2PreTrainedModel {
2349
+ }
2350
+ /**
2351
+ * ConvNextV2 Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for ImageNet.
2352
+ */
2353
+ export class ConvNextV2ForImageClassification extends ConvNextV2PreTrainedModel {
2354
+ /**
2355
+ * @param {any} model_inputs
2356
+ */
2357
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2358
+ }
2359
+ export class Dinov2PreTrainedModel extends PreTrainedModel {
2360
+ }
2361
+ /**
2362
+ * The bare DINOv2 Model transformer outputting raw hidden-states without any specific head on top.
2363
+ */
2364
+ export class Dinov2Model extends Dinov2PreTrainedModel {
2365
+ }
2366
+ /**
2367
+ * Dinov2 Model transformer with an image classification head on top (a linear layer on top of the final hidden state of the [CLS] token) e.g. for ImageNet.
2368
+ */
2369
+ export class Dinov2ForImageClassification extends Dinov2PreTrainedModel {
2370
+ /**
2371
+ * @param {any} model_inputs
2372
+ */
2373
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2374
+ }
2375
+ export class YolosPreTrainedModel extends PreTrainedModel {
2376
+ }
2377
+ export class YolosModel extends YolosPreTrainedModel {
2378
+ }
2379
+ export class YolosForObjectDetection extends YolosPreTrainedModel {
2380
+ /**
2381
+ * @param {any} model_inputs
2382
+ */
2383
+ _call(model_inputs: any): Promise<YolosObjectDetectionOutput>;
2384
+ }
2385
+ export class YolosObjectDetectionOutput extends ModelOutput {
2386
+ /**
2387
+ * @param {Object} output The output of the model.
2388
+ * @param {Tensor} output.logits Classification logits (including no-object) for all queries.
2389
+ * @param {Tensor} output.pred_boxes Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height).
2390
+ * These values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding).
2391
+ */
2392
+ constructor({ logits, pred_boxes }: {
2393
+ logits: Tensor;
2394
+ pred_boxes: Tensor;
2395
+ });
2396
+ logits: Tensor;
2397
+ pred_boxes: Tensor;
2398
+ }
2399
+ export class SamPreTrainedModel extends PreTrainedModel {
2400
+ }
2401
+ /**
2402
+ * Segment Anything Model (SAM) for generating segmentation masks, given an input image
2403
+ * and optional 2D location and bounding boxes.
2404
+ *
2405
+ * **Example:** Perform mask generation w/ `Xenova/sam-vit-base`.
2406
+ * ```javascript
2407
+ * import { SamModel, AutoProcessor, RawImage } from '@huggingface/transformers';
2408
+ *
2409
+ * const model = await SamModel.from_pretrained('Xenova/sam-vit-base');
2410
+ * const processor = await AutoProcessor.from_pretrained('Xenova/sam-vit-base');
2411
+ *
2412
+ * const img_url = 'https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png';
2413
+ * const raw_image = await RawImage.read(img_url);
2414
+ * const input_points = [[[450, 600]]] // 2D localization of a window
2415
+ *
2416
+ * const inputs = await processor(raw_image, { input_points });
2417
+ * const outputs = await model(inputs);
2418
+ *
2419
+ * const masks = await processor.post_process_masks(outputs.pred_masks, inputs.original_sizes, inputs.reshaped_input_sizes);
2420
+ * // [
2421
+ * // Tensor {
2422
+ * // dims: [ 1, 3, 1764, 2646 ],
2423
+ * // type: 'bool',
2424
+ * // data: Uint8Array(14002632) [ ... ],
2425
+ * // size: 14002632
2426
+ * // }
2427
+ * // ]
2428
+ * const scores = outputs.iou_scores;
2429
+ * // Tensor {
2430
+ * // dims: [ 1, 1, 3 ],
2431
+ * // type: 'float32',
2432
+ * // data: Float32Array(3) [
2433
+ * // 0.8892380595207214,
2434
+ * // 0.9311248064041138,
2435
+ * // 0.983696699142456
2436
+ * // ],
2437
+ * // size: 3
2438
+ * // }
2439
+ * ```
2440
+ */
2441
+ export class SamModel extends SamPreTrainedModel {
2442
+ /**
2443
+ * Compute image embeddings and positional image embeddings, given the pixel values of an image.
2444
+ * @param {Object} model_inputs Object containing the model inputs.
2445
+ * @param {Tensor} model_inputs.pixel_values Pixel values obtained using a `SamProcessor`.
2446
+ * @returns {Promise<{ image_embeddings: Tensor, image_positional_embeddings: Tensor }>} The image embeddings and positional image embeddings.
2447
+ */
2448
+ get_image_embeddings({ pixel_values }: {
2449
+ pixel_values: Tensor;
2450
+ }): Promise<{
2451
+ image_embeddings: Tensor;
2452
+ image_positional_embeddings: Tensor;
2453
+ }>;
2454
+ /**
2455
+ * @typedef {Object} SamModelInputs Object containing the model inputs.
2456
+ * @property {Tensor} pixel_values Pixel values as a Tensor with shape `(batch_size, num_channels, height, width)`.
2457
+ * These can be obtained using a `SamProcessor`.
2458
+ * @property {Tensor} [input_points] Input 2D spatial points with shape `(batch_size, num_points, 2)`.
2459
+ * This is used by the prompt encoder to encode the prompt.
2460
+ * @property {Tensor} [input_labels] Input labels for the points, as a Tensor of shape `(batch_size, point_batch_size, num_points)`.
2461
+ * This is used by the prompt encoder to encode the prompt. There are 4 types of labels:
2462
+ * - `1`: the point is a point that contains the object of interest
2463
+ * - `0`: the point is a point that does not contain the object of interest
2464
+ * - `-1`: the point corresponds to the background
2465
+ * - `-10`: the point is a padding point, thus should be ignored by the prompt encoder
2466
+ * @property {Tensor} [input_boxes] Input bounding boxes with shape `(batch_size, num_boxes, 4)`.
2467
+ * @property {Tensor} [image_embeddings] Image embeddings used by the mask decoder.
2468
+ * @property {Tensor} [image_positional_embeddings] Image positional embeddings used by the mask decoder.
2469
+ */
2470
+ /**
2471
+ * @param {SamModelInputs} model_inputs Object containing the model inputs.
2472
+ * @returns {Promise<Object>} The output of the model.
2473
+ */
2474
+ forward(model_inputs: {
2475
+ /**
2476
+ * Pixel values as a Tensor with shape `(batch_size, num_channels, height, width)`.
2477
+ * These can be obtained using a `SamProcessor`.
2478
+ */
2479
+ pixel_values: Tensor;
2480
+ /**
2481
+ * Input 2D spatial points with shape `(batch_size, num_points, 2)`.
2482
+ * This is used by the prompt encoder to encode the prompt.
2483
+ */
2484
+ input_points?: Tensor;
2485
+ /**
2486
+ * Input labels for the points, as a Tensor of shape `(batch_size, point_batch_size, num_points)`.
2487
+ * This is used by the prompt encoder to encode the prompt. There are 4 types of labels:
2488
+ * - `1`: the point is a point that contains the object of interest
2489
+ * - `0`: the point is a point that does not contain the object of interest
2490
+ * - `-1`: the point corresponds to the background
2491
+ * - `-10`: the point is a padding point, thus should be ignored by the prompt encoder
2492
+ */
2493
+ input_labels?: Tensor;
2494
+ /**
2495
+ * Input bounding boxes with shape `(batch_size, num_boxes, 4)`.
2496
+ */
2497
+ input_boxes?: Tensor;
2498
+ /**
2499
+ * Image embeddings used by the mask decoder.
2500
+ */
2501
+ image_embeddings?: Tensor;
2502
+ /**
2503
+ * Image positional embeddings used by the mask decoder.
2504
+ */
2505
+ image_positional_embeddings?: Tensor;
2506
+ }): Promise<any>;
2507
+ /**
2508
+ * Runs the model with the provided inputs
2509
+ * @param {Object} model_inputs Model inputs
2510
+ * @returns {Promise<SamImageSegmentationOutput>} Object containing segmentation outputs
2511
+ */
2512
+ _call(model_inputs: any): Promise<SamImageSegmentationOutput>;
2513
+ }
2514
+ /**
2515
+ * Base class for Segment-Anything model's output.
2516
+ */
2517
+ export class SamImageSegmentationOutput extends ModelOutput {
2518
+ /**
2519
+ * @param {Object} output The output of the model.
2520
+ * @param {Tensor} output.iou_scores The output logits of the model.
2521
+ * @param {Tensor} output.pred_masks Predicted boxes.
2522
+ */
2523
+ constructor({ iou_scores, pred_masks }: {
2524
+ iou_scores: Tensor;
2525
+ pred_masks: Tensor;
2526
+ });
2527
+ iou_scores: Tensor;
2528
+ pred_masks: Tensor;
2529
+ }
2530
+ export class MarianPreTrainedModel extends PreTrainedModel {
2531
+ /**
2532
+ * Creates a new instance of the `MarianMTModel` class.
2533
+ * @param {Object} config The model configuration.
2534
+ * @param {Record<string, any>} sessions The inference sessions for the model.
2535
+ * @param {GenerationConfig} generation_config The generation configuration.
2536
+ */
2537
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
2538
+ generation_config: GenerationConfig;
2539
+ }
2540
+ export class MarianModel extends MarianPreTrainedModel {
2541
+ }
2542
+ export class MarianMTModel extends MarianPreTrainedModel {
2543
+ }
2544
+ export class M2M100PreTrainedModel extends PreTrainedModel {
2545
+ /**
2546
+ * Creates a new instance of the `M2M100ForConditionalGeneration` class.
2547
+ * @param {Object} config The model configuration.
2548
+ * @param {Record<string, any>} sessions The inference sessions for the model.
2549
+ * @param {GenerationConfig} generation_config The generation configuration.
2550
+ */
2551
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
2552
+ generation_config: GenerationConfig;
2553
+ }
2554
+ export class M2M100Model extends M2M100PreTrainedModel {
2555
+ }
2556
+ export class M2M100ForConditionalGeneration extends M2M100PreTrainedModel {
2557
+ }
2558
+ export class Wav2Vec2PreTrainedModel extends PreTrainedModel {
2559
+ }
2560
+ /**
2561
+ * The bare Wav2Vec2 Model transformer outputting raw hidden-states without any specific head on top.
2562
+ *
2563
+ * **Example:** Load and run a `Wav2Vec2Model` for feature extraction.
2564
+ *
2565
+ * ```javascript
2566
+ * import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
2567
+ *
2568
+ * // Read and preprocess audio
2569
+ * const processor = await AutoProcessor.from_pretrained('Xenova/mms-300m');
2570
+ * const audio = await read_audio('https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac', 16000);
2571
+ * const inputs = await processor(audio);
2572
+ *
2573
+ * // Run model with inputs
2574
+ * const model = await AutoModel.from_pretrained('Xenova/mms-300m');
2575
+ * const output = await model(inputs);
2576
+ * // {
2577
+ * // last_hidden_state: Tensor {
2578
+ * // dims: [ 1, 1144, 1024 ],
2579
+ * // type: 'float32',
2580
+ * // data: Float32Array(1171456) [ ... ],
2581
+ * // size: 1171456
2582
+ * // }
2583
+ * // }
2584
+ * ```
2585
+ */
2586
+ export class Wav2Vec2Model extends Wav2Vec2PreTrainedModel {
2587
+ }
2588
+ export class Wav2Vec2ForCTC extends Wav2Vec2PreTrainedModel {
2589
+ /**
2590
+ * @param {Object} model_inputs
2591
+ * @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
2592
+ * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
2593
+ */
2594
+ _call(model_inputs: {
2595
+ input_values: Tensor;
2596
+ attention_mask: Tensor;
2597
+ }): Promise<CausalLMOutput>;
2598
+ }
2599
+ export class Wav2Vec2ForSequenceClassification extends Wav2Vec2PreTrainedModel {
2600
+ /**
2601
+ * Calls the model on new inputs.
2602
+ * @param {Object} model_inputs The inputs to the model.
2603
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
2604
+ */
2605
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2606
+ }
2607
+ /**
2608
+ * Wav2Vec2 Model with a frame classification head on top for tasks like Speaker Diarization.
2609
+ */
2610
+ export class Wav2Vec2ForAudioFrameClassification extends Wav2Vec2PreTrainedModel {
2611
+ /**
2612
+ * Calls the model on new inputs.
2613
+ * @param {Object} model_inputs The inputs to the model.
2614
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
2615
+ */
2616
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
2617
+ }
2618
+ export class PyAnnotePreTrainedModel extends PreTrainedModel {
2619
+ }
2620
+ /**
2621
+ * The bare PyAnnote Model transformer outputting raw hidden-states without any specific head on top.
2622
+ */
2623
+ export class PyAnnoteModel extends PyAnnotePreTrainedModel {
2624
+ }
2625
+ /**
2626
+ * PyAnnote Model with a frame classification head on top for tasks like Speaker Diarization.
2627
+ *
2628
+ * **Example:** Load and run a `PyAnnoteForAudioFrameClassification` for speaker diarization.
2629
+ *
2630
+ * ```javascript
2631
+ * import { AutoProcessor, AutoModelForAudioFrameClassification, read_audio } from '@xenova/transformers';
2632
+ *
2633
+ * // Load model and processor
2634
+ * const model_id = 'onnx-community/pyannote-segmentation-3.0';
2635
+ * const model = await AutoModelForAudioFrameClassification.from_pretrained(model_id);
2636
+ * const processor = await AutoProcessor.from_pretrained(model_id);
2637
+ *
2638
+ * // Read and preprocess audio
2639
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/mlk.wav';
2640
+ * const audio = await read_audio(url, processor.feature_extractor.config.sampling_rate);
2641
+ * const inputs = await processor(audio);
2642
+ *
2643
+ * // Run model with inputs
2644
+ * const { logits } = await model(inputs);
2645
+ * // {
2646
+ * // logits: Tensor {
2647
+ * // dims: [ 1, 767, 7 ], // [batch_size, num_frames, num_classes]
2648
+ * // type: 'float32',
2649
+ * // data: Float32Array(5369) [ ... ],
2650
+ * // size: 5369
2651
+ * // }
2652
+ * // }
2653
+ *
2654
+ * const result = processor.post_process_speaker_diarization(logits, audio.length);
2655
+ * // [
2656
+ * // [
2657
+ * // { id: 0, start: 0, end: 1.0512535626298245, confidence: 0.8220156481664611 },
2658
+ * // { id: 2, start: 1.0512535626298245, end: 2.3398869619825127, confidence: 0.9008811707860472 },
2659
+ * // ...
2660
+ * // ]
2661
+ * // ]
2662
+ *
2663
+ * // Display result
2664
+ * console.table(result[0], ['start', 'end', 'id', 'confidence']);
2665
+ * // ┌─────────┬────────────────────┬────────────────────┬────┬─────────────────────┐
2666
+ * // │ (index) │ start │ end │ id │ confidence │
2667
+ * // ├─────────┼────────────────────┼────────────────────┼────┼─────────────────────┤
2668
+ * // │ 0 │ 0 │ 1.0512535626298245 │ 0 │ 0.8220156481664611 │
2669
+ * // │ 1 │ 1.0512535626298245 │ 2.3398869619825127 │ 2 │ 0.9008811707860472 │
2670
+ * // │ 2 │ 2.3398869619825127 │ 3.5946089560890773 │ 0 │ 0.7521651315796233 │
2671
+ * // │ 3 │ 3.5946089560890773 │ 4.578039708226655 │ 2 │ 0.8491978128022479 │
2672
+ * // │ 4 │ 4.578039708226655 │ 4.594995410849717 │ 0 │ 0.2935352600416393 │
2673
+ * // │ 5 │ 4.594995410849717 │ 6.121008646925269 │ 3 │ 0.6788051309866024 │
2674
+ * // │ 6 │ 6.121008646925269 │ 6.256654267909762 │ 0 │ 0.37125512393851134 │
2675
+ * // │ 7 │ 6.256654267909762 │ 8.630452635138397 │ 2 │ 0.7467035186353542 │
2676
+ * // │ 8 │ 8.630452635138397 │ 10.088643060721703 │ 0 │ 0.7689364814666032 │
2677
+ * // │ 9 │ 10.088643060721703 │ 12.58113134631177 │ 2 │ 0.9123324509131324 │
2678
+ * // │ 10 │ 12.58113134631177 │ 13.005023911888312 │ 0 │ 0.4828358177572041 │
2679
+ * // └─────────┴────────────────────┴────────────────────┴────┴─────────────────────┘
2680
+ * ```
2681
+ */
2682
+ export class PyAnnoteForAudioFrameClassification extends PyAnnotePreTrainedModel {
2683
+ /**
2684
+ * Calls the model on new inputs.
2685
+ * @param {Object} model_inputs The inputs to the model.
2686
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
2687
+ */
2688
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
2689
+ }
2690
+ export class WeSpeakerResNetPreTrainedModel extends PreTrainedModel {
2691
+ }
2692
+ export class WeSpeakerResNetModel extends WeSpeakerResNetPreTrainedModel {
2693
+ }
2694
+ export class UniSpeechPreTrainedModel extends PreTrainedModel {
2695
+ }
2696
+ /**
2697
+ * The bare UniSpeech Model transformer outputting raw hidden-states without any specific head on top.
2698
+ */
2699
+ export class UniSpeechModel extends UniSpeechPreTrainedModel {
2700
+ }
2701
+ /**
2702
+ * UniSpeech Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
2703
+ */
2704
+ export class UniSpeechForCTC extends UniSpeechPreTrainedModel {
2705
+ /**
2706
+ * @param {Object} model_inputs
2707
+ * @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
2708
+ * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
2709
+ */
2710
+ _call(model_inputs: {
2711
+ input_values: Tensor;
2712
+ attention_mask: Tensor;
2713
+ }): Promise<CausalLMOutput>;
2714
+ }
2715
+ /**
2716
+ * UniSpeech Model with a sequence classification head on top (a linear layer over the pooled output).
2717
+ */
2718
+ export class UniSpeechForSequenceClassification extends UniSpeechPreTrainedModel {
2719
+ /**
2720
+ * Calls the model on new inputs.
2721
+ * @param {Object} model_inputs The inputs to the model.
2722
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
2723
+ */
2724
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2725
+ }
2726
+ export class UniSpeechSatPreTrainedModel extends PreTrainedModel {
2727
+ }
2728
+ /**
2729
+ * The bare UniSpeechSat Model transformer outputting raw hidden-states without any specific head on top.
2730
+ */
2731
+ export class UniSpeechSatModel extends UniSpeechSatPreTrainedModel {
2732
+ }
2733
+ /**
2734
+ * UniSpeechSat Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
2735
+ */
2736
+ export class UniSpeechSatForCTC extends UniSpeechSatPreTrainedModel {
2737
+ /**
2738
+ * @param {Object} model_inputs
2739
+ * @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
2740
+ * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
2741
+ */
2742
+ _call(model_inputs: {
2743
+ input_values: Tensor;
2744
+ attention_mask: Tensor;
2745
+ }): Promise<CausalLMOutput>;
2746
+ }
2747
+ /**
2748
+ * UniSpeechSat Model with a sequence classification head on top (a linear layer over the pooled output).
2749
+ */
2750
+ export class UniSpeechSatForSequenceClassification extends UniSpeechSatPreTrainedModel {
2751
+ /**
2752
+ * Calls the model on new inputs.
2753
+ * @param {Object} model_inputs The inputs to the model.
2754
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
2755
+ */
2756
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2757
+ }
2758
+ /**
2759
+ * UniSpeechSat Model with a frame classification head on top for tasks like Speaker Diarization.
2760
+ */
2761
+ export class UniSpeechSatForAudioFrameClassification extends UniSpeechSatPreTrainedModel {
2762
+ /**
2763
+ * Calls the model on new inputs.
2764
+ * @param {Object} model_inputs The inputs to the model.
2765
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
2766
+ */
2767
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
2768
+ }
2769
+ export class Wav2Vec2BertPreTrainedModel extends PreTrainedModel {
2770
+ }
2771
+ /**
2772
+ * The bare Wav2Vec2Bert Model transformer outputting raw hidden-states without any specific head on top.
2773
+ */
2774
+ export class Wav2Vec2BertModel extends Wav2Vec2BertPreTrainedModel {
2775
+ }
2776
+ /**
2777
+ * Wav2Vec2Bert Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
2778
+ */
2779
+ export class Wav2Vec2BertForCTC extends Wav2Vec2BertPreTrainedModel {
2780
+ /**
2781
+ * @param {Object} model_inputs
2782
+ * @param {Tensor} model_inputs.input_features Float values of input mel-spectrogram.
2783
+ * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
2784
+ */
2785
+ _call(model_inputs: {
2786
+ input_features: Tensor;
2787
+ attention_mask: Tensor;
2788
+ }): Promise<CausalLMOutput>;
2789
+ }
2790
+ /**
2791
+ * Wav2Vec2Bert Model with a sequence classification head on top (a linear layer over the pooled output).
2792
+ */
2793
+ export class Wav2Vec2BertForSequenceClassification extends Wav2Vec2BertPreTrainedModel {
2794
+ /**
2795
+ * Calls the model on new inputs.
2796
+ * @param {Object} model_inputs The inputs to the model.
2797
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
2798
+ */
2799
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2800
+ }
2801
+ export class HubertPreTrainedModel extends PreTrainedModel {
2802
+ }
2803
+ /**
2804
+ * The bare Hubert Model transformer outputting raw hidden-states without any specific head on top.
2805
+ *
2806
+ * **Example:** Load and run a `HubertModel` for feature extraction.
2807
+ *
2808
+ * ```javascript
2809
+ * import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
2810
+ *
2811
+ * // Read and preprocess audio
2812
+ * const processor = await AutoProcessor.from_pretrained('Xenova/hubert-base-ls960');
2813
+ * const audio = await read_audio('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav', 16000);
2814
+ * const inputs = await processor(audio);
2815
+ *
2816
+ * // Load and run model with inputs
2817
+ * const model = await AutoModel.from_pretrained('Xenova/hubert-base-ls960');
2818
+ * const output = await model(inputs);
2819
+ * // {
2820
+ * // last_hidden_state: Tensor {
2821
+ * // dims: [ 1, 549, 768 ],
2822
+ * // type: 'float32',
2823
+ * // data: Float32Array(421632) [0.0682469978928566, 0.08104046434164047, -0.4975186586380005, ...],
2824
+ * // size: 421632
2825
+ * // }
2826
+ * // }
2827
+ * ```
2828
+ */
2829
+ export class HubertModel extends Wav2Vec2PreTrainedModel {
2830
+ }
2831
+ /**
2832
+ * Hubert Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
2833
+ */
2834
+ export class HubertForCTC extends Wav2Vec2PreTrainedModel {
2835
+ /**
2836
+ * @param {Object} model_inputs
2837
+ * @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
2838
+ * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
2839
+ */
2840
+ _call(model_inputs: {
2841
+ input_values: Tensor;
2842
+ attention_mask: Tensor;
2843
+ }): Promise<CausalLMOutput>;
2844
+ }
2845
+ /**
2846
+ * Hubert Model with a sequence classification head on top (a linear layer over the pooled output) for tasks like SUPERB Keyword Spotting.
2847
+ */
2848
+ export class HubertForSequenceClassification extends Wav2Vec2PreTrainedModel {
2849
+ /**
2850
+ * Calls the model on new inputs.
2851
+ * @param {Object} model_inputs The inputs to the model.
2852
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
2853
+ */
2854
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2855
+ }
2856
+ /**
2857
+ * An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.
2858
+ */
2859
+ export class WavLMPreTrainedModel extends PreTrainedModel {
2860
+ }
2861
+ /**
2862
+ * The bare WavLM Model transformer outputting raw hidden-states without any specific head on top.
2863
+ *
2864
+ * **Example:** Load and run a `WavLMModel` for feature extraction.
2865
+ *
2866
+ * ```javascript
2867
+ * import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
2868
+ *
2869
+ * // Read and preprocess audio
2870
+ * const processor = await AutoProcessor.from_pretrained('Xenova/wavlm-base');
2871
+ * const audio = await read_audio('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav', 16000);
2872
+ * const inputs = await processor(audio);
2873
+ *
2874
+ * // Run model with inputs
2875
+ * const model = await AutoModel.from_pretrained('Xenova/wavlm-base');
2876
+ * const output = await model(inputs);
2877
+ * // {
2878
+ * // last_hidden_state: Tensor {
2879
+ * // dims: [ 1, 549, 768 ],
2880
+ * // type: 'float32',
2881
+ * // data: Float32Array(421632) [-0.349443256855011, -0.39341306686401367, 0.022836603224277496, ...],
2882
+ * // size: 421632
2883
+ * // }
2884
+ * // }
2885
+ * ```
2886
+ */
2887
+ export class WavLMModel extends WavLMPreTrainedModel {
2888
+ }
2889
+ /**
2890
+ * WavLM Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
2891
+ */
2892
+ export class WavLMForCTC extends WavLMPreTrainedModel {
2893
+ /**
2894
+ * @param {Object} model_inputs
2895
+ * @param {Tensor} model_inputs.input_values Float values of input raw speech waveform.
2896
+ * @param {Tensor} model_inputs.attention_mask Mask to avoid performing convolution and attention on padding token indices. Mask values selected in [0, 1]
2897
+ */
2898
+ _call(model_inputs: {
2899
+ input_values: Tensor;
2900
+ attention_mask: Tensor;
2901
+ }): Promise<CausalLMOutput>;
2902
+ }
2903
+ /**
2904
+ * WavLM Model with a sequence classification head on top (a linear layer over the pooled output).
2905
+ */
2906
+ export class WavLMForSequenceClassification extends WavLMPreTrainedModel {
2907
+ /**
2908
+ * Calls the model on new inputs.
2909
+ * @param {Object} model_inputs The inputs to the model.
2910
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
2911
+ */
2912
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
2913
+ }
2914
+ /**
2915
+ * WavLM Model with an XVector feature extraction head on top for tasks like Speaker Verification.
2916
+ *
2917
+ * **Example:** Extract speaker embeddings with `WavLMForXVector`.
2918
+ * ```javascript
2919
+ * import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
2920
+ *
2921
+ * // Read and preprocess audio
2922
+ * const processor = await AutoProcessor.from_pretrained('Xenova/wavlm-base-plus-sv');
2923
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
2924
+ * const audio = await read_audio(url, 16000);
2925
+ * const inputs = await processor(audio);
2926
+ *
2927
+ * // Run model with inputs
2928
+ * const model = await AutoModel.from_pretrained('Xenova/wavlm-base-plus-sv');
2929
+ * const outputs = await model(inputs);
2930
+ * // {
2931
+ * // logits: Tensor {
2932
+ * // dims: [ 1, 512 ],
2933
+ * // type: 'float32',
2934
+ * // data: Float32Array(512) [0.5847219228744507, ...],
2935
+ * // size: 512
2936
+ * // },
2937
+ * // embeddings: Tensor {
2938
+ * // dims: [ 1, 512 ],
2939
+ * // type: 'float32',
2940
+ * // data: Float32Array(512) [-0.09079201519489288, ...],
2941
+ * // size: 512
2942
+ * // }
2943
+ * // }
2944
+ * ```
2945
+ */
2946
+ export class WavLMForXVector extends WavLMPreTrainedModel {
2947
+ /**
2948
+ * Calls the model on new inputs.
2949
+ * @param {Object} model_inputs The inputs to the model.
2950
+ * @returns {Promise<XVectorOutput>} An object containing the model's output logits and speaker embeddings.
2951
+ */
2952
+ _call(model_inputs: any): Promise<XVectorOutput>;
2953
+ }
2954
+ /**
2955
+ * WavLM Model with a frame classification head on top for tasks like Speaker Diarization.
2956
+ *
2957
+ * **Example:** Perform speaker diarization with `WavLMForAudioFrameClassification`.
2958
+ * ```javascript
2959
+ * import { AutoProcessor, AutoModelForAudioFrameClassification, read_audio } from '@huggingface/transformers';
2960
+ *
2961
+ * // Read and preprocess audio
2962
+ * const processor = await AutoProcessor.from_pretrained('Xenova/wavlm-base-plus-sd');
2963
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
2964
+ * const audio = await read_audio(url, 16000);
2965
+ * const inputs = await processor(audio);
2966
+ *
2967
+ * // Run model with inputs
2968
+ * const model = await AutoModelForAudioFrameClassification.from_pretrained('Xenova/wavlm-base-plus-sd');
2969
+ * const { logits } = await model(inputs);
2970
+ * // {
2971
+ * // logits: Tensor {
2972
+ * // dims: [ 1, 549, 2 ], // [batch_size, num_frames, num_speakers]
2973
+ * // type: 'float32',
2974
+ * // data: Float32Array(1098) [-3.5301010608673096, ...],
2975
+ * // size: 1098
2976
+ * // }
2977
+ * // }
2978
+ *
2979
+ * const labels = logits[0].sigmoid().tolist().map(
2980
+ * frames => frames.map(speaker => speaker > 0.5 ? 1 : 0)
2981
+ * );
2982
+ * console.log(labels); // labels is a one-hot array of shape (num_frames, num_speakers)
2983
+ * // [
2984
+ * // [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
2985
+ * // [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0],
2986
+ * // [0, 0], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1],
2987
+ * // ...
2988
+ * // ]
2989
+ * ```
2990
+ */
2991
+ export class WavLMForAudioFrameClassification extends WavLMPreTrainedModel {
2992
+ /**
2993
+ * Calls the model on new inputs.
2994
+ * @param {Object} model_inputs The inputs to the model.
2995
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
2996
+ */
2997
+ _call(model_inputs: any): Promise<TokenClassifierOutput>;
2998
+ }
2999
+ /**
3000
+ * An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained models.
3001
+ */
3002
+ export class SpeechT5PreTrainedModel extends PreTrainedModel {
3003
+ /**
3004
+ * Creates a new instance of the `SpeechT5ForTextToSpeech` class.
3005
+ * @param {Object} config The model configuration.
3006
+ * @param {Record<string, any>} sessions The inference sessions for the model.
3007
+ * @param {GenerationConfig} generation_config The generation configuration.
3008
+ */
3009
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
3010
+ generation_config: GenerationConfig;
3011
+ }
3012
+ /**
3013
+ * The bare SpeechT5 Encoder-Decoder Model outputting raw hidden-states without any specific pre- or post-nets.
3014
+ */
3015
+ export class SpeechT5Model extends SpeechT5PreTrainedModel {
3016
+ }
3017
+ /**
3018
+ * SpeechT5 Model with a speech encoder and a text decoder.
3019
+ *
3020
+ * **Example:** Generate speech from text with `SpeechT5ForSpeechToText`.
3021
+ * ```javascript
3022
+ * import { AutoTokenizer, AutoProcessor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, Tensor } from '@huggingface/transformers';
3023
+ *
3024
+ * // Load the tokenizer and processor
3025
+ * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/speecht5_tts');
3026
+ * const processor = await AutoProcessor.from_pretrained('Xenova/speecht5_tts');
3027
+ *
3028
+ * // Load the models
3029
+ * // NOTE: We use the full-precision versions as they are more accurate
3030
+ * const model = await SpeechT5ForTextToSpeech.from_pretrained('Xenova/speecht5_tts', { dtype: 'fp32' });
3031
+ * const vocoder = await SpeechT5HifiGan.from_pretrained('Xenova/speecht5_hifigan', { dtype: 'fp32' });
3032
+ *
3033
+ * // Load speaker embeddings from URL
3034
+ * const speaker_embeddings_data = new Float32Array(
3035
+ * await (await fetch('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin')).arrayBuffer()
3036
+ * );
3037
+ * const speaker_embeddings = new Tensor(
3038
+ * 'float32',
3039
+ * speaker_embeddings_data,
3040
+ * [1, speaker_embeddings_data.length]
3041
+ * )
3042
+ *
3043
+ * // Run tokenization
3044
+ * const { input_ids } = tokenizer('Hello, my dog is cute');
3045
+ *
3046
+ * // Generate waveform
3047
+ * const { waveform } = await model.generate_speech(input_ids, speaker_embeddings, { vocoder });
3048
+ * console.log(waveform)
3049
+ * // Tensor {
3050
+ * // dims: [ 26112 ],
3051
+ * // type: 'float32',
3052
+ * // size: 26112,
3053
+ * // data: Float32Array(26112) [ -0.00043630177970044315, -0.00018082228780258447, ... ],
3054
+ * // }
3055
+ * ```
3056
+ */
3057
+ export class SpeechT5ForSpeechToText extends SpeechT5PreTrainedModel {
3058
+ }
3059
+ /**
3060
+ * SpeechT5 Model with a text encoder and a speech decoder.
3061
+ */
3062
+ export class SpeechT5ForTextToSpeech extends SpeechT5PreTrainedModel {
3063
+ /**
3064
+ * @typedef {Object} SpeechOutput
3065
+ * @property {Tensor} [spectrogram] The predicted log-mel spectrogram of shape
3066
+ * `(output_sequence_length, config.num_mel_bins)`. Returned when no `vocoder` is provided
3067
+ * @property {Tensor} [waveform] The predicted waveform of shape `(num_frames,)`. Returned when a `vocoder` is provided.
3068
+ * @property {Tensor} [cross_attentions] The outputs of the decoder's cross-attention layers of shape
3069
+ * `(config.decoder_layers, config.decoder_attention_heads, output_sequence_length, input_sequence_length)`. returned when `output_cross_attentions` is `true`.
3070
+ */
3071
+ /**
3072
+ * Converts a sequence of input tokens into a sequence of mel spectrograms, which are subsequently turned into a speech waveform using a vocoder.
3073
+ * @param {Tensor} input_values Indices of input sequence tokens in the vocabulary.
3074
+ * @param {Tensor} speaker_embeddings Tensor containing the speaker embeddings.
3075
+ * @param {Object} options Optional parameters for generating speech.
3076
+ * @param {number} [options.threshold=0.5] The generated sequence ends when the predicted stop token probability exceeds this value.
3077
+ * @param {number} [options.minlenratio=0.0] Used to calculate the minimum required length for the output sequence.
3078
+ * @param {number} [options.maxlenratio=20.0] Used to calculate the maximum allowed length for the output sequence.
3079
+ * @param {Object} [options.vocoder=null] The vocoder that converts the mel spectrogram into a speech waveform. If `null`, the output is the mel spectrogram.
3080
+ * @param {boolean} [options.output_cross_attentions=false] Whether or not to return the attentions tensors of the decoder's cross-attention layers.
3081
+ * @returns {Promise<SpeechOutput>} A promise which resolves to an object containing the spectrogram, waveform, and cross-attention tensors.
3082
+ */
3083
+ generate_speech(input_values: Tensor, speaker_embeddings: Tensor, { threshold, minlenratio, maxlenratio, vocoder, }?: {
3084
+ threshold?: number;
3085
+ minlenratio?: number;
3086
+ maxlenratio?: number;
3087
+ vocoder?: any;
3088
+ output_cross_attentions?: boolean;
3089
+ }): Promise<{
3090
+ /**
3091
+ * The predicted log-mel spectrogram of shape
3092
+ * `(output_sequence_length, config.num_mel_bins)`. Returned when no `vocoder` is provided
3093
+ */
3094
+ spectrogram?: Tensor;
3095
+ /**
3096
+ * The predicted waveform of shape `(num_frames,)`. Returned when a `vocoder` is provided.
3097
+ */
3098
+ waveform?: Tensor;
3099
+ /**
3100
+ * The outputs of the decoder's cross-attention layers of shape
3101
+ * `(config.decoder_layers, config.decoder_attention_heads, output_sequence_length, input_sequence_length)`. returned when `output_cross_attentions` is `true`.
3102
+ */
3103
+ cross_attentions?: Tensor;
3104
+ }>;
3105
+ }
3106
+ /**
3107
+ * HiFi-GAN vocoder.
3108
+ *
3109
+ * See [SpeechT5ForSpeechToText](./models#module_models.SpeechT5ForSpeechToText) for example usage.
3110
+ */
3111
+ export class SpeechT5HifiGan extends PreTrainedModel {
3112
+ }
3113
+ export class TrOCRPreTrainedModel extends PreTrainedModel {
3114
+ /**
3115
+ * Creates a new instance of the `TrOCRPreTrainedModel` class.
3116
+ * @param {Object} config The configuration of the model.
3117
+ * @param {any} session The ONNX session containing the model weights.
3118
+ * @param {GenerationConfig} generation_config The generation configuration.
3119
+ */
3120
+ constructor(config: any, session: any, generation_config: GenerationConfig);
3121
+ generation_config: GenerationConfig;
3122
+ }
3123
+ /**
3124
+ * The TrOCR Decoder with a language modeling head.
3125
+ */
3126
+ export class TrOCRForCausalLM extends TrOCRPreTrainedModel {
3127
+ }
3128
+ /**
3129
+ * The bare Mistral Model outputting raw hidden-states without any specific head on top.
3130
+ */
3131
+ export class MistralPreTrainedModel extends PreTrainedModel {
3132
+ /**
3133
+ * Creates a new instance of the `MistralPreTrainedModel` class.
3134
+ * @param {Object} config The configuration of the model.
3135
+ * @param {any} session The ONNX session containing the model weights.
3136
+ * @param {GenerationConfig} generation_config The generation configuration.
3137
+ */
3138
+ constructor(config: any, session: any, generation_config: GenerationConfig);
3139
+ generation_config: GenerationConfig;
3140
+ }
3141
+ export class MistralModel extends MistralPreTrainedModel {
3142
+ }
3143
+ export class MistralForCausalLM extends MistralPreTrainedModel {
3144
+ }
3145
+ /**
3146
+ * The bare Starcoder2 Model outputting raw hidden-states without any specific head on top.
3147
+ */
3148
+ export class Starcoder2PreTrainedModel extends PreTrainedModel {
3149
+ /**
3150
+ * Creates a new instance of the `Starcoder2PreTrainedModel` class.
3151
+ * @param {Object} config The configuration of the model.
3152
+ * @param {any} session The ONNX session containing the model weights.
3153
+ * @param {GenerationConfig} generation_config The generation configuration.
3154
+ */
3155
+ constructor(config: any, session: any, generation_config: GenerationConfig);
3156
+ generation_config: GenerationConfig;
3157
+ }
3158
+ export class Starcoder2Model extends Starcoder2PreTrainedModel {
3159
+ }
3160
+ export class Starcoder2ForCausalLM extends Starcoder2PreTrainedModel {
3161
+ }
3162
+ /**
3163
+ * The bare Falcon Model outputting raw hidden-states without any specific head on top.
3164
+ */
3165
+ export class FalconPreTrainedModel extends PreTrainedModel {
3166
+ /**
3167
+ * Creates a new instance of the `FalconPreTrainedModel` class.
3168
+ * @param {Object} config The configuration of the model.
3169
+ * @param {any} session The ONNX session containing the model weights.
3170
+ * @param {GenerationConfig} generation_config The generation configuration.
3171
+ */
3172
+ constructor(config: any, session: any, generation_config: GenerationConfig);
3173
+ generation_config: GenerationConfig;
3174
+ }
3175
+ export class FalconModel extends FalconPreTrainedModel {
3176
+ }
3177
+ export class FalconForCausalLM extends FalconPreTrainedModel {
3178
+ }
3179
+ export class ClapPreTrainedModel extends PreTrainedModel {
3180
+ }
3181
+ export class ClapModel extends ClapPreTrainedModel {
3182
+ }
3183
+ /**
3184
+ * CLAP Text Model with a projection layer on top (a linear layer on top of the pooled output).
3185
+ *
3186
+ * **Example:** Compute text embeddings with `ClapTextModelWithProjection`.
3187
+ *
3188
+ * ```javascript
3189
+ * import { AutoTokenizer, ClapTextModelWithProjection } from '@huggingface/transformers';
3190
+ *
3191
+ * // Load tokenizer and text model
3192
+ * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/clap-htsat-unfused');
3193
+ * const text_model = await ClapTextModelWithProjection.from_pretrained('Xenova/clap-htsat-unfused');
3194
+ *
3195
+ * // Run tokenization
3196
+ * const texts = ['a sound of a cat', 'a sound of a dog'];
3197
+ * const text_inputs = tokenizer(texts, { padding: true, truncation: true });
3198
+ *
3199
+ * // Compute embeddings
3200
+ * const { text_embeds } = await text_model(text_inputs);
3201
+ * // Tensor {
3202
+ * // dims: [ 2, 512 ],
3203
+ * // type: 'float32',
3204
+ * // data: Float32Array(1024) [ ... ],
3205
+ * // size: 1024
3206
+ * // }
3207
+ * ```
3208
+ */
3209
+ export class ClapTextModelWithProjection extends ClapPreTrainedModel {
3210
+ }
3211
+ /**
3212
+ * CLAP Audio Model with a projection layer on top (a linear layer on top of the pooled output).
3213
+ *
3214
+ * **Example:** Compute audio embeddings with `ClapAudioModelWithProjection`.
3215
+ *
3216
+ * ```javascript
3217
+ * import { AutoProcessor, ClapAudioModelWithProjection, read_audio } from '@huggingface/transformers';
3218
+ *
3219
+ * // Load processor and audio model
3220
+ * const processor = await AutoProcessor.from_pretrained('Xenova/clap-htsat-unfused');
3221
+ * const audio_model = await ClapAudioModelWithProjection.from_pretrained('Xenova/clap-htsat-unfused');
3222
+ *
3223
+ * // Read audio and run processor
3224
+ * const audio = await read_audio('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cat_meow.wav');
3225
+ * const audio_inputs = await processor(audio);
3226
+ *
3227
+ * // Compute embeddings
3228
+ * const { audio_embeds } = await audio_model(audio_inputs);
3229
+ * // Tensor {
3230
+ * // dims: [ 1, 512 ],
3231
+ * // type: 'float32',
3232
+ * // data: Float32Array(512) [ ... ],
3233
+ * // size: 512
3234
+ * // }
3235
+ * ```
3236
+ */
3237
+ export class ClapAudioModelWithProjection extends ClapPreTrainedModel {
3238
+ }
3239
+ export class VitsPreTrainedModel extends PreTrainedModel {
3240
+ }
3241
+ /**
3242
+ * The complete VITS model, for text-to-speech synthesis.
3243
+ *
3244
+ * **Example:** Generate speech from text with `VitsModel`.
3245
+ * ```javascript
3246
+ * import { AutoTokenizer, VitsModel } from '@huggingface/transformers';
3247
+ *
3248
+ * // Load the tokenizer and model
3249
+ * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/mms-tts-eng');
3250
+ * const model = await VitsModel.from_pretrained('Xenova/mms-tts-eng');
3251
+ *
3252
+ * // Run tokenization
3253
+ * const inputs = tokenizer('I love transformers');
3254
+ *
3255
+ * // Generate waveform
3256
+ * const { waveform } = await model(inputs);
3257
+ * // Tensor {
3258
+ * // dims: [ 1, 35328 ],
3259
+ * // type: 'float32',
3260
+ * // data: Float32Array(35328) [ ... ],
3261
+ * // size: 35328,
3262
+ * // }
3263
+ * ```
3264
+ */
3265
+ export class VitsModel extends VitsPreTrainedModel {
3266
+ /**
3267
+ * Calls the model on new inputs.
3268
+ * @param {Object} model_inputs The inputs to the model.
3269
+ * @returns {Promise<VitsModelOutput>} The outputs for the VITS model.
3270
+ */
3271
+ _call(model_inputs: any): Promise<VitsModelOutput>;
3272
+ }
3273
+ export class SegformerPreTrainedModel extends PreTrainedModel {
3274
+ }
3275
+ /**
3276
+ * The bare SegFormer encoder (Mix-Transformer) outputting raw hidden-states without any specific head on top.
3277
+ */
3278
+ export class SegformerModel extends SegformerPreTrainedModel {
3279
+ }
3280
+ /**
3281
+ * SegFormer Model transformer with an image classification head on top (a linear layer on top of the final hidden states) e.g. for ImageNet.
3282
+ */
3283
+ export class SegformerForImageClassification extends SegformerPreTrainedModel {
3284
+ }
3285
+ /**
3286
+ * SegFormer Model transformer with an all-MLP decode head on top e.g. for ADE20k, CityScapes.
3287
+ */
3288
+ export class SegformerForSemanticSegmentation extends SegformerPreTrainedModel {
3289
+ }
3290
+ export class StableLmPreTrainedModel extends PreTrainedModel {
3291
+ /**
3292
+ * Creates a new instance of the `StableLmPreTrainedModel` class.
3293
+ * @param {Object} config The configuration of the model.
3294
+ * @param {any} session The ONNX session containing the model weights.
3295
+ * @param {GenerationConfig} generation_config The generation configuration.
3296
+ */
3297
+ constructor(config: any, session: any, generation_config: GenerationConfig);
3298
+ generation_config: GenerationConfig;
3299
+ }
3300
+ /**
3301
+ * The bare StableLm Model transformer outputting raw hidden-states without any specific head on top.
3302
+ */
3303
+ export class StableLmModel extends StableLmPreTrainedModel {
3304
+ }
3305
+ /**
3306
+ * StableLm Model with a `language modeling` head on top for Causal Language Modeling (with past).
3307
+ */
3308
+ export class StableLmForCausalLM extends StableLmPreTrainedModel {
3309
+ }
3310
+ export class EfficientNetPreTrainedModel extends PreTrainedModel {
3311
+ }
3312
+ /**
3313
+ * The bare EfficientNet model outputting raw features without any specific head on top.
3314
+ */
3315
+ export class EfficientNetModel extends EfficientNetPreTrainedModel {
3316
+ }
3317
+ /**
3318
+ * EfficientNet Model with an image classification head on top (a linear layer on top of the pooled features).
3319
+ */
3320
+ export class EfficientNetForImageClassification extends EfficientNetPreTrainedModel {
3321
+ /**
3322
+ * @param {any} model_inputs
3323
+ */
3324
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
3325
+ }
3326
+ export class MusicgenPreTrainedModel extends PreTrainedModel {
3327
+ }
3328
+ /**
3329
+ * The bare Musicgen decoder model outputting raw hidden-states without any specific head on top.
3330
+ */
3331
+ export class MusicgenModel extends MusicgenPreTrainedModel {
3332
+ }
3333
+ /**
3334
+ * The MusicGen decoder model with a language modelling head on top.
3335
+ */
3336
+ export class MusicgenForCausalLM extends MusicgenPreTrainedModel {
3337
+ }
3338
+ /**
3339
+ * The composite MusicGen model with a text encoder, audio encoder and Musicgen decoder,
3340
+ * for music generation tasks with one or both of text and audio prompts.
3341
+ *
3342
+ * **Example:** Generate music from text with `Xenova/musicgen-small`.
3343
+ * ```javascript
3344
+ * import { AutoTokenizer, MusicgenForConditionalGeneration } from '@huggingface/transformers';
3345
+ *
3346
+ * // Load tokenizer and model
3347
+ * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/musicgen-small');
3348
+ * const model = await MusicgenForConditionalGeneration.from_pretrained(
3349
+ * 'Xenova/musicgen-small', { dtype: 'fp32' }
3350
+ * );
3351
+ *
3352
+ * // Prepare text input
3353
+ * const prompt = '80s pop track with bassy drums and synth';
3354
+ * const inputs = tokenizer(prompt);
3355
+ *
3356
+ * // Generate audio
3357
+ * const audio_values = await model.generate({
3358
+ * ...inputs,
3359
+ * max_new_tokens: 512,
3360
+ * do_sample: true,
3361
+ * guidance_scale: 3,
3362
+ * });
3363
+ *
3364
+ * // (Optional) Write the output to a WAV file
3365
+ * import wavefile from 'wavefile';
3366
+ * import fs from 'fs';
3367
+ *
3368
+ * const wav = new wavefile.WaveFile();
3369
+ * wav.fromScratch(1, model.config.audio_encoder.sampling_rate, '32f', audio_values.data);
3370
+ * fs.writeFileSync('musicgen_out.wav', wav.toBuffer());
3371
+ * ```
3372
+ */
3373
+ export class MusicgenForConditionalGeneration extends PreTrainedModel {
3374
+ /**
3375
+ * Creates a new instance of the `MusicgenForConditionalGeneration` class.
3376
+ * @param {Object} config The model configuration.
3377
+ * @param {Record<string, any>} sessions The inference sessions for the model.
3378
+ * @param {GenerationConfig} generation_config The generation configuration.
3379
+ */
3380
+ constructor(config: any, sessions: Record<string, any>, generation_config: GenerationConfig);
3381
+ generation_config: GenerationConfig;
3382
+ /**
3383
+ * Apply the pattern mask to the final ids,
3384
+ * then revert the pattern delay mask by filtering the pad token id in a single step.
3385
+ * @param {Tensor} outputs The output tensor from the model.
3386
+ * @returns {Tensor} The filtered output tensor.
3387
+ */
3388
+ _apply_and_filter_by_delay_pattern_mask(outputs: Tensor): Tensor;
3389
+ prepare_inputs_for_generation(input_ids: any, model_inputs: any, generation_config: any): any;
3390
+ }
3391
+ export class MobileNetV1PreTrainedModel extends PreTrainedModel {
3392
+ }
3393
+ /**
3394
+ * The bare MobileNetV1 model outputting raw hidden-states without any specific head on top.
3395
+ */
3396
+ export class MobileNetV1Model extends MobileNetV1PreTrainedModel {
3397
+ }
3398
+ /**
3399
+ * MobileNetV1 model with an image classification head on top (a linear layer on top of the pooled features),
3400
+ * e.g. for ImageNet.
3401
+ */
3402
+ export class MobileNetV1ForImageClassification extends MobileNetV1PreTrainedModel {
3403
+ /**
3404
+ * @param {any} model_inputs
3405
+ */
3406
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
3407
+ }
3408
+ export class MobileNetV2PreTrainedModel extends PreTrainedModel {
3409
+ }
3410
+ /**
3411
+ * The bare MobileNetV2 model outputting raw hidden-states without any specific head on top.
3412
+ */
3413
+ export class MobileNetV2Model extends MobileNetV2PreTrainedModel {
3414
+ }
3415
+ /**
3416
+ * MobileNetV2 model with an image classification head on top (a linear layer on top of the pooled features),
3417
+ * e.g. for ImageNet.
3418
+ */
3419
+ export class MobileNetV2ForImageClassification extends MobileNetV2PreTrainedModel {
3420
+ /**
3421
+ * @param {any} model_inputs
3422
+ */
3423
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
3424
+ }
3425
+ export class MobileNetV3PreTrainedModel extends PreTrainedModel {
3426
+ }
3427
+ /**
3428
+ * The bare MobileNetV3 model outputting raw hidden-states without any specific head on top.
3429
+ */
3430
+ export class MobileNetV3Model extends MobileNetV3PreTrainedModel {
3431
+ }
3432
+ /**
3433
+ * MobileNetV3 model with an image classification head on top (a linear layer on top of the pooled features),
3434
+ * e.g. for ImageNet.
3435
+ */
3436
+ export class MobileNetV3ForImageClassification extends MobileNetV3PreTrainedModel {
3437
+ /**
3438
+ * @param {any} model_inputs
3439
+ */
3440
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
3441
+ }
3442
+ export class MobileNetV4PreTrainedModel extends PreTrainedModel {
3443
+ }
3444
+ /**
3445
+ * The bare MobileNetV4 model outputting raw hidden-states without any specific head on top.
3446
+ */
3447
+ export class MobileNetV4Model extends MobileNetV4PreTrainedModel {
3448
+ }
3449
+ /**
3450
+ * MobileNetV4 model with an image classification head on top (a linear layer on top of the pooled features),
3451
+ * e.g. for ImageNet.
3452
+ */
3453
+ export class MobileNetV4ForImageClassification extends MobileNetV4PreTrainedModel {
3454
+ /**
3455
+ * @param {any} model_inputs
3456
+ */
3457
+ _call(model_inputs: any): Promise<SequenceClassifierOutput>;
3458
+ }
3459
+ /**
3460
+ * Base class of all AutoModels. Contains the `from_pretrained` function
3461
+ * which is used to instantiate pretrained models.
3462
+ */
3463
+ export class PretrainedMixin {
3464
+ /**
3465
+ * Mapping from model type to model class.
3466
+ * @type {Map<string, Object>[]}
3467
+ */
3468
+ static MODEL_CLASS_MAPPINGS: Map<string, any>[];
3469
+ /**
3470
+ * Whether to attempt to instantiate the base class (`PretrainedModel`) if
3471
+ * the model type is not found in the mapping.
3472
+ */
3473
+ static BASE_IF_FAIL: boolean;
3474
+ /**
3475
+ * Instantiate one of the model classes of the library from a pretrained model.
3476
+ *
3477
+ * The model class to instantiate is selected based on the `model_type` property of the config object
3478
+ * (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
3479
+ *
3480
+ * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
3481
+ * - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
3482
+ * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
3483
+ * user or organization name, like `dbmdz/bert-base-german-cased`.
3484
+ * - A path to a *directory* containing model weights, e.g., `./my_model_directory/`.
3485
+ * @param {import('./utils/hub.js').PretrainedModelOptions} options Additional options for loading the model.
3486
+ *
3487
+ * @returns {Promise<PreTrainedModel>} A new instance of the `PreTrainedModel` class.
3488
+ */
3489
+ static from_pretrained(pretrained_model_name_or_path: string, { progress_callback, config, cache_dir, local_files_only, revision, model_file_name, subfolder, device, dtype, use_external_data_format, session_options, }?: import("./utils/hub.js").PretrainedModelOptions): Promise<PreTrainedModel>;
3490
+ }
3491
+ /**
3492
+ * Helper class which is used to instantiate pretrained models with the `from_pretrained` function.
3493
+ * The chosen model class is determined by the type specified in the model config.
3494
+ *
3495
+ * @example
3496
+ * let model = await AutoModel.from_pretrained('Xenova/bert-base-uncased');
3497
+ */
3498
+ export class AutoModel extends PretrainedMixin {
3499
+ }
3500
+ /**
3501
+ * Helper class which is used to instantiate pretrained sequence classification models with the `from_pretrained` function.
3502
+ * The chosen model class is determined by the type specified in the model config.
3503
+ *
3504
+ * @example
3505
+ * let model = await AutoModelForSequenceClassification.from_pretrained('Xenova/distilbert-base-uncased-finetuned-sst-2-english');
3506
+ */
3507
+ export class AutoModelForSequenceClassification extends PretrainedMixin {
3508
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof BertForSequenceClassification)[] | (string | typeof BartForSequenceClassification)[]>[];
3509
+ }
3510
+ /**
3511
+ * Helper class which is used to instantiate pretrained token classification models with the `from_pretrained` function.
3512
+ * The chosen model class is determined by the type specified in the model config.
3513
+ *
3514
+ * @example
3515
+ * let model = await AutoModelForTokenClassification.from_pretrained('Xenova/distilbert-base-multilingual-cased-ner-hrl');
3516
+ */
3517
+ export class AutoModelForTokenClassification extends PretrainedMixin {
3518
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof BertForTokenClassification)[]>[];
3519
+ }
3520
+ /**
3521
+ * Helper class which is used to instantiate pretrained sequence-to-sequence models with the `from_pretrained` function.
3522
+ * The chosen model class is determined by the type specified in the model config.
3523
+ *
3524
+ * @example
3525
+ * let model = await AutoModelForSeq2SeqLM.from_pretrained('Xenova/t5-small');
3526
+ */
3527
+ export class AutoModelForSeq2SeqLM extends PretrainedMixin {
3528
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof T5ForConditionalGeneration)[]>[];
3529
+ }
3530
+ /**
3531
+ * Helper class which is used to instantiate pretrained sequence-to-sequence speech-to-text models with the `from_pretrained` function.
3532
+ * The chosen model class is determined by the type specified in the model config.
3533
+ *
3534
+ * @example
3535
+ * let model = await AutoModelForSpeechSeq2Seq.from_pretrained('openai/whisper-tiny.en');
3536
+ */
3537
+ export class AutoModelForSpeechSeq2Seq extends PretrainedMixin {
3538
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof SpeechT5ForSpeechToText)[] | (string | typeof WhisperForConditionalGeneration)[]>[];
3539
+ }
3540
+ /**
3541
+ * Helper class which is used to instantiate pretrained sequence-to-sequence text-to-spectrogram models with the `from_pretrained` function.
3542
+ * The chosen model class is determined by the type specified in the model config.
3543
+ *
3544
+ * @example
3545
+ * let model = await AutoModelForTextToSpectrogram.from_pretrained('microsoft/speecht5_tts');
3546
+ */
3547
+ export class AutoModelForTextToSpectrogram extends PretrainedMixin {
3548
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof SpeechT5ForTextToSpeech)[]>[];
3549
+ }
3550
+ /**
3551
+ * Helper class which is used to instantiate pretrained text-to-waveform models with the `from_pretrained` function.
3552
+ * The chosen model class is determined by the type specified in the model config.
3553
+ *
3554
+ * @example
3555
+ * let model = await AutoModelForTextToSpectrogram.from_pretrained('facebook/mms-tts-eng');
3556
+ */
3557
+ export class AutoModelForTextToWaveform extends PretrainedMixin {
3558
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof VitsModel)[] | (string | typeof MusicgenForConditionalGeneration)[]>[];
3559
+ }
3560
+ /**
3561
+ * Helper class which is used to instantiate pretrained causal language models with the `from_pretrained` function.
3562
+ * The chosen model class is determined by the type specified in the model config.
3563
+ *
3564
+ * @example
3565
+ * let model = await AutoModelForCausalLM.from_pretrained('Xenova/gpt2');
3566
+ */
3567
+ export class AutoModelForCausalLM extends PretrainedMixin {
3568
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof BloomForCausalLM)[]>[];
3569
+ }
3570
+ /**
3571
+ * Helper class which is used to instantiate pretrained masked language models with the `from_pretrained` function.
3572
+ * The chosen model class is determined by the type specified in the model config.
3573
+ *
3574
+ * @example
3575
+ * let model = await AutoModelForMaskedLM.from_pretrained('Xenova/bert-base-uncased');
3576
+ */
3577
+ export class AutoModelForMaskedLM extends PretrainedMixin {
3578
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof BertForMaskedLM)[]>[];
3579
+ }
3580
+ /**
3581
+ * Helper class which is used to instantiate pretrained question answering models with the `from_pretrained` function.
3582
+ * The chosen model class is determined by the type specified in the model config.
3583
+ *
3584
+ * @example
3585
+ * let model = await AutoModelForQuestionAnswering.from_pretrained('Xenova/distilbert-base-cased-distilled-squad');
3586
+ */
3587
+ export class AutoModelForQuestionAnswering extends PretrainedMixin {
3588
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof BertForQuestionAnswering)[]>[];
3589
+ }
3590
+ /**
3591
+ * Helper class which is used to instantiate pretrained vision-to-sequence models with the `from_pretrained` function.
3592
+ * The chosen model class is determined by the type specified in the model config.
3593
+ *
3594
+ * @example
3595
+ * let model = await AutoModelForVision2Seq.from_pretrained('Xenova/vit-gpt2-image-captioning');
3596
+ */
3597
+ export class AutoModelForVision2Seq extends PretrainedMixin {
3598
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof VisionEncoderDecoderModel)[]>[];
3599
+ }
3600
+ /**
3601
+ * Helper class which is used to instantiate pretrained image classification models with the `from_pretrained` function.
3602
+ * The chosen model class is determined by the type specified in the model config.
3603
+ *
3604
+ * @example
3605
+ * let model = await AutoModelForImageClassification.from_pretrained('Xenova/vit-base-patch16-224');
3606
+ */
3607
+ export class AutoModelForImageClassification extends PretrainedMixin {
3608
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof SegformerForImageClassification)[]>[];
3609
+ }
3610
+ /**
3611
+ * Helper class which is used to instantiate pretrained image segmentation models with the `from_pretrained` function.
3612
+ * The chosen model class is determined by the type specified in the model config.
3613
+ *
3614
+ * @example
3615
+ * let model = await AutoModelForImageSegmentation.from_pretrained('Xenova/detr-resnet-50-panoptic');
3616
+ */
3617
+ export class AutoModelForImageSegmentation extends PretrainedMixin {
3618
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof CLIPSegForImageSegmentation)[]>[];
3619
+ }
3620
+ /**
3621
+ * Helper class which is used to instantiate pretrained image segmentation models with the `from_pretrained` function.
3622
+ * The chosen model class is determined by the type specified in the model config.
3623
+ *
3624
+ * @example
3625
+ * let model = await AutoModelForSemanticSegmentation.from_pretrained('nvidia/segformer-b3-finetuned-cityscapes-1024-1024');
3626
+ */
3627
+ export class AutoModelForSemanticSegmentation extends PretrainedMixin {
3628
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof SegformerForSemanticSegmentation)[]>[];
3629
+ }
3630
+ /**
3631
+ * Helper class which is used to instantiate pretrained object detection models with the `from_pretrained` function.
3632
+ * The chosen model class is determined by the type specified in the model config.
3633
+ *
3634
+ * @example
3635
+ * let model = await AutoModelForObjectDetection.from_pretrained('Xenova/detr-resnet-50');
3636
+ */
3637
+ export class AutoModelForObjectDetection extends PretrainedMixin {
3638
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof DetrForObjectDetection)[]>[];
3639
+ }
3640
+ export class AutoModelForZeroShotObjectDetection extends PretrainedMixin {
3641
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof OwlViTForObjectDetection)[]>[];
3642
+ }
3643
+ /**
3644
+ * Helper class which is used to instantiate pretrained mask generation models with the `from_pretrained` function.
3645
+ * The chosen model class is determined by the type specified in the model config.
3646
+ *
3647
+ * @example
3648
+ * let model = await AutoModelForMaskGeneration.from_pretrained('Xenova/sam-vit-base');
3649
+ */
3650
+ export class AutoModelForMaskGeneration extends PretrainedMixin {
3651
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof SamModel)[]>[];
3652
+ }
3653
+ export class AutoModelForCTC extends PretrainedMixin {
3654
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof Wav2Vec2ForCTC)[] | (string | typeof Wav2Vec2BertForCTC)[]>[];
3655
+ }
3656
+ export class AutoModelForAudioClassification extends PretrainedMixin {
3657
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof ASTForAudioClassification)[]>[];
3658
+ }
3659
+ export class AutoModelForXVector extends PretrainedMixin {
3660
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof WavLMForXVector)[]>[];
3661
+ }
3662
+ export class AutoModelForAudioFrameClassification extends PretrainedMixin {
3663
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof UniSpeechSatForAudioFrameClassification)[]>[];
3664
+ }
3665
+ export class AutoModelForDocumentQuestionAnswering extends PretrainedMixin {
3666
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof VisionEncoderDecoderModel)[]>[];
3667
+ }
3668
+ export class AutoModelForImageMatting extends PretrainedMixin {
3669
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof VitMatteForImageMatting)[]>[];
3670
+ }
3671
+ export class AutoModelForImageToImage extends PretrainedMixin {
3672
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof Swin2SRForImageSuperResolution)[]>[];
3673
+ }
3674
+ export class AutoModelForDepthEstimation extends PretrainedMixin {
3675
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof DPTForDepthEstimation)[]>[];
3676
+ }
3677
+ export class AutoModelForImageFeatureExtraction extends PretrainedMixin {
3678
+ static MODEL_CLASS_MAPPINGS: Map<string, (string | typeof CLIPVisionModelWithProjection)[]>[];
3679
+ }
3680
+ export class Seq2SeqLMOutput extends ModelOutput {
3681
+ /**
3682
+ * @param {Object} output The output of the model.
3683
+ * @param {Tensor} output.logits The output logits of the model.
3684
+ * @param {Tensor} output.past_key_values An tensor of key/value pairs that represent the previous state of the model.
3685
+ * @param {Tensor} output.encoder_outputs The output of the encoder in a sequence-to-sequence model.
3686
+ * @param {Tensor} [output.decoder_attentions] Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the self-attention heads.
3687
+ * @param {Tensor} [output.cross_attentions] Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the weighted average in the cross-attention heads.
3688
+ */
3689
+ constructor({ logits, past_key_values, encoder_outputs, decoder_attentions, cross_attentions }: {
3690
+ logits: Tensor;
3691
+ past_key_values: Tensor;
3692
+ encoder_outputs: Tensor;
3693
+ decoder_attentions?: Tensor;
3694
+ cross_attentions?: Tensor;
3695
+ });
3696
+ logits: Tensor;
3697
+ past_key_values: Tensor;
3698
+ encoder_outputs: Tensor;
3699
+ decoder_attentions: Tensor;
3700
+ cross_attentions: Tensor;
3701
+ }
3702
+ /**
3703
+ * Base class for outputs of sentence classification models.
3704
+ */
3705
+ export class SequenceClassifierOutput extends ModelOutput {
3706
+ /**
3707
+ * @param {Object} output The output of the model.
3708
+ * @param {Tensor} output.logits classification (or regression if config.num_labels==1) scores (before SoftMax).
3709
+ */
3710
+ constructor({ logits }: {
3711
+ logits: Tensor;
3712
+ });
3713
+ logits: Tensor;
3714
+ }
3715
+ /**
3716
+ * Base class for outputs of XVector models.
3717
+ */
3718
+ export class XVectorOutput extends ModelOutput {
3719
+ /**
3720
+ * @param {Object} output The output of the model.
3721
+ * @param {Tensor} output.logits Classification hidden states before AMSoftmax, of shape `(batch_size, config.xvector_output_dim)`.
3722
+ * @param {Tensor} output.embeddings Utterance embeddings used for vector similarity-based retrieval, of shape `(batch_size, config.xvector_output_dim)`.
3723
+ */
3724
+ constructor({ logits, embeddings }: {
3725
+ logits: Tensor;
3726
+ embeddings: Tensor;
3727
+ });
3728
+ logits: Tensor;
3729
+ embeddings: Tensor;
3730
+ }
3731
+ /**
3732
+ * Base class for outputs of token classification models.
3733
+ */
3734
+ export class TokenClassifierOutput extends ModelOutput {
3735
+ /**
3736
+ * @param {Object} output The output of the model.
3737
+ * @param {Tensor} output.logits Classification scores (before SoftMax).
3738
+ */
3739
+ constructor({ logits }: {
3740
+ logits: Tensor;
3741
+ });
3742
+ logits: Tensor;
3743
+ }
3744
+ /**
3745
+ * Base class for masked language models outputs.
3746
+ */
3747
+ export class MaskedLMOutput extends ModelOutput {
3748
+ /**
3749
+ * @param {Object} output The output of the model.
3750
+ * @param {Tensor} output.logits Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
3751
+ */
3752
+ constructor({ logits }: {
3753
+ logits: Tensor;
3754
+ });
3755
+ logits: Tensor;
3756
+ }
3757
+ /**
3758
+ * Base class for outputs of question answering models.
3759
+ */
3760
+ export class QuestionAnsweringModelOutput extends ModelOutput {
3761
+ /**
3762
+ * @param {Object} output The output of the model.
3763
+ * @param {Tensor} output.start_logits Span-start scores (before SoftMax).
3764
+ * @param {Tensor} output.end_logits Span-end scores (before SoftMax).
3765
+ */
3766
+ constructor({ start_logits, end_logits }: {
3767
+ start_logits: Tensor;
3768
+ end_logits: Tensor;
3769
+ });
3770
+ start_logits: Tensor;
3771
+ end_logits: Tensor;
3772
+ }
3773
+ /**
3774
+ * Base class for causal language model (or autoregressive) outputs.
3775
+ */
3776
+ export class CausalLMOutput extends ModelOutput {
3777
+ /**
3778
+ * @param {Object} output The output of the model.
3779
+ * @param {Tensor} output.logits Prediction scores of the language modeling head (scores for each vocabulary token before softmax).
3780
+ */
3781
+ constructor({ logits }: {
3782
+ logits: Tensor;
3783
+ });
3784
+ logits: Tensor;
3785
+ }
3786
+ /**
3787
+ * Base class for causal language model (or autoregressive) outputs.
3788
+ */
3789
+ export class CausalLMOutputWithPast extends ModelOutput {
3790
+ /**
3791
+ * @param {Object} output The output of the model.
3792
+ * @param {Tensor} output.logits Prediction scores of the language modeling head (scores for each vocabulary token before softmax).
3793
+ * @param {Tensor} output.past_key_values Contains pre-computed hidden-states (key and values in the self-attention blocks)
3794
+ * that can be used (see `past_key_values` input) to speed up sequential decoding.
3795
+ */
3796
+ constructor({ logits, past_key_values }: {
3797
+ logits: Tensor;
3798
+ past_key_values: Tensor;
3799
+ });
3800
+ logits: Tensor;
3801
+ past_key_values: Tensor;
3802
+ }
3803
+ export class ImageMattingOutput extends ModelOutput {
3804
+ /**
3805
+ * @param {Object} output The output of the model.
3806
+ * @param {Tensor} output.alphas Estimated alpha values, of shape `(batch_size, num_channels, height, width)`.
3807
+ */
3808
+ constructor({ alphas }: {
3809
+ alphas: Tensor;
3810
+ });
3811
+ alphas: Tensor;
3812
+ }
3813
+ /**
3814
+ * Describes the outputs for the VITS model.
3815
+ */
3816
+ export class VitsModelOutput extends ModelOutput {
3817
+ /**
3818
+ * @param {Object} output The output of the model.
3819
+ * @param {Tensor} output.waveform The final audio waveform predicted by the model, of shape `(batch_size, sequence_length)`.
3820
+ * @param {Tensor} output.spectrogram The log-mel spectrogram predicted at the output of the flow model.
3821
+ * This spectrogram is passed to the Hi-Fi GAN decoder model to obtain the final audio waveform.
3822
+ */
3823
+ constructor({ waveform, spectrogram }: {
3824
+ waveform: Tensor;
3825
+ spectrogram: Tensor;
3826
+ });
3827
+ waveform: Tensor;
3828
+ spectrogram: Tensor;
3829
+ }
3830
+ /**
3831
+ * Forward pass of a decoder model.
3832
+ * @param {Object} self The decoder model.
3833
+ * @param {Object} model_inputs The input data to be used for the forward pass.
3834
+ * @returns {Promise<Object>} The logits and past key values.
3835
+ * @private
3836
+ */
3837
+ declare function decoderForward(self: any, model_inputs: any, is_encoder_decoder?: boolean): Promise<any>;
3838
+ declare function image_text_to_text_prepare_inputs_for_generation(self: any, ...args: any[]): any;
3839
+ import { GenerationConfig } from './generation/configuration_utils.js';
3840
+ import { LogitsProcessorList } from './generation/logits_process.js';
3841
+ import { StoppingCriteriaList } from './generation/stopping_criteria.js';
3842
+ import { Tensor } from './utils/tensor.js';
3843
+ import { WhisperGenerationConfig } from './models/whisper/generation_whisper.js';
3844
+ export {};
3845
+ //# sourceMappingURL=models.d.ts.map