@huggingface/transformers 3.4.0 → 3.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/dist/transformers.js +528 -201
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.js +1 -1
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.node.cjs +508 -200
- package/dist/transformers.node.cjs.map +1 -1
- package/dist/transformers.node.min.cjs +1 -1
- package/dist/transformers.node.min.cjs.map +1 -1
- package/dist/transformers.node.min.mjs +1 -1
- package/dist/transformers.node.min.mjs.map +1 -1
- package/dist/transformers.node.mjs +528 -201
- package/dist/transformers.node.mjs.map +1 -1
- package/dist/transformers.web.js +528 -201
- package/dist/transformers.web.js.map +1 -1
- package/dist/transformers.web.min.js +1 -1
- package/dist/transformers.web.min.js.map +1 -1
- package/package.json +1 -1
- package/src/configs.js +2 -0
- package/src/env.js +1 -1
- package/src/models/feature_extractors.js +1 -0
- package/src/models/snac/feature_extraction_snac.js +3 -0
- package/src/models.js +125 -2
- package/src/pipelines.js +140 -135
- package/src/tokenizers.js +44 -34
- package/src/utils/data-structures.js +74 -0
- package/src/utils/hub.js +36 -15
- package/src/utils/image.js +9 -1
- package/src/utils/tensor.js +6 -2
- package/types/configs.d.ts.map +1 -1
- package/types/models/feature_extractors.d.ts +1 -0
- package/types/models/snac/feature_extraction_snac.d.ts +4 -0
- package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
- package/types/models.d.ts +72 -0
- package/types/models.d.ts.map +1 -1
- package/types/pipelines.d.ts +2 -2
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenizers.d.ts +4 -1
- package/types/tokenizers.d.ts.map +1 -1
- package/types/tsconfig.tsbuildinfo +1 -1
- package/types/utils/data-structures.d.ts +26 -0
- package/types/utils/data-structures.d.ts.map +1 -1
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +2 -2
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/tensor.d.ts.map +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/transformers",
|
|
3
|
-
"version": "3.4.
|
|
3
|
+
"version": "3.4.2",
|
|
4
4
|
"description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
|
|
5
5
|
"main": "./src/transformers.js",
|
|
6
6
|
"types": "./types/transformers.d.ts",
|
package/src/configs.js
CHANGED
|
@@ -67,6 +67,7 @@ function getNormalizedConfig(config) {
|
|
|
67
67
|
// Sub-configs
|
|
68
68
|
case 'llava':
|
|
69
69
|
case 'paligemma':
|
|
70
|
+
case 'gemma3':
|
|
70
71
|
case 'florence2':
|
|
71
72
|
case 'llava_onevision':
|
|
72
73
|
case 'idefics3':
|
|
@@ -126,6 +127,7 @@ function getNormalizedConfig(config) {
|
|
|
126
127
|
break;
|
|
127
128
|
case 'gemma':
|
|
128
129
|
case 'gemma2':
|
|
130
|
+
case 'gemma3_text':
|
|
129
131
|
case 'glm':
|
|
130
132
|
case 'helium':
|
|
131
133
|
mapping['num_heads'] = 'num_key_value_heads';
|
package/src/env.js
CHANGED
|
@@ -26,7 +26,7 @@ import fs from 'fs';
|
|
|
26
26
|
import path from 'path';
|
|
27
27
|
import url from 'url';
|
|
28
28
|
|
|
29
|
-
const VERSION = '3.4.
|
|
29
|
+
const VERSION = '3.4.2';
|
|
30
30
|
|
|
31
31
|
// Check if various APIs are available (depends on environment)
|
|
32
32
|
const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
@@ -6,6 +6,7 @@ export * from './dac/feature_extraction_dac.js';
|
|
|
6
6
|
export * from './moonshine/feature_extraction_moonshine.js';
|
|
7
7
|
export * from './pyannote/feature_extraction_pyannote.js';
|
|
8
8
|
export * from './seamless_m4t/feature_extraction_seamless_m4t.js';
|
|
9
|
+
export * from './snac/feature_extraction_snac.js';
|
|
9
10
|
export * from './speecht5/feature_extraction_speecht5.js';
|
|
10
11
|
export * from './wav2vec2/feature_extraction_wav2vec2.js';
|
|
11
12
|
export * from './wespeaker/feature_extraction_wespeaker.js';
|
package/src/models.js
CHANGED
|
@@ -594,8 +594,8 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
|
|
|
594
594
|
new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
|
|
595
595
|
}
|
|
596
596
|
if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
|
|
597
|
-
// NOTE: Handle a special case for paligemma models, where positions are 1-indexed
|
|
598
|
-
const start_index = self.config.model_type
|
|
597
|
+
// NOTE: Handle a special case for paligemma/gemma3 models, where positions are 1-indexed
|
|
598
|
+
const start_index = ['paligemma', 'gemma3_text', 'gemma3'].includes(self.config.model_type) ? 1 : 0;
|
|
599
599
|
new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
|
|
600
600
|
}
|
|
601
601
|
|
|
@@ -4520,6 +4520,23 @@ export class Gemma2Model extends Gemma2PreTrainedModel { }
|
|
|
4520
4520
|
export class Gemma2ForCausalLM extends Gemma2PreTrainedModel { }
|
|
4521
4521
|
//////////////////////////////////////////////////
|
|
4522
4522
|
|
|
4523
|
+
|
|
4524
|
+
//////////////////////////////////////////////////
|
|
4525
|
+
// Gemma3 models
|
|
4526
|
+
|
|
4527
|
+
/**
|
|
4528
|
+
* The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
|
|
4529
|
+
*/
|
|
4530
|
+
export class Gemma3PreTrainedModel extends PreTrainedModel { }
|
|
4531
|
+
/**
|
|
4532
|
+
* The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
|
|
4533
|
+
*/
|
|
4534
|
+
export class Gemma3Model extends Gemma3PreTrainedModel { }
|
|
4535
|
+
|
|
4536
|
+
export class Gemma3ForCausalLM extends Gemma3PreTrainedModel { }
|
|
4537
|
+
//////////////////////////////////////////////////
|
|
4538
|
+
|
|
4539
|
+
|
|
4523
4540
|
//////////////////////////////////////////////////
|
|
4524
4541
|
export class OpenELMPreTrainedModel extends PreTrainedModel { }
|
|
4525
4542
|
export class OpenELMModel extends OpenELMPreTrainedModel { }
|
|
@@ -5164,6 +5181,37 @@ export class RTDetrObjectDetectionOutput extends ModelOutput {
|
|
|
5164
5181
|
}
|
|
5165
5182
|
//////////////////////////////////////////////////
|
|
5166
5183
|
|
|
5184
|
+
|
|
5185
|
+
//////////////////////////////////////////////////
|
|
5186
|
+
export class RTDetrV2PreTrainedModel extends PreTrainedModel { }
|
|
5187
|
+
export class RTDetrV2Model extends RTDetrV2PreTrainedModel { }
|
|
5188
|
+
export class RTDetrV2ForObjectDetection extends RTDetrV2PreTrainedModel {
|
|
5189
|
+
/**
|
|
5190
|
+
* @param {any} model_inputs
|
|
5191
|
+
*/
|
|
5192
|
+
async _call(model_inputs) {
|
|
5193
|
+
return new RTDetrV2ObjectDetectionOutput(await super._call(model_inputs));
|
|
5194
|
+
}
|
|
5195
|
+
}
|
|
5196
|
+
|
|
5197
|
+
export class RTDetrV2ObjectDetectionOutput extends RTDetrObjectDetectionOutput {}
|
|
5198
|
+
//////////////////////////////////////////////////
|
|
5199
|
+
|
|
5200
|
+
//////////////////////////////////////////////////
|
|
5201
|
+
export class RFDetrPreTrainedModel extends PreTrainedModel { }
|
|
5202
|
+
export class RFDetrModel extends RFDetrPreTrainedModel { }
|
|
5203
|
+
export class RFDetrForObjectDetection extends RFDetrPreTrainedModel {
|
|
5204
|
+
/**
|
|
5205
|
+
* @param {any} model_inputs
|
|
5206
|
+
*/
|
|
5207
|
+
async _call(model_inputs) {
|
|
5208
|
+
return new RFDetrObjectDetectionOutput(await super._call(model_inputs));
|
|
5209
|
+
}
|
|
5210
|
+
}
|
|
5211
|
+
|
|
5212
|
+
export class RFDetrObjectDetectionOutput extends RTDetrObjectDetectionOutput {}
|
|
5213
|
+
//////////////////////////////////////////////////
|
|
5214
|
+
|
|
5167
5215
|
//////////////////////////////////////////////////
|
|
5168
5216
|
export class TableTransformerPreTrainedModel extends PreTrainedModel { }
|
|
5169
5217
|
|
|
@@ -5372,6 +5420,16 @@ export class DepthProPreTrainedModel extends PreTrainedModel { }
|
|
|
5372
5420
|
export class DepthProForDepthEstimation extends DepthProPreTrainedModel { }
|
|
5373
5421
|
//////////////////////////////////////////////////
|
|
5374
5422
|
|
|
5423
|
+
//////////////////////////////////////////////////
|
|
5424
|
+
export class Metric3DPreTrainedModel extends PreTrainedModel { }
|
|
5425
|
+
export class Metric3DForDepthEstimation extends Metric3DPreTrainedModel { }
|
|
5426
|
+
//////////////////////////////////////////////////
|
|
5427
|
+
|
|
5428
|
+
//////////////////////////////////////////////////
|
|
5429
|
+
export class Metric3Dv2PreTrainedModel extends PreTrainedModel { }
|
|
5430
|
+
export class Metric3Dv2ForDepthEstimation extends Metric3Dv2PreTrainedModel { }
|
|
5431
|
+
//////////////////////////////////////////////////
|
|
5432
|
+
|
|
5375
5433
|
//////////////////////////////////////////////////
|
|
5376
5434
|
export class MaskFormerPreTrainedModel extends PreTrainedModel { }
|
|
5377
5435
|
export class MaskFormerModel extends MaskFormerPreTrainedModel { }
|
|
@@ -7287,6 +7345,60 @@ export class DacDecoderModel extends DacPreTrainedModel {
|
|
|
7287
7345
|
}
|
|
7288
7346
|
//////////////////////////////////////////////////
|
|
7289
7347
|
|
|
7348
|
+
|
|
7349
|
+
//////////////////////////////////////////////////
|
|
7350
|
+
// Snac models
|
|
7351
|
+
export class SnacPreTrainedModel extends PreTrainedModel {
|
|
7352
|
+
main_input_name = 'input_values';
|
|
7353
|
+
forward_params = ['input_values'];
|
|
7354
|
+
}
|
|
7355
|
+
|
|
7356
|
+
/**
|
|
7357
|
+
* The SNAC (Multi-Scale Neural Audio Codec) model.
|
|
7358
|
+
*/
|
|
7359
|
+
export class SnacModel extends SnacPreTrainedModel {
|
|
7360
|
+
/**
|
|
7361
|
+
* Encodes the input audio waveform into discrete codes.
|
|
7362
|
+
* @param {Object} inputs Model inputs
|
|
7363
|
+
* @param {Tensor} [inputs.input_values] Float values of the input audio waveform, of shape `(batch_size, channels, sequence_length)`).
|
|
7364
|
+
* @returns {Promise<Record<string, Tensor>>} The output tensors of shape `(batch_size, num_codebooks, sequence_length)`.
|
|
7365
|
+
*/
|
|
7366
|
+
async encode(inputs) {
|
|
7367
|
+
return await sessionRun(this.sessions['encoder_model'], inputs);
|
|
7368
|
+
}
|
|
7369
|
+
|
|
7370
|
+
/**
|
|
7371
|
+
* Decodes the given frames into an output audio waveform.
|
|
7372
|
+
* @param {Record<string, Tensor>} inputs The encoded audio codes.
|
|
7373
|
+
* @returns {Promise<{audio_values: Tensor}>} The output tensor of shape `(batch_size, num_channels, sequence_length)`.
|
|
7374
|
+
*/
|
|
7375
|
+
async decode(inputs) {
|
|
7376
|
+
return await sessionRun(this.sessions['decoder_model'], inputs);
|
|
7377
|
+
}
|
|
7378
|
+
}
|
|
7379
|
+
|
|
7380
|
+
export class SnacEncoderModel extends SnacPreTrainedModel {
|
|
7381
|
+
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
7382
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
7383
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
7384
|
+
...options,
|
|
7385
|
+
// Update default model file name if not provided
|
|
7386
|
+
model_file_name: options.model_file_name ?? 'encoder_model',
|
|
7387
|
+
});
|
|
7388
|
+
}
|
|
7389
|
+
}
|
|
7390
|
+
export class SnacDecoderModel extends SnacPreTrainedModel {
|
|
7391
|
+
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
7392
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
7393
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
7394
|
+
...options,
|
|
7395
|
+
// Update default model file name if not provided
|
|
7396
|
+
model_file_name: options.model_file_name ?? 'decoder_model',
|
|
7397
|
+
});
|
|
7398
|
+
}
|
|
7399
|
+
}
|
|
7400
|
+
//////////////////////////////////////////////////
|
|
7401
|
+
|
|
7290
7402
|
//////////////////////////////////////////////////
|
|
7291
7403
|
// AutoModels, used to simplify construction of PreTrainedModels
|
|
7292
7404
|
// (uses config to instantiate correct class)
|
|
@@ -7407,6 +7519,8 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
|
7407
7519
|
|
|
7408
7520
|
['detr', ['DetrModel', DetrModel]],
|
|
7409
7521
|
['rt_detr', ['RTDetrModel', RTDetrModel]],
|
|
7522
|
+
['rt_detr_v2', ['RTDetrV2Model', RTDetrV2Model]],
|
|
7523
|
+
['rf_detr', ['RFDetrModel', RFDetrModel]],
|
|
7410
7524
|
['table-transformer', ['TableTransformerModel', TableTransformerModel]],
|
|
7411
7525
|
['vit', ['ViTModel', ViTModel]],
|
|
7412
7526
|
['ijepa', ['IJepaModel', IJepaModel]],
|
|
@@ -7468,6 +7582,7 @@ const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
|
|
|
7468
7582
|
const MODEL_MAPPING_NAMES_AUTO_ENCODER = new Map([
|
|
7469
7583
|
['mimi', ['MimiModel', MimiModel]],
|
|
7470
7584
|
['dac', ['DacModel', DacModel]],
|
|
7585
|
+
['snac', ['SnacModel', SnacModel]],
|
|
7471
7586
|
]);
|
|
7472
7587
|
|
|
7473
7588
|
const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
@@ -7488,6 +7603,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
|
7488
7603
|
['cohere', ['CohereModel', CohereModel]],
|
|
7489
7604
|
['gemma', ['GemmaModel', GemmaModel]],
|
|
7490
7605
|
['gemma2', ['Gemma2Model', Gemma2Model]],
|
|
7606
|
+
['gemma3_text', ['Gemma3Model', Gemma3Model]],
|
|
7491
7607
|
['helium', ['HeliumModel', HeliumModel]],
|
|
7492
7608
|
['glm', ['GlmModel', GlmModel]],
|
|
7493
7609
|
['openelm', ['OpenELMModel', OpenELMModel]],
|
|
@@ -7587,6 +7703,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
|
|
|
7587
7703
|
['cohere', ['CohereForCausalLM', CohereForCausalLM]],
|
|
7588
7704
|
['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],
|
|
7589
7705
|
['gemma2', ['Gemma2ForCausalLM', Gemma2ForCausalLM]],
|
|
7706
|
+
['gemma3_text', ['Gemma3ForCausalLM', Gemma3ForCausalLM]],
|
|
7590
7707
|
['helium', ['HeliumForCausalLM', HeliumForCausalLM]],
|
|
7591
7708
|
['glm', ['GlmForCausalLM', GlmForCausalLM]],
|
|
7592
7709
|
['openelm', ['OpenELMForCausalLM', OpenELMForCausalLM]],
|
|
@@ -7703,6 +7820,8 @@ const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
|
|
|
7703
7820
|
const MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES = new Map([
|
|
7704
7821
|
['detr', ['DetrForObjectDetection', DetrForObjectDetection]],
|
|
7705
7822
|
['rt_detr', ['RTDetrForObjectDetection', RTDetrForObjectDetection]],
|
|
7823
|
+
['rt_detr_v2', ['RTDetrV2ForObjectDetection', RTDetrV2ForObjectDetection]],
|
|
7824
|
+
['rf_detr', ['RFDetrForObjectDetection', RFDetrForObjectDetection]],
|
|
7706
7825
|
['table-transformer', ['TableTransformerForObjectDetection', TableTransformerForObjectDetection]],
|
|
7707
7826
|
['yolos', ['YolosForObjectDetection', YolosForObjectDetection]],
|
|
7708
7827
|
]);
|
|
@@ -7788,6 +7907,8 @@ const MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = new Map([
|
|
|
7788
7907
|
['glpn', ['GLPNForDepthEstimation', GLPNForDepthEstimation]],
|
|
7789
7908
|
['sapiens', ['SapiensForDepthEstimation', SapiensForDepthEstimation]],
|
|
7790
7909
|
['depth_pro', ['DepthProForDepthEstimation', DepthProForDepthEstimation]],
|
|
7910
|
+
['metric3d', ['Metric3DForDepthEstimation', Metric3DForDepthEstimation]],
|
|
7911
|
+
['metric3dv2', ['Metric3Dv2ForDepthEstimation', Metric3Dv2ForDepthEstimation]],
|
|
7791
7912
|
])
|
|
7792
7913
|
|
|
7793
7914
|
const MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES = new Map([
|
|
@@ -7873,6 +7994,8 @@ const CUSTOM_MAPPING = [
|
|
|
7873
7994
|
['DacDecoderModel', DacDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
7874
7995
|
['MimiEncoderModel', MimiEncoderModel, MODEL_TYPES.EncoderOnly],
|
|
7875
7996
|
['MimiDecoderModel', MimiDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
7997
|
+
['SnacEncoderModel', SnacEncoderModel, MODEL_TYPES.EncoderOnly],
|
|
7998
|
+
['SnacDecoderModel', SnacDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
7876
7999
|
]
|
|
7877
8000
|
for (const [name, model, type] of CUSTOM_MAPPING) {
|
|
7878
8001
|
MODEL_TYPE_MAPPING.set(name, type);
|