@huggingface/transformers 3.4.0 → 3.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +8 -2
  2. package/dist/transformers.js +528 -201
  3. package/dist/transformers.js.map +1 -1
  4. package/dist/transformers.min.js +1 -1
  5. package/dist/transformers.min.js.map +1 -1
  6. package/dist/transformers.node.cjs +508 -200
  7. package/dist/transformers.node.cjs.map +1 -1
  8. package/dist/transformers.node.min.cjs +1 -1
  9. package/dist/transformers.node.min.cjs.map +1 -1
  10. package/dist/transformers.node.min.mjs +1 -1
  11. package/dist/transformers.node.min.mjs.map +1 -1
  12. package/dist/transformers.node.mjs +528 -201
  13. package/dist/transformers.node.mjs.map +1 -1
  14. package/dist/transformers.web.js +528 -201
  15. package/dist/transformers.web.js.map +1 -1
  16. package/dist/transformers.web.min.js +1 -1
  17. package/dist/transformers.web.min.js.map +1 -1
  18. package/package.json +1 -1
  19. package/src/configs.js +2 -0
  20. package/src/env.js +1 -1
  21. package/src/models/feature_extractors.js +1 -0
  22. package/src/models/snac/feature_extraction_snac.js +3 -0
  23. package/src/models.js +125 -2
  24. package/src/pipelines.js +140 -135
  25. package/src/tokenizers.js +44 -34
  26. package/src/utils/data-structures.js +74 -0
  27. package/src/utils/hub.js +36 -15
  28. package/src/utils/image.js +9 -1
  29. package/src/utils/tensor.js +6 -2
  30. package/types/configs.d.ts.map +1 -1
  31. package/types/models/feature_extractors.d.ts +1 -0
  32. package/types/models/snac/feature_extraction_snac.d.ts +4 -0
  33. package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
  34. package/types/models.d.ts +72 -0
  35. package/types/models.d.ts.map +1 -1
  36. package/types/pipelines.d.ts +2 -2
  37. package/types/pipelines.d.ts.map +1 -1
  38. package/types/tokenizers.d.ts +4 -1
  39. package/types/tokenizers.d.ts.map +1 -1
  40. package/types/tsconfig.tsbuildinfo +1 -1
  41. package/types/utils/data-structures.d.ts +26 -0
  42. package/types/utils/data-structures.d.ts.map +1 -1
  43. package/types/utils/hub.d.ts.map +1 -1
  44. package/types/utils/image.d.ts +2 -2
  45. package/types/utils/image.d.ts.map +1 -1
  46. package/types/utils/tensor.d.ts.map +1 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/transformers",
3
- "version": "3.4.0",
3
+ "version": "3.4.2",
4
4
  "description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
5
5
  "main": "./src/transformers.js",
6
6
  "types": "./types/transformers.d.ts",
package/src/configs.js CHANGED
@@ -67,6 +67,7 @@ function getNormalizedConfig(config) {
67
67
  // Sub-configs
68
68
  case 'llava':
69
69
  case 'paligemma':
70
+ case 'gemma3':
70
71
  case 'florence2':
71
72
  case 'llava_onevision':
72
73
  case 'idefics3':
@@ -126,6 +127,7 @@ function getNormalizedConfig(config) {
126
127
  break;
127
128
  case 'gemma':
128
129
  case 'gemma2':
130
+ case 'gemma3_text':
129
131
  case 'glm':
130
132
  case 'helium':
131
133
  mapping['num_heads'] = 'num_key_value_heads';
package/src/env.js CHANGED
@@ -26,7 +26,7 @@ import fs from 'fs';
26
26
  import path from 'path';
27
27
  import url from 'url';
28
28
 
29
- const VERSION = '3.4.0';
29
+ const VERSION = '3.4.2';
30
30
 
31
31
  // Check if various APIs are available (depends on environment)
32
32
  const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
@@ -6,6 +6,7 @@ export * from './dac/feature_extraction_dac.js';
6
6
  export * from './moonshine/feature_extraction_moonshine.js';
7
7
  export * from './pyannote/feature_extraction_pyannote.js';
8
8
  export * from './seamless_m4t/feature_extraction_seamless_m4t.js';
9
+ export * from './snac/feature_extraction_snac.js';
9
10
  export * from './speecht5/feature_extraction_speecht5.js';
10
11
  export * from './wav2vec2/feature_extraction_wav2vec2.js';
11
12
  export * from './wespeaker/feature_extraction_wespeaker.js';
@@ -0,0 +1,3 @@
1
+ import { DacFeatureExtractor } from '../dac/feature_extraction_dac.js';
2
+
3
+ export class SnacFeatureExtractor extends DacFeatureExtractor { }
package/src/models.js CHANGED
@@ -594,8 +594,8 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
594
594
  new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
595
595
  }
596
596
  if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
597
- // NOTE: Handle a special case for paligemma models, where positions are 1-indexed
598
- const start_index = self.config.model_type === 'paligemma' ? 1 : 0;
597
+ // NOTE: Handle a special case for paligemma/gemma3 models, where positions are 1-indexed
598
+ const start_index = ['paligemma', 'gemma3_text', 'gemma3'].includes(self.config.model_type) ? 1 : 0;
599
599
  new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
600
600
  }
601
601
 
@@ -4520,6 +4520,23 @@ export class Gemma2Model extends Gemma2PreTrainedModel { }
4520
4520
  export class Gemma2ForCausalLM extends Gemma2PreTrainedModel { }
4521
4521
  //////////////////////////////////////////////////
4522
4522
 
4523
+
4524
+ //////////////////////////////////////////////////
4525
+ // Gemma3 models
4526
+
4527
+ /**
4528
+ * The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
4529
+ */
4530
+ export class Gemma3PreTrainedModel extends PreTrainedModel { }
4531
+ /**
4532
+ * The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
4533
+ */
4534
+ export class Gemma3Model extends Gemma3PreTrainedModel { }
4535
+
4536
+ export class Gemma3ForCausalLM extends Gemma3PreTrainedModel { }
4537
+ //////////////////////////////////////////////////
4538
+
4539
+
4523
4540
  //////////////////////////////////////////////////
4524
4541
  export class OpenELMPreTrainedModel extends PreTrainedModel { }
4525
4542
  export class OpenELMModel extends OpenELMPreTrainedModel { }
@@ -5164,6 +5181,37 @@ export class RTDetrObjectDetectionOutput extends ModelOutput {
5164
5181
  }
5165
5182
  //////////////////////////////////////////////////
5166
5183
 
5184
+
5185
+ //////////////////////////////////////////////////
5186
+ export class RTDetrV2PreTrainedModel extends PreTrainedModel { }
5187
+ export class RTDetrV2Model extends RTDetrV2PreTrainedModel { }
5188
+ export class RTDetrV2ForObjectDetection extends RTDetrV2PreTrainedModel {
5189
+ /**
5190
+ * @param {any} model_inputs
5191
+ */
5192
+ async _call(model_inputs) {
5193
+ return new RTDetrV2ObjectDetectionOutput(await super._call(model_inputs));
5194
+ }
5195
+ }
5196
+
5197
+ export class RTDetrV2ObjectDetectionOutput extends RTDetrObjectDetectionOutput {}
5198
+ //////////////////////////////////////////////////
5199
+
5200
+ //////////////////////////////////////////////////
5201
+ export class RFDetrPreTrainedModel extends PreTrainedModel { }
5202
+ export class RFDetrModel extends RFDetrPreTrainedModel { }
5203
+ export class RFDetrForObjectDetection extends RFDetrPreTrainedModel {
5204
+ /**
5205
+ * @param {any} model_inputs
5206
+ */
5207
+ async _call(model_inputs) {
5208
+ return new RFDetrObjectDetectionOutput(await super._call(model_inputs));
5209
+ }
5210
+ }
5211
+
5212
+ export class RFDetrObjectDetectionOutput extends RTDetrObjectDetectionOutput {}
5213
+ //////////////////////////////////////////////////
5214
+
5167
5215
  //////////////////////////////////////////////////
5168
5216
  export class TableTransformerPreTrainedModel extends PreTrainedModel { }
5169
5217
 
@@ -5372,6 +5420,16 @@ export class DepthProPreTrainedModel extends PreTrainedModel { }
5372
5420
  export class DepthProForDepthEstimation extends DepthProPreTrainedModel { }
5373
5421
  //////////////////////////////////////////////////
5374
5422
 
5423
+ //////////////////////////////////////////////////
5424
+ export class Metric3DPreTrainedModel extends PreTrainedModel { }
5425
+ export class Metric3DForDepthEstimation extends Metric3DPreTrainedModel { }
5426
+ //////////////////////////////////////////////////
5427
+
5428
+ //////////////////////////////////////////////////
5429
+ export class Metric3Dv2PreTrainedModel extends PreTrainedModel { }
5430
+ export class Metric3Dv2ForDepthEstimation extends Metric3Dv2PreTrainedModel { }
5431
+ //////////////////////////////////////////////////
5432
+
5375
5433
  //////////////////////////////////////////////////
5376
5434
  export class MaskFormerPreTrainedModel extends PreTrainedModel { }
5377
5435
  export class MaskFormerModel extends MaskFormerPreTrainedModel { }
@@ -7287,6 +7345,60 @@ export class DacDecoderModel extends DacPreTrainedModel {
7287
7345
  }
7288
7346
  //////////////////////////////////////////////////
7289
7347
 
7348
+
7349
+ //////////////////////////////////////////////////
7350
+ // Snac models
7351
+ export class SnacPreTrainedModel extends PreTrainedModel {
7352
+ main_input_name = 'input_values';
7353
+ forward_params = ['input_values'];
7354
+ }
7355
+
7356
+ /**
7357
+ * The SNAC (Multi-Scale Neural Audio Codec) model.
7358
+ */
7359
+ export class SnacModel extends SnacPreTrainedModel {
7360
+ /**
7361
+ * Encodes the input audio waveform into discrete codes.
7362
+ * @param {Object} inputs Model inputs
7363
+ * @param {Tensor} [inputs.input_values] Float values of the input audio waveform, of shape `(batch_size, channels, sequence_length)`).
7364
+ * @returns {Promise<Record<string, Tensor>>} The output tensors of shape `(batch_size, num_codebooks, sequence_length)`.
7365
+ */
7366
+ async encode(inputs) {
7367
+ return await sessionRun(this.sessions['encoder_model'], inputs);
7368
+ }
7369
+
7370
+ /**
7371
+ * Decodes the given frames into an output audio waveform.
7372
+ * @param {Record<string, Tensor>} inputs The encoded audio codes.
7373
+ * @returns {Promise<{audio_values: Tensor}>} The output tensor of shape `(batch_size, num_channels, sequence_length)`.
7374
+ */
7375
+ async decode(inputs) {
7376
+ return await sessionRun(this.sessions['decoder_model'], inputs);
7377
+ }
7378
+ }
7379
+
7380
+ export class SnacEncoderModel extends SnacPreTrainedModel {
7381
+ /** @type {typeof PreTrainedModel.from_pretrained} */
7382
+ static async from_pretrained(pretrained_model_name_or_path, options = {}) {
7383
+ return super.from_pretrained(pretrained_model_name_or_path, {
7384
+ ...options,
7385
+ // Update default model file name if not provided
7386
+ model_file_name: options.model_file_name ?? 'encoder_model',
7387
+ });
7388
+ }
7389
+ }
7390
+ export class SnacDecoderModel extends SnacPreTrainedModel {
7391
+ /** @type {typeof PreTrainedModel.from_pretrained} */
7392
+ static async from_pretrained(pretrained_model_name_or_path, options = {}) {
7393
+ return super.from_pretrained(pretrained_model_name_or_path, {
7394
+ ...options,
7395
+ // Update default model file name if not provided
7396
+ model_file_name: options.model_file_name ?? 'decoder_model',
7397
+ });
7398
+ }
7399
+ }
7400
+ //////////////////////////////////////////////////
7401
+
7290
7402
  //////////////////////////////////////////////////
7291
7403
  // AutoModels, used to simplify construction of PreTrainedModels
7292
7404
  // (uses config to instantiate correct class)
@@ -7407,6 +7519,8 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
7407
7519
 
7408
7520
  ['detr', ['DetrModel', DetrModel]],
7409
7521
  ['rt_detr', ['RTDetrModel', RTDetrModel]],
7522
+ ['rt_detr_v2', ['RTDetrV2Model', RTDetrV2Model]],
7523
+ ['rf_detr', ['RFDetrModel', RFDetrModel]],
7410
7524
  ['table-transformer', ['TableTransformerModel', TableTransformerModel]],
7411
7525
  ['vit', ['ViTModel', ViTModel]],
7412
7526
  ['ijepa', ['IJepaModel', IJepaModel]],
@@ -7468,6 +7582,7 @@ const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
7468
7582
  const MODEL_MAPPING_NAMES_AUTO_ENCODER = new Map([
7469
7583
  ['mimi', ['MimiModel', MimiModel]],
7470
7584
  ['dac', ['DacModel', DacModel]],
7585
+ ['snac', ['SnacModel', SnacModel]],
7471
7586
  ]);
7472
7587
 
7473
7588
  const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
@@ -7488,6 +7603,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
7488
7603
  ['cohere', ['CohereModel', CohereModel]],
7489
7604
  ['gemma', ['GemmaModel', GemmaModel]],
7490
7605
  ['gemma2', ['Gemma2Model', Gemma2Model]],
7606
+ ['gemma3_text', ['Gemma3Model', Gemma3Model]],
7491
7607
  ['helium', ['HeliumModel', HeliumModel]],
7492
7608
  ['glm', ['GlmModel', GlmModel]],
7493
7609
  ['openelm', ['OpenELMModel', OpenELMModel]],
@@ -7587,6 +7703,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
7587
7703
  ['cohere', ['CohereForCausalLM', CohereForCausalLM]],
7588
7704
  ['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],
7589
7705
  ['gemma2', ['Gemma2ForCausalLM', Gemma2ForCausalLM]],
7706
+ ['gemma3_text', ['Gemma3ForCausalLM', Gemma3ForCausalLM]],
7590
7707
  ['helium', ['HeliumForCausalLM', HeliumForCausalLM]],
7591
7708
  ['glm', ['GlmForCausalLM', GlmForCausalLM]],
7592
7709
  ['openelm', ['OpenELMForCausalLM', OpenELMForCausalLM]],
@@ -7703,6 +7820,8 @@ const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
7703
7820
  const MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES = new Map([
7704
7821
  ['detr', ['DetrForObjectDetection', DetrForObjectDetection]],
7705
7822
  ['rt_detr', ['RTDetrForObjectDetection', RTDetrForObjectDetection]],
7823
+ ['rt_detr_v2', ['RTDetrV2ForObjectDetection', RTDetrV2ForObjectDetection]],
7824
+ ['rf_detr', ['RFDetrForObjectDetection', RFDetrForObjectDetection]],
7706
7825
  ['table-transformer', ['TableTransformerForObjectDetection', TableTransformerForObjectDetection]],
7707
7826
  ['yolos', ['YolosForObjectDetection', YolosForObjectDetection]],
7708
7827
  ]);
@@ -7788,6 +7907,8 @@ const MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = new Map([
7788
7907
  ['glpn', ['GLPNForDepthEstimation', GLPNForDepthEstimation]],
7789
7908
  ['sapiens', ['SapiensForDepthEstimation', SapiensForDepthEstimation]],
7790
7909
  ['depth_pro', ['DepthProForDepthEstimation', DepthProForDepthEstimation]],
7910
+ ['metric3d', ['Metric3DForDepthEstimation', Metric3DForDepthEstimation]],
7911
+ ['metric3dv2', ['Metric3Dv2ForDepthEstimation', Metric3Dv2ForDepthEstimation]],
7791
7912
  ])
7792
7913
 
7793
7914
  const MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES = new Map([
@@ -7873,6 +7994,8 @@ const CUSTOM_MAPPING = [
7873
7994
  ['DacDecoderModel', DacDecoderModel, MODEL_TYPES.EncoderOnly],
7874
7995
  ['MimiEncoderModel', MimiEncoderModel, MODEL_TYPES.EncoderOnly],
7875
7996
  ['MimiDecoderModel', MimiDecoderModel, MODEL_TYPES.EncoderOnly],
7997
+ ['SnacEncoderModel', SnacEncoderModel, MODEL_TYPES.EncoderOnly],
7998
+ ['SnacDecoderModel', SnacDecoderModel, MODEL_TYPES.EncoderOnly],
7876
7999
  ]
7877
8000
  for (const [name, model, type] of CUSTOM_MAPPING) {
7878
8001
  MODEL_TYPE_MAPPING.set(name, type);