@huggingface/transformers 3.4.0 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +6 -2
  2. package/dist/transformers.js +315 -152
  3. package/dist/transformers.js.map +1 -1
  4. package/dist/transformers.min.js +1 -1
  5. package/dist/transformers.min.js.map +1 -1
  6. package/dist/transformers.node.cjs +303 -151
  7. package/dist/transformers.node.cjs.map +1 -1
  8. package/dist/transformers.node.min.cjs +1 -1
  9. package/dist/transformers.node.min.cjs.map +1 -1
  10. package/dist/transformers.node.min.mjs +1 -1
  11. package/dist/transformers.node.min.mjs.map +1 -1
  12. package/dist/transformers.node.mjs +315 -152
  13. package/dist/transformers.node.mjs.map +1 -1
  14. package/dist/transformers.web.js +315 -152
  15. package/dist/transformers.web.js.map +1 -1
  16. package/dist/transformers.web.min.js +1 -1
  17. package/dist/transformers.web.min.js.map +1 -1
  18. package/package.json +1 -1
  19. package/src/configs.js +2 -0
  20. package/src/env.js +1 -1
  21. package/src/models/feature_extractors.js +1 -0
  22. package/src/models/snac/feature_extraction_snac.js +3 -0
  23. package/src/models.js +90 -2
  24. package/src/pipelines.js +140 -135
  25. package/src/utils/image.js +9 -1
  26. package/src/utils/tensor.js +6 -2
  27. package/types/configs.d.ts.map +1 -1
  28. package/types/models/feature_extractors.d.ts +1 -0
  29. package/types/models/snac/feature_extraction_snac.d.ts +4 -0
  30. package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
  31. package/types/models.d.ts +48 -0
  32. package/types/models.d.ts.map +1 -1
  33. package/types/pipelines.d.ts +2 -2
  34. package/types/pipelines.d.ts.map +1 -1
  35. package/types/tsconfig.tsbuildinfo +1 -1
  36. package/types/utils/image.d.ts +2 -2
  37. package/types/utils/image.d.ts.map +1 -1
  38. package/types/utils/tensor.d.ts.map +1 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/transformers",
3
- "version": "3.4.0",
3
+ "version": "3.4.1",
4
4
  "description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
5
5
  "main": "./src/transformers.js",
6
6
  "types": "./types/transformers.d.ts",
package/src/configs.js CHANGED
@@ -67,6 +67,7 @@ function getNormalizedConfig(config) {
67
67
  // Sub-configs
68
68
  case 'llava':
69
69
  case 'paligemma':
70
+ case 'gemma3':
70
71
  case 'florence2':
71
72
  case 'llava_onevision':
72
73
  case 'idefics3':
@@ -126,6 +127,7 @@ function getNormalizedConfig(config) {
126
127
  break;
127
128
  case 'gemma':
128
129
  case 'gemma2':
130
+ case 'gemma3_text':
129
131
  case 'glm':
130
132
  case 'helium':
131
133
  mapping['num_heads'] = 'num_key_value_heads';
package/src/env.js CHANGED
@@ -26,7 +26,7 @@ import fs from 'fs';
26
26
  import path from 'path';
27
27
  import url from 'url';
28
28
 
29
- const VERSION = '3.4.0';
29
+ const VERSION = '3.4.1';
30
30
 
31
31
  // Check if various APIs are available (depends on environment)
32
32
  const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
@@ -6,6 +6,7 @@ export * from './dac/feature_extraction_dac.js';
6
6
  export * from './moonshine/feature_extraction_moonshine.js';
7
7
  export * from './pyannote/feature_extraction_pyannote.js';
8
8
  export * from './seamless_m4t/feature_extraction_seamless_m4t.js';
9
+ export * from './snac/feature_extraction_snac.js';
9
10
  export * from './speecht5/feature_extraction_speecht5.js';
10
11
  export * from './wav2vec2/feature_extraction_wav2vec2.js';
11
12
  export * from './wespeaker/feature_extraction_wespeaker.js';
@@ -0,0 +1,3 @@
1
+ import { DacFeatureExtractor } from '../dac/feature_extraction_dac.js';
2
+
3
+ export class SnacFeatureExtractor extends DacFeatureExtractor { }
package/src/models.js CHANGED
@@ -594,8 +594,8 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
594
594
  new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
595
595
  }
596
596
  if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
597
- // NOTE: Handle a special case for paligemma models, where positions are 1-indexed
598
- const start_index = self.config.model_type === 'paligemma' ? 1 : 0;
597
+ // NOTE: Handle a special case for paligemma/gemma3 models, where positions are 1-indexed
598
+ const start_index = ['paligemma', 'gemma3_text', 'gemma3'].includes(self.config.model_type) ? 1 : 0;
599
599
  new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
600
600
  }
601
601
 
@@ -4520,6 +4520,23 @@ export class Gemma2Model extends Gemma2PreTrainedModel { }
4520
4520
  export class Gemma2ForCausalLM extends Gemma2PreTrainedModel { }
4521
4521
  //////////////////////////////////////////////////
4522
4522
 
4523
+
4524
+ //////////////////////////////////////////////////
4525
+ // Gemma3 models
4526
+
4527
+ /**
4528
+ * The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
4529
+ */
4530
+ export class Gemma3PreTrainedModel extends PreTrainedModel { }
4531
+ /**
4532
+ * The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
4533
+ */
4534
+ export class Gemma3Model extends Gemma3PreTrainedModel { }
4535
+
4536
+ export class Gemma3ForCausalLM extends Gemma3PreTrainedModel { }
4537
+ //////////////////////////////////////////////////
4538
+
4539
+
4523
4540
  //////////////////////////////////////////////////
4524
4541
  export class OpenELMPreTrainedModel extends PreTrainedModel { }
4525
4542
  export class OpenELMModel extends OpenELMPreTrainedModel { }
@@ -5372,6 +5389,16 @@ export class DepthProPreTrainedModel extends PreTrainedModel { }
5372
5389
  export class DepthProForDepthEstimation extends DepthProPreTrainedModel { }
5373
5390
  //////////////////////////////////////////////////
5374
5391
 
5392
+ //////////////////////////////////////////////////
5393
+ export class Metric3DPreTrainedModel extends PreTrainedModel { }
5394
+ export class Metric3DForDepthEstimation extends Metric3DPreTrainedModel { }
5395
+ //////////////////////////////////////////////////
5396
+
5397
+ //////////////////////////////////////////////////
5398
+ export class Metric3Dv2PreTrainedModel extends PreTrainedModel { }
5399
+ export class Metric3Dv2ForDepthEstimation extends Metric3Dv2PreTrainedModel { }
5400
+ //////////////////////////////////////////////////
5401
+
5375
5402
  //////////////////////////////////////////////////
5376
5403
  export class MaskFormerPreTrainedModel extends PreTrainedModel { }
5377
5404
  export class MaskFormerModel extends MaskFormerPreTrainedModel { }
@@ -7287,6 +7314,60 @@ export class DacDecoderModel extends DacPreTrainedModel {
7287
7314
  }
7288
7315
  //////////////////////////////////////////////////
7289
7316
 
7317
+
7318
+ //////////////////////////////////////////////////
7319
+ // Snac models
7320
+ export class SnacPreTrainedModel extends PreTrainedModel {
7321
+ main_input_name = 'input_values';
7322
+ forward_params = ['input_values'];
7323
+ }
7324
+
7325
+ /**
7326
+ * The SNAC (Multi-Scale Neural Audio Codec) model.
7327
+ */
7328
+ export class SnacModel extends SnacPreTrainedModel {
7329
+ /**
7330
+ * Encodes the input audio waveform into discrete codes.
7331
+ * @param {Object} inputs Model inputs
7332
+ * @param {Tensor} [inputs.input_values] Float values of the input audio waveform, of shape `(batch_size, channels, sequence_length)`).
7333
+ * @returns {Promise<Record<string, Tensor>>} The output tensors of shape `(batch_size, num_codebooks, sequence_length)`.
7334
+ */
7335
+ async encode(inputs) {
7336
+ return await sessionRun(this.sessions['encoder_model'], inputs);
7337
+ }
7338
+
7339
+ /**
7340
+ * Decodes the given frames into an output audio waveform.
7341
+ * @param {Record<string, Tensor>} inputs The encoded audio codes.
7342
+ * @returns {Promise<{audio_values: Tensor}>} The output tensor of shape `(batch_size, num_channels, sequence_length)`.
7343
+ */
7344
+ async decode(inputs) {
7345
+ return await sessionRun(this.sessions['decoder_model'], inputs);
7346
+ }
7347
+ }
7348
+
7349
+ export class SnacEncoderModel extends SnacPreTrainedModel {
7350
+ /** @type {typeof PreTrainedModel.from_pretrained} */
7351
+ static async from_pretrained(pretrained_model_name_or_path, options = {}) {
7352
+ return super.from_pretrained(pretrained_model_name_or_path, {
7353
+ ...options,
7354
+ // Update default model file name if not provided
7355
+ model_file_name: options.model_file_name ?? 'encoder_model',
7356
+ });
7357
+ }
7358
+ }
7359
+ export class SnacDecoderModel extends SnacPreTrainedModel {
7360
+ /** @type {typeof PreTrainedModel.from_pretrained} */
7361
+ static async from_pretrained(pretrained_model_name_or_path, options = {}) {
7362
+ return super.from_pretrained(pretrained_model_name_or_path, {
7363
+ ...options,
7364
+ // Update default model file name if not provided
7365
+ model_file_name: options.model_file_name ?? 'decoder_model',
7366
+ });
7367
+ }
7368
+ }
7369
+ //////////////////////////////////////////////////
7370
+
7290
7371
  //////////////////////////////////////////////////
7291
7372
  // AutoModels, used to simplify construction of PreTrainedModels
7292
7373
  // (uses config to instantiate correct class)
@@ -7468,6 +7549,7 @@ const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
7468
7549
  const MODEL_MAPPING_NAMES_AUTO_ENCODER = new Map([
7469
7550
  ['mimi', ['MimiModel', MimiModel]],
7470
7551
  ['dac', ['DacModel', DacModel]],
7552
+ ['snac', ['SnacModel', SnacModel]],
7471
7553
  ]);
7472
7554
 
7473
7555
  const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
@@ -7488,6 +7570,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
7488
7570
  ['cohere', ['CohereModel', CohereModel]],
7489
7571
  ['gemma', ['GemmaModel', GemmaModel]],
7490
7572
  ['gemma2', ['Gemma2Model', Gemma2Model]],
7573
+ ['gemma3_text', ['Gemma3Model', Gemma3Model]],
7491
7574
  ['helium', ['HeliumModel', HeliumModel]],
7492
7575
  ['glm', ['GlmModel', GlmModel]],
7493
7576
  ['openelm', ['OpenELMModel', OpenELMModel]],
@@ -7587,6 +7670,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
7587
7670
  ['cohere', ['CohereForCausalLM', CohereForCausalLM]],
7588
7671
  ['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],
7589
7672
  ['gemma2', ['Gemma2ForCausalLM', Gemma2ForCausalLM]],
7673
+ ['gemma3_text', ['Gemma3ForCausalLM', Gemma3ForCausalLM]],
7590
7674
  ['helium', ['HeliumForCausalLM', HeliumForCausalLM]],
7591
7675
  ['glm', ['GlmForCausalLM', GlmForCausalLM]],
7592
7676
  ['openelm', ['OpenELMForCausalLM', OpenELMForCausalLM]],
@@ -7788,6 +7872,8 @@ const MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = new Map([
7788
7872
  ['glpn', ['GLPNForDepthEstimation', GLPNForDepthEstimation]],
7789
7873
  ['sapiens', ['SapiensForDepthEstimation', SapiensForDepthEstimation]],
7790
7874
  ['depth_pro', ['DepthProForDepthEstimation', DepthProForDepthEstimation]],
7875
+ ['metric3d', ['Metric3DForDepthEstimation', Metric3DForDepthEstimation]],
7876
+ ['metric3dv2', ['Metric3Dv2ForDepthEstimation', Metric3Dv2ForDepthEstimation]],
7791
7877
  ])
7792
7878
 
7793
7879
  const MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES = new Map([
@@ -7873,6 +7959,8 @@ const CUSTOM_MAPPING = [
7873
7959
  ['DacDecoderModel', DacDecoderModel, MODEL_TYPES.EncoderOnly],
7874
7960
  ['MimiEncoderModel', MimiEncoderModel, MODEL_TYPES.EncoderOnly],
7875
7961
  ['MimiDecoderModel', MimiDecoderModel, MODEL_TYPES.EncoderOnly],
7962
+ ['SnacEncoderModel', SnacEncoderModel, MODEL_TYPES.EncoderOnly],
7963
+ ['SnacDecoderModel', SnacDecoderModel, MODEL_TYPES.EncoderOnly],
7876
7964
  ]
7877
7965
  for (const [name, model, type] of CUSTOM_MAPPING) {
7878
7966
  MODEL_TYPE_MAPPING.set(name, type);