@huggingface/transformers 3.4.0 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -2
- package/dist/transformers.js +315 -152
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.js +1 -1
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.node.cjs +303 -151
- package/dist/transformers.node.cjs.map +1 -1
- package/dist/transformers.node.min.cjs +1 -1
- package/dist/transformers.node.min.cjs.map +1 -1
- package/dist/transformers.node.min.mjs +1 -1
- package/dist/transformers.node.min.mjs.map +1 -1
- package/dist/transformers.node.mjs +315 -152
- package/dist/transformers.node.mjs.map +1 -1
- package/dist/transformers.web.js +315 -152
- package/dist/transformers.web.js.map +1 -1
- package/dist/transformers.web.min.js +1 -1
- package/dist/transformers.web.min.js.map +1 -1
- package/package.json +1 -1
- package/src/configs.js +2 -0
- package/src/env.js +1 -1
- package/src/models/feature_extractors.js +1 -0
- package/src/models/snac/feature_extraction_snac.js +3 -0
- package/src/models.js +90 -2
- package/src/pipelines.js +140 -135
- package/src/utils/image.js +9 -1
- package/src/utils/tensor.js +6 -2
- package/types/configs.d.ts.map +1 -1
- package/types/models/feature_extractors.d.ts +1 -0
- package/types/models/snac/feature_extraction_snac.d.ts +4 -0
- package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
- package/types/models.d.ts +48 -0
- package/types/models.d.ts.map +1 -1
- package/types/pipelines.d.ts +2 -2
- package/types/pipelines.d.ts.map +1 -1
- package/types/tsconfig.tsbuildinfo +1 -1
- package/types/utils/image.d.ts +2 -2
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/tensor.d.ts.map +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/transformers",
|
|
3
|
-
"version": "3.4.
|
|
3
|
+
"version": "3.4.1",
|
|
4
4
|
"description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
|
|
5
5
|
"main": "./src/transformers.js",
|
|
6
6
|
"types": "./types/transformers.d.ts",
|
package/src/configs.js
CHANGED
|
@@ -67,6 +67,7 @@ function getNormalizedConfig(config) {
|
|
|
67
67
|
// Sub-configs
|
|
68
68
|
case 'llava':
|
|
69
69
|
case 'paligemma':
|
|
70
|
+
case 'gemma3':
|
|
70
71
|
case 'florence2':
|
|
71
72
|
case 'llava_onevision':
|
|
72
73
|
case 'idefics3':
|
|
@@ -126,6 +127,7 @@ function getNormalizedConfig(config) {
|
|
|
126
127
|
break;
|
|
127
128
|
case 'gemma':
|
|
128
129
|
case 'gemma2':
|
|
130
|
+
case 'gemma3_text':
|
|
129
131
|
case 'glm':
|
|
130
132
|
case 'helium':
|
|
131
133
|
mapping['num_heads'] = 'num_key_value_heads';
|
package/src/env.js
CHANGED
|
@@ -26,7 +26,7 @@ import fs from 'fs';
|
|
|
26
26
|
import path from 'path';
|
|
27
27
|
import url from 'url';
|
|
28
28
|
|
|
29
|
-
const VERSION = '3.4.
|
|
29
|
+
const VERSION = '3.4.1';
|
|
30
30
|
|
|
31
31
|
// Check if various APIs are available (depends on environment)
|
|
32
32
|
const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
@@ -6,6 +6,7 @@ export * from './dac/feature_extraction_dac.js';
|
|
|
6
6
|
export * from './moonshine/feature_extraction_moonshine.js';
|
|
7
7
|
export * from './pyannote/feature_extraction_pyannote.js';
|
|
8
8
|
export * from './seamless_m4t/feature_extraction_seamless_m4t.js';
|
|
9
|
+
export * from './snac/feature_extraction_snac.js';
|
|
9
10
|
export * from './speecht5/feature_extraction_speecht5.js';
|
|
10
11
|
export * from './wav2vec2/feature_extraction_wav2vec2.js';
|
|
11
12
|
export * from './wespeaker/feature_extraction_wespeaker.js';
|
package/src/models.js
CHANGED
|
@@ -594,8 +594,8 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
|
|
|
594
594
|
new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
|
|
595
595
|
}
|
|
596
596
|
if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
|
|
597
|
-
// NOTE: Handle a special case for paligemma models, where positions are 1-indexed
|
|
598
|
-
const start_index = self.config.model_type
|
|
597
|
+
// NOTE: Handle a special case for paligemma/gemma3 models, where positions are 1-indexed
|
|
598
|
+
const start_index = ['paligemma', 'gemma3_text', 'gemma3'].includes(self.config.model_type) ? 1 : 0;
|
|
599
599
|
new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
|
|
600
600
|
}
|
|
601
601
|
|
|
@@ -4520,6 +4520,23 @@ export class Gemma2Model extends Gemma2PreTrainedModel { }
|
|
|
4520
4520
|
export class Gemma2ForCausalLM extends Gemma2PreTrainedModel { }
|
|
4521
4521
|
//////////////////////////////////////////////////
|
|
4522
4522
|
|
|
4523
|
+
|
|
4524
|
+
//////////////////////////////////////////////////
|
|
4525
|
+
// Gemma3 models
|
|
4526
|
+
|
|
4527
|
+
/**
|
|
4528
|
+
* The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
|
|
4529
|
+
*/
|
|
4530
|
+
export class Gemma3PreTrainedModel extends PreTrainedModel { }
|
|
4531
|
+
/**
|
|
4532
|
+
* The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
|
|
4533
|
+
*/
|
|
4534
|
+
export class Gemma3Model extends Gemma3PreTrainedModel { }
|
|
4535
|
+
|
|
4536
|
+
export class Gemma3ForCausalLM extends Gemma3PreTrainedModel { }
|
|
4537
|
+
//////////////////////////////////////////////////
|
|
4538
|
+
|
|
4539
|
+
|
|
4523
4540
|
//////////////////////////////////////////////////
|
|
4524
4541
|
export class OpenELMPreTrainedModel extends PreTrainedModel { }
|
|
4525
4542
|
export class OpenELMModel extends OpenELMPreTrainedModel { }
|
|
@@ -5372,6 +5389,16 @@ export class DepthProPreTrainedModel extends PreTrainedModel { }
|
|
|
5372
5389
|
export class DepthProForDepthEstimation extends DepthProPreTrainedModel { }
|
|
5373
5390
|
//////////////////////////////////////////////////
|
|
5374
5391
|
|
|
5392
|
+
//////////////////////////////////////////////////
|
|
5393
|
+
export class Metric3DPreTrainedModel extends PreTrainedModel { }
|
|
5394
|
+
export class Metric3DForDepthEstimation extends Metric3DPreTrainedModel { }
|
|
5395
|
+
//////////////////////////////////////////////////
|
|
5396
|
+
|
|
5397
|
+
//////////////////////////////////////////////////
|
|
5398
|
+
export class Metric3Dv2PreTrainedModel extends PreTrainedModel { }
|
|
5399
|
+
export class Metric3Dv2ForDepthEstimation extends Metric3Dv2PreTrainedModel { }
|
|
5400
|
+
//////////////////////////////////////////////////
|
|
5401
|
+
|
|
5375
5402
|
//////////////////////////////////////////////////
|
|
5376
5403
|
export class MaskFormerPreTrainedModel extends PreTrainedModel { }
|
|
5377
5404
|
export class MaskFormerModel extends MaskFormerPreTrainedModel { }
|
|
@@ -7287,6 +7314,60 @@ export class DacDecoderModel extends DacPreTrainedModel {
|
|
|
7287
7314
|
}
|
|
7288
7315
|
//////////////////////////////////////////////////
|
|
7289
7316
|
|
|
7317
|
+
|
|
7318
|
+
//////////////////////////////////////////////////
|
|
7319
|
+
// Snac models
|
|
7320
|
+
export class SnacPreTrainedModel extends PreTrainedModel {
|
|
7321
|
+
main_input_name = 'input_values';
|
|
7322
|
+
forward_params = ['input_values'];
|
|
7323
|
+
}
|
|
7324
|
+
|
|
7325
|
+
/**
|
|
7326
|
+
* The SNAC (Multi-Scale Neural Audio Codec) model.
|
|
7327
|
+
*/
|
|
7328
|
+
export class SnacModel extends SnacPreTrainedModel {
|
|
7329
|
+
/**
|
|
7330
|
+
* Encodes the input audio waveform into discrete codes.
|
|
7331
|
+
* @param {Object} inputs Model inputs
|
|
7332
|
+
* @param {Tensor} [inputs.input_values] Float values of the input audio waveform, of shape `(batch_size, channels, sequence_length)`).
|
|
7333
|
+
* @returns {Promise<Record<string, Tensor>>} The output tensors of shape `(batch_size, num_codebooks, sequence_length)`.
|
|
7334
|
+
*/
|
|
7335
|
+
async encode(inputs) {
|
|
7336
|
+
return await sessionRun(this.sessions['encoder_model'], inputs);
|
|
7337
|
+
}
|
|
7338
|
+
|
|
7339
|
+
/**
|
|
7340
|
+
* Decodes the given frames into an output audio waveform.
|
|
7341
|
+
* @param {Record<string, Tensor>} inputs The encoded audio codes.
|
|
7342
|
+
* @returns {Promise<{audio_values: Tensor}>} The output tensor of shape `(batch_size, num_channels, sequence_length)`.
|
|
7343
|
+
*/
|
|
7344
|
+
async decode(inputs) {
|
|
7345
|
+
return await sessionRun(this.sessions['decoder_model'], inputs);
|
|
7346
|
+
}
|
|
7347
|
+
}
|
|
7348
|
+
|
|
7349
|
+
export class SnacEncoderModel extends SnacPreTrainedModel {
|
|
7350
|
+
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
7351
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
7352
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
7353
|
+
...options,
|
|
7354
|
+
// Update default model file name if not provided
|
|
7355
|
+
model_file_name: options.model_file_name ?? 'encoder_model',
|
|
7356
|
+
});
|
|
7357
|
+
}
|
|
7358
|
+
}
|
|
7359
|
+
export class SnacDecoderModel extends SnacPreTrainedModel {
|
|
7360
|
+
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
7361
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
7362
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
7363
|
+
...options,
|
|
7364
|
+
// Update default model file name if not provided
|
|
7365
|
+
model_file_name: options.model_file_name ?? 'decoder_model',
|
|
7366
|
+
});
|
|
7367
|
+
}
|
|
7368
|
+
}
|
|
7369
|
+
//////////////////////////////////////////////////
|
|
7370
|
+
|
|
7290
7371
|
//////////////////////////////////////////////////
|
|
7291
7372
|
// AutoModels, used to simplify construction of PreTrainedModels
|
|
7292
7373
|
// (uses config to instantiate correct class)
|
|
@@ -7468,6 +7549,7 @@ const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
|
|
|
7468
7549
|
const MODEL_MAPPING_NAMES_AUTO_ENCODER = new Map([
|
|
7469
7550
|
['mimi', ['MimiModel', MimiModel]],
|
|
7470
7551
|
['dac', ['DacModel', DacModel]],
|
|
7552
|
+
['snac', ['SnacModel', SnacModel]],
|
|
7471
7553
|
]);
|
|
7472
7554
|
|
|
7473
7555
|
const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
@@ -7488,6 +7570,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
|
7488
7570
|
['cohere', ['CohereModel', CohereModel]],
|
|
7489
7571
|
['gemma', ['GemmaModel', GemmaModel]],
|
|
7490
7572
|
['gemma2', ['Gemma2Model', Gemma2Model]],
|
|
7573
|
+
['gemma3_text', ['Gemma3Model', Gemma3Model]],
|
|
7491
7574
|
['helium', ['HeliumModel', HeliumModel]],
|
|
7492
7575
|
['glm', ['GlmModel', GlmModel]],
|
|
7493
7576
|
['openelm', ['OpenELMModel', OpenELMModel]],
|
|
@@ -7587,6 +7670,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
|
|
|
7587
7670
|
['cohere', ['CohereForCausalLM', CohereForCausalLM]],
|
|
7588
7671
|
['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],
|
|
7589
7672
|
['gemma2', ['Gemma2ForCausalLM', Gemma2ForCausalLM]],
|
|
7673
|
+
['gemma3_text', ['Gemma3ForCausalLM', Gemma3ForCausalLM]],
|
|
7590
7674
|
['helium', ['HeliumForCausalLM', HeliumForCausalLM]],
|
|
7591
7675
|
['glm', ['GlmForCausalLM', GlmForCausalLM]],
|
|
7592
7676
|
['openelm', ['OpenELMForCausalLM', OpenELMForCausalLM]],
|
|
@@ -7788,6 +7872,8 @@ const MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = new Map([
|
|
|
7788
7872
|
['glpn', ['GLPNForDepthEstimation', GLPNForDepthEstimation]],
|
|
7789
7873
|
['sapiens', ['SapiensForDepthEstimation', SapiensForDepthEstimation]],
|
|
7790
7874
|
['depth_pro', ['DepthProForDepthEstimation', DepthProForDepthEstimation]],
|
|
7875
|
+
['metric3d', ['Metric3DForDepthEstimation', Metric3DForDepthEstimation]],
|
|
7876
|
+
['metric3dv2', ['Metric3Dv2ForDepthEstimation', Metric3Dv2ForDepthEstimation]],
|
|
7791
7877
|
])
|
|
7792
7878
|
|
|
7793
7879
|
const MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES = new Map([
|
|
@@ -7873,6 +7959,8 @@ const CUSTOM_MAPPING = [
|
|
|
7873
7959
|
['DacDecoderModel', DacDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
7874
7960
|
['MimiEncoderModel', MimiEncoderModel, MODEL_TYPES.EncoderOnly],
|
|
7875
7961
|
['MimiDecoderModel', MimiDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
7962
|
+
['SnacEncoderModel', SnacEncoderModel, MODEL_TYPES.EncoderOnly],
|
|
7963
|
+
['SnacDecoderModel', SnacDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
7876
7964
|
]
|
|
7877
7965
|
for (const [name, model, type] of CUSTOM_MAPPING) {
|
|
7878
7966
|
MODEL_TYPE_MAPPING.set(name, type);
|