@huggingface/transformers 3.4.0 → 3.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +8 -2
  2. package/dist/transformers.js +528 -201
  3. package/dist/transformers.js.map +1 -1
  4. package/dist/transformers.min.js +1 -1
  5. package/dist/transformers.min.js.map +1 -1
  6. package/dist/transformers.node.cjs +508 -200
  7. package/dist/transformers.node.cjs.map +1 -1
  8. package/dist/transformers.node.min.cjs +1 -1
  9. package/dist/transformers.node.min.cjs.map +1 -1
  10. package/dist/transformers.node.min.mjs +1 -1
  11. package/dist/transformers.node.min.mjs.map +1 -1
  12. package/dist/transformers.node.mjs +528 -201
  13. package/dist/transformers.node.mjs.map +1 -1
  14. package/dist/transformers.web.js +528 -201
  15. package/dist/transformers.web.js.map +1 -1
  16. package/dist/transformers.web.min.js +1 -1
  17. package/dist/transformers.web.min.js.map +1 -1
  18. package/package.json +1 -1
  19. package/src/configs.js +2 -0
  20. package/src/env.js +1 -1
  21. package/src/models/feature_extractors.js +1 -0
  22. package/src/models/snac/feature_extraction_snac.js +3 -0
  23. package/src/models.js +125 -2
  24. package/src/pipelines.js +140 -135
  25. package/src/tokenizers.js +44 -34
  26. package/src/utils/data-structures.js +74 -0
  27. package/src/utils/hub.js +36 -15
  28. package/src/utils/image.js +9 -1
  29. package/src/utils/tensor.js +6 -2
  30. package/types/configs.d.ts.map +1 -1
  31. package/types/models/feature_extractors.d.ts +1 -0
  32. package/types/models/snac/feature_extraction_snac.d.ts +4 -0
  33. package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
  34. package/types/models.d.ts +72 -0
  35. package/types/models.d.ts.map +1 -1
  36. package/types/pipelines.d.ts +2 -2
  37. package/types/pipelines.d.ts.map +1 -1
  38. package/types/tokenizers.d.ts +4 -1
  39. package/types/tokenizers.d.ts.map +1 -1
  40. package/types/tsconfig.tsbuildinfo +1 -1
  41. package/types/utils/data-structures.d.ts +26 -0
  42. package/types/utils/data-structures.d.ts.map +1 -1
  43. package/types/utils/hub.d.ts.map +1 -1
  44. package/types/utils/image.d.ts +2 -2
  45. package/types/utils/image.d.ts.map +1 -1
  46. package/types/utils/tensor.d.ts.map +1 -1
@@ -3684,6 +3684,7 @@ function getNormalizedConfig(config) {
3684
3684
  // Sub-configs
3685
3685
  case 'llava':
3686
3686
  case 'paligemma':
3687
+ case 'gemma3':
3687
3688
  case 'florence2':
3688
3689
  case 'llava_onevision':
3689
3690
  case 'idefics3':
@@ -3743,6 +3744,7 @@ function getNormalizedConfig(config) {
3743
3744
  break;
3744
3745
  case 'gemma':
3745
3746
  case 'gemma2':
3747
+ case 'gemma3_text':
3746
3748
  case 'glm':
3747
3749
  case 'helium':
3748
3750
  mapping['num_heads'] = 'num_key_value_heads';
@@ -4074,7 +4076,7 @@ __webpack_require__.r(__webpack_exports__);
4074
4076
 
4075
4077
 
4076
4078
 
4077
- const VERSION = '3.4.0';
4079
+ const VERSION = '3.4.2';
4078
4080
 
4079
4081
  // Check if various APIs are available (depends on environment)
4080
4082
  const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
@@ -6218,6 +6220,9 @@ __webpack_require__.r(__webpack_exports__);
6218
6220
  /* harmony export */ Gemma2ForCausalLM: () => (/* binding */ Gemma2ForCausalLM),
6219
6221
  /* harmony export */ Gemma2Model: () => (/* binding */ Gemma2Model),
6220
6222
  /* harmony export */ Gemma2PreTrainedModel: () => (/* binding */ Gemma2PreTrainedModel),
6223
+ /* harmony export */ Gemma3ForCausalLM: () => (/* binding */ Gemma3ForCausalLM),
6224
+ /* harmony export */ Gemma3Model: () => (/* binding */ Gemma3Model),
6225
+ /* harmony export */ Gemma3PreTrainedModel: () => (/* binding */ Gemma3PreTrainedModel),
6221
6226
  /* harmony export */ GemmaForCausalLM: () => (/* binding */ GemmaForCausalLM),
6222
6227
  /* harmony export */ GemmaModel: () => (/* binding */ GemmaModel),
6223
6228
  /* harmony export */ GemmaPreTrainedModel: () => (/* binding */ GemmaPreTrainedModel),
@@ -6288,6 +6293,10 @@ __webpack_require__.r(__webpack_exports__);
6288
6293
  /* harmony export */ MaskFormerModel: () => (/* binding */ MaskFormerModel),
6289
6294
  /* harmony export */ MaskFormerPreTrainedModel: () => (/* binding */ MaskFormerPreTrainedModel),
6290
6295
  /* harmony export */ MaskedLMOutput: () => (/* binding */ MaskedLMOutput),
6296
+ /* harmony export */ Metric3DForDepthEstimation: () => (/* binding */ Metric3DForDepthEstimation),
6297
+ /* harmony export */ Metric3DPreTrainedModel: () => (/* binding */ Metric3DPreTrainedModel),
6298
+ /* harmony export */ Metric3Dv2ForDepthEstimation: () => (/* binding */ Metric3Dv2ForDepthEstimation),
6299
+ /* harmony export */ Metric3Dv2PreTrainedModel: () => (/* binding */ Metric3Dv2PreTrainedModel),
6291
6300
  /* harmony export */ MgpstrForSceneTextRecognition: () => (/* binding */ MgpstrForSceneTextRecognition),
6292
6301
  /* harmony export */ MgpstrModelOutput: () => (/* binding */ MgpstrModelOutput),
6293
6302
  /* harmony export */ MgpstrPreTrainedModel: () => (/* binding */ MgpstrPreTrainedModel),
@@ -6399,10 +6408,18 @@ __webpack_require__.r(__webpack_exports__);
6399
6408
  /* harmony export */ Qwen2PreTrainedModel: () => (/* binding */ Qwen2PreTrainedModel),
6400
6409
  /* harmony export */ Qwen2VLForConditionalGeneration: () => (/* binding */ Qwen2VLForConditionalGeneration),
6401
6410
  /* harmony export */ Qwen2VLPreTrainedModel: () => (/* binding */ Qwen2VLPreTrainedModel),
6411
+ /* harmony export */ RFDetrForObjectDetection: () => (/* binding */ RFDetrForObjectDetection),
6412
+ /* harmony export */ RFDetrModel: () => (/* binding */ RFDetrModel),
6413
+ /* harmony export */ RFDetrObjectDetectionOutput: () => (/* binding */ RFDetrObjectDetectionOutput),
6414
+ /* harmony export */ RFDetrPreTrainedModel: () => (/* binding */ RFDetrPreTrainedModel),
6402
6415
  /* harmony export */ RTDetrForObjectDetection: () => (/* binding */ RTDetrForObjectDetection),
6403
6416
  /* harmony export */ RTDetrModel: () => (/* binding */ RTDetrModel),
6404
6417
  /* harmony export */ RTDetrObjectDetectionOutput: () => (/* binding */ RTDetrObjectDetectionOutput),
6405
6418
  /* harmony export */ RTDetrPreTrainedModel: () => (/* binding */ RTDetrPreTrainedModel),
6419
+ /* harmony export */ RTDetrV2ForObjectDetection: () => (/* binding */ RTDetrV2ForObjectDetection),
6420
+ /* harmony export */ RTDetrV2Model: () => (/* binding */ RTDetrV2Model),
6421
+ /* harmony export */ RTDetrV2ObjectDetectionOutput: () => (/* binding */ RTDetrV2ObjectDetectionOutput),
6422
+ /* harmony export */ RTDetrV2PreTrainedModel: () => (/* binding */ RTDetrV2PreTrainedModel),
6406
6423
  /* harmony export */ ResNetForImageClassification: () => (/* binding */ ResNetForImageClassification),
6407
6424
  /* harmony export */ ResNetModel: () => (/* binding */ ResNetModel),
6408
6425
  /* harmony export */ ResNetPreTrainedModel: () => (/* binding */ ResNetPreTrainedModel),
@@ -6436,6 +6453,10 @@ __webpack_require__.r(__webpack_exports__);
6436
6453
  /* harmony export */ SiglipTextModel: () => (/* binding */ SiglipTextModel),
6437
6454
  /* harmony export */ SiglipVisionModel: () => (/* binding */ SiglipVisionModel),
6438
6455
  /* harmony export */ SmolVLMForConditionalGeneration: () => (/* binding */ SmolVLMForConditionalGeneration),
6456
+ /* harmony export */ SnacDecoderModel: () => (/* binding */ SnacDecoderModel),
6457
+ /* harmony export */ SnacEncoderModel: () => (/* binding */ SnacEncoderModel),
6458
+ /* harmony export */ SnacModel: () => (/* binding */ SnacModel),
6459
+ /* harmony export */ SnacPreTrainedModel: () => (/* binding */ SnacPreTrainedModel),
6439
6460
  /* harmony export */ SpeechT5ForSpeechToText: () => (/* binding */ SpeechT5ForSpeechToText),
6440
6461
  /* harmony export */ SpeechT5ForTextToSpeech: () => (/* binding */ SpeechT5ForTextToSpeech),
6441
6462
  /* harmony export */ SpeechT5HifiGan: () => (/* binding */ SpeechT5HifiGan),
@@ -7094,8 +7115,8 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
7094
7115
  new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
7095
7116
  }
7096
7117
  if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
7097
- // NOTE: Handle a special case for paligemma models, where positions are 1-indexed
7098
- const start_index = self.config.model_type === 'paligemma' ? 1 : 0;
7118
+ // NOTE: Handle a special case for paligemma/gemma3 models, where positions are 1-indexed
7119
+ const start_index = ['paligemma', 'gemma3_text', 'gemma3'].includes(self.config.model_type) ? 1 : 0;
7099
7120
  new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
7100
7121
  }
7101
7122
 
@@ -11020,6 +11041,23 @@ class Gemma2Model extends Gemma2PreTrainedModel { }
11020
11041
  class Gemma2ForCausalLM extends Gemma2PreTrainedModel { }
11021
11042
  //////////////////////////////////////////////////
11022
11043
 
11044
+
11045
+ //////////////////////////////////////////////////
11046
+ // Gemma3 models
11047
+
11048
+ /**
11049
+ * The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
11050
+ */
11051
+ class Gemma3PreTrainedModel extends PreTrainedModel { }
11052
+ /**
11053
+ * The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
11054
+ */
11055
+ class Gemma3Model extends Gemma3PreTrainedModel { }
11056
+
11057
+ class Gemma3ForCausalLM extends Gemma3PreTrainedModel { }
11058
+ //////////////////////////////////////////////////
11059
+
11060
+
11023
11061
  //////////////////////////////////////////////////
11024
11062
  class OpenELMPreTrainedModel extends PreTrainedModel { }
11025
11063
  class OpenELMModel extends OpenELMPreTrainedModel { }
@@ -11664,6 +11702,37 @@ class RTDetrObjectDetectionOutput extends ModelOutput {
11664
11702
  }
11665
11703
  //////////////////////////////////////////////////
11666
11704
 
11705
+
11706
+ //////////////////////////////////////////////////
11707
+ class RTDetrV2PreTrainedModel extends PreTrainedModel { }
11708
+ class RTDetrV2Model extends RTDetrV2PreTrainedModel { }
11709
+ class RTDetrV2ForObjectDetection extends RTDetrV2PreTrainedModel {
11710
+ /**
11711
+ * @param {any} model_inputs
11712
+ */
11713
+ async _call(model_inputs) {
11714
+ return new RTDetrV2ObjectDetectionOutput(await super._call(model_inputs));
11715
+ }
11716
+ }
11717
+
11718
+ class RTDetrV2ObjectDetectionOutput extends RTDetrObjectDetectionOutput {}
11719
+ //////////////////////////////////////////////////
11720
+
11721
+ //////////////////////////////////////////////////
11722
+ class RFDetrPreTrainedModel extends PreTrainedModel { }
11723
+ class RFDetrModel extends RFDetrPreTrainedModel { }
11724
+ class RFDetrForObjectDetection extends RFDetrPreTrainedModel {
11725
+ /**
11726
+ * @param {any} model_inputs
11727
+ */
11728
+ async _call(model_inputs) {
11729
+ return new RFDetrObjectDetectionOutput(await super._call(model_inputs));
11730
+ }
11731
+ }
11732
+
11733
+ class RFDetrObjectDetectionOutput extends RTDetrObjectDetectionOutput {}
11734
+ //////////////////////////////////////////////////
11735
+
11667
11736
  //////////////////////////////////////////////////
11668
11737
  class TableTransformerPreTrainedModel extends PreTrainedModel { }
11669
11738
 
@@ -11872,6 +11941,16 @@ class DepthProPreTrainedModel extends PreTrainedModel { }
11872
11941
  class DepthProForDepthEstimation extends DepthProPreTrainedModel { }
11873
11942
  //////////////////////////////////////////////////
11874
11943
 
11944
+ //////////////////////////////////////////////////
11945
+ class Metric3DPreTrainedModel extends PreTrainedModel { }
11946
+ class Metric3DForDepthEstimation extends Metric3DPreTrainedModel { }
11947
+ //////////////////////////////////////////////////
11948
+
11949
+ //////////////////////////////////////////////////
11950
+ class Metric3Dv2PreTrainedModel extends PreTrainedModel { }
11951
+ class Metric3Dv2ForDepthEstimation extends Metric3Dv2PreTrainedModel { }
11952
+ //////////////////////////////////////////////////
11953
+
11875
11954
  //////////////////////////////////////////////////
11876
11955
  class MaskFormerPreTrainedModel extends PreTrainedModel { }
11877
11956
  class MaskFormerModel extends MaskFormerPreTrainedModel { }
@@ -13787,6 +13866,60 @@ class DacDecoderModel extends DacPreTrainedModel {
13787
13866
  }
13788
13867
  //////////////////////////////////////////////////
13789
13868
 
13869
+
13870
+ //////////////////////////////////////////////////
13871
+ // Snac models
13872
+ class SnacPreTrainedModel extends PreTrainedModel {
13873
+ main_input_name = 'input_values';
13874
+ forward_params = ['input_values'];
13875
+ }
13876
+
13877
+ /**
13878
+ * The SNAC (Multi-Scale Neural Audio Codec) model.
13879
+ */
13880
+ class SnacModel extends SnacPreTrainedModel {
13881
+ /**
13882
+ * Encodes the input audio waveform into discrete codes.
13883
+ * @param {Object} inputs Model inputs
13884
+ * @param {Tensor} [inputs.input_values] Float values of the input audio waveform, of shape `(batch_size, channels, sequence_length)`).
13885
+ * @returns {Promise<Record<string, Tensor>>} The output tensors of shape `(batch_size, num_codebooks, sequence_length)`.
13886
+ */
13887
+ async encode(inputs) {
13888
+ return await sessionRun(this.sessions['encoder_model'], inputs);
13889
+ }
13890
+
13891
+ /**
13892
+ * Decodes the given frames into an output audio waveform.
13893
+ * @param {Record<string, Tensor>} inputs The encoded audio codes.
13894
+ * @returns {Promise<{audio_values: Tensor}>} The output tensor of shape `(batch_size, num_channels, sequence_length)`.
13895
+ */
13896
+ async decode(inputs) {
13897
+ return await sessionRun(this.sessions['decoder_model'], inputs);
13898
+ }
13899
+ }
13900
+
13901
+ class SnacEncoderModel extends SnacPreTrainedModel {
13902
+ /** @type {typeof PreTrainedModel.from_pretrained} */
13903
+ static async from_pretrained(pretrained_model_name_or_path, options = {}) {
13904
+ return super.from_pretrained(pretrained_model_name_or_path, {
13905
+ ...options,
13906
+ // Update default model file name if not provided
13907
+ model_file_name: options.model_file_name ?? 'encoder_model',
13908
+ });
13909
+ }
13910
+ }
13911
+ class SnacDecoderModel extends SnacPreTrainedModel {
13912
+ /** @type {typeof PreTrainedModel.from_pretrained} */
13913
+ static async from_pretrained(pretrained_model_name_or_path, options = {}) {
13914
+ return super.from_pretrained(pretrained_model_name_or_path, {
13915
+ ...options,
13916
+ // Update default model file name if not provided
13917
+ model_file_name: options.model_file_name ?? 'decoder_model',
13918
+ });
13919
+ }
13920
+ }
13921
+ //////////////////////////////////////////////////
13922
+
13790
13923
  //////////////////////////////////////////////////
13791
13924
  // AutoModels, used to simplify construction of PreTrainedModels
13792
13925
  // (uses config to instantiate correct class)
@@ -13907,6 +14040,8 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
13907
14040
 
13908
14041
  ['detr', ['DetrModel', DetrModel]],
13909
14042
  ['rt_detr', ['RTDetrModel', RTDetrModel]],
14043
+ ['rt_detr_v2', ['RTDetrV2Model', RTDetrV2Model]],
14044
+ ['rf_detr', ['RFDetrModel', RFDetrModel]],
13910
14045
  ['table-transformer', ['TableTransformerModel', TableTransformerModel]],
13911
14046
  ['vit', ['ViTModel', ViTModel]],
13912
14047
  ['ijepa', ['IJepaModel', IJepaModel]],
@@ -13968,6 +14103,7 @@ const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
13968
14103
  const MODEL_MAPPING_NAMES_AUTO_ENCODER = new Map([
13969
14104
  ['mimi', ['MimiModel', MimiModel]],
13970
14105
  ['dac', ['DacModel', DacModel]],
14106
+ ['snac', ['SnacModel', SnacModel]],
13971
14107
  ]);
13972
14108
 
13973
14109
  const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
@@ -13988,6 +14124,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
13988
14124
  ['cohere', ['CohereModel', CohereModel]],
13989
14125
  ['gemma', ['GemmaModel', GemmaModel]],
13990
14126
  ['gemma2', ['Gemma2Model', Gemma2Model]],
14127
+ ['gemma3_text', ['Gemma3Model', Gemma3Model]],
13991
14128
  ['helium', ['HeliumModel', HeliumModel]],
13992
14129
  ['glm', ['GlmModel', GlmModel]],
13993
14130
  ['openelm', ['OpenELMModel', OpenELMModel]],
@@ -14087,6 +14224,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
14087
14224
  ['cohere', ['CohereForCausalLM', CohereForCausalLM]],
14088
14225
  ['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],
14089
14226
  ['gemma2', ['Gemma2ForCausalLM', Gemma2ForCausalLM]],
14227
+ ['gemma3_text', ['Gemma3ForCausalLM', Gemma3ForCausalLM]],
14090
14228
  ['helium', ['HeliumForCausalLM', HeliumForCausalLM]],
14091
14229
  ['glm', ['GlmForCausalLM', GlmForCausalLM]],
14092
14230
  ['openelm', ['OpenELMForCausalLM', OpenELMForCausalLM]],
@@ -14203,6 +14341,8 @@ const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
14203
14341
  const MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES = new Map([
14204
14342
  ['detr', ['DetrForObjectDetection', DetrForObjectDetection]],
14205
14343
  ['rt_detr', ['RTDetrForObjectDetection', RTDetrForObjectDetection]],
14344
+ ['rt_detr_v2', ['RTDetrV2ForObjectDetection', RTDetrV2ForObjectDetection]],
14345
+ ['rf_detr', ['RFDetrForObjectDetection', RFDetrForObjectDetection]],
14206
14346
  ['table-transformer', ['TableTransformerForObjectDetection', TableTransformerForObjectDetection]],
14207
14347
  ['yolos', ['YolosForObjectDetection', YolosForObjectDetection]],
14208
14348
  ]);
@@ -14288,6 +14428,8 @@ const MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = new Map([
14288
14428
  ['glpn', ['GLPNForDepthEstimation', GLPNForDepthEstimation]],
14289
14429
  ['sapiens', ['SapiensForDepthEstimation', SapiensForDepthEstimation]],
14290
14430
  ['depth_pro', ['DepthProForDepthEstimation', DepthProForDepthEstimation]],
14431
+ ['metric3d', ['Metric3DForDepthEstimation', Metric3DForDepthEstimation]],
14432
+ ['metric3dv2', ['Metric3Dv2ForDepthEstimation', Metric3Dv2ForDepthEstimation]],
14291
14433
  ])
14292
14434
 
14293
14435
  const MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES = new Map([
@@ -14373,6 +14515,8 @@ const CUSTOM_MAPPING = [
14373
14515
  ['DacDecoderModel', DacDecoderModel, MODEL_TYPES.EncoderOnly],
14374
14516
  ['MimiEncoderModel', MimiEncoderModel, MODEL_TYPES.EncoderOnly],
14375
14517
  ['MimiDecoderModel', MimiDecoderModel, MODEL_TYPES.EncoderOnly],
14518
+ ['SnacEncoderModel', SnacEncoderModel, MODEL_TYPES.EncoderOnly],
14519
+ ['SnacDecoderModel', SnacDecoderModel, MODEL_TYPES.EncoderOnly],
14376
14520
  ]
14377
14521
  for (const [name, model, type] of CUSTOM_MAPPING) {
14378
14522
  MODEL_TYPE_MAPPING.set(name, type);
@@ -15689,14 +15833,15 @@ __webpack_require__.r(__webpack_exports__);
15689
15833
  /* harmony export */ ClapFeatureExtractor: () => (/* reexport safe */ _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_2__.ClapFeatureExtractor),
15690
15834
  /* harmony export */ DacFeatureExtractor: () => (/* reexport safe */ _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_3__.DacFeatureExtractor),
15691
15835
  /* harmony export */ EncodecFeatureExtractor: () => (/* reexport safe */ _encodec_feature_extraction_encodec_js__WEBPACK_IMPORTED_MODULE_1__.EncodecFeatureExtractor),
15692
- /* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_11__.ImageProcessor),
15836
+ /* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_12__.ImageProcessor),
15693
15837
  /* harmony export */ MoonshineFeatureExtractor: () => (/* reexport safe */ _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_4__.MoonshineFeatureExtractor),
15694
15838
  /* harmony export */ PyAnnoteFeatureExtractor: () => (/* reexport safe */ _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_5__.PyAnnoteFeatureExtractor),
15695
15839
  /* harmony export */ SeamlessM4TFeatureExtractor: () => (/* reexport safe */ _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_6__.SeamlessM4TFeatureExtractor),
15696
- /* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_7__.SpeechT5FeatureExtractor),
15697
- /* harmony export */ Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_8__.Wav2Vec2FeatureExtractor),
15698
- /* harmony export */ WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_9__.WeSpeakerFeatureExtractor),
15699
- /* harmony export */ WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_10__.WhisperFeatureExtractor)
15840
+ /* harmony export */ SnacFeatureExtractor: () => (/* reexport safe */ _snac_feature_extraction_snac_js__WEBPACK_IMPORTED_MODULE_7__.SnacFeatureExtractor),
15841
+ /* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_8__.SpeechT5FeatureExtractor),
15842
+ /* harmony export */ Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_9__.Wav2Vec2FeatureExtractor),
15843
+ /* harmony export */ WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_10__.WeSpeakerFeatureExtractor),
15844
+ /* harmony export */ WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_11__.WhisperFeatureExtractor)
15700
15845
  /* harmony export */ });
15701
15846
  /* harmony import */ var _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js */ "./src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js");
15702
15847
  /* harmony import */ var _encodec_feature_extraction_encodec_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./encodec/feature_extraction_encodec.js */ "./src/models/encodec/feature_extraction_encodec.js");
@@ -15705,11 +15850,13 @@ __webpack_require__.r(__webpack_exports__);
15705
15850
  /* harmony import */ var _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./moonshine/feature_extraction_moonshine.js */ "./src/models/moonshine/feature_extraction_moonshine.js");
15706
15851
  /* harmony import */ var _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./pyannote/feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
15707
15852
  /* harmony import */ var _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./seamless_m4t/feature_extraction_seamless_m4t.js */ "./src/models/seamless_m4t/feature_extraction_seamless_m4t.js");
15708
- /* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
15709
- /* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
15710
- /* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
15711
- /* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
15712
- /* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
15853
+ /* harmony import */ var _snac_feature_extraction_snac_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./snac/feature_extraction_snac.js */ "./src/models/snac/feature_extraction_snac.js");
15854
+ /* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
15855
+ /* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
15856
+ /* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
15857
+ /* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
15858
+ /* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
15859
+
15713
15860
 
15714
15861
 
15715
15862
 
@@ -18742,6 +18889,25 @@ __webpack_require__.r(__webpack_exports__);
18742
18889
 
18743
18890
 
18744
18891
 
18892
+ /***/ }),
18893
+
18894
+ /***/ "./src/models/snac/feature_extraction_snac.js":
18895
+ /*!****************************************************!*\
18896
+ !*** ./src/models/snac/feature_extraction_snac.js ***!
18897
+ \****************************************************/
18898
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18899
+
18900
+ "use strict";
18901
+ __webpack_require__.r(__webpack_exports__);
18902
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18903
+ /* harmony export */ SnacFeatureExtractor: () => (/* binding */ SnacFeatureExtractor)
18904
+ /* harmony export */ });
18905
+ /* harmony import */ var _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../dac/feature_extraction_dac.js */ "./src/models/dac/feature_extraction_dac.js");
18906
+
18907
+
18908
+ class SnacFeatureExtractor extends _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_0__.DacFeatureExtractor { }
18909
+
18910
+
18745
18911
  /***/ }),
18746
18912
 
18747
18913
  /***/ "./src/models/speecht5/feature_extraction_speecht5.js":
@@ -19999,16 +20165,16 @@ __webpack_require__.r(__webpack_exports__);
19999
20165
  /* harmony import */ var _utils_image_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./utils/image.js */ "./src/utils/image.js");
20000
20166
  /**
20001
20167
  * @file Pipelines provide a high-level, easy to use, API for running machine learning models.
20002
- *
20168
+ *
20003
20169
  * **Example:** Instantiate pipeline using the `pipeline` function.
20004
20170
  * ```javascript
20005
20171
  * import { pipeline } from '@huggingface/transformers';
20006
- *
20172
+ *
20007
20173
  * const classifier = await pipeline('sentiment-analysis');
20008
20174
  * const output = await classifier('I love transformers!');
20009
20175
  * // [{'label': 'POSITIVE', 'score': 0.999817686}]
20010
20176
  * ```
20011
- *
20177
+ *
20012
20178
  * @module pipelines
20013
20179
  */
20014
20180
 
@@ -20027,7 +20193,7 @@ __webpack_require__.r(__webpack_exports__);
20027
20193
 
20028
20194
 
20029
20195
  /**
20030
- * @typedef {string | RawImage | URL} ImageInput
20196
+ * @typedef {string | RawImage | URL | Blob | HTMLCanvasElement | OffscreenCanvas} ImageInput
20031
20197
  * @typedef {ImageInput|ImageInput[]} ImagePipelineInputs
20032
20198
  */
20033
20199
 
@@ -20101,7 +20267,7 @@ function get_bounding_box(box, asInteger) {
20101
20267
  /**
20102
20268
  * @callback DisposeType Disposes the item.
20103
20269
  * @returns {Promise<void>} A promise that resolves when the item has been disposed.
20104
- *
20270
+ *
20105
20271
  * @typedef {Object} Disposable
20106
20272
  * @property {DisposeType} dispose A promise that resolves when the pipeline has been disposed.
20107
20273
  */
@@ -20138,7 +20304,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
20138
20304
  * @property {string} task The task of the pipeline. Useful for specifying subtasks.
20139
20305
  * @property {PreTrainedModel} model The model used by the pipeline.
20140
20306
  * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
20141
- *
20307
+ *
20142
20308
  * @typedef {ModelTokenizerConstructorArgs} TextPipelineConstructorArgs An object used to instantiate a text-based pipeline.
20143
20309
  */
20144
20310
 
@@ -20147,7 +20313,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
20147
20313
  * @property {string} task The task of the pipeline. Useful for specifying subtasks.
20148
20314
  * @property {PreTrainedModel} model The model used by the pipeline.
20149
20315
  * @property {Processor} processor The processor used by the pipeline.
20150
- *
20316
+ *
20151
20317
  * @typedef {ModelProcessorConstructorArgs} AudioPipelineConstructorArgs An object used to instantiate an audio-based pipeline.
20152
20318
  * @typedef {ModelProcessorConstructorArgs} ImagePipelineConstructorArgs An object used to instantiate an image-based pipeline.
20153
20319
  */
@@ -20159,7 +20325,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
20159
20325
  * @property {PreTrainedModel} model The model used by the pipeline.
20160
20326
  * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
20161
20327
  * @property {Processor} processor The processor used by the pipeline.
20162
- *
20328
+ *
20163
20329
  * @typedef {ModelTokenizerProcessorConstructorArgs} TextAudioPipelineConstructorArgs An object used to instantiate a text- and audio-based pipeline.
20164
20330
  * @typedef {ModelTokenizerProcessorConstructorArgs} TextImagePipelineConstructorArgs An object used to instantiate a text- and image-based pipeline.
20165
20331
  */
@@ -20169,15 +20335,15 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
20169
20335
  * @property {string} label The label predicted.
20170
20336
  * @property {number} score The corresponding probability.
20171
20337
  * @typedef {TextClassificationSingle[]} TextClassificationOutput
20172
- *
20338
+ *
20173
20339
  * @typedef {Object} TextClassificationPipelineOptions Parameters specific to text classification pipelines.
20174
20340
  * @property {number} [top_k=1] The number of top predictions to be returned.
20175
- *
20341
+ *
20176
20342
  * @callback TextClassificationPipelineCallback Classify the text(s) given as inputs.
20177
20343
  * @param {string|string[]} texts The input text(s) to be classified.
20178
20344
  * @param {TextClassificationPipelineOptions} [options] The options to use for text classification.
20179
20345
  * @returns {Promise<TextClassificationOutput|TextClassificationOutput[]>} An array or object containing the predicted labels and scores.
20180
- *
20346
+ *
20181
20347
  * @typedef {TextPipelineConstructorArgs & TextClassificationPipelineCallback & Disposable} TextClassificationPipelineType
20182
20348
  */
20183
20349
 
@@ -20190,7 +20356,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
20190
20356
  * const output = await classifier('I love transformers!');
20191
20357
  * // [{ label: 'POSITIVE', score: 0.999788761138916 }]
20192
20358
  * ```
20193
- *
20359
+ *
20194
20360
  * **Example:** Multilingual sentiment-analysis w/ `Xenova/bert-base-multilingual-uncased-sentiment` (and return top 5 classes).
20195
20361
  * ```javascript
20196
20362
  * const classifier = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
@@ -20203,7 +20369,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
20203
20369
  * // { label: '2 stars', score: 0.0009423971059732139 }
20204
20370
  * // ]
20205
20371
  * ```
20206
- *
20372
+ *
20207
20373
  * **Example:** Toxic comment classification w/ `Xenova/toxic-bert` (and return all classes).
20208
20374
  * ```javascript
20209
20375
  * const classifier = await pipeline('text-classification', 'Xenova/toxic-bert');
@@ -20288,21 +20454,21 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
20288
20454
  * @property {number} [start] The index of the start of the corresponding entity in the sentence.
20289
20455
  * @property {number} [end] The index of the end of the corresponding entity in the sentence.
20290
20456
  * @typedef {TokenClassificationSingle[]} TokenClassificationOutput
20291
- *
20457
+ *
20292
20458
  * @typedef {Object} TokenClassificationPipelineOptions Parameters specific to token classification pipelines.
20293
20459
  * @property {string[]} [ignore_labels] A list of labels to ignore.
20294
- *
20460
+ *
20295
20461
  * @callback TokenClassificationPipelineCallback Classify each token of the text(s) given as inputs.
20296
20462
  * @param {string|string[]} texts One or several texts (or one list of texts) for token classification.
20297
20463
  * @param {TokenClassificationPipelineOptions} [options] The options to use for token classification.
20298
20464
  * @returns {Promise<TokenClassificationOutput|TokenClassificationOutput[]>} The result.
20299
- *
20465
+ *
20300
20466
  * @typedef {TextPipelineConstructorArgs & TokenClassificationPipelineCallback & Disposable} TokenClassificationPipelineType
20301
20467
  */
20302
20468
 
20303
20469
  /**
20304
20470
  * Named Entity Recognition pipeline using any `ModelForTokenClassification`.
20305
- *
20471
+ *
20306
20472
  * **Example:** Perform named entity recognition with `Xenova/bert-base-NER`.
20307
20473
  * ```javascript
20308
20474
  * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
@@ -20312,7 +20478,7 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
20312
20478
  * // { entity: 'B-LOC', score: 0.9994474053382874, index: 9, word: 'London' }
20313
20479
  * // ]
20314
20480
  * ```
20315
- *
20481
+ *
20316
20482
  * **Example:** Perform named entity recognition with `Xenova/bert-base-NER` (and return all labels).
20317
20483
  * ```javascript
20318
20484
  * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
@@ -20408,22 +20574,22 @@ class TokenClassificationPipeline extends (/** @type {new (options: TextPipeline
20408
20574
  * @property {number} [start] The character start index of the answer (in the tokenized version of the input).
20409
20575
  * @property {number} [end] The character end index of the answer (in the tokenized version of the input).
20410
20576
  * @property {string} answer The answer to the question.
20411
- *
20577
+ *
20412
20578
  * @typedef {Object} QuestionAnsweringPipelineOptions Parameters specific to question answering pipelines.
20413
20579
  * @property {number} [top_k=1] The number of top answer predictions to be returned.
20414
- *
20580
+ *
20415
20581
  * @callback QuestionAnsweringPipelineCallback Answer the question(s) given as inputs by using the context(s).
20416
20582
  * @param {string|string[]} question One or several question(s) (must be used in conjunction with the `context` argument).
20417
20583
  * @param {string|string[]} context One or several context(s) associated with the question(s) (must be used in conjunction with the `question` argument).
20418
20584
  * @param {QuestionAnsweringPipelineOptions} [options] The options to use for question answering.
20419
20585
  * @returns {Promise<QuestionAnsweringOutput|QuestionAnsweringOutput[]>} An array or object containing the predicted answers and scores.
20420
- *
20586
+ *
20421
20587
  * @typedef {TextPipelineConstructorArgs & QuestionAnsweringPipelineCallback & Disposable} QuestionAnsweringPipelineType
20422
20588
  */
20423
20589
 
20424
20590
  /**
20425
20591
  * Question Answering pipeline using any `ModelForQuestionAnswering`.
20426
- *
20592
+ *
20427
20593
  * **Example:** Run question answering with `Xenova/distilbert-base-uncased-distilled-squad`.
20428
20594
  * ```javascript
20429
20595
  * const answerer = await pipeline('question-answering', 'Xenova/distilbert-base-uncased-distilled-squad');
@@ -20548,10 +20714,10 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
20548
20714
  * @property {number} token The predicted token id (to replace the masked one).
20549
20715
  * @property {string} token_str The predicted token (to replace the masked one).
20550
20716
  * @typedef {FillMaskSingle[]} FillMaskOutput
20551
- *
20717
+ *
20552
20718
  * @typedef {Object} FillMaskPipelineOptions Parameters specific to fill mask pipelines.
20553
20719
  * @property {number} [top_k=5] When passed, overrides the number of predictions to return.
20554
- *
20720
+ *
20555
20721
  * @callback FillMaskPipelineCallback Fill the masked token in the text(s) given as inputs.
20556
20722
  * @param {string|string[]} texts One or several texts (or one list of prompts) with masked tokens.
20557
20723
  * @param {FillMaskPipelineOptions} [options] The options to use for masked language modelling.
@@ -20559,13 +20725,13 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
20559
20725
  * and the sequence with the predicted token filled in, or an array of such arrays (one for each input text).
20560
20726
  * If only one input text is given, the output will be an array of objects.
20561
20727
  * @throws {Error} When the mask token is not found in the input text.
20562
- *
20728
+ *
20563
20729
  * @typedef {TextPipelineConstructorArgs & FillMaskPipelineCallback & Disposable} FillMaskPipelineType
20564
20730
  */
20565
20731
 
20566
20732
  /**
20567
20733
  * Masked language modeling prediction pipeline using any `ModelWithLMHead`.
20568
- *
20734
+ *
20569
20735
  * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-uncased`.
20570
20736
  * ```javascript
20571
20737
  * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
@@ -20578,7 +20744,7 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
20578
20744
  * // { token_str: 'life', score: 0.01859794743359089, token: 1297, sequence: 'The goal of life is life.' }
20579
20745
  * // ]
20580
20746
  * ```
20581
- *
20747
+ *
20582
20748
  * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-cased` (and return top result).
20583
20749
  * ```javascript
20584
20750
  * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
@@ -20655,18 +20821,18 @@ class FillMaskPipeline extends (/** @type {new (options: TextPipelineConstructor
20655
20821
  * @typedef {Object} Text2TextGenerationSingle
20656
20822
  * @property {string} generated_text The generated text.
20657
20823
  * @typedef {Text2TextGenerationSingle[]} Text2TextGenerationOutput
20658
- *
20824
+ *
20659
20825
  * @callback Text2TextGenerationPipelineCallback Generate the output text(s) using text(s) given as inputs.
20660
20826
  * @param {string|string[]} texts Input text for the encoder.
20661
20827
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
20662
20828
  * @returns {Promise<Text2TextGenerationOutput|Text2TextGenerationOutput[]>}
20663
- *
20829
+ *
20664
20830
  * @typedef {TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable} Text2TextGenerationPipelineType
20665
20831
  */
20666
20832
 
20667
20833
  /**
20668
20834
  * Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks.
20669
- *
20835
+ *
20670
20836
  * **Example:** Text-to-text generation w/ `Xenova/LaMini-Flan-T5-783M`.
20671
20837
  * ```javascript
20672
20838
  * const generator = await pipeline('text2text-generation', 'Xenova/LaMini-Flan-T5-783M');
@@ -20742,18 +20908,18 @@ class Text2TextGenerationPipeline extends (/** @type {new (options: TextPipeline
20742
20908
  * @typedef {Object} SummarizationSingle
20743
20909
  * @property {string} summary_text The summary text.
20744
20910
  * @typedef {SummarizationSingle[]} SummarizationOutput
20745
- *
20911
+ *
20746
20912
  * @callback SummarizationPipelineCallback Summarize the text(s) given as inputs.
20747
20913
  * @param {string|string[]} texts One or several articles (or one list of articles) to summarize.
20748
20914
  * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
20749
20915
  * @returns {Promise<SummarizationOutput|SummarizationOutput[]>}
20750
- *
20916
+ *
20751
20917
  * @typedef {TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable} SummarizationPipelineType
20752
20918
  */
20753
20919
 
20754
20920
  /**
20755
20921
  * A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline.
20756
- *
20922
+ *
20757
20923
  * **Example:** Summarization w/ `Xenova/distilbart-cnn-6-6`.
20758
20924
  * ```javascript
20759
20925
  * const generator = await pipeline('summarization', 'Xenova/distilbart-cnn-6-6');
@@ -20789,23 +20955,23 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
20789
20955
  * @typedef {Object} TranslationSingle
20790
20956
  * @property {string} translation_text The translated text.
20791
20957
  * @typedef {TranslationSingle[]} TranslationOutput
20792
- *
20958
+ *
20793
20959
  * @callback TranslationPipelineCallback Translate the text(s) given as inputs.
20794
20960
  * @param {string|string[]} texts Texts to be translated.
20795
20961
  * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
20796
20962
  * @returns {Promise<TranslationOutput|TranslationOutput[]>}
20797
- *
20963
+ *
20798
20964
  * @typedef {TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable} TranslationPipelineType
20799
20965
  */
20800
20966
 
20801
20967
  /**
20802
20968
  * Translates text from one language to another.
20803
- *
20969
+ *
20804
20970
  * **Example:** Multilingual translation w/ `Xenova/nllb-200-distilled-600M`.
20805
- *
20971
+ *
20806
20972
  * See [here](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200)
20807
20973
  * for the full list of languages and their corresponding codes.
20808
- *
20974
+ *
20809
20975
  * ```javascript
20810
20976
  * const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M');
20811
20977
  * const output = await translator('जीवन एक चॉकलेट बॉक्स की तरह है।', {
@@ -20814,12 +20980,12 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
20814
20980
  * });
20815
20981
  * // [{ translation_text: 'La vie est comme une boîte à chocolat.' }]
20816
20982
  * ```
20817
- *
20983
+ *
20818
20984
  * **Example:** Multilingual translation w/ `Xenova/m2m100_418M`.
20819
- *
20985
+ *
20820
20986
  * See [here](https://huggingface.co/facebook/m2m100_418M#languages-covered)
20821
20987
  * for the full list of languages and their corresponding codes.
20822
- *
20988
+ *
20823
20989
  * ```javascript
20824
20990
  * const translator = await pipeline('translation', 'Xenova/m2m100_418M');
20825
20991
  * const output = await translator('生活就像一盒巧克力。', {
@@ -20828,12 +20994,12 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
20828
20994
  * });
20829
20995
  * // [{ translation_text: 'Life is like a box of chocolate.' }]
20830
20996
  * ```
20831
- *
20997
+ *
20832
20998
  * **Example:** Multilingual translation w/ `Xenova/mbart-large-50-many-to-many-mmt`.
20833
- *
20999
+ *
20834
21000
  * See [here](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt#languages-covered)
20835
21001
  * for the full list of languages and their corresponding codes.
20836
- *
21002
+ *
20837
21003
  * ```javascript
20838
21004
  * const translator = await pipeline('translation', 'Xenova/mbart-large-50-many-to-many-mmt');
20839
21005
  * const output = await translator('संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है', {
@@ -20862,21 +21028,21 @@ function isChat(x) {
20862
21028
 
20863
21029
  /**
20864
21030
  * @typedef {import('./tokenizers.js').Message[]} Chat
20865
- *
21031
+ *
20866
21032
  * @typedef {Object} TextGenerationSingle
20867
21033
  * @property {string|Chat} generated_text The generated text.
20868
21034
  * @typedef {TextGenerationSingle[]} TextGenerationOutput
20869
- *
21035
+ *
20870
21036
  * @typedef {Object} TextGenerationSpecificParams Parameters specific to text-generation pipelines.
20871
21037
  * @property {boolean} [add_special_tokens] Whether or not to add special tokens when tokenizing the sequences.
20872
21038
  * @property {boolean} [return_full_text=true] If set to `false` only added text is returned, otherwise the full text is returned.
20873
21039
  * @typedef {import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams} TextGenerationConfig
20874
- *
21040
+ *
20875
21041
  * @callback TextGenerationPipelineCallback Complete the prompt(s) given as inputs.
20876
21042
  * @param {string|string[]|Chat|Chat[]} texts One or several prompts (or one list of prompts) to complete.
20877
21043
  * @param {Partial<TextGenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
20878
21044
  * @returns {Promise<TextGenerationOutput|TextGenerationOutput[]>} An array or object containing the generated texts.
20879
- *
21045
+ *
20880
21046
  * @typedef {TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable} TextGenerationPipelineType
20881
21047
  */
20882
21048
 
@@ -20884,7 +21050,7 @@ function isChat(x) {
20884
21050
  * Language generation pipeline using any `ModelWithLMHead` or `ModelForCausalLM`.
20885
21051
  * This pipeline predicts the words that will follow a specified text prompt.
20886
21052
  * NOTE: For the full list of generation parameters, see [`GenerationConfig`](./utils/generation#module_utils/generation.GenerationConfig).
20887
- *
21053
+ *
20888
21054
  * **Example:** Text generation with `Xenova/distilgpt2` (default settings).
20889
21055
  * ```javascript
20890
21056
  * const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
@@ -20892,7 +21058,7 @@ function isChat(x) {
20892
21058
  * const output = await generator(text);
20893
21059
  * // [{ generated_text: "I enjoy walking with my cute dog, and I love to play with the other dogs." }]
20894
21060
  * ```
20895
- *
21061
+ *
20896
21062
  * **Example:** Text generation with `Xenova/distilgpt2` (custom settings).
20897
21063
  * ```javascript
20898
21064
  * const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
@@ -20911,7 +21077,7 @@ function isChat(x) {
20911
21077
  * // "generated_text": "Once upon a time, there was an abundance of information about the most important and influential"
20912
21078
  * // }]
20913
21079
  * ```
20914
- *
21080
+ *
20915
21081
  * **Example:** Run code generation with `Xenova/codegen-350M-mono`.
20916
21082
  * ```javascript
20917
21083
  * const generator = await pipeline('text-generation', 'Xenova/codegen-350M-mono');
@@ -21030,7 +21196,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
21030
21196
  * @property {string} sequence The sequence for which this is the output.
21031
21197
  * @property {string[]} labels The labels sorted by order of likelihood.
21032
21198
  * @property {number[]} scores The probabilities for each of the labels.
21033
- *
21199
+ *
21034
21200
  * @typedef {Object} ZeroShotClassificationPipelineOptions Parameters specific to zero-shot classification pipelines.
21035
21201
  * @property {string} [hypothesis_template="This example is {}."] The template used to turn each
21036
21202
  * candidate label into an NLI-style hypothesis. The candidate label will replace the {} placeholder.
@@ -21038,14 +21204,14 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
21038
21204
  * If `false`, the scores are normalized such that the sum of the label likelihoods for each sequence
21039
21205
  * is 1. If `true`, the labels are considered independent and probabilities are normalized for each
21040
21206
  * candidate by doing a softmax of the entailment score vs. the contradiction score.
21041
- *
21207
+ *
21042
21208
  * @callback ZeroShotClassificationPipelineCallback Classify the sequence(s) given as inputs.
21043
21209
  * @param {string|string[]} texts The sequence(s) to classify, will be truncated if the model input is too large.
21044
21210
  * @param {string|string[]} candidate_labels The set of possible class labels to classify each sequence into.
21045
21211
  * Can be a single label, a string of comma-separated labels, or a list of labels.
21046
21212
  * @param {ZeroShotClassificationPipelineOptions} [options] The options to use for zero-shot classification.
21047
21213
  * @returns {Promise<ZeroShotClassificationOutput|ZeroShotClassificationOutput[]>} An array or object containing the predicted labels and scores.
21048
- *
21214
+ *
21049
21215
  * @typedef {TextPipelineConstructorArgs & ZeroShotClassificationPipelineCallback & Disposable} ZeroShotClassificationPipelineType
21050
21216
  */
21051
21217
 
@@ -21054,7 +21220,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
21054
21220
  * trained on NLI (natural language inference) tasks. Equivalent of `text-classification`
21055
21221
  * pipelines, but these models don't require a hardcoded number of potential classes, they
21056
21222
  * can be chosen at runtime. It usually means it's slower but it is **much** more flexible.
21057
- *
21223
+ *
21058
21224
  * **Example:** Zero shot classification with `Xenova/mobilebert-uncased-mnli`.
21059
21225
  * ```javascript
21060
21226
  * const classifier = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli');
@@ -21067,7 +21233,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
21067
21233
  * // scores: [ 0.5562091040482018, 0.1843621307860853, 0.13942646639336376, 0.12000229877234923 ]
21068
21234
  * // }
21069
21235
  * ```
21070
- *
21236
+ *
21071
21237
  * **Example:** Zero shot classification with `Xenova/nli-deberta-v3-xsmall` (multi-label).
21072
21238
  * ```javascript
21073
21239
  * const classifier = await pipeline('zero-shot-classification', 'Xenova/nli-deberta-v3-xsmall');
@@ -21181,20 +21347,20 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
21181
21347
  * @property {'none'|'mean'|'cls'} [pooling="none"] The pooling method to use.
21182
21348
  * @property {boolean} [normalize=false] Whether or not to normalize the embeddings in the last dimension.
21183
21349
  * @property {boolean} [quantize=false] Whether or not to quantize the embeddings.
21184
- * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
21185
- *
21350
+ * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
21351
+ *
21186
21352
  * @callback FeatureExtractionPipelineCallback Extract the features of the input(s).
21187
21353
  * @param {string|string[]} texts One or several texts (or one list of texts) to get the features of.
21188
21354
  * @param {FeatureExtractionPipelineOptions} [options] The options to use for feature extraction.
21189
21355
  * @returns {Promise<Tensor>} The features computed by the model.
21190
- *
21356
+ *
21191
21357
  * @typedef {TextPipelineConstructorArgs & FeatureExtractionPipelineCallback & Disposable} FeatureExtractionPipelineType
21192
21358
  */
21193
21359
 
21194
21360
  /**
21195
21361
  * Feature extraction pipeline using no model head. This pipeline extracts the hidden
21196
21362
  * states from the base transformer, which can be used as features in downstream tasks.
21197
- *
21363
+ *
21198
21364
  * **Example:** Run feature extraction with `bert-base-uncased` (without pooling/normalization).
21199
21365
  * ```javascript
21200
21366
  * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
@@ -21205,7 +21371,7 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
21205
21371
  * // dims: [1, 8, 768]
21206
21372
  * // }
21207
21373
  * ```
21208
- *
21374
+ *
21209
21375
  * **Example:** Run feature extraction with `bert-base-uncased` (with pooling/normalization).
21210
21376
  * ```javascript
21211
21377
  * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
@@ -21216,7 +21382,7 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
21216
21382
  * // dims: [1, 768]
21217
21383
  * // }
21218
21384
  * ```
21219
- *
21385
+ *
21220
21386
  * **Example:** Calculating embeddings with `sentence-transformers` models.
21221
21387
  * ```javascript
21222
21388
  * const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
@@ -21297,19 +21463,19 @@ class FeatureExtractionPipeline extends (/** @type {new (options: TextPipelineCo
21297
21463
  /**
21298
21464
  * @typedef {Object} ImageFeatureExtractionPipelineOptions Parameters specific to image feature extraction pipelines.
21299
21465
  * @property {boolean} [pool=null] Whether or not to return the pooled output. If set to `false`, the model will return the raw hidden states.
21300
- *
21466
+ *
21301
21467
  * @callback ImageFeatureExtractionPipelineCallback Extract the features of the input(s).
21302
21468
  * @param {ImagePipelineInputs} images One or several images (or one list of images) to get the features of.
21303
21469
  * @param {ImageFeatureExtractionPipelineOptions} [options] The options to use for image feature extraction.
21304
21470
  * @returns {Promise<Tensor>} The image features computed by the model.
21305
- *
21471
+ *
21306
21472
  * @typedef {ImagePipelineConstructorArgs & ImageFeatureExtractionPipelineCallback & Disposable} ImageFeatureExtractionPipelineType
21307
21473
  */
21308
21474
 
21309
21475
  /**
21310
21476
  * Image feature extraction pipeline using no model head. This pipeline extracts the hidden
21311
21477
  * states from the base transformer, which can be used as features in downstream tasks.
21312
- *
21478
+ *
21313
21479
  * **Example:** Perform image feature extraction with `Xenova/vit-base-patch16-224-in21k`.
21314
21480
  * ```javascript
21315
21481
  * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/vit-base-patch16-224-in21k');
@@ -21322,7 +21488,7 @@ class FeatureExtractionPipeline extends (/** @type {new (options: TextPipelineCo
21322
21488
  * // size: 151296
21323
21489
  * // }
21324
21490
  * ```
21325
- *
21491
+ *
21326
21492
  * **Example:** Compute image embeddings with `Xenova/clip-vit-base-patch32`.
21327
21493
  * ```javascript
21328
21494
  * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32');
@@ -21378,12 +21544,12 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
21378
21544
  * @property {string} label The label predicted.
21379
21545
  * @property {number} score The corresponding probability.
21380
21546
  * @typedef {AudioClassificationSingle[]} AudioClassificationOutput
21381
- *
21547
+ *
21382
21548
  * @typedef {Object} AudioClassificationPipelineOptions Parameters specific to audio classification pipelines.
21383
21549
  * @property {number} [top_k=5] The number of top labels that will be returned by the pipeline.
21384
21550
  * If the provided number is `null` or higher than the number of labels available in the model configuration,
21385
21551
  * it will default to the number of labels.
21386
- *
21552
+ *
21387
21553
  * @callback AudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
21388
21554
  * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
21389
21555
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -21392,14 +21558,14 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
21392
21558
  * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
21393
21559
  * @param {AudioClassificationPipelineOptions} [options] The options to use for audio classification.
21394
21560
  * @returns {Promise<AudioClassificationOutput|AudioClassificationOutput[]>} An array or object containing the predicted labels and scores.
21395
- *
21561
+ *
21396
21562
  * @typedef {AudioPipelineConstructorArgs & AudioClassificationPipelineCallback & Disposable} AudioClassificationPipelineType
21397
21563
  */
21398
21564
 
21399
21565
  /**
21400
21566
  * Audio classification pipeline using any `AutoModelForAudioClassification`.
21401
21567
  * This pipeline predicts the class of a raw waveform or an audio file.
21402
- *
21568
+ *
21403
21569
  * **Example:** Perform audio classification with `Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech`.
21404
21570
  * ```javascript
21405
21571
  * const classifier = await pipeline('audio-classification', 'Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech');
@@ -21410,7 +21576,7 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
21410
21576
  * // { label: 'female', score: 0.001845747814513743 }
21411
21577
  * // ]
21412
21578
  * ```
21413
- *
21579
+ *
21414
21580
  * **Example:** Perform audio classification with `Xenova/ast-finetuned-audioset-10-10-0.4593` and return top 4 results.
21415
21581
  * ```javascript
21416
21582
  * const classifier = await pipeline('audio-classification', 'Xenova/ast-finetuned-audioset-10-10-0.4593');
@@ -21475,12 +21641,12 @@ class AudioClassificationPipeline extends (/** @type {new (options: AudioPipelin
21475
21641
  * @typedef {Object} ZeroShotAudioClassificationOutput
21476
21642
  * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
21477
21643
  * @property {number} score The score attributed by the model for that label (between 0 and 1).
21478
- *
21644
+ *
21479
21645
  * @typedef {Object} ZeroShotAudioClassificationPipelineOptions Parameters specific to zero-shot audio classification pipelines.
21480
21646
  * @property {string} [hypothesis_template="This is a sound of {}."] The sentence used in conjunction with `candidate_labels`
21481
21647
  * to attempt the audio classification by replacing the placeholder with the candidate_labels.
21482
21648
  * Then likelihood is estimated by using `logits_per_audio`.
21483
- *
21649
+ *
21484
21650
  * @callback ZeroShotAudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
21485
21651
  * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
21486
21652
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -21490,14 +21656,14 @@ class AudioClassificationPipeline extends (/** @type {new (options: AudioPipelin
21490
21656
  * @param {string[]} candidate_labels The candidate labels for this audio.
21491
21657
  * @param {ZeroShotAudioClassificationPipelineOptions} [options] The options to use for zero-shot audio classification.
21492
21658
  * @returns {Promise<ZeroShotAudioClassificationOutput[]|ZeroShotAudioClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
21493
- *
21659
+ *
21494
21660
  * @typedef {TextAudioPipelineConstructorArgs & ZeroShotAudioClassificationPipelineCallback & Disposable} ZeroShotAudioClassificationPipelineType
21495
21661
  */
21496
21662
 
21497
21663
  /**
21498
21664
  * Zero shot audio classification pipeline using `ClapModel`. This pipeline predicts the class of an audio when you
21499
21665
  * provide an audio and a set of `candidate_labels`.
21500
- *
21666
+ *
21501
21667
  * **Example**: Perform zero-shot audio classification with `Xenova/clap-htsat-unfused`.
21502
21668
  * ```javascript
21503
21669
  * const classifier = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused');
@@ -21530,7 +21696,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
21530
21696
  audio = [/** @type {AudioInput} */ (audio)];
21531
21697
  }
21532
21698
 
21533
- // Insert label into hypothesis template
21699
+ // Insert label into hypothesis template
21534
21700
  const texts = candidate_labels.map(
21535
21701
  x => hypothesis_template.replace('{}', x)
21536
21702
  );
@@ -21574,7 +21740,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
21574
21740
  * @property {string} text The recognized text.
21575
21741
  * @property {Chunk[]} [chunks] When using `return_timestamps`, the `chunks` will become a list
21576
21742
  * containing all the various text chunks identified by the model.
21577
- *
21743
+ *
21578
21744
  * @typedef {Object} AutomaticSpeechRecognitionSpecificParams Parameters specific to automatic-speech-recognition pipelines.
21579
21745
  * @property {boolean|'word'} [return_timestamps] Whether to return timestamps or not. Default is `false`.
21580
21746
  * @property {number} [chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking).
@@ -21584,7 +21750,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
21584
21750
  * @property {string} [task] The task to perform. Default is `null`, meaning it should be auto-detected.
21585
21751
  * @property {number} [num_frames] The number of frames in the input audio.
21586
21752
  * @typedef {import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig
21587
- *
21753
+ *
21588
21754
  * @callback AutomaticSpeechRecognitionPipelineCallback Transcribe the audio sequence(s) given as inputs to text.
21589
21755
  * @param {AudioPipelineInputs} audio The input audio file(s) to be transcribed. The input is either:
21590
21756
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -21593,7 +21759,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
21593
21759
  * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
21594
21760
  * @param {Partial<AutomaticSpeechRecognitionConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
21595
21761
  * @returns {Promise<AutomaticSpeechRecognitionOutput|AutomaticSpeechRecognitionOutput[]>} An object containing the transcription text and optionally timestamps if `return_timestamps` is `true`.
21596
- *
21762
+ *
21597
21763
  * @typedef {TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable} AutomaticSpeechRecognitionPipelineType
21598
21764
  */
21599
21765
 
@@ -21607,7 +21773,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
21607
21773
  * const output = await transcriber(url);
21608
21774
  * // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." }
21609
21775
  * ```
21610
- *
21776
+ *
21611
21777
  * **Example:** Transcribe English w/ timestamps.
21612
21778
  * ```javascript
21613
21779
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -21621,7 +21787,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
21621
21787
  * // ]
21622
21788
  * // }
21623
21789
  * ```
21624
- *
21790
+ *
21625
21791
  * **Example:** Transcribe English w/ word-level timestamps.
21626
21792
  * ```javascript
21627
21793
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -21640,7 +21806,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
21640
21806
  * // ]
21641
21807
  * // }
21642
21808
  * ```
21643
- *
21809
+ *
21644
21810
  * **Example:** Transcribe French.
21645
21811
  * ```javascript
21646
21812
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
@@ -21648,7 +21814,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
21648
21814
  * const output = await transcriber(url, { language: 'french', task: 'transcribe' });
21649
21815
  * // { text: " J'adore, j'aime, je n'aime pas, je déteste." }
21650
21816
  * ```
21651
- *
21817
+ *
21652
21818
  * **Example:** Translate French to English.
21653
21819
  * ```javascript
21654
21820
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
@@ -21656,7 +21822,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
21656
21822
  * const output = await transcriber(url, { language: 'french', task: 'translate' });
21657
21823
  * // { text: " I love, I like, I don't like, I hate." }
21658
21824
  * ```
21659
- *
21825
+ *
21660
21826
  * **Example:** Transcribe/translate audio longer than 30 seconds.
21661
21827
  * ```javascript
21662
21828
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -21879,18 +22045,18 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
21879
22045
  * @typedef {Object} ImageToTextSingle
21880
22046
  * @property {string} generated_text The generated text.
21881
22047
  * @typedef {ImageToTextSingle[]} ImageToTextOutput
21882
- *
22048
+ *
21883
22049
  * @callback ImageToTextPipelineCallback Assign labels to the image(s) passed as inputs.
21884
22050
  * @param {ImagePipelineInputs} texts The images to be captioned.
21885
22051
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
21886
22052
  * @returns {Promise<ImageToTextOutput|ImageToTextOutput[]>} An object (or array of objects) containing the generated text(s).
21887
- *
22053
+ *
21888
22054
  * @typedef {TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable} ImageToTextPipelineType
21889
22055
  */
21890
22056
 
21891
22057
  /**
21892
22058
  * Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image.
21893
- *
22059
+ *
21894
22060
  * **Example:** Generate a caption for an image w/ `Xenova/vit-gpt2-image-captioning`.
21895
22061
  * ```javascript
21896
22062
  * const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');
@@ -21898,7 +22064,7 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
21898
22064
  * const output = await captioner(url);
21899
22065
  * // [{ generated_text: 'a cat laying on a couch with another cat' }]
21900
22066
  * ```
21901
- *
22067
+ *
21902
22068
  * **Example:** Optical Character Recognition (OCR) w/ `Xenova/trocr-small-handwritten`.
21903
22069
  * ```javascript
21904
22070
  * const captioner = await pipeline('image-to-text', 'Xenova/trocr-small-handwritten');
@@ -21944,22 +22110,22 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
21944
22110
  * @property {string} label The label identified by the model.
21945
22111
  * @property {number} score The score attributed by the model for that label.
21946
22112
  * @typedef {ImageClassificationSingle[]} ImageClassificationOutput
21947
- *
22113
+ *
21948
22114
  * @typedef {Object} ImageClassificationPipelineOptions Parameters specific to image classification pipelines.
21949
- * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
21950
- *
22115
+ * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
22116
+ *
21951
22117
  * @callback ImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
21952
22118
  * @param {ImagePipelineInputs} images The input images(s) to be classified.
21953
22119
  * @param {ImageClassificationPipelineOptions} [options] The options to use for image classification.
21954
22120
  * @returns {Promise<ImageClassificationOutput|ImageClassificationOutput[]>} An array or object containing the predicted labels and scores.
21955
- *
22121
+ *
21956
22122
  * @typedef {ImagePipelineConstructorArgs & ImageClassificationPipelineCallback & Disposable} ImageClassificationPipelineType
21957
22123
  */
21958
22124
 
21959
22125
  /**
21960
22126
  * Image classification pipeline using any `AutoModelForImageClassification`.
21961
22127
  * This pipeline predicts the class of an image.
21962
- *
22128
+ *
21963
22129
  * **Example:** Classify an image.
21964
22130
  * ```javascript
21965
22131
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -21969,7 +22135,7 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
21969
22135
  * // { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
21970
22136
  * // ]
21971
22137
  * ```
21972
- *
22138
+ *
21973
22139
  * **Example:** Classify an image and return top `n` classes.
21974
22140
  * ```javascript
21975
22141
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -21981,7 +22147,7 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
21981
22147
  * // { label: 'lion, king of beasts, Panthera leo', score: 0.00045060308184474707 },
21982
22148
  * // ]
21983
22149
  * ```
21984
- *
22150
+ *
21985
22151
  * **Example:** Classify an image and return all classes.
21986
22152
  * ```javascript
21987
22153
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -22048,7 +22214,7 @@ class ImageClassificationPipeline extends (/** @type {new (options: ImagePipelin
22048
22214
  * @property {string|null} label The label of the segment.
22049
22215
  * @property {number|null} score The score of the segment.
22050
22216
  * @property {RawImage} mask The mask of the segment.
22051
- *
22217
+ *
22052
22218
  * @typedef {Object} ImageSegmentationPipelineOptions Parameters specific to image segmentation pipelines.
22053
22219
  * @property {number} [threshold=0.5] Probability threshold to filter out predicted masks.
22054
22220
  * @property {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
@@ -22057,19 +22223,19 @@ class ImageClassificationPipeline extends (/** @type {new (options: ImagePipelin
22057
22223
  * depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
22058
22224
  * @property {number[]} [label_ids_to_fuse=null] List of label ids to fuse. If not set, do not fuse any labels.
22059
22225
  * @property {number[][]} [target_sizes=null] List of target sizes for the input images. If not set, use the original image sizes.
22060
- *
22226
+ *
22061
22227
  * @callback ImageSegmentationPipelineCallback Segment the input images.
22062
22228
  * @param {ImagePipelineInputs} images The input images.
22063
22229
  * @param {ImageSegmentationPipelineOptions} [options] The options to use for image segmentation.
22064
22230
  * @returns {Promise<ImageSegmentationPipelineOutput[]>} The annotated segments.
22065
- *
22231
+ *
22066
22232
  * @typedef {ImagePipelineConstructorArgs & ImageSegmentationPipelineCallback & Disposable} ImageSegmentationPipelineType
22067
22233
  */
22068
22234
 
22069
22235
  /**
22070
22236
  * Image segmentation pipeline using any `AutoModelForXXXSegmentation`.
22071
22237
  * This pipeline predicts masks of objects and their classes.
22072
- *
22238
+ *
22073
22239
  * **Example:** Perform image segmentation with `Xenova/detr-resnet-50-panoptic`.
22074
22240
  * ```javascript
22075
22241
  * const segmenter = await pipeline('image-segmentation', 'Xenova/detr-resnet-50-panoptic');
@@ -22153,12 +22319,17 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
22153
22319
  /** @type {ImageSegmentationPipelineOutput[]} */
22154
22320
  const annotation = [];
22155
22321
  if (!subtask) {
22322
+ // We define an epsilon to safeguard against numerical/precision issues when detecting
22323
+ // the normalization mode of the output (i.e., sigmoid already applied, or not).
22324
+ // See https://github.com/microsoft/onnxruntime/issues/23943 for more information.
22325
+ const epsilon = 1e-5;
22326
+
22156
22327
  // Perform standard image segmentation
22157
22328
  const result = output[outputNames[0]];
22158
22329
  for (let i = 0; i < imageSizes.length; ++i) {
22159
22330
  const size = imageSizes[i];
22160
22331
  const item = result[i];
22161
- if (item.data.some(x => x < 0 || x > 1)) {
22332
+ if (item.data.some(x => x < -epsilon || x > 1 + epsilon)) {
22162
22333
  item.sigmoid_();
22163
22334
  }
22164
22335
  const mask = await _utils_image_js__WEBPACK_IMPORTED_MODULE_9__.RawImage.fromTensor(item.mul_(255).to('uint8')).resize(size[1], size[0]);
@@ -22227,19 +22398,19 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
22227
22398
 
22228
22399
  /**
22229
22400
  * @typedef {Object} BackgroundRemovalPipelineOptions Parameters specific to image segmentation pipelines.
22230
- *
22401
+ *
22231
22402
  * @callback BackgroundRemovalPipelineCallback Segment the input images.
22232
22403
  * @param {ImagePipelineInputs} images The input images.
22233
22404
  * @param {BackgroundRemovalPipelineOptions} [options] The options to use for image segmentation.
22234
22405
  * @returns {Promise<RawImage[]>} The images with the background removed.
22235
- *
22406
+ *
22236
22407
  * @typedef {ImagePipelineConstructorArgs & BackgroundRemovalPipelineCallback & Disposable} BackgroundRemovalPipelineType
22237
22408
  */
22238
22409
 
22239
22410
  /**
22240
22411
  * Background removal pipeline using certain `AutoModelForXXXSegmentation`.
22241
22412
  * This pipeline removes the backgrounds of images.
22242
- *
22413
+ *
22243
22414
  * **Example:** Perform background removal with `Xenova/modnet`.
22244
22415
  * ```javascript
22245
22416
  * const segmenter = await pipeline('background-removal', 'Xenova/modnet');
@@ -22250,7 +22421,7 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
22250
22421
  * // ]
22251
22422
  * ```
22252
22423
  */
22253
- class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => ImageSegmentationPipelineType} */ (ImageSegmentationPipeline)) {
22424
+ class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => BackgroundRemovalPipelineType} */ (/** @type {any} */(ImageSegmentationPipeline))) {
22254
22425
  /**
22255
22426
  * Create a new BackgroundRemovalPipeline.
22256
22427
  * @param {ImagePipelineConstructorArgs} options An object used to instantiate the pipeline.
@@ -22285,25 +22456,25 @@ class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineC
22285
22456
  * @typedef {Object} ZeroShotImageClassificationOutput
22286
22457
  * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
22287
22458
  * @property {number} score The score attributed by the model for that label (between 0 and 1).
22288
- *
22459
+ *
22289
22460
  * @typedef {Object} ZeroShotImageClassificationPipelineOptions Parameters specific to zero-shot image classification pipelines.
22290
22461
  * @property {string} [hypothesis_template="This is a photo of {}"] The sentence used in conjunction with `candidate_labels`
22291
22462
  * to attempt the image classification by replacing the placeholder with the candidate_labels.
22292
22463
  * Then likelihood is estimated by using `logits_per_image`.
22293
- *
22464
+ *
22294
22465
  * @callback ZeroShotImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
22295
22466
  * @param {ImagePipelineInputs} images The input images.
22296
22467
  * @param {string[]} candidate_labels The candidate labels for this image.
22297
22468
  * @param {ZeroShotImageClassificationPipelineOptions} [options] The options to use for zero-shot image classification.
22298
22469
  * @returns {Promise<ZeroShotImageClassificationOutput[]|ZeroShotImageClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
22299
- *
22470
+ *
22300
22471
  * @typedef {TextImagePipelineConstructorArgs & ZeroShotImageClassificationPipelineCallback & Disposable} ZeroShotImageClassificationPipelineType
22301
22472
  */
22302
22473
 
22303
22474
  /**
22304
22475
  * Zero shot image classification pipeline. This pipeline predicts the class of
22305
22476
  * an image when you provide an image and a set of `candidate_labels`.
22306
- *
22477
+ *
22307
22478
  * **Example:** Zero shot image classification w/ `Xenova/clip-vit-base-patch32`.
22308
22479
  * ```javascript
22309
22480
  * const classifier = await pipeline('zero-shot-image-classification', 'Xenova/clip-vit-base-patch32');
@@ -22333,7 +22504,7 @@ class ZeroShotImageClassificationPipeline extends (/** @type {new (options: Text
22333
22504
  const isBatched = Array.isArray(images);
22334
22505
  const preparedImages = await prepareImages(images);
22335
22506
 
22336
- // Insert label into hypothesis template
22507
+ // Insert label into hypothesis template
22337
22508
  const texts = candidate_labels.map(
22338
22509
  x => hypothesis_template.replace('{}', x)
22339
22510
  );
@@ -22380,23 +22551,23 @@ class ZeroShotImageClassificationPipeline extends (/** @type {new (options: Text
22380
22551
  * @property {number} score The score attributed by the model for that label.
22381
22552
  * @property {BoundingBox} box The bounding box of detected object in image's original size, or as a percentage if `percentage` is set to true.
22382
22553
  * @typedef {ObjectDetectionPipelineSingle[]} ObjectDetectionPipelineOutput
22383
- *
22554
+ *
22384
22555
  * @typedef {Object} ObjectDetectionPipelineOptions Parameters specific to object detection pipelines.
22385
22556
  * @property {number} [threshold=0.9] The threshold used to filter boxes by score.
22386
22557
  * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
22387
- *
22558
+ *
22388
22559
  * @callback ObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
22389
22560
  * @param {ImagePipelineInputs} images The input images.
22390
22561
  * @param {ObjectDetectionPipelineOptions} [options] The options to use for object detection.
22391
- * @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
22392
- *
22562
+ * @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
22563
+ *
22393
22564
  * @typedef {ImagePipelineConstructorArgs & ObjectDetectionPipelineCallback & Disposable} ObjectDetectionPipelineType
22394
22565
  */
22395
22566
 
22396
22567
  /**
22397
22568
  * Object detection pipeline using any `AutoModelForObjectDetection`.
22398
22569
  * This pipeline predicts bounding boxes of objects and their classes.
22399
- *
22570
+ *
22400
22571
  * **Example:** Run object-detection with `Xenova/detr-resnet-50`.
22401
22572
  * ```javascript
22402
22573
  * const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
@@ -22470,27 +22641,27 @@ class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipelineCon
22470
22641
  * @property {string} label Text query corresponding to the found object.
22471
22642
  * @property {number} score Score corresponding to the object (between 0 and 1).
22472
22643
  * @property {BoundingBox} box Bounding box of the detected object in image's original size, or as a percentage if `percentage` is set to true.
22473
- *
22644
+ *
22474
22645
  * @typedef {Object} ZeroShotObjectDetectionPipelineOptions Parameters specific to zero-shot object detection pipelines.
22475
22646
  * @property {number} [threshold=0.1] The probability necessary to make a prediction.
22476
22647
  * @property {number} [top_k=null] The number of top predictions that will be returned by the pipeline.
22477
22648
  * If the provided number is `null` or higher than the number of predictions available, it will default
22478
22649
  * to the number of predictions.
22479
22650
  * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
22480
- *
22651
+ *
22481
22652
  * @callback ZeroShotObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
22482
22653
  * @param {ImagePipelineInputs} images The input images.
22483
22654
  * @param {string[]} candidate_labels What the model should recognize in the image.
22484
22655
  * @param {ZeroShotObjectDetectionPipelineOptions} [options] The options to use for zero-shot object detection.
22485
22656
  * @returns {Promise<ZeroShotObjectDetectionOutput[]|ZeroShotObjectDetectionOutput[][]>} An array of objects containing the predicted labels, scores, and bounding boxes.
22486
- *
22657
+ *
22487
22658
  * @typedef {TextImagePipelineConstructorArgs & ZeroShotObjectDetectionPipelineCallback & Disposable} ZeroShotObjectDetectionPipelineType
22488
22659
  */
22489
22660
 
22490
22661
  /**
22491
22662
  * Zero-shot object detection pipeline. This pipeline predicts bounding boxes of
22492
22663
  * objects when you provide an image and a set of `candidate_labels`.
22493
- *
22664
+ *
22494
22665
  * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32`.
22495
22666
  * ```javascript
22496
22667
  * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
@@ -22520,7 +22691,7 @@ class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipelineCon
22520
22691
  * // }
22521
22692
  * // ]
22522
22693
  * ```
22523
- *
22694
+ *
22524
22695
  * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32` (returning top 4 matches and setting a threshold).
22525
22696
  * ```javascript
22526
22697
  * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
@@ -22635,13 +22806,13 @@ class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: TextImag
22635
22806
  * @typedef {Object} DocumentQuestionAnsweringSingle
22636
22807
  * @property {string} answer The generated text.
22637
22808
  * @typedef {DocumentQuestionAnsweringSingle[]} DocumentQuestionAnsweringOutput
22638
- *
22809
+ *
22639
22810
  * @callback DocumentQuestionAnsweringPipelineCallback Answer the question given as input by using the document.
22640
22811
  * @param {ImageInput} image The image of the document to use.
22641
22812
  * @param {string} question A question to ask of the document.
22642
22813
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
22643
22814
  * @returns {Promise<DocumentQuestionAnsweringOutput|DocumentQuestionAnsweringOutput[]>} An object (or array of objects) containing the answer(s).
22644
- *
22815
+ *
22645
22816
  * @typedef {TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable} DocumentQuestionAnsweringPipelineType
22646
22817
  */
22647
22818
 
@@ -22649,7 +22820,7 @@ class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: TextImag
22649
22820
  * Document Question Answering pipeline using any `AutoModelForDocumentQuestionAnswering`.
22650
22821
  * The inputs/outputs are similar to the (extractive) question answering pipeline; however,
22651
22822
  * the pipeline takes an image (and optional OCR'd words/boxes) as input instead of text context.
22652
- *
22823
+ *
22653
22824
  * **Example:** Answer questions about a document with `Xenova/donut-base-finetuned-docvqa`.
22654
22825
  * ```javascript
22655
22826
  * const qa_pipeline = await pipeline('document-question-answering', 'Xenova/donut-base-finetuned-docvqa');
@@ -22719,22 +22890,22 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
22719
22890
  * @typedef {Object} TextToAudioOutput
22720
22891
  * @property {Float32Array} audio The generated audio waveform.
22721
22892
  * @property {number} sampling_rate The sampling rate of the generated audio waveform.
22722
- *
22893
+ *
22723
22894
  * @typedef {Object} TextToAudioPipelineOptions Parameters specific to text-to-audio pipelines.
22724
22895
  * @property {Tensor|Float32Array|string|URL} [speaker_embeddings=null] The speaker embeddings (if the model requires it).
22725
- *
22896
+ *
22726
22897
  * @callback TextToAudioPipelineCallback Generates speech/audio from the inputs.
22727
22898
  * @param {string|string[]} texts The text(s) to generate.
22728
22899
  * @param {TextToAudioPipelineOptions} options Parameters passed to the model generation/forward method.
22729
22900
  * @returns {Promise<TextToAudioOutput>} An object containing the generated audio and sampling rate.
22730
- *
22901
+ *
22731
22902
  * @typedef {TextToAudioPipelineConstructorArgs & TextToAudioPipelineCallback & Disposable} TextToAudioPipelineType
22732
22903
  */
22733
22904
 
22734
22905
  /**
22735
22906
  * Text-to-audio generation pipeline using any `AutoModelForTextToWaveform` or `AutoModelForTextToSpectrogram`.
22736
22907
  * This pipeline generates an audio file from an input text and optional other conditional inputs.
22737
- *
22908
+ *
22738
22909
  * **Example:** Generate audio from text with `Xenova/speecht5_tts`.
22739
22910
  * ```javascript
22740
22911
  * const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
@@ -22745,17 +22916,17 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
22745
22916
  * // sampling_rate: 16000
22746
22917
  * // }
22747
22918
  * ```
22748
- *
22919
+ *
22749
22920
  * You can then save the audio to a .wav file with the `wavefile` package:
22750
22921
  * ```javascript
22751
22922
  * import wavefile from 'wavefile';
22752
22923
  * import fs from 'fs';
22753
- *
22924
+ *
22754
22925
  * const wav = new wavefile.WaveFile();
22755
22926
  * wav.fromScratch(1, out.sampling_rate, '32f', out.audio);
22756
22927
  * fs.writeFileSync('out.wav', wav.toBuffer());
22757
22928
  * ```
22758
- *
22929
+ *
22759
22930
  * **Example:** Multilingual speech generation with `Xenova/mms-tts-fra`. See [here](https://huggingface.co/models?pipeline_tag=text-to-speech&other=vits&sort=trending) for the full list of available languages (1107).
22760
22931
  * ```javascript
22761
22932
  * const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra');
@@ -22861,13 +23032,13 @@ class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPipelineC
22861
23032
  * @callback ImageToImagePipelineCallback Transform the image(s) passed as inputs.
22862
23033
  * @param {ImagePipelineInputs} images The images to transform.
22863
23034
  * @returns {Promise<RawImage|RawImage[]>} The transformed image or list of images.
22864
- *
23035
+ *
22865
23036
  * @typedef {ImagePipelineConstructorArgs & ImageToImagePipelineCallback & Disposable} ImageToImagePipelineType
22866
23037
  */
22867
23038
 
22868
23039
  /**
22869
23040
  * Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input.
22870
- *
23041
+ *
22871
23042
  * **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`
22872
23043
  * ```javascript
22873
23044
  * const upscaler = await pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64');
@@ -22912,17 +23083,17 @@ class ImageToImagePipeline extends (/** @type {new (options: ImagePipelineConstr
22912
23083
  * @typedef {Object} DepthEstimationPipelineOutput
22913
23084
  * @property {Tensor} predicted_depth The raw depth map predicted by the model.
22914
23085
  * @property {RawImage} depth The processed depth map as an image (with the same size as the input image).
22915
- *
23086
+ *
22916
23087
  * @callback DepthEstimationPipelineCallback Predicts the depth for the image(s) passed as inputs.
22917
23088
  * @param {ImagePipelineInputs} images The images to compute depth for.
22918
23089
  * @returns {Promise<DepthEstimationPipelineOutput|DepthEstimationPipelineOutput[]>} An image or a list of images containing result(s).
22919
- *
23090
+ *
22920
23091
  * @typedef {ImagePipelineConstructorArgs & DepthEstimationPipelineCallback & Disposable} DepthEstimationPipelineType
22921
23092
  */
22922
23093
 
22923
23094
  /**
22924
23095
  * Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image.
22925
- *
23096
+ *
22926
23097
  * **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`
22927
23098
  * ```javascript
22928
23099
  * const depth_estimator = await pipeline('depth-estimation', 'Xenova/dpt-hybrid-midas');
@@ -23307,7 +23478,7 @@ const TASK_ALIASES = Object.freeze({
23307
23478
 
23308
23479
  /**
23309
23480
  * Utility factory method to build a `Pipeline` object.
23310
- *
23481
+ *
23311
23482
  * @template {PipelineType} T The type of pipeline to return.
23312
23483
  * @param {T} task The task defining which pipeline will be returned. Currently accepted tasks are:
23313
23484
  * - `"audio-classification"`: will return a `AudioClassificationPipeline`.
@@ -26126,13 +26297,12 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
26126
26297
  this.decoder.end_of_word_suffix = this.model.end_of_word_suffix;
26127
26298
  }
26128
26299
 
26129
- this.added_tokens_regex = this.added_tokens.length > 0 ? new RegExp(
26130
- this.added_tokens.slice()
26131
- // Sort by length (desc) to avoid early partial matches
26132
- .sort((a, b) => b.content.length - a.content.length)
26133
- .map(x => `${x.lstrip ? '\\s*' : ''}(${(0,_utils_core_js__WEBPACK_IMPORTED_MODULE_1__.escapeRegExp)(x.content)})${x.rstrip ? '\\s*' : ''}`)
26134
- .join('|')
26135
- ) : null;
26300
+ this.added_tokens_splitter = new _utils_data_structures_js__WEBPACK_IMPORTED_MODULE_5__.DictionarySplitter(
26301
+ this.added_tokens.map(x => x.content),
26302
+ );
26303
+
26304
+ /** @type {Map<string, AddedToken>} */
26305
+ this.added_tokens_map = new Map(this.added_tokens.map(x => [x.content, x]))
26136
26306
 
26137
26307
  // Set mask token if present (otherwise will be undefined, which is fine)
26138
26308
  this.mask_token = this.getToken('mask_token');
@@ -26427,40 +26597,50 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
26427
26597
  // Actual function which does encoding, for a single text
26428
26598
  // First, we take care of special tokens. Needed to avoid issues arising from
26429
26599
  // normalization and/or pretokenization (which may not preserve special tokens)
26430
- const sections = this.added_tokens_regex ? text.split(this.added_tokens_regex).filter(x => x) : [text];
26431
-
26432
- const tokens = sections.map((x, section_index) => {
26433
- const addedToken = this.added_tokens.find(t => t.content === x);
26434
- if (addedToken !== undefined) {
26435
- // Ignore added tokens
26436
- return x
26437
- } else {
26438
- if (this.remove_space === true) {
26439
- x = x.trim().split(/\s+/).join(' ');
26440
- }
26441
- if (this.do_lowercase_and_remove_accent) {
26442
- x = lowercase_and_remove_accent(x);
26600
+ const sections = this.added_tokens_splitter.split(text);
26601
+
26602
+ // Process left/right stripping of added tokens
26603
+ for (let i = 0; i < sections.length; ++i) {
26604
+ const addedToken = this.added_tokens_map.get(sections[i]);
26605
+ if (addedToken) {
26606
+ if (addedToken.lstrip && i > 0) {
26607
+ sections[i - 1] = sections[i - 1].trimEnd();
26443
26608
  }
26444
-
26445
- if (this.normalizer !== null) {
26446
- x = this.normalizer(x);
26609
+ if (addedToken.rstrip && i < sections.length - 1) {
26610
+ sections[i + 1] = sections[i + 1].trimStart();
26447
26611
  }
26612
+ }
26613
+ }
26448
26614
 
26449
- // If, after normalization, this section is empty (e.g., trimming whitespace),
26450
- // we return an empty array
26451
- if (x.length === 0) {
26452
- return [];
26453
- }
26615
+ const tokens = sections.flatMap((x, section_index) => {
26616
+ if (x.length === 0) return [];
26617
+ if (this.added_tokens_map.has(x)) return [x]; // Return added tokens unchanged
26454
26618
 
26455
- const sectionTokens = (this.pre_tokenizer !== null) ? this.pre_tokenizer(x, {
26456
- section_index,
26457
- }) : [x];
26619
+ if (this.remove_space === true) {
26620
+ x = x.trim().split(/\s+/).join(' ');
26621
+ }
26622
+ if (this.do_lowercase_and_remove_accent) {
26623
+ x = lowercase_and_remove_accent(x);
26624
+ }
26458
26625
 
26459
- const tokens = this.model(sectionTokens);
26626
+ if (this.normalizer !== null) {
26627
+ x = this.normalizer(x);
26628
+ }
26460
26629
 
26461
- return tokens;
26630
+ // If, after normalization, this section is empty (e.g., trimming whitespace),
26631
+ // we return an empty array
26632
+ if (x.length === 0) {
26633
+ return [];
26462
26634
  }
26463
- }).flat();
26635
+
26636
+ const sectionTokens = (this.pre_tokenizer !== null) ? this.pre_tokenizer(x, {
26637
+ section_index,
26638
+ }) : [x];
26639
+
26640
+ const tokens = this.model(sectionTokens);
26641
+
26642
+ return tokens;
26643
+ });
26464
26644
 
26465
26645
  return tokens;
26466
26646
  }
@@ -29089,6 +29269,7 @@ function count(arr, value) {
29089
29269
  __webpack_require__.r(__webpack_exports__);
29090
29270
  /* harmony export */ __webpack_require__.d(__webpack_exports__, {
29091
29271
  /* harmony export */ CharTrie: () => (/* binding */ CharTrie),
29272
+ /* harmony export */ DictionarySplitter: () => (/* binding */ DictionarySplitter),
29092
29273
  /* harmony export */ PriorityQueue: () => (/* binding */ PriorityQueue),
29093
29274
  /* harmony export */ TokenLattice: () => (/* binding */ TokenLattice)
29094
29275
  /* harmony export */ });
@@ -29540,6 +29721,80 @@ class TokenLatticeNode {
29540
29721
  }
29541
29722
  }
29542
29723
 
29724
+ /**
29725
+ * A data structure which uses a trie to split a string into tokens based on a dictionary.
29726
+ * It can also use a regular expression to preprocess the input text before splitting.
29727
+ *
29728
+ * NOTE: To ensure multi-byte characters are handled correctly, we operate at byte-level instead of character-level.
29729
+ */
29730
+ class DictionarySplitter {
29731
+ /**
29732
+ * @param {string[]} dictionary The dictionary of words to use for splitting.
29733
+ */
29734
+ constructor(dictionary) {
29735
+ this.trie = this._buildTrie(dictionary);
29736
+ }
29737
+
29738
+ /**
29739
+ * Builds a trie from the given dictionary.
29740
+ * @param {string[]} dictionary The dictionary of words to build the trie from.
29741
+ * @returns {Object} The root node of the trie.
29742
+ * @private
29743
+ */
29744
+ _buildTrie(dictionary) {
29745
+ const trie = Object.create(null);
29746
+ for (const word of dictionary) {
29747
+ let node = trie;
29748
+ for (let i = 0; i < word.length; ++i) {
29749
+ node = (node[word[i]] ??= Object.create(null));
29750
+ }
29751
+ node.end = word;
29752
+ }
29753
+ return trie;
29754
+ }
29755
+
29756
+ /**
29757
+ * Splits the input text into tokens based on the dictionary.
29758
+ * @param {string} text The input text to split.
29759
+ * @returns {string[]} An array of tokens.
29760
+ */
29761
+ split(text) {
29762
+ const result = [];
29763
+ const n = text.length;
29764
+ let start = 0;
29765
+ let i = 0;
29766
+
29767
+ while (i < n) {
29768
+ let node = this.trie;
29769
+ let match = null;
29770
+ let j = i;
29771
+
29772
+ while (j < n && (node = node[text[j]])) {
29773
+ if (node.end) {
29774
+ // Always keep the last (i.e., longest) match.
29775
+ match = node.end;
29776
+ }
29777
+ ++j;
29778
+ }
29779
+
29780
+ if (match) {
29781
+ if (i > start) {
29782
+ result.push(text.slice(start, i));
29783
+ }
29784
+ result.push(match);
29785
+ i += match.length;
29786
+ start = i;
29787
+ } else {
29788
+ ++i;
29789
+ }
29790
+ }
29791
+ if (start < n) {
29792
+ result.push(text.slice(start));
29793
+ }
29794
+ return result;
29795
+ }
29796
+ }
29797
+
29543
29798
 
29544
29799
  /***/ }),
29545
29800
 
@@ -29923,6 +30178,22 @@ function isValidUrl(string, protocols = null, validHosts = null) {
29923
30178
  return true;
29924
30179
  }
29925
30180
 
30181
+ const REPO_ID_REGEX = /^(\b[\w\-.]+\b\/)?\b[\w\-.]{1,96}\b$/;
30182
+
30183
+ /**
30184
+ * Tests whether a string is a valid Hugging Face model ID or not.
30185
+ * Adapted from https://github.com/huggingface/huggingface_hub/blob/6378820ebb03f071988a96c7f3268f5bdf8f9449/src/huggingface_hub/utils/_validators.py#L119-L170
30186
+ *
30187
+ * @param {string} string The string to test
30188
+ * @returns {boolean} True if the string is a valid model ID, false otherwise.
30189
+ */
30190
+ function isValidHfModelId(string) {
30191
+ if (!REPO_ID_REGEX.test(string)) return false;
30192
+ if (string.includes("..") || string.includes("--")) return false;
30193
+ if (string.endsWith(".git") || string.endsWith(".ipynb")) return false;
30194
+ return true;
30195
+ }
30196
+
29926
30197
  /**
29927
30198
  * Helper function to get a file, using either the Fetch API or FileSystem API.
29928
30199
  *
@@ -30175,12 +30446,13 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
30175
30446
  }
30176
30447
 
30177
30448
  const revision = options.revision ?? 'main';
30449
+ const requestURL = pathJoin(path_or_repo_id, filename);
30178
30450
 
30179
- let requestURL = pathJoin(path_or_repo_id, filename);
30180
- let cachePath = pathJoin(_env_js__WEBPACK_IMPORTED_MODULE_2__.env.localModelPath, requestURL);
30181
-
30182
- let localPath = requestURL;
30183
- let remoteURL = pathJoin(
30451
+ const validModelId = isValidHfModelId(path_or_repo_id);
30452
+ const localPath = validModelId
30453
+ ? pathJoin(_env_js__WEBPACK_IMPORTED_MODULE_2__.env.localModelPath, requestURL)
30454
+ : requestURL;
30455
+ const remoteURL = pathJoin(
30184
30456
  _env_js__WEBPACK_IMPORTED_MODULE_2__.env.remoteHost,
30185
30457
  _env_js__WEBPACK_IMPORTED_MODULE_2__.env.remotePathTemplate
30186
30458
  .replaceAll('{model}', path_or_repo_id)
@@ -30188,14 +30460,14 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
30188
30460
  filename
30189
30461
  );
30190
30462
 
30191
- // Choose cache key for filesystem cache
30192
- // When using the main revision (default), we use the request URL as the cache key.
30193
- // If a specific revision is requested, we account for this in the cache key.
30194
- let fsCacheKey = revision === 'main' ? requestURL : pathJoin(path_or_repo_id, revision, filename);
30195
-
30196
30463
  /** @type {string} */
30197
30464
  let cacheKey;
30198
- let proposedCacheKey = cache instanceof FileCache ? fsCacheKey : remoteURL;
30465
+ const proposedCacheKey = cache instanceof FileCache
30466
+ // Choose cache key for filesystem cache
30467
+ // When using the main revision (default), we use the request URL as the cache key.
30468
+ // If a specific revision is requested, we account for this in the cache key.
30469
+ ? revision === 'main' ? requestURL : pathJoin(path_or_repo_id, revision, filename)
30470
+ : remoteURL;
30199
30471
 
30200
30472
  // Whether to cache the final response in the end.
30201
30473
  let toCacheResponse = false;
@@ -30208,11 +30480,10 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
30208
30480
  // 1. We first try to get from cache using the local path. In some environments (like deno),
30209
30481
  // non-URL cache keys are not allowed. In these cases, `response` will be undefined.
30210
30482
  // 2. If no response is found, we try to get from cache using the remote URL or file system cache.
30211
- response = await tryCache(cache, cachePath, proposedCacheKey);
30483
+ response = await tryCache(cache, localPath, proposedCacheKey);
30212
30484
  }
30213
30485
 
30214
30486
  const cacheHit = response !== undefined;
30215
-
30216
30487
  if (response === undefined) {
30217
30488
  // Caching not available, or file is not cached, so we perform the request
30218
30489
 
@@ -30230,9 +30501,9 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
30230
30501
  console.warn(`Unable to load from local path "${localPath}": "${e}"`);
30231
30502
  }
30232
30503
  } else if (options.local_files_only) {
30233
- throw new Error(`\`local_files_only=true\`, but attempted to load a remote file from: ${localPath}.`);
30504
+ throw new Error(`\`local_files_only=true\`, but attempted to load a remote file from: ${requestURL}.`);
30234
30505
  } else if (!_env_js__WEBPACK_IMPORTED_MODULE_2__.env.allowRemoteModels) {
30235
- throw new Error(`\`env.allowRemoteModels=false\`, but attempted to load a remote file from: ${localPath}.`);
30506
+ throw new Error(`\`env.allowRemoteModels=false\`, but attempted to load a remote file from: ${requestURL}.`);
30236
30507
  }
30237
30508
  }
30238
30509
 
@@ -30252,6 +30523,11 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
30252
30523
  return null;
30253
30524
  }
30254
30525
  }
30526
+ if (!validModelId) {
30527
+ // Before making any requests to the remote server, we check if the model ID is valid.
30528
+ // This prevents unnecessary network requests for invalid model IDs.
30529
+ throw Error(`Local file missing at "${localPath}" and download aborted due to invalid model ID "${path_or_repo_id}".`);
30530
+ }
30255
30531
 
30256
30532
  // File not found locally, so we try to download it from the remote server
30257
30533
  response = await getFile(remoteURL);
@@ -30583,7 +30859,7 @@ class RawImage {
30583
30859
 
30584
30860
  /**
30585
30861
  * Helper method for reading an image from a variety of input types.
30586
- * @param {RawImage|string|URL} input
30862
+ * @param {RawImage|string|URL|Blob|HTMLCanvasElement|OffscreenCanvas} input
30587
30863
  * @returns The image object.
30588
30864
  *
30589
30865
  * **Example:** Read image from a URL.
@@ -30602,6 +30878,14 @@ class RawImage {
30602
30878
  return input;
30603
30879
  } else if (typeof input === 'string' || input instanceof URL) {
30604
30880
  return await this.fromURL(input);
30881
+ } else if (input instanceof Blob) {
30882
+ return await this.fromBlob(input);
30883
+ } else if (
30884
+ (typeof HTMLCanvasElement !== "undefined" && input instanceof HTMLCanvasElement)
30885
+ ||
30886
+ (typeof OffscreenCanvas !== "undefined" && input instanceof OffscreenCanvas)
30887
+ ) {
30888
+ return this.fromCanvas(input);
30605
30889
  } else {
30606
30890
  throw new Error(`Unsupported input type: ${typeof input}`);
30607
30891
  }
@@ -33613,8 +33897,12 @@ function calc_unsqueeze_dims(dims, dim) {
33613
33897
  * @private
33614
33898
  */
33615
33899
  function safeIndex(index, size, dimension = null, boundsCheck = true) {
33616
- if (boundsCheck && (index < -size || index >= size)) {
33617
- throw new Error(`IndexError: index ${index} is out of bounds for dimension${dimension === null ? '' : ' ' + dimension} with size ${size}`);
33900
+ if (index < -size || index >= size) {
33901
+ if (boundsCheck) {
33902
+ throw new Error(`IndexError: index ${index} is out of bounds for dimension${dimension === null ? '' : ' ' + dimension} with size ${size}`);
33903
+ } else {
33904
+ return index < -size ? 0 : size;
33905
+ }
33618
33906
  }
33619
33907
 
33620
33908
  if (index < 0) {
@@ -34471,6 +34759,9 @@ __webpack_require__.r(__webpack_exports__);
34471
34759
  /* harmony export */ Gemma2ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2ForCausalLM),
34472
34760
  /* harmony export */ Gemma2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2Model),
34473
34761
  /* harmony export */ Gemma2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2PreTrainedModel),
34762
+ /* harmony export */ Gemma3ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3ForCausalLM),
34763
+ /* harmony export */ Gemma3Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3Model),
34764
+ /* harmony export */ Gemma3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3PreTrainedModel),
34474
34765
  /* harmony export */ GemmaForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaForCausalLM),
34475
34766
  /* harmony export */ GemmaModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaModel),
34476
34767
  /* harmony export */ GemmaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaPreTrainedModel),
@@ -34572,6 +34863,10 @@ __webpack_require__.r(__webpack_exports__);
34572
34863
  /* harmony export */ MaskFormerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskFormerPreTrainedModel),
34573
34864
  /* harmony export */ MaskedLMOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskedLMOutput),
34574
34865
  /* harmony export */ MaxLengthCriteria: () => (/* reexport safe */ _generation_stopping_criteria_js__WEBPACK_IMPORTED_MODULE_20__.MaxLengthCriteria),
34866
+ /* harmony export */ Metric3DForDepthEstimation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3DForDepthEstimation),
34867
+ /* harmony export */ Metric3DPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3DPreTrainedModel),
34868
+ /* harmony export */ Metric3Dv2ForDepthEstimation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3Dv2ForDepthEstimation),
34869
+ /* harmony export */ Metric3Dv2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3Dv2PreTrainedModel),
34575
34870
  /* harmony export */ MgpstrForSceneTextRecognition: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrForSceneTextRecognition),
34576
34871
  /* harmony export */ MgpstrModelOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrModelOutput),
34577
34872
  /* harmony export */ MgpstrPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrPreTrainedModel),
@@ -34724,11 +35019,19 @@ __webpack_require__.r(__webpack_exports__);
34724
35019
  /* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_14__.Qwen2VLImageProcessor),
34725
35020
  /* harmony export */ Qwen2VLPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Qwen2VLPreTrainedModel),
34726
35021
  /* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_17__.Qwen2VLProcessor),
35022
+ /* harmony export */ RFDetrForObjectDetection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RFDetrForObjectDetection),
35023
+ /* harmony export */ RFDetrModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RFDetrModel),
35024
+ /* harmony export */ RFDetrObjectDetectionOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RFDetrObjectDetectionOutput),
35025
+ /* harmony export */ RFDetrPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RFDetrPreTrainedModel),
34727
35026
  /* harmony export */ RTDetrForObjectDetection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrForObjectDetection),
34728
35027
  /* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_14__.RTDetrImageProcessor),
34729
35028
  /* harmony export */ RTDetrModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrModel),
34730
35029
  /* harmony export */ RTDetrObjectDetectionOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrObjectDetectionOutput),
34731
35030
  /* harmony export */ RTDetrPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrPreTrainedModel),
35031
+ /* harmony export */ RTDetrV2ForObjectDetection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrV2ForObjectDetection),
35032
+ /* harmony export */ RTDetrV2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrV2Model),
35033
+ /* harmony export */ RTDetrV2ObjectDetectionOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrV2ObjectDetectionOutput),
35034
+ /* harmony export */ RTDetrV2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrV2PreTrainedModel),
34732
35035
  /* harmony export */ RawAudio: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.RawAudio),
34733
35036
  /* harmony export */ RawImage: () => (/* reexport safe */ _utils_image_js__WEBPACK_IMPORTED_MODULE_6__.RawImage),
34734
35037
  /* harmony export */ RawVideo: () => (/* reexport safe */ _utils_video_js__WEBPACK_IMPORTED_MODULE_7__.RawVideo),
@@ -34778,6 +35081,11 @@ __webpack_require__.r(__webpack_exports__);
34778
35081
  /* harmony export */ SmolVLMForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SmolVLMForConditionalGeneration),
34779
35082
  /* harmony export */ SmolVLMImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_14__.SmolVLMImageProcessor),
34780
35083
  /* harmony export */ SmolVLMProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_17__.SmolVLMProcessor),
35084
+ /* harmony export */ SnacDecoderModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacDecoderModel),
35085
+ /* harmony export */ SnacEncoderModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacEncoderModel),
35086
+ /* harmony export */ SnacFeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_11__.SnacFeatureExtractor),
35087
+ /* harmony export */ SnacModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacModel),
35088
+ /* harmony export */ SnacPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacPreTrainedModel),
34781
35089
  /* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_11__.SpeechT5FeatureExtractor),
34782
35090
  /* harmony export */ SpeechT5ForSpeechToText: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SpeechT5ForSpeechToText),
34783
35091
  /* harmony export */ SpeechT5ForTextToSpeech: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SpeechT5ForTextToSpeech),