@huggingface/transformers 3.1.2 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +7 -3
  2. package/dist/transformers.cjs +835 -144
  3. package/dist/transformers.cjs.map +1 -1
  4. package/dist/transformers.js +850 -144
  5. package/dist/transformers.js.map +1 -1
  6. package/dist/transformers.min.cjs +1 -1
  7. package/dist/transformers.min.cjs.map +1 -1
  8. package/dist/transformers.min.js +1 -1
  9. package/dist/transformers.min.js.map +1 -1
  10. package/dist/transformers.min.mjs +1 -1
  11. package/dist/transformers.min.mjs.map +1 -1
  12. package/dist/transformers.mjs +850 -144
  13. package/dist/transformers.mjs.map +1 -1
  14. package/package.json +1 -1
  15. package/src/base/image_processors_utils.js +3 -1
  16. package/src/configs.js +10 -2
  17. package/src/env.js +1 -1
  18. package/src/models/feature_extractors.js +1 -0
  19. package/src/models/idefics3/image_processing_idefics3.js +24 -13
  20. package/src/models/image_processors.js +1 -0
  21. package/src/models/moonshine/feature_extraction_moonshine.js +26 -0
  22. package/src/models/moonshine/processing_moonshine.js +20 -0
  23. package/src/models/phi3_v/image_processing_phi3_v.js +163 -0
  24. package/src/models/phi3_v/processing_phi3_v.js +53 -0
  25. package/src/models/processors.js +2 -0
  26. package/src/models/pyannote/feature_extraction_pyannote.js +56 -0
  27. package/src/models/pyannote/processing_pyannote.js +7 -54
  28. package/src/models.js +223 -30
  29. package/src/ops/registry.js +11 -0
  30. package/src/pipelines.js +31 -1
  31. package/src/utils/tensor.js +51 -1
  32. package/types/base/image_processors_utils.d.ts +2 -2
  33. package/types/base/image_processors_utils.d.ts.map +1 -1
  34. package/types/configs.d.ts.map +1 -1
  35. package/types/models/auto/image_processing_auto.d.ts.map +1 -1
  36. package/types/models/feature_extractors.d.ts +1 -0
  37. package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -1
  38. package/types/models/image_processors.d.ts +1 -0
  39. package/types/models/moonshine/feature_extraction_moonshine.d.ts +13 -0
  40. package/types/models/moonshine/feature_extraction_moonshine.d.ts.map +1 -0
  41. package/types/models/moonshine/processing_moonshine.d.ts +17 -0
  42. package/types/models/moonshine/processing_moonshine.d.ts.map +1 -0
  43. package/types/models/phi3_v/image_processing_phi3_v.d.ts +17 -0
  44. package/types/models/phi3_v/image_processing_phi3_v.d.ts.map +1 -0
  45. package/types/models/phi3_v/processing_phi3_v.d.ts +17 -0
  46. package/types/models/phi3_v/processing_phi3_v.d.ts.map +1 -0
  47. package/types/models/processors.d.ts +2 -0
  48. package/types/models/pyannote/feature_extraction_pyannote.d.ts +18 -0
  49. package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
  50. package/types/models/pyannote/processing_pyannote.d.ts +4 -15
  51. package/types/models/pyannote/processing_pyannote.d.ts.map +1 -1
  52. package/types/models.d.ts +64 -1
  53. package/types/models.d.ts.map +1 -1
  54. package/types/ops/registry.d.ts +1 -0
  55. package/types/ops/registry.d.ts.map +1 -1
  56. package/types/pipelines.d.ts +5 -0
  57. package/types/pipelines.d.ts.map +1 -1
  58. package/types/utils/tensor.d.ts +16 -0
  59. package/types/utils/tensor.d.ts.map +1 -1
package/src/models.js CHANGED
@@ -131,6 +131,7 @@ const MODEL_TYPES = {
131
131
  ImageTextToText: 6,
132
132
  Musicgen: 7,
133
133
  MultiModality: 8,
134
+ Phi3V: 9,
134
135
  }
135
136
  //////////////////////////////////////////////////
136
137
 
@@ -906,6 +907,10 @@ export class PreTrainedModel extends Callable {
906
907
  this._forward = imageTextToTextForward;
907
908
  this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
908
909
  break;
910
+ case MODEL_TYPES.Phi3V:
911
+ this.can_generate = true;
912
+ this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
913
+ break;
909
914
 
910
915
  case MODEL_TYPES.MultiModality:
911
916
  this.can_generate = true;
@@ -1070,6 +1075,18 @@ export class PreTrainedModel extends Callable {
1070
1075
  }, options),
1071
1076
  ]);
1072
1077
 
1078
+ } else if (modelType === MODEL_TYPES.Phi3V) {
1079
+ info = await Promise.all([
1080
+ constructSessions(pretrained_model_name_or_path, {
1081
+ prepare_inputs_embeds: 'prepare_inputs_embeds',
1082
+ model: 'model',
1083
+ vision_encoder: 'vision_encoder',
1084
+ }, options),
1085
+ getOptionalConfigs(pretrained_model_name_or_path, {
1086
+ generation_config: 'generation_config.json',
1087
+ }, options),
1088
+ ]);
1089
+
1073
1090
  } else { // should be MODEL_TYPES.EncoderOnly
1074
1091
  if (modelType !== MODEL_TYPES.EncoderOnly) {
1075
1092
  const type = modelName ?? config?.model_type;
@@ -1934,6 +1951,49 @@ export class BertForQuestionAnswering extends BertPreTrainedModel {
1934
1951
  }
1935
1952
  //////////////////////////////////////////////////
1936
1953
 
1954
+ //////////////////////////////////////////////////
1955
+ // ModernBert models
1956
+ export class ModernBertPreTrainedModel extends PreTrainedModel { }
1957
+ export class ModernBertModel extends ModernBertPreTrainedModel { }
1958
+
1959
+ export class ModernBertForMaskedLM extends ModernBertPreTrainedModel {
1960
+ /**
1961
+ * Calls the model on new inputs.
1962
+ *
1963
+ * @param {Object} model_inputs The inputs to the model.
1964
+ * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
1965
+ */
1966
+ async _call(model_inputs) {
1967
+ return new MaskedLMOutput(await super._call(model_inputs));
1968
+ }
1969
+ }
1970
+
1971
+ export class ModernBertForSequenceClassification extends ModernBertPreTrainedModel {
1972
+ /**
1973
+ * Calls the model on new inputs.
1974
+ *
1975
+ * @param {Object} model_inputs The inputs to the model.
1976
+ * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
1977
+ */
1978
+ async _call(model_inputs) {
1979
+ return new SequenceClassifierOutput(await super._call(model_inputs));
1980
+ }
1981
+ }
1982
+
1983
+ export class ModernBertForTokenClassification extends ModernBertPreTrainedModel {
1984
+ /**
1985
+ * Calls the model on new inputs.
1986
+ *
1987
+ * @param {Object} model_inputs The inputs to the model.
1988
+ * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
1989
+ */
1990
+ async _call(model_inputs) {
1991
+ return new TokenClassifierOutput(await super._call(model_inputs));
1992
+ }
1993
+ }
1994
+ //////////////////////////////////////////////////
1995
+
1996
+
1937
1997
  //////////////////////////////////////////////////
1938
1998
  // NomicBert models
1939
1999
  export class NomicBertPreTrainedModel extends PreTrainedModel { }
@@ -3342,6 +3402,29 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
3342
3402
  }
3343
3403
  //////////////////////////////////////////////////
3344
3404
 
3405
+
3406
+ //////////////////////////////////////////////////
3407
+ // Moonshine models
3408
+ export class MoonshinePreTrainedModel extends PreTrainedModel {
3409
+
3410
+ requires_attention_mask = false;
3411
+ main_input_name = 'input_values';
3412
+ forward_params = [
3413
+ 'input_values',
3414
+ 'decoder_input_ids',
3415
+ 'past_key_values',
3416
+ ];
3417
+ };
3418
+
3419
+ /**
3420
+ * MoonshineModel class for training Moonshine models without a language model head.
3421
+ */
3422
+ export class MoonshineModel extends MoonshinePreTrainedModel { }
3423
+
3424
+ export class MoonshineForConditionalGeneration extends MoonshinePreTrainedModel { }
3425
+ //////////////////////////////////////////////////
3426
+
3427
+
3345
3428
  //////////////////////////////////////////////////
3346
3429
  /**
3347
3430
  * Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
@@ -3612,6 +3695,77 @@ export class Idefics3ForConditionalGeneration extends Idefics3PreTrainedModel {
3612
3695
  }
3613
3696
  //////////////////////////////////////////////////
3614
3697
 
3698
+ export class Phi3VPreTrainedModel extends PreTrainedModel {
3699
+ forward_params = [
3700
+ 'input_ids',
3701
+ 'inputs_embeds',
3702
+ 'attention_mask',
3703
+ 'position_ids',
3704
+ 'pixel_values',
3705
+ 'image_sizes',
3706
+ 'past_key_values',
3707
+ ];
3708
+ }
3709
+ export class Phi3VForCausalLM extends Phi3VPreTrainedModel {
3710
+
3711
+ async forward({
3712
+ // Produced by the tokenizer/processor:
3713
+ input_ids = null,
3714
+ attention_mask = null,
3715
+ pixel_values = null,
3716
+ image_sizes = null,
3717
+
3718
+ // Used during generation:
3719
+ position_ids = null,
3720
+ inputs_embeds = null,
3721
+ past_key_values = null,
3722
+
3723
+ // Generic generation parameters
3724
+ generation_config = null,
3725
+ logits_processor = null,
3726
+
3727
+ // TODO: needed?
3728
+ ...kwargs
3729
+ }) {
3730
+ if (!inputs_embeds) {
3731
+ let image_features;
3732
+ if (pixel_values && input_ids.dims[1] !== 1) {
3733
+ if (!image_sizes) {
3734
+ throw new Error('`image_sizes` must be provided when `pixel_values` is provided.');
3735
+ }
3736
+
3737
+ // Encode the image
3738
+ ({ image_features } = await sessionRun(this.sessions['vision_encoder'], {
3739
+ pixel_values,
3740
+ image_sizes,
3741
+ }));
3742
+ } else {
3743
+ const hidden_size = this.config.normalized_config.hidden_size;
3744
+ image_features = new Tensor(
3745
+ 'float32',
3746
+ [],
3747
+ [0, hidden_size],
3748
+ );
3749
+ }
3750
+
3751
+ ({ inputs_embeds } = await sessionRun(this.sessions['prepare_inputs_embeds'], {
3752
+ input_ids,
3753
+ image_features,
3754
+ }));
3755
+ }
3756
+
3757
+ const outputs = await decoderForward(this, {
3758
+ inputs_embeds,
3759
+ past_key_values,
3760
+ attention_mask,
3761
+ position_ids,
3762
+ generation_config,
3763
+ logits_processor,
3764
+ }, false);
3765
+ return outputs;
3766
+ }
3767
+ }
3768
+
3615
3769
  //////////////////////////////////////////////////
3616
3770
  export class CLIPPreTrainedModel extends PreTrainedModel { }
3617
3771
 
@@ -3666,9 +3820,11 @@ export class CLIPModel extends CLIPPreTrainedModel { }
3666
3820
  export class CLIPTextModel extends CLIPPreTrainedModel {
3667
3821
  /** @type {typeof PreTrainedModel.from_pretrained} */
3668
3822
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
3669
- // Update default model file name if not provided
3670
- options.model_file_name ??= 'text_model';
3671
- return super.from_pretrained(pretrained_model_name_or_path, options);
3823
+ return super.from_pretrained(pretrained_model_name_or_path, {
3824
+ // Update default model file name if not provided
3825
+ model_file_name: 'text_model',
3826
+ ...options,
3827
+ });
3672
3828
  }
3673
3829
  }
3674
3830
 
@@ -3701,9 +3857,11 @@ export class CLIPTextModel extends CLIPPreTrainedModel {
3701
3857
  export class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
3702
3858
  /** @type {typeof PreTrainedModel.from_pretrained} */
3703
3859
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
3704
- // Update default model file name if not provided
3705
- options.model_file_name ??= 'text_model';
3706
- return super.from_pretrained(pretrained_model_name_or_path, options);
3860
+ return super.from_pretrained(pretrained_model_name_or_path, {
3861
+ // Update default model file name if not provided
3862
+ model_file_name: 'text_model',
3863
+ ...options,
3864
+ });
3707
3865
  }
3708
3866
  }
3709
3867
 
@@ -3713,9 +3871,11 @@ export class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
3713
3871
  export class CLIPVisionModel extends CLIPPreTrainedModel {
3714
3872
  /** @type {typeof PreTrainedModel.from_pretrained} */
3715
3873
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
3716
- // Update default model file name if not provided
3717
- options.model_file_name ??= 'vision_model';
3718
- return super.from_pretrained(pretrained_model_name_or_path, options);
3874
+ return super.from_pretrained(pretrained_model_name_or_path, {
3875
+ // Update default model file name if not provided
3876
+ model_file_name: 'vision_model',
3877
+ ...options,
3878
+ });
3719
3879
  }
3720
3880
  }
3721
3881
 
@@ -3748,9 +3908,11 @@ export class CLIPVisionModel extends CLIPPreTrainedModel {
3748
3908
  export class CLIPVisionModelWithProjection extends CLIPPreTrainedModel {
3749
3909
  /** @type {typeof PreTrainedModel.from_pretrained} */
3750
3910
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
3751
- // Update default model file name if not provided
3752
- options.model_file_name ??= 'vision_model';
3753
- return super.from_pretrained(pretrained_model_name_or_path, options);
3911
+ return super.from_pretrained(pretrained_model_name_or_path, {
3912
+ // Update default model file name if not provided
3913
+ model_file_name: 'vision_model',
3914
+ ...options,
3915
+ });
3754
3916
  }
3755
3917
  }
3756
3918
  //////////////////////////////////////////////////
@@ -3834,9 +3996,11 @@ export class SiglipModel extends SiglipPreTrainedModel { }
3834
3996
  export class SiglipTextModel extends SiglipPreTrainedModel {
3835
3997
  /** @type {typeof PreTrainedModel.from_pretrained} */
3836
3998
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
3837
- // Update default model file name if not provided
3838
- options.model_file_name ??= 'text_model';
3839
- return super.from_pretrained(pretrained_model_name_or_path, options);
3999
+ return super.from_pretrained(pretrained_model_name_or_path, {
4000
+ // Update default model file name if not provided
4001
+ model_file_name: 'text_model',
4002
+ ...options,
4003
+ });
3840
4004
  }
3841
4005
  }
3842
4006
 
@@ -3869,9 +4033,11 @@ export class SiglipTextModel extends SiglipPreTrainedModel {
3869
4033
  export class SiglipVisionModel extends CLIPPreTrainedModel {
3870
4034
  /** @type {typeof PreTrainedModel.from_pretrained} */
3871
4035
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
3872
- // Update default model file name if not provided
3873
- options.model_file_name ??= 'vision_model';
3874
- return super.from_pretrained(pretrained_model_name_or_path, options);
4036
+ return super.from_pretrained(pretrained_model_name_or_path, {
4037
+ // Update default model file name if not provided
4038
+ model_file_name: 'vision_model',
4039
+ ...options,
4040
+ });
3875
4041
  }
3876
4042
  }
3877
4043
  //////////////////////////////////////////////////
@@ -3926,18 +4092,22 @@ export class JinaCLIPModel extends JinaCLIPPreTrainedModel {
3926
4092
  export class JinaCLIPTextModel extends JinaCLIPPreTrainedModel {
3927
4093
  /** @type {typeof PreTrainedModel.from_pretrained} */
3928
4094
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
3929
- // Update default model file name if not provided
3930
- options.model_file_name ??= 'text_model';
3931
- return super.from_pretrained(pretrained_model_name_or_path, options);
4095
+ return super.from_pretrained(pretrained_model_name_or_path, {
4096
+ // Update default model file name if not provided
4097
+ model_file_name: 'text_model',
4098
+ ...options,
4099
+ });
3932
4100
  }
3933
4101
  }
3934
4102
 
3935
4103
  export class JinaCLIPVisionModel extends JinaCLIPPreTrainedModel {
3936
4104
  /** @type {typeof PreTrainedModel.from_pretrained} */
3937
4105
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
3938
- // Update default model file name if not provided
3939
- options.model_file_name ??= 'vision_model';
3940
- return super.from_pretrained(pretrained_model_name_or_path, options);
4106
+ return super.from_pretrained(pretrained_model_name_or_path, {
4107
+ // Update default model file name if not provided
4108
+ model_file_name: 'vision_model',
4109
+ ...options,
4110
+ });
3941
4111
  }
3942
4112
  }
3943
4113
  //////////////////////////////////////////////////
@@ -4097,6 +4267,14 @@ export class LlamaForCausalLM extends LlamaPreTrainedModel { }
4097
4267
  //////////////////////////////////////////////////
4098
4268
 
4099
4269
 
4270
+ //////////////////////////////////////////////////
4271
+ // EXAONE models
4272
+ export class ExaonePreTrainedModel extends PreTrainedModel { }
4273
+ export class ExaoneModel extends ExaonePreTrainedModel { }
4274
+ export class ExaoneForCausalLM extends ExaonePreTrainedModel { }
4275
+ //////////////////////////////////////////////////
4276
+
4277
+
4100
4278
  //////////////////////////////////////////////////
4101
4279
  // MobileLLM models
4102
4280
  export class MobileLLMPreTrainedModel extends PreTrainedModel { }
@@ -6159,9 +6337,11 @@ export class ClapModel extends ClapPreTrainedModel { }
6159
6337
  export class ClapTextModelWithProjection extends ClapPreTrainedModel {
6160
6338
  /** @type {typeof PreTrainedModel.from_pretrained} */
6161
6339
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
6162
- // Update default model file name if not provided
6163
- options.model_file_name ??= 'text_model';
6164
- return super.from_pretrained(pretrained_model_name_or_path, options);
6340
+ return super.from_pretrained(pretrained_model_name_or_path, {
6341
+ // Update default model file name if not provided
6342
+ model_file_name: 'text_model',
6343
+ ...options,
6344
+ });
6165
6345
  }
6166
6346
  }
6167
6347
 
@@ -6194,9 +6374,11 @@ export class ClapTextModelWithProjection extends ClapPreTrainedModel {
6194
6374
  export class ClapAudioModelWithProjection extends ClapPreTrainedModel {
6195
6375
  /** @type {typeof PreTrainedModel.from_pretrained} */
6196
6376
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
6197
- // Update default model file name if not provided
6198
- options.model_file_name ??= 'audio_model';
6199
- return super.from_pretrained(pretrained_model_name_or_path, options);
6377
+ return super.from_pretrained(pretrained_model_name_or_path, {
6378
+ // Update default model file name if not provided
6379
+ model_file_name: 'audio_model',
6380
+ ...options,
6381
+ });
6200
6382
  }
6201
6383
  }
6202
6384
  //////////////////////////////////////////////////
@@ -6782,6 +6964,7 @@ export class PretrainedMixin {
6782
6964
 
6783
6965
  const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
6784
6966
  ['bert', ['BertModel', BertModel]],
6967
+ ['modernbert', ['ModernBertModel', ModernBertModel]],
6785
6968
  ['nomic_bert', ['NomicBertModel', NomicBertModel]],
6786
6969
  ['roformer', ['RoFormerModel', RoFormerModel]],
6787
6970
  ['electra', ['ElectraModel', ElectraModel]],
@@ -6883,6 +7066,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
6883
7066
  ['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]],
6884
7067
  ['codegen', ['CodeGenModel', CodeGenModel]],
6885
7068
  ['llama', ['LlamaModel', LlamaModel]],
7069
+ ['exaone', ['ExaoneModel', ExaoneModel]],
6886
7070
  ['olmo', ['OlmoModel', OlmoModel]],
6887
7071
  ['olmo2', ['Olmo2Model', Olmo2Model]],
6888
7072
  ['mobilellm', ['MobileLLMModel', MobileLLMModel]],
@@ -6905,6 +7089,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
6905
7089
  const MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES = new Map([
6906
7090
  ['speecht5', ['SpeechT5ForSpeechToText', SpeechT5ForSpeechToText]],
6907
7091
  ['whisper', ['WhisperForConditionalGeneration', WhisperForConditionalGeneration]],
7092
+ ['moonshine', ['MoonshineForConditionalGeneration', MoonshineForConditionalGeneration]],
6908
7093
  ]);
6909
7094
 
6910
7095
  const MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES = new Map([
@@ -6918,6 +7103,7 @@ const MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES = new Map([
6918
7103
 
6919
7104
  const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([
6920
7105
  ['bert', ['BertForSequenceClassification', BertForSequenceClassification]],
7106
+ ['modernbert', ['ModernBertForSequenceClassification', ModernBertForSequenceClassification]],
6921
7107
  ['roformer', ['RoFormerForSequenceClassification', RoFormerForSequenceClassification]],
6922
7108
  ['electra', ['ElectraForSequenceClassification', ElectraForSequenceClassification]],
6923
7109
  ['esm', ['EsmForSequenceClassification', EsmForSequenceClassification]],
@@ -6939,6 +7125,7 @@ const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([
6939
7125
 
6940
7126
  const MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = new Map([
6941
7127
  ['bert', ['BertForTokenClassification', BertForTokenClassification]],
7128
+ ['modernbert', ['ModernBertForTokenClassification', ModernBertForTokenClassification]],
6942
7129
  ['roformer', ['RoFormerForTokenClassification', RoFormerForTokenClassification]],
6943
7130
  ['electra', ['ElectraForTokenClassification', ElectraForTokenClassification]],
6944
7131
  ['esm', ['EsmForTokenClassification', EsmForTokenClassification]],
@@ -6975,6 +7162,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
6975
7162
  ['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]],
6976
7163
  ['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]],
6977
7164
  ['llama', ['LlamaForCausalLM', LlamaForCausalLM]],
7165
+ ['exaone', ['ExaoneForCausalLM', ExaoneForCausalLM]],
6978
7166
  ['olmo', ['OlmoForCausalLM', OlmoForCausalLM]],
6979
7167
  ['olmo2', ['Olmo2ForCausalLM', Olmo2ForCausalLM]],
6980
7168
  ['mobilellm', ['MobileLLMForCausalLM', MobileLLMForCausalLM]],
@@ -6994,6 +7182,9 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
6994
7182
  ['falcon', ['FalconForCausalLM', FalconForCausalLM]],
6995
7183
  ['trocr', ['TrOCRForCausalLM', TrOCRForCausalLM]],
6996
7184
  ['stablelm', ['StableLmForCausalLM', StableLmForCausalLM]],
7185
+
7186
+ // Also image-text-to-text
7187
+ ['phi3_v', ['Phi3VForCausalLM', Phi3VForCausalLM]],
6997
7188
  ]);
6998
7189
 
6999
7190
  const MODEL_FOR_MULTIMODALITY_MAPPING_NAMES = new Map([
@@ -7003,6 +7194,7 @@ const MODEL_FOR_MULTIMODALITY_MAPPING_NAMES = new Map([
7003
7194
 
7004
7195
  const MODEL_FOR_MASKED_LM_MAPPING_NAMES = new Map([
7005
7196
  ['bert', ['BertForMaskedLM', BertForMaskedLM]],
7197
+ ['modernbert', ['ModernBertForMaskedLM', ModernBertForMaskedLM]],
7006
7198
  ['roformer', ['RoFormerForMaskedLM', RoFormerForMaskedLM]],
7007
7199
  ['electra', ['ElectraForMaskedLM', ElectraForMaskedLM]],
7008
7200
  ['esm', ['EsmForMaskedLM', EsmForMaskedLM]],
@@ -7231,6 +7423,7 @@ const CUSTOM_MAPPING = [
7231
7423
  // OVERRIDE:
7232
7424
  // TODO: Refactor to allow class to specify model
7233
7425
  ['MusicgenForConditionalGeneration', MusicgenForConditionalGeneration, MODEL_TYPES.Musicgen],
7426
+ ['Phi3VForCausalLM', Phi3VForCausalLM, MODEL_TYPES.Phi3V],
7234
7427
 
7235
7428
  ['CLIPTextModelWithProjection', CLIPTextModelWithProjection, MODEL_TYPES.EncoderOnly],
7236
7429
  ['SiglipTextModel', SiglipTextModel, MODEL_TYPES.EncoderOnly],
@@ -100,4 +100,15 @@ export class TensorOpRegistry {
100
100
  }
101
101
  return this._top_k;
102
102
  }
103
+
104
+ static get slice() {
105
+ if (!this._slice) {
106
+ this._slice = wrap(
107
+ [8, 7, 18, 0, 58, 96, 10, 25, 10, 1, 120, 10, 1, 115, 10, 1, 101, 10, 1, 97, 10, 1, 116, 18, 1, 121, 34, 5, 83, 108, 105, 99, 101, 18, 1, 114, 90, 9, 10, 1, 120, 18, 4, 10, 2, 8, 1, 90, 9, 10, 1, 115, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 101, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 97, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 116, 18, 4, 10, 2, 8, 7, 98, 9, 10, 1, 121, 18, 4, 10, 2, 8, 1, 66, 2, 16, 13],
108
+ this.session_options,
109
+ 'y',
110
+ )
111
+ }
112
+ return this._slice;
113
+ }
103
114
  }
package/src/pipelines.js CHANGED
@@ -688,7 +688,7 @@ export class FillMaskPipeline extends (/** @type {new (options: TextPipelineCons
688
688
  return {
689
689
  score: values[i],
690
690
  token: Number(x),
691
- token_str: this.tokenizer.model.vocab[x],
691
+ token_str: this.tokenizer.decode([x]),
692
692
  sequence: this.tokenizer.decode(sequence, { skip_special_tokens: true }),
693
693
  }
694
694
  }));
@@ -1729,6 +1729,8 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
1729
1729
  case 'unispeech-sat':
1730
1730
  case 'hubert':
1731
1731
  return this._call_wav2vec2(audio, kwargs)
1732
+ case 'moonshine':
1733
+ return this._call_moonshine(audio, kwargs)
1732
1734
  default:
1733
1735
  throw new Error(`AutomaticSpeechRecognitionPipeline does not support model type '${this.model.config.model_type}'.`)
1734
1736
  }
@@ -1882,6 +1884,34 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
1882
1884
  }
1883
1885
  return single ? toReturn[0] : toReturn;
1884
1886
  }
1887
+
1888
+ /**
1889
+ * @type {AutomaticSpeechRecognitionPipelineCallback}
1890
+ * @private
1891
+ */
1892
+ async _call_moonshine(audio, kwargs) {
1893
+ const single = !Array.isArray(audio);
1894
+ if (single) {
1895
+ audio = [/** @type {AudioInput} */ (audio)];
1896
+ }
1897
+ const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
1898
+ const preparedAudios = await prepareAudios(audio, sampling_rate);
1899
+ const toReturn = [];
1900
+ for (const aud of preparedAudios) {
1901
+ const inputs = await this.processor(aud);
1902
+
1903
+ // According to the [paper](https://arxiv.org/pdf/2410.15608):
1904
+ // "We use greedy decoding, with a heuristic limit of 6 output tokens
1905
+ // per second of audio to avoid repeated output sequences."
1906
+ const max_new_tokens = Math.floor(aud.length / sampling_rate) * 6;
1907
+ const outputs = await this.model.generate({ max_new_tokens, ...kwargs, ...inputs });
1908
+
1909
+ const text = this.processor.batch_decode(outputs, { skip_special_tokens: true })[0];
1910
+ toReturn.push({ text });
1911
+ }
1912
+ return single ? toReturn[0] : toReturn;
1913
+ }
1914
+
1885
1915
  }
1886
1916
 
1887
1917
  /**
@@ -772,8 +772,21 @@ export class Tensor {
772
772
  if (!DataTypeMap.hasOwnProperty(type)) {
773
773
  throw new Error(`Unsupported type: ${type}`);
774
774
  }
775
+
776
+ // Handle special cases where a mapping function is needed (e.g., where one type is a bigint and the other is a number)
777
+ let map_fn;
778
+ const is_source_bigint = ['int64', 'uint64'].includes(this.type);
779
+ const is_dest_bigint = ['int64', 'uint64'].includes(type);
780
+ if (is_source_bigint && !is_dest_bigint) {
781
+ // TypeError: Cannot convert a BigInt value to a number
782
+ map_fn = Number;
783
+ } else if (!is_source_bigint && is_dest_bigint) {
784
+ // TypeError: Cannot convert [x] to a BigInt
785
+ map_fn = BigInt;
786
+ }
787
+
775
788
  // @ts-ignore
776
- return new Tensor(type, DataTypeMap[type].from(this.data), this.dims);
789
+ return new Tensor(type, DataTypeMap[type].from(this.data, map_fn), this.dims);
777
790
  }
778
791
  }
779
792
 
@@ -971,6 +984,29 @@ export async function topk(x, k) {
971
984
  });
972
985
  }
973
986
 
987
+
988
+ const arrayToIndexTensor = (array) => new Tensor('int64', array, [array.length]);
989
+ /**
990
+ * Slice a multidimensional float32 tensor.
991
+ * @param {Tensor} data: Tensor of data to extract slices from
992
+ * @param {number[]} starts: 1-D array of starting indices of corresponding axis in axes
993
+ * @param {number[]} ends: 1-D array of ending indices (exclusive) of corresponding axis in axes
994
+ * @param {number[]} axes: 1-D array of axes that starts and ends apply to
995
+ * @param {number[]} [steps]: 1-D array of slice step of corresponding axis in axes.
996
+ * @returns {Promise<Tensor>} Sliced data tensor.
997
+ */
998
+ export async function slice(data, starts, ends, axes, steps) {
999
+ const op = await TensorOpRegistry.slice;
1000
+ return await op({
1001
+ x: data,
1002
+ s: arrayToIndexTensor(starts),
1003
+ e: arrayToIndexTensor(ends),
1004
+ a: arrayToIndexTensor(axes),
1005
+ t: arrayToIndexTensor(steps ?? new Array(axes.length).fill(1)),
1006
+ });
1007
+ }
1008
+
1009
+
974
1010
  /**
975
1011
  * Perform mean pooling of the last hidden state followed by a normalization step.
976
1012
  * @param {Tensor} last_hidden_state Tensor of shape [batchSize, seqLength, embedDim]
@@ -1417,6 +1453,20 @@ export function zeros_like(tensor) {
1417
1453
  return zeros(tensor.dims);
1418
1454
  }
1419
1455
 
1456
+ /**
1457
+ * Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)
1458
+ * @param {number[]} size A sequence of integers defining the shape of the output tensor.
1459
+ * @returns {Tensor} The random tensor.
1460
+ */
1461
+ export function rand(size) {
1462
+ const length = size.reduce((a, b) => a * b, 1);
1463
+ return new Tensor(
1464
+ "float32",
1465
+ Float32Array.from({ length }, () => Math.random()),
1466
+ size,
1467
+ )
1468
+ }
1469
+
1420
1470
  /**
1421
1471
  * Quantizes the embeddings tensor to binary or unsigned binary precision.
1422
1472
  * @param {Tensor} tensor The tensor to quantize.
@@ -149,7 +149,7 @@ export class ImageProcessor extends ImageProcessor_base {
149
149
  * Pad the image by a certain amount.
150
150
  * @param {Float32Array} pixelData The pixel data to pad.
151
151
  * @param {number[]} imgDims The dimensions of the image (height, width, channels).
152
- * @param {{width:number; height:number}|number} padSize The dimensions of the padded image.
152
+ * @param {{width:number; height:number}|number|'square'} padSize The dimensions of the padded image.
153
153
  * @param {Object} options The options for padding.
154
154
  * @param {'constant'|'symmetric'} [options.mode='constant'] The type of padding to add.
155
155
  * @param {boolean} [options.center=false] Whether to center the image.
@@ -159,7 +159,7 @@ export class ImageProcessor extends ImageProcessor_base {
159
159
  pad_image(pixelData: Float32Array, imgDims: number[], padSize: {
160
160
  width: number;
161
161
  height: number;
162
- } | number, { mode, center, constant_values, }?: {
162
+ } | number | "square", { mode, center, constant_values, }?: {
163
163
  mode?: "constant" | "symmetric";
164
164
  center?: boolean;
165
165
  constant_values?: number | number[];
@@ -1 +1 @@
1
- {"version":3,"file":"image_processors_utils.d.ts","sourceRoot":"","sources":["../../src/base/image_processors_utils.js"],"names":[],"mappings":"AA+EA;;;;;;;;;GASG;AACH,uDAPG;IAAwB,MAAM,EAAtB,MAAM;IACU,UAAU,EAA1B,MAAM;CACd,cAAQ,MAAM,iBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,iBAClB,OAAO,GACN,KAAQ,CAwEnB;AAGD;;;;;;GAMG;AACH,4DALW,GAAC,iBACD,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAEhB;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAC,EAAE,CAwDtD;AAkPD;;;;;;;;;GASG;AACH,4DARW,GAAC,cACD,MAAM,mBACN,MAAM,gCACN,MAAM,sBACN,GAAG,CAAC,MAAM,CAAC,iBACX,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAChB,KAAK,CAAC;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,KAAK,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAC,CAAC,CAAA;CAAC,CAAC,CAuE/G;AAGD;;;;;;;GAOG;AACH,4DANW,GAAC,cACD,MAAM,iBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAEhB,KAAK,CAAC;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,KAAK,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAC,CAAC,CAAA;CAAC,CAAC,CAI/G;;KA3iBsC,GAAG;UAAyB,GACnE;;AA6iBA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH;IAmeI;;;;;;;;;;;;;;OAcG;IACH,sDATW,MAAM,WAKN,OAAO,iBAAiB,EAAE,iBAAiB,GAEzC,OAAO,CAAC,cAAc,CAAC,CAKnC;IAnfD;;;OAGG;IACH,oBAFW,oBAAoB,EAmC9B;IA9BG,qBAAkD;IAClD,oBAA+C;IAE/C,iBAAoC;IACpC,oBAA2C;IAC3C,uBAAwD;IACxD,sBAAuC;IAEvC,sBAAuC;IACvC,UAA4C;IAC5C,mBAA8D;IAC9D,uBAAwE;IAExE,wBAA2C;IAC3C,eAAiC;IACjC,oBAAmD;IACnD,oBAA2C;IAE3C,cAA+B;IAC/B,YAA2B;IAQ3B,+BAAkE;IAElE,6BAAoB;IAGxB;;;;;;;OAOG;IACH,iBALW,QAAQ,QACR;QAAC,MAAM,EAAC,MAAM,CAAC;QAAC,KAAK,EAAC,MAAM,CAAA;KAAC,aAC7B,MAAM,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAC5B,OAAO,CAAC,QAAQ,CAAC,CAsB7B;IAGD;;;;;OAKG;IACH,mBAJW,QAAQ,mBACR,MAAM,GACJ,OAAO,CAAC,QAAQ,CAAC,CAiC7B;IAED;;;;;;;;;;OAUG;IACH,qBATW,YAAY,WACZ,MAAM,EAAE,WACR;QAAC,KAAK,EAAC,MAAM,CAAC;QAAC,MAAM,EAAC,MAAM,CAAA;KAAC,GAAC,MAAM,uCAE5C;QAAyC,IAAI,GAArC,UAAU,GAAC,WAAW;QACJ,MAAM,GAAxB,OAAO;QACmB,eAAe,GAAzC,MAAM,GAAC,MAAM,EAAE;KACvB,GAAU,CAAC,YAAY,EAAE,MAAM,EAAE,CAAC,CA6EpC;IAED;;;;OAIG;IACH,mBAHW,YAAY,GACV,IAAI,CAMhB;IAED;;;;;;OAMG;IACH,oCAJW,QAAQ,QACR,GAAG,GACD,CAAC,MAAM,EAAE,MAAM,CAAC,CA4F5B;IAED;;;;OAIG;IACH,cAHW,QAAQ,GACN,OAAO,CAAC,QAAQ,CAAC,CAO7B;IAED;;;;;OAKG;IAEH;;;;;;OAMG;IACH,kBAJW,QAAQ,iGAEN,OAAO;;;;uBAVN,WAAW;;;;6BACX,WAAW;;;;sBACX,MAAM;MAQmB,CAsHtC;IAED;;;;;;;OAOG;IACH,cAJW,QAAQ,EAAE,WACP,GAAG,EAAA,GACJ,OAAO,CAAC,oBAAoB,CAAC,CAqBzC;CAsBJ;;;;;0BArjCY,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC;;;;;kBAM9B,MAAM;;;;oBACN,WAAW,EAAE;;;;0BACb,WAAW,EAAE;;;;;;;;;;;;;iBAgiBb,MAAM,EAAE;;;;gBACR,MAAM,EAAE;;;;iBACR,OAAO;;;;qBACP,MAAM;;;;mBACN,OAAO;;;;gBACP,OAAO;;;;eACP,MAAM;;;;WACN,MAAM,MAAO;;;;iBACb,MAAM,MAAO;;;;;4BACb,OAAO;;;;;qBAEP,OAAO;;;;mBAEP,OAAO;;;;;wBACP,OAAO;;;;;yBAEP,MAAM;;;;WAGN,MAAM,EAAE;;;;UACR,MAAM,EAAE;;uBAtkBqB,oBAAoB;yBAEtC,mBAAmB"}
1
+ {"version":3,"file":"image_processors_utils.d.ts","sourceRoot":"","sources":["../../src/base/image_processors_utils.js"],"names":[],"mappings":"AA+EA;;;;;;;;;GASG;AACH,uDAPG;IAAwB,MAAM,EAAtB,MAAM;IACU,UAAU,EAA1B,MAAM;CACd,cAAQ,MAAM,iBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,iBAClB,OAAO,GACN,KAAQ,CAwEnB;AAGD;;;;;;GAMG;AACH,4DALW,GAAC,iBACD,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAEhB;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAC,EAAE,CAwDtD;AAkPD;;;;;;;;;GASG;AACH,4DARW,GAAC,cACD,MAAM,mBACN,MAAM,gCACN,MAAM,sBACN,GAAG,CAAC,MAAM,CAAC,iBACX,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAChB,KAAK,CAAC;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,KAAK,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAC,CAAC,CAAA;CAAC,CAAC,CAuE/G;AAGD;;;;;;;GAOG;AACH,4DANW,GAAC,cACD,MAAM,iBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAEhB,KAAK,CAAC;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,KAAK,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAC,CAAC,CAAA;CAAC,CAAC,CAI/G;;KA3iBsC,GAAG;UAAyB,GACnE;;AA6iBA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH;IAqeI;;;;;;;;;;;;;;OAcG;IACH,sDATW,MAAM,WAKN,OAAO,iBAAiB,EAAE,iBAAiB,GAEzC,OAAO,CAAC,cAAc,CAAC,CAKnC;IArfD;;;OAGG;IACH,oBAFW,oBAAoB,EAmC9B;IA9BG,qBAAkD;IAClD,oBAA+C;IAE/C,iBAAoC;IACpC,oBAA2C;IAC3C,uBAAwD;IACxD,sBAAuC;IAEvC,sBAAuC;IACvC,UAA4C;IAC5C,mBAA8D;IAC9D,uBAAwE;IAExE,wBAA2C;IAC3C,eAAiC;IACjC,oBAAmD;IACnD,oBAA2C;IAE3C,cAA+B;IAC/B,YAA2B;IAQ3B,+BAAkE;IAElE,6BAAoB;IAGxB;;;;;;;OAOG;IACH,iBALW,QAAQ,QACR;QAAC,MAAM,EAAC,MAAM,CAAC;QAAC,KAAK,EAAC,MAAM,CAAA;KAAC,aAC7B,MAAM,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAC5B,OAAO,CAAC,QAAQ,CAAC,CAsB7B;IAGD;;;;;OAKG;IACH,mBAJW,QAAQ,mBACR,MAAM,GACJ,OAAO,CAAC,QAAQ,CAAC,CAiC7B;IAED;;;;;;;;;;OAUG;IACH,qBATW,YAAY,WACZ,MAAM,EAAE,WACR;QAAC,KAAK,EAAC,MAAM,CAAC;QAAC,MAAM,EAAC,MAAM,CAAA;KAAC,GAAC,MAAM,GAAC,QAAQ,uCAErD;QAAyC,IAAI,GAArC,UAAU,GAAC,WAAW;QACJ,MAAM,GAAxB,OAAO;QACmB,eAAe,GAAzC,MAAM,GAAC,MAAM,EAAE;KACvB,GAAU,CAAC,YAAY,EAAE,MAAM,EAAE,CAAC,CA+EpC;IAED;;;;OAIG;IACH,mBAHW,YAAY,GACV,IAAI,CAMhB;IAED;;;;;;OAMG;IACH,oCAJW,QAAQ,QACR,GAAG,GACD,CAAC,MAAM,EAAE,MAAM,CAAC,CA4F5B;IAED;;;;OAIG;IACH,cAHW,QAAQ,GACN,OAAO,CAAC,QAAQ,CAAC,CAO7B;IAED;;;;;OAKG;IAEH;;;;;;OAMG;IACH,kBAJW,QAAQ,iGAEN,OAAO;;;;uBAVN,WAAW;;;;6BACX,WAAW;;;;sBACX,MAAM;MAQmB,CAsHtC;IAED;;;;;;;OAOG;IACH,cAJW,QAAQ,EAAE,WACP,GAAG,EAAA,GACJ,OAAO,CAAC,oBAAoB,CAAC,CAqBzC;CAsBJ;;;;;0BAvjCY,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC;;;;;kBAM9B,MAAM;;;;oBACN,WAAW,EAAE;;;;0BACb,WAAW,EAAE;;;;;;;;;;;;;iBAgiBb,MAAM,EAAE;;;;gBACR,MAAM,EAAE;;;;iBACR,OAAO;;;;qBACP,MAAM;;;;mBACN,OAAO;;;;gBACP,OAAO;;;;eACP,MAAM;;;;WACN,MAAM,MAAO;;;;iBACb,MAAM,MAAO;;;;;4BACb,OAAO;;;;;qBAEP,OAAO;;;;mBAEP,OAAO;;;;;wBACP,OAAO;;;;;yBAEP,MAAM;;;;WAGN,MAAM,EAAE;;;;UACR,MAAM,EAAE;;uBAtkBqB,oBAAoB;yBAEtC,mBAAmB"}
@@ -1 +1 @@
1
- {"version":3,"file":"configs.d.ts","sourceRoot":"","sources":["../src/configs.js"],"names":[],"mappings":"AAmOA;;;;GAIG;AACH,0CAHW,gBAAgB;;;IACd,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CA2EpC;AACD;;;GAGG;AACH;IAwBI;;;;;;;;OAQG;IACH,sDANW,MAAM,0EACN,iBAAiB,GAGf,OAAO,CAAC,gBAAgB,CAAC,CAqBrC;IArCD;;;OAGG;IACH,6BAGC;IAnBD,0BAA0B;IAC1B,YADW,MAAM,GAAC,IAAI,CACJ;IAElB,sBAAsB;IACtB,oBADW,OAAO,CACS;IAE3B,qBAAqB;IACrB,yBADW,MAAM,CACO;IAExB,mCAAmC;IACnC,0BADW,oBAAoB,CACN;IAQrB,uBAAkD;CAgCzD;AAED;;;;;GAKG;AACH;IArCI;;;;;;;;OAQG;IACH,sDANW,MAAM,0EACN,iBAAiB,GAGf,OAAO,CAAC,gBAAgB,CAAC,CAqBrC;CAcJ;gCArVY,OAAO,gBAAgB,EAAE,iBAAiB;+BAI1C,OAAO,iBAAiB,EAAE,gBAAgB;2BAI1C,OAAO,iBAAiB,EAAE,YAAY;;;;;;;;qBAkVrC,OAAO,mBAAmB,EAAE,QAAQ,GAAC,MAAM,CAAC,OAAO,mBAAmB,EAAE,QAAQ,EAAE,OAAO,mBAAmB,EAAE,QAAQ,CAAC;;;;;;+BACvH,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;;;;aAGtB,OAAO,oBAAoB,EAAE,UAAU;;;;YACvC,OAAO,mBAAmB,EAAE,QAAQ,GAAC,MAAM,CAAC,MAAM,EAAE,OAAO,mBAAmB,EAAE,QAAQ,CAAC;;;;+BACzF,OAAO,GAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC"}
1
+ {"version":3,"file":"configs.d.ts","sourceRoot":"","sources":["../src/configs.js"],"names":[],"mappings":"AA2OA;;;;GAIG;AACH,0CAHW,gBAAgB;;;IACd,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CA2EpC;AACD;;;GAGG;AACH;IAwBI;;;;;;;;OAQG;IACH,sDANW,MAAM,0EACN,iBAAiB,GAGf,OAAO,CAAC,gBAAgB,CAAC,CAqBrC;IArCD;;;OAGG;IACH,6BAGC;IAnBD,0BAA0B;IAC1B,YADW,MAAM,GAAC,IAAI,CACJ;IAElB,sBAAsB;IACtB,oBADW,OAAO,CACS;IAE3B,qBAAqB;IACrB,yBADW,MAAM,CACO;IAExB,mCAAmC;IACnC,0BADW,oBAAoB,CACN;IAQrB,uBAAkD;CAgCzD;AAED;;;;;GAKG;AACH;IArCI;;;;;;;;OAQG;IACH,sDANW,MAAM,0EACN,iBAAiB,GAGf,OAAO,CAAC,gBAAgB,CAAC,CAqBrC;CAcJ;gCA7VY,OAAO,gBAAgB,EAAE,iBAAiB;+BAI1C,OAAO,iBAAiB,EAAE,gBAAgB;2BAI1C,OAAO,iBAAiB,EAAE,YAAY;;;;;;;;qBA0VrC,OAAO,mBAAmB,EAAE,QAAQ,GAAC,MAAM,CAAC,OAAO,mBAAmB,EAAE,QAAQ,EAAE,OAAO,mBAAmB,EAAE,QAAQ,CAAC;;;;;;+BACvH,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;;;;aAGtB,OAAO,oBAAoB,EAAE,UAAU;;;;YACvC,OAAO,mBAAmB,EAAE,QAAQ,GAAC,MAAM,CAAC,MAAM,EAAE,OAAO,mBAAmB,EAAE,QAAQ,CAAC;;;;+BACzF,OAAO,GAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"image_processing_auto.d.ts","sourceRoot":"","sources":["../../../src/models/auto/image_processing_auto.js"],"names":[],"mappings":"AAMA;kFAuBk7xC,oBAAiB;CADl8xC;+BAzB8B,sCAAsC"}
1
+ {"version":3,"file":"image_processing_auto.d.ts","sourceRoot":"","sources":["../../../src/models/auto/image_processing_auto.js"],"names":[],"mappings":"AAMA;kFAuB4jyC,oBAAiB;CAD5kyC;+BAzB8B,sCAAsC"}
@@ -1,5 +1,6 @@
1
1
  export * from "./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js";
2
2
  export * from "./clap/feature_extraction_clap.js";
3
+ export * from "./moonshine/feature_extraction_moonshine.js";
3
4
  export * from "./pyannote/feature_extraction_pyannote.js";
4
5
  export * from "./seamless_m4t/feature_extraction_seamless_m4t.js";
5
6
  export * from "./speecht5/feature_extraction_speecht5.js";
@@ -1 +1 @@
1
- {"version":3,"file":"image_processing_idefics3.d.ts","sourceRoot":"","sources":["../../../src/models/idefics3/image_processing_idefics3.js"],"names":[],"mappings":"AAOA;IACI,yBAKC;IAFG,wBAA2D;IAC3D,oBAA2C;IAG/C;;;OAGG;IAEH;;;;;OAKG;IACH,6GAHW,MAAM;;;MAiBhB;IAED,uDAAuD;IACvD,cADY,0CAAS,yCAAU,GAAC,yCAAU,EAAE;;;;;;;;;;OA4H3C;IAED;;;;;;OAiDC;CACJ;+BAtNM,sCAAsC"}
1
+ {"version":3,"file":"image_processing_idefics3.d.ts","sourceRoot":"","sources":["../../../src/models/idefics3/image_processing_idefics3.js"],"names":[],"mappings":"AAOA;IACI,yBAKC;IAFG,wBAA2D;IAC3D,oBAA2C;IAG/C;;;OAGG;IAEH;;;;;OAKG;IACH,6GAHW,MAAM;;;MAiBhB;IAED,uDAAuD;IACvD,cADY,0CAAS,yCAAU,GAAC,yCAAU,EAAE;;;;;;;;;;OA4H3C;IAED;;;;;;OA4DC;CACJ;+BAjOM,sCAAsC"}
@@ -23,6 +23,7 @@ export * from "./mobilevit/image_processing_mobilevit.js";
23
23
  export * from "./nougat/image_processing_nougat.js";
24
24
  export * from "./owlv2/image_processing_owlv2.js";
25
25
  export * from "./owlvit/image_processing_owlvit.js";
26
+ export * from "./phi3_v/image_processing_phi3_v.js";
26
27
  export * from "./pvt/image_processing_pvt.js";
27
28
  export * from "./qwen2_vl/image_processing_qwen2_vl.js";
28
29
  export * from "./rt_detr/image_processing_rt_detr.js";