@huggingface/transformers 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +6 -3
  2. package/dist/transformers.cjs +777 -143
  3. package/dist/transformers.cjs.map +1 -1
  4. package/dist/transformers.js +787 -143
  5. package/dist/transformers.js.map +1 -1
  6. package/dist/transformers.min.cjs +1 -1
  7. package/dist/transformers.min.cjs.map +1 -1
  8. package/dist/transformers.min.js +1 -1
  9. package/dist/transformers.min.js.map +1 -1
  10. package/dist/transformers.min.mjs +1 -1
  11. package/dist/transformers.min.mjs.map +1 -1
  12. package/dist/transformers.mjs +787 -143
  13. package/dist/transformers.mjs.map +1 -1
  14. package/package.json +1 -1
  15. package/src/base/image_processors_utils.js +3 -1
  16. package/src/configs.js +10 -2
  17. package/src/env.js +1 -1
  18. package/src/models/feature_extractors.js +1 -0
  19. package/src/models/idefics3/image_processing_idefics3.js +24 -13
  20. package/src/models/image_processors.js +1 -0
  21. package/src/models/moonshine/feature_extraction_moonshine.js +26 -0
  22. package/src/models/moonshine/processing_moonshine.js +20 -0
  23. package/src/models/phi3_v/image_processing_phi3_v.js +163 -0
  24. package/src/models/phi3_v/processing_phi3_v.js +53 -0
  25. package/src/models/processors.js +2 -0
  26. package/src/models/pyannote/feature_extraction_pyannote.js +56 -0
  27. package/src/models/pyannote/processing_pyannote.js +7 -54
  28. package/src/models.js +176 -30
  29. package/src/ops/registry.js +11 -0
  30. package/src/pipelines.js +30 -0
  31. package/src/utils/tensor.js +51 -1
  32. package/types/base/image_processors_utils.d.ts +2 -2
  33. package/types/base/image_processors_utils.d.ts.map +1 -1
  34. package/types/configs.d.ts.map +1 -1
  35. package/types/models/auto/image_processing_auto.d.ts.map +1 -1
  36. package/types/models/feature_extractors.d.ts +1 -0
  37. package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -1
  38. package/types/models/image_processors.d.ts +1 -0
  39. package/types/models/moonshine/feature_extraction_moonshine.d.ts +13 -0
  40. package/types/models/moonshine/feature_extraction_moonshine.d.ts.map +1 -0
  41. package/types/models/moonshine/processing_moonshine.d.ts +17 -0
  42. package/types/models/moonshine/processing_moonshine.d.ts.map +1 -0
  43. package/types/models/phi3_v/image_processing_phi3_v.d.ts +17 -0
  44. package/types/models/phi3_v/image_processing_phi3_v.d.ts.map +1 -0
  45. package/types/models/phi3_v/processing_phi3_v.d.ts +17 -0
  46. package/types/models/phi3_v/processing_phi3_v.d.ts.map +1 -0
  47. package/types/models/processors.d.ts +2 -0
  48. package/types/models/pyannote/feature_extraction_pyannote.d.ts +18 -0
  49. package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
  50. package/types/models/pyannote/processing_pyannote.d.ts +4 -15
  51. package/types/models/pyannote/processing_pyannote.d.ts.map +1 -1
  52. package/types/models.d.ts +33 -1
  53. package/types/models.d.ts.map +1 -1
  54. package/types/ops/registry.d.ts +1 -0
  55. package/types/ops/registry.d.ts.map +1 -1
  56. package/types/pipelines.d.ts +5 -0
  57. package/types/pipelines.d.ts.map +1 -1
  58. package/types/utils/tensor.d.ts +16 -0
  59. package/types/utils/tensor.d.ts.map +1 -1
@@ -4920,7 +4920,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
4920
4920
  * Pad the image by a certain amount.
4921
4921
  * @param {Float32Array} pixelData The pixel data to pad.
4922
4922
  * @param {number[]} imgDims The dimensions of the image (height, width, channels).
4923
- * @param {{width:number; height:number}|number} padSize The dimensions of the padded image.
4923
+ * @param {{width:number; height:number}|number|'square'} padSize The dimensions of the padded image.
4924
4924
  * @param {Object} options The options for padding.
4925
4925
  * @param {'constant'|'symmetric'} [options.mode='constant'] The type of padding to add.
4926
4926
  * @param {boolean} [options.center=false] Whether to center the image.
@@ -4938,6 +4938,8 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
4938
4938
  if (typeof padSize === 'number') {
4939
4939
  paddedImageWidth = padSize;
4940
4940
  paddedImageHeight = padSize;
4941
+ } else if (padSize === 'square') {
4942
+ paddedImageWidth = paddedImageHeight = Math.max(imageHeight, imageWidth);
4941
4943
  } else {
4942
4944
  paddedImageWidth = padSize.width;
4943
4945
  paddedImageHeight = padSize.height;
@@ -5583,8 +5585,6 @@ function getNormalizedConfig(config) {
5583
5585
  case 'gpt_neox':
5584
5586
  case 'stablelm':
5585
5587
  case 'opt':
5586
- case 'phi':
5587
- case 'phi3':
5588
5588
  case 'falcon':
5589
5589
  mapping['num_heads'] = 'num_attention_heads';
5590
5590
  mapping['num_layers'] = 'num_hidden_layers';
@@ -5600,6 +5600,9 @@ function getNormalizedConfig(config) {
5600
5600
  case 'starcoder2':
5601
5601
  case 'qwen2':
5602
5602
  case 'qwen2_vl':
5603
+ case 'phi':
5604
+ case 'phi3':
5605
+ case 'phi3_v':
5603
5606
  mapping['num_heads'] = 'num_key_value_heads';
5604
5607
  mapping['num_layers'] = 'num_hidden_layers';
5605
5608
  mapping['hidden_size'] = 'hidden_size';
@@ -5632,6 +5635,12 @@ function getNormalizedConfig(config) {
5632
5635
  mapping['num_layers'] = 'n_layers';
5633
5636
  mapping['hidden_size'] = 'd_model';
5634
5637
  break;
5638
+ case 'exaone':
5639
+ mapping['num_heads'] = 'num_key_value_heads';
5640
+ mapping['num_layers'] = 'num_layers';
5641
+ mapping['dim_kv'] = 'head_dim';
5642
+ mapping['num_attention_heads'] = 'num_attention_heads';
5643
+ break;
5635
5644
 
5636
5645
  // Encoder-decoder models
5637
5646
  case 't5':
@@ -5673,6 +5682,7 @@ function getNormalizedConfig(config) {
5673
5682
  mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'd_model';
5674
5683
  break;
5675
5684
  case 'musicgen_decoder':
5685
+ case 'moonshine':
5676
5686
  mapping['num_encoder_layers'] = mapping['num_decoder_layers'] = 'num_hidden_layers';
5677
5687
  mapping['num_encoder_heads'] = mapping['num_decoder_heads'] = 'num_attention_heads';
5678
5688
  mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'hidden_size';
@@ -5922,7 +5932,7 @@ __webpack_require__.r(__webpack_exports__);
5922
5932
 
5923
5933
 
5924
5934
 
5925
- const VERSION = '3.1.2';
5935
+ const VERSION = '3.2.0';
5926
5936
 
5927
5937
  // Check if various APIs are available (depends on environment)
5928
5938
  const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
@@ -8020,6 +8030,9 @@ __webpack_require__.r(__webpack_exports__);
8020
8030
  /* harmony export */ EsmForTokenClassification: () => (/* binding */ EsmForTokenClassification),
8021
8031
  /* harmony export */ EsmModel: () => (/* binding */ EsmModel),
8022
8032
  /* harmony export */ EsmPreTrainedModel: () => (/* binding */ EsmPreTrainedModel),
8033
+ /* harmony export */ ExaoneForCausalLM: () => (/* binding */ ExaoneForCausalLM),
8034
+ /* harmony export */ ExaoneModel: () => (/* binding */ ExaoneModel),
8035
+ /* harmony export */ ExaonePreTrainedModel: () => (/* binding */ ExaonePreTrainedModel),
8023
8036
  /* harmony export */ FalconForCausalLM: () => (/* binding */ FalconForCausalLM),
8024
8037
  /* harmony export */ FalconModel: () => (/* binding */ FalconModel),
8025
8038
  /* harmony export */ FalconPreTrainedModel: () => (/* binding */ FalconPreTrainedModel),
@@ -8144,6 +8157,9 @@ __webpack_require__.r(__webpack_exports__);
8144
8157
  /* harmony export */ MobileViTV2PreTrainedModel: () => (/* binding */ MobileViTV2PreTrainedModel),
8145
8158
  /* harmony export */ ModelOutput: () => (/* binding */ ModelOutput),
8146
8159
  /* harmony export */ Moondream1ForConditionalGeneration: () => (/* binding */ Moondream1ForConditionalGeneration),
8160
+ /* harmony export */ MoonshineForConditionalGeneration: () => (/* binding */ MoonshineForConditionalGeneration),
8161
+ /* harmony export */ MoonshineModel: () => (/* binding */ MoonshineModel),
8162
+ /* harmony export */ MoonshinePreTrainedModel: () => (/* binding */ MoonshinePreTrainedModel),
8147
8163
  /* harmony export */ MptForCausalLM: () => (/* binding */ MptForCausalLM),
8148
8164
  /* harmony export */ MptModel: () => (/* binding */ MptModel),
8149
8165
  /* harmony export */ MptPreTrainedModel: () => (/* binding */ MptPreTrainedModel),
@@ -8184,6 +8200,8 @@ __webpack_require__.r(__webpack_exports__);
8184
8200
  /* harmony export */ Phi3ForCausalLM: () => (/* binding */ Phi3ForCausalLM),
8185
8201
  /* harmony export */ Phi3Model: () => (/* binding */ Phi3Model),
8186
8202
  /* harmony export */ Phi3PreTrainedModel: () => (/* binding */ Phi3PreTrainedModel),
8203
+ /* harmony export */ Phi3VForCausalLM: () => (/* binding */ Phi3VForCausalLM),
8204
+ /* harmony export */ Phi3VPreTrainedModel: () => (/* binding */ Phi3VPreTrainedModel),
8187
8205
  /* harmony export */ PhiForCausalLM: () => (/* binding */ PhiForCausalLM),
8188
8206
  /* harmony export */ PhiModel: () => (/* binding */ PhiModel),
8189
8207
  /* harmony export */ PhiPreTrainedModel: () => (/* binding */ PhiPreTrainedModel),
@@ -8429,6 +8447,7 @@ const MODEL_TYPES = {
8429
8447
  ImageTextToText: 6,
8430
8448
  Musicgen: 7,
8431
8449
  MultiModality: 8,
8450
+ Phi3V: 9,
8432
8451
  }
8433
8452
  //////////////////////////////////////////////////
8434
8453
 
@@ -9204,6 +9223,10 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
9204
9223
  this._forward = imageTextToTextForward;
9205
9224
  this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
9206
9225
  break;
9226
+ case MODEL_TYPES.Phi3V:
9227
+ this.can_generate = true;
9228
+ this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
9229
+ break;
9207
9230
 
9208
9231
  case MODEL_TYPES.MultiModality:
9209
9232
  this.can_generate = true;
@@ -9368,6 +9391,18 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
9368
9391
  }, options),
9369
9392
  ]);
9370
9393
 
9394
+ } else if (modelType === MODEL_TYPES.Phi3V) {
9395
+ info = await Promise.all([
9396
+ constructSessions(pretrained_model_name_or_path, {
9397
+ prepare_inputs_embeds: 'prepare_inputs_embeds',
9398
+ model: 'model',
9399
+ vision_encoder: 'vision_encoder',
9400
+ }, options),
9401
+ getOptionalConfigs(pretrained_model_name_or_path, {
9402
+ generation_config: 'generation_config.json',
9403
+ }, options),
9404
+ ]);
9405
+
9371
9406
  } else { // should be MODEL_TYPES.EncoderOnly
9372
9407
  if (modelType !== MODEL_TYPES.EncoderOnly) {
9373
9408
  const type = modelName ?? config?.model_type;
@@ -11640,6 +11675,29 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
11640
11675
  }
11641
11676
  //////////////////////////////////////////////////
11642
11677
 
11678
+
11679
+ //////////////////////////////////////////////////
11680
+ // Moonshine models
11681
+ class MoonshinePreTrainedModel extends PreTrainedModel {
11682
+
11683
+ requires_attention_mask = false;
11684
+ main_input_name = 'input_values';
11685
+ forward_params = [
11686
+ 'input_values',
11687
+ 'decoder_input_ids',
11688
+ 'past_key_values',
11689
+ ];
11690
+ };
11691
+
11692
+ /**
11693
+ * MoonshineModel class for training Moonshine models without a language model head.
11694
+ */
11695
+ class MoonshineModel extends MoonshinePreTrainedModel { }
11696
+
11697
+ class MoonshineForConditionalGeneration extends MoonshinePreTrainedModel { }
11698
+ //////////////////////////////////////////////////
11699
+
11700
+
11643
11701
  //////////////////////////////////////////////////
11644
11702
  /**
11645
11703
  * Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
@@ -11910,6 +11968,77 @@ class Idefics3ForConditionalGeneration extends Idefics3PreTrainedModel {
11910
11968
  }
11911
11969
  //////////////////////////////////////////////////
11912
11970
 
11971
+ class Phi3VPreTrainedModel extends PreTrainedModel {
11972
+ forward_params = [
11973
+ 'input_ids',
11974
+ 'inputs_embeds',
11975
+ 'attention_mask',
11976
+ 'position_ids',
11977
+ 'pixel_values',
11978
+ 'image_sizes',
11979
+ 'past_key_values',
11980
+ ];
11981
+ }
11982
+ class Phi3VForCausalLM extends Phi3VPreTrainedModel {
11983
+
11984
+ async forward({
11985
+ // Produced by the tokenizer/processor:
11986
+ input_ids = null,
11987
+ attention_mask = null,
11988
+ pixel_values = null,
11989
+ image_sizes = null,
11990
+
11991
+ // Used during generation:
11992
+ position_ids = null,
11993
+ inputs_embeds = null,
11994
+ past_key_values = null,
11995
+
11996
+ // Generic generation parameters
11997
+ generation_config = null,
11998
+ logits_processor = null,
11999
+
12000
+ // TODO: needed?
12001
+ ...kwargs
12002
+ }) {
12003
+ if (!inputs_embeds) {
12004
+ let image_features;
12005
+ if (pixel_values && input_ids.dims[1] !== 1) {
12006
+ if (!image_sizes) {
12007
+ throw new Error('`image_sizes` must be provided when `pixel_values` is provided.');
12008
+ }
12009
+
12010
+ // Encode the image
12011
+ ({ image_features } = await sessionRun(this.sessions['vision_encoder'], {
12012
+ pixel_values,
12013
+ image_sizes,
12014
+ }));
12015
+ } else {
12016
+ const hidden_size = this.config.normalized_config.hidden_size;
12017
+ image_features = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.Tensor(
12018
+ 'float32',
12019
+ [],
12020
+ [0, hidden_size],
12021
+ );
12022
+ }
12023
+
12024
+ ({ inputs_embeds } = await sessionRun(this.sessions['prepare_inputs_embeds'], {
12025
+ input_ids,
12026
+ image_features,
12027
+ }));
12028
+ }
12029
+
12030
+ const outputs = await decoderForward(this, {
12031
+ inputs_embeds,
12032
+ past_key_values,
12033
+ attention_mask,
12034
+ position_ids,
12035
+ generation_config,
12036
+ logits_processor,
12037
+ }, false);
12038
+ return outputs;
12039
+ }
12040
+ }
12041
+
11913
12042
  //////////////////////////////////////////////////
11914
12043
  class CLIPPreTrainedModel extends PreTrainedModel { }
11915
12044
 
@@ -11964,9 +12093,11 @@ class CLIPModel extends CLIPPreTrainedModel { }
11964
12093
  class CLIPTextModel extends CLIPPreTrainedModel {
11965
12094
  /** @type {typeof PreTrainedModel.from_pretrained} */
11966
12095
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
11967
- // Update default model file name if not provided
11968
- options.model_file_name ??= 'text_model';
11969
- return super.from_pretrained(pretrained_model_name_or_path, options);
12096
+ return super.from_pretrained(pretrained_model_name_or_path, {
12097
+ // Update default model file name if not provided
12098
+ model_file_name: 'text_model',
12099
+ ...options,
12100
+ });
11970
12101
  }
11971
12102
  }
11972
12103
 
@@ -11999,9 +12130,11 @@ class CLIPTextModel extends CLIPPreTrainedModel {
11999
12130
  class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
12000
12131
  /** @type {typeof PreTrainedModel.from_pretrained} */
12001
12132
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12002
- // Update default model file name if not provided
12003
- options.model_file_name ??= 'text_model';
12004
- return super.from_pretrained(pretrained_model_name_or_path, options);
12133
+ return super.from_pretrained(pretrained_model_name_or_path, {
12134
+ // Update default model file name if not provided
12135
+ model_file_name: 'text_model',
12136
+ ...options,
12137
+ });
12005
12138
  }
12006
12139
  }
12007
12140
 
@@ -12011,9 +12144,11 @@ class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
12011
12144
  class CLIPVisionModel extends CLIPPreTrainedModel {
12012
12145
  /** @type {typeof PreTrainedModel.from_pretrained} */
12013
12146
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12014
- // Update default model file name if not provided
12015
- options.model_file_name ??= 'vision_model';
12016
- return super.from_pretrained(pretrained_model_name_or_path, options);
12147
+ return super.from_pretrained(pretrained_model_name_or_path, {
12148
+ // Update default model file name if not provided
12149
+ model_file_name: 'vision_model',
12150
+ ...options,
12151
+ });
12017
12152
  }
12018
12153
  }
12019
12154
 
@@ -12046,9 +12181,11 @@ class CLIPVisionModel extends CLIPPreTrainedModel {
12046
12181
  class CLIPVisionModelWithProjection extends CLIPPreTrainedModel {
12047
12182
  /** @type {typeof PreTrainedModel.from_pretrained} */
12048
12183
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12049
- // Update default model file name if not provided
12050
- options.model_file_name ??= 'vision_model';
12051
- return super.from_pretrained(pretrained_model_name_or_path, options);
12184
+ return super.from_pretrained(pretrained_model_name_or_path, {
12185
+ // Update default model file name if not provided
12186
+ model_file_name: 'vision_model',
12187
+ ...options,
12188
+ });
12052
12189
  }
12053
12190
  }
12054
12191
  //////////////////////////////////////////////////
@@ -12132,9 +12269,11 @@ class SiglipModel extends SiglipPreTrainedModel { }
12132
12269
  class SiglipTextModel extends SiglipPreTrainedModel {
12133
12270
  /** @type {typeof PreTrainedModel.from_pretrained} */
12134
12271
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12135
- // Update default model file name if not provided
12136
- options.model_file_name ??= 'text_model';
12137
- return super.from_pretrained(pretrained_model_name_or_path, options);
12272
+ return super.from_pretrained(pretrained_model_name_or_path, {
12273
+ // Update default model file name if not provided
12274
+ model_file_name: 'text_model',
12275
+ ...options,
12276
+ });
12138
12277
  }
12139
12278
  }
12140
12279
 
@@ -12167,9 +12306,11 @@ class SiglipTextModel extends SiglipPreTrainedModel {
12167
12306
  class SiglipVisionModel extends CLIPPreTrainedModel {
12168
12307
  /** @type {typeof PreTrainedModel.from_pretrained} */
12169
12308
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12170
- // Update default model file name if not provided
12171
- options.model_file_name ??= 'vision_model';
12172
- return super.from_pretrained(pretrained_model_name_or_path, options);
12309
+ return super.from_pretrained(pretrained_model_name_or_path, {
12310
+ // Update default model file name if not provided
12311
+ model_file_name: 'vision_model',
12312
+ ...options,
12313
+ });
12173
12314
  }
12174
12315
  }
12175
12316
  //////////////////////////////////////////////////
@@ -12224,18 +12365,22 @@ class JinaCLIPModel extends JinaCLIPPreTrainedModel {
12224
12365
  class JinaCLIPTextModel extends JinaCLIPPreTrainedModel {
12225
12366
  /** @type {typeof PreTrainedModel.from_pretrained} */
12226
12367
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12227
- // Update default model file name if not provided
12228
- options.model_file_name ??= 'text_model';
12229
- return super.from_pretrained(pretrained_model_name_or_path, options);
12368
+ return super.from_pretrained(pretrained_model_name_or_path, {
12369
+ // Update default model file name if not provided
12370
+ model_file_name: 'text_model',
12371
+ ...options,
12372
+ });
12230
12373
  }
12231
12374
  }
12232
12375
 
12233
12376
  class JinaCLIPVisionModel extends JinaCLIPPreTrainedModel {
12234
12377
  /** @type {typeof PreTrainedModel.from_pretrained} */
12235
12378
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12236
- // Update default model file name if not provided
12237
- options.model_file_name ??= 'vision_model';
12238
- return super.from_pretrained(pretrained_model_name_or_path, options);
12379
+ return super.from_pretrained(pretrained_model_name_or_path, {
12380
+ // Update default model file name if not provided
12381
+ model_file_name: 'vision_model',
12382
+ ...options,
12383
+ });
12239
12384
  }
12240
12385
  }
12241
12386
  //////////////////////////////////////////////////
@@ -12395,6 +12540,14 @@ class LlamaForCausalLM extends LlamaPreTrainedModel { }
12395
12540
  //////////////////////////////////////////////////
12396
12541
 
12397
12542
 
12543
+ //////////////////////////////////////////////////
12544
+ // EXAONE models
12545
+ class ExaonePreTrainedModel extends PreTrainedModel { }
12546
+ class ExaoneModel extends ExaonePreTrainedModel { }
12547
+ class ExaoneForCausalLM extends ExaonePreTrainedModel { }
12548
+ //////////////////////////////////////////////////
12549
+
12550
+
12398
12551
  //////////////////////////////////////////////////
12399
12552
  // MobileLLM models
12400
12553
  class MobileLLMPreTrainedModel extends PreTrainedModel { }
@@ -14457,9 +14610,11 @@ class ClapModel extends ClapPreTrainedModel { }
14457
14610
  class ClapTextModelWithProjection extends ClapPreTrainedModel {
14458
14611
  /** @type {typeof PreTrainedModel.from_pretrained} */
14459
14612
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
14460
- // Update default model file name if not provided
14461
- options.model_file_name ??= 'text_model';
14462
- return super.from_pretrained(pretrained_model_name_or_path, options);
14613
+ return super.from_pretrained(pretrained_model_name_or_path, {
14614
+ // Update default model file name if not provided
14615
+ model_file_name: 'text_model',
14616
+ ...options,
14617
+ });
14463
14618
  }
14464
14619
  }
14465
14620
 
@@ -14492,9 +14647,11 @@ class ClapTextModelWithProjection extends ClapPreTrainedModel {
14492
14647
  class ClapAudioModelWithProjection extends ClapPreTrainedModel {
14493
14648
  /** @type {typeof PreTrainedModel.from_pretrained} */
14494
14649
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
14495
- // Update default model file name if not provided
14496
- options.model_file_name ??= 'audio_model';
14497
- return super.from_pretrained(pretrained_model_name_or_path, options);
14650
+ return super.from_pretrained(pretrained_model_name_or_path, {
14651
+ // Update default model file name if not provided
14652
+ model_file_name: 'audio_model',
14653
+ ...options,
14654
+ });
14498
14655
  }
14499
14656
  }
14500
14657
  //////////////////////////////////////////////////
@@ -15181,6 +15338,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
15181
15338
  ['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]],
15182
15339
  ['codegen', ['CodeGenModel', CodeGenModel]],
15183
15340
  ['llama', ['LlamaModel', LlamaModel]],
15341
+ ['exaone', ['ExaoneModel', ExaoneModel]],
15184
15342
  ['olmo', ['OlmoModel', OlmoModel]],
15185
15343
  ['olmo2', ['Olmo2Model', Olmo2Model]],
15186
15344
  ['mobilellm', ['MobileLLMModel', MobileLLMModel]],
@@ -15203,6 +15361,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
15203
15361
  const MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES = new Map([
15204
15362
  ['speecht5', ['SpeechT5ForSpeechToText', SpeechT5ForSpeechToText]],
15205
15363
  ['whisper', ['WhisperForConditionalGeneration', WhisperForConditionalGeneration]],
15364
+ ['moonshine', ['MoonshineForConditionalGeneration', MoonshineForConditionalGeneration]],
15206
15365
  ]);
15207
15366
 
15208
15367
  const MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES = new Map([
@@ -15273,6 +15432,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
15273
15432
  ['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]],
15274
15433
  ['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]],
15275
15434
  ['llama', ['LlamaForCausalLM', LlamaForCausalLM]],
15435
+ ['exaone', ['ExaoneForCausalLM', ExaoneForCausalLM]],
15276
15436
  ['olmo', ['OlmoForCausalLM', OlmoForCausalLM]],
15277
15437
  ['olmo2', ['Olmo2ForCausalLM', Olmo2ForCausalLM]],
15278
15438
  ['mobilellm', ['MobileLLMForCausalLM', MobileLLMForCausalLM]],
@@ -15292,6 +15452,9 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
15292
15452
  ['falcon', ['FalconForCausalLM', FalconForCausalLM]],
15293
15453
  ['trocr', ['TrOCRForCausalLM', TrOCRForCausalLM]],
15294
15454
  ['stablelm', ['StableLmForCausalLM', StableLmForCausalLM]],
15455
+
15456
+ // Also image-text-to-text
15457
+ ['phi3_v', ['Phi3VForCausalLM', Phi3VForCausalLM]],
15295
15458
  ]);
15296
15459
 
15297
15460
  const MODEL_FOR_MULTIMODALITY_MAPPING_NAMES = new Map([
@@ -15529,6 +15692,7 @@ const CUSTOM_MAPPING = [
15529
15692
  // OVERRIDE:
15530
15693
  // TODO: Refactor to allow class to specify model
15531
15694
  ['MusicgenForConditionalGeneration', MusicgenForConditionalGeneration, MODEL_TYPES.Musicgen],
15695
+ ['Phi3VForCausalLM', Phi3VForCausalLM, MODEL_TYPES.Phi3V],
15532
15696
 
15533
15697
  ['CLIPTextModelWithProjection', CLIPTextModelWithProjection, MODEL_TYPES.EncoderOnly],
15534
15698
  ['SiglipTextModel', SiglipTextModel, MODEL_TYPES.EncoderOnly],
@@ -16783,23 +16947,26 @@ __webpack_require__.r(__webpack_exports__);
16783
16947
  /* harmony export */ __webpack_require__.d(__webpack_exports__, {
16784
16948
  /* harmony export */ ASTFeatureExtractor: () => (/* reexport safe */ _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__.ASTFeatureExtractor),
16785
16949
  /* harmony export */ ClapFeatureExtractor: () => (/* reexport safe */ _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_1__.ClapFeatureExtractor),
16786
- /* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_8__.ImageProcessor),
16787
- /* harmony export */ PyAnnoteFeatureExtractor: () => (/* reexport safe */ _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_2__.PyAnnoteFeatureExtractor),
16788
- /* harmony export */ SeamlessM4TFeatureExtractor: () => (/* reexport safe */ _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_3__.SeamlessM4TFeatureExtractor),
16789
- /* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_4__.SpeechT5FeatureExtractor),
16790
- /* harmony export */ Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_5__.Wav2Vec2FeatureExtractor),
16791
- /* harmony export */ WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_6__.WeSpeakerFeatureExtractor),
16792
- /* harmony export */ WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_7__.WhisperFeatureExtractor)
16950
+ /* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_9__.ImageProcessor),
16951
+ /* harmony export */ MoonshineFeatureExtractor: () => (/* reexport safe */ _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineFeatureExtractor),
16952
+ /* harmony export */ PyAnnoteFeatureExtractor: () => (/* reexport safe */ _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_3__.PyAnnoteFeatureExtractor),
16953
+ /* harmony export */ SeamlessM4TFeatureExtractor: () => (/* reexport safe */ _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_4__.SeamlessM4TFeatureExtractor),
16954
+ /* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_5__.SpeechT5FeatureExtractor),
16955
+ /* harmony export */ Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_6__.Wav2Vec2FeatureExtractor),
16956
+ /* harmony export */ WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_7__.WeSpeakerFeatureExtractor),
16957
+ /* harmony export */ WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_8__.WhisperFeatureExtractor)
16793
16958
  /* harmony export */ });
16794
16959
  /* harmony import */ var _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js */ "./src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js");
16795
16960
  /* harmony import */ var _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./clap/feature_extraction_clap.js */ "./src/models/clap/feature_extraction_clap.js");
16796
- /* harmony import */ var _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./pyannote/feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
16797
- /* harmony import */ var _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./seamless_m4t/feature_extraction_seamless_m4t.js */ "./src/models/seamless_m4t/feature_extraction_seamless_m4t.js");
16798
- /* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
16799
- /* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
16800
- /* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
16801
- /* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
16802
- /* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
16961
+ /* harmony import */ var _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./moonshine/feature_extraction_moonshine.js */ "./src/models/moonshine/feature_extraction_moonshine.js");
16962
+ /* harmony import */ var _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./pyannote/feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
16963
+ /* harmony import */ var _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./seamless_m4t/feature_extraction_seamless_m4t.js */ "./src/models/seamless_m4t/feature_extraction_seamless_m4t.js");
16964
+ /* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
16965
+ /* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
16966
+ /* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
16967
+ /* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
16968
+ /* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
16969
+
16803
16970
 
16804
16971
 
16805
16972
 
@@ -17180,18 +17347,29 @@ class Idefics3ImageProcessor extends _base_image_processors_utils_js__WEBPACK_IM
17180
17347
  const optimal_width = Math.ceil(width / num_splits_w);
17181
17348
 
17182
17349
  // Iterate through each row and column
17183
- for (let r = 0; r < num_splits_h; r++) {
17184
- for (let c = 0; c < num_splits_w; c++) {
17185
- // Calculate the starting point of the crop
17186
- const start_x = c * optimal_width;
17187
- const start_y = r * optimal_height;
17188
-
17189
- // Calculate the ending point of the crop
17190
- const end_x = Math.min(start_x + optimal_width, width);
17191
- const end_y = Math.min(start_y + optimal_height, height);
17192
-
17193
- // Crop the image
17194
- frames.push(pixel_values.slice(null, null, [start_y, end_y], [start_x, end_x]));
17350
+ for (let r = 0; r < num_splits_h; ++r) {
17351
+ for (let c = 0; c < num_splits_w; ++c) {
17352
+ let start_x, start_y, end_x, end_y;
17353
+ if (r === num_splits_h - 1) { // At bottom
17354
+ start_y = height - optimal_height;
17355
+ end_y = height;
17356
+ } else {
17357
+ start_y = r * optimal_height;
17358
+ end_y = (r + 1) * optimal_height;
17359
+ }
17360
+ if (c === num_splits_w - 1) { // At right
17361
+ start_x = width - optimal_width;
17362
+ end_x = width;
17363
+ } else {
17364
+ start_x = c * optimal_width;
17365
+ end_x = (c + 1) * optimal_width;
17366
+ }
17367
+
17368
+ const starts = [start_y, start_x];
17369
+ const ends = [end_y, end_x];
17370
+
17371
+ const patch = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.slice)(pixel_values, starts, ends, [2, 3]);
17372
+ frames.push(patch);
17195
17373
  }
17196
17374
  }
17197
17375
 
@@ -17417,21 +17595,22 @@ __webpack_require__.r(__webpack_exports__);
17417
17595
  /* harmony export */ OwlViTFeatureExtractor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTFeatureExtractor),
17418
17596
  /* harmony export */ OwlViTImageProcessor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTImageProcessor),
17419
17597
  /* harmony export */ Owlv2ImageProcessor: () => (/* reexport safe */ _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__.Owlv2ImageProcessor),
17420
- /* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_25__.PvtImageProcessor),
17421
- /* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_26__.Qwen2VLImageProcessor),
17422
- /* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_27__.RTDetrImageProcessor),
17423
- /* harmony export */ SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_28__.SamImageProcessor),
17424
- /* harmony export */ SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__.SegformerFeatureExtractor),
17425
- /* harmony export */ SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__.SegformerImageProcessor),
17426
- /* harmony export */ SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_30__.SiglipImageProcessor),
17427
- /* harmony export */ Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_31__.Swin2SRImageProcessor),
17598
+ /* harmony export */ Phi3VImageProcessor: () => (/* reexport safe */ _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_25__.Phi3VImageProcessor),
17599
+ /* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_26__.PvtImageProcessor),
17600
+ /* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_27__.Qwen2VLImageProcessor),
17601
+ /* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_28__.RTDetrImageProcessor),
17602
+ /* harmony export */ SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_29__.SamImageProcessor),
17603
+ /* harmony export */ SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__.SegformerFeatureExtractor),
17604
+ /* harmony export */ SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__.SegformerImageProcessor),
17605
+ /* harmony export */ SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_31__.SiglipImageProcessor),
17606
+ /* harmony export */ Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_32__.Swin2SRImageProcessor),
17428
17607
  /* harmony export */ VLMImageProcessor: () => (/* reexport safe */ _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_12__.VLMImageProcessor),
17429
- /* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__.ViTFeatureExtractor),
17430
- /* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__.ViTImageProcessor),
17431
- /* harmony export */ VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_33__.VitMatteImageProcessor),
17432
- /* harmony export */ VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_34__.VitPoseImageProcessor),
17433
- /* harmony export */ YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__.YolosFeatureExtractor),
17434
- /* harmony export */ YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__.YolosImageProcessor)
17608
+ /* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__.ViTFeatureExtractor),
17609
+ /* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__.ViTImageProcessor),
17610
+ /* harmony export */ VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_34__.VitMatteImageProcessor),
17611
+ /* harmony export */ VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_35__.VitPoseImageProcessor),
17612
+ /* harmony export */ YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__.YolosFeatureExtractor),
17613
+ /* harmony export */ YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__.YolosImageProcessor)
17435
17614
  /* harmony export */ });
17436
17615
  /* harmony import */ var _beit_image_processing_beit_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./beit/image_processing_beit.js */ "./src/models/beit/image_processing_beit.js");
17437
17616
  /* harmony import */ var _bit_image_processing_bit_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./bit/image_processing_bit.js */ "./src/models/bit/image_processing_bit.js");
@@ -17458,17 +17637,19 @@ __webpack_require__.r(__webpack_exports__);
17458
17637
  /* harmony import */ var _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_22__ = __webpack_require__(/*! ./nougat/image_processing_nougat.js */ "./src/models/nougat/image_processing_nougat.js");
17459
17638
  /* harmony import */ var _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__ = __webpack_require__(/*! ./owlv2/image_processing_owlv2.js */ "./src/models/owlv2/image_processing_owlv2.js");
17460
17639
  /* harmony import */ var _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__ = __webpack_require__(/*! ./owlvit/image_processing_owlvit.js */ "./src/models/owlvit/image_processing_owlvit.js");
17461
- /* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
17462
- /* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
17463
- /* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
17464
- /* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
17465
- /* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
17466
- /* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
17467
- /* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
17468
- /* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
17469
- /* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
17470
- /* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
17471
- /* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
17640
+ /* harmony import */ var _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./phi3_v/image_processing_phi3_v.js */ "./src/models/phi3_v/image_processing_phi3_v.js");
17641
+ /* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
17642
+ /* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
17643
+ /* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
17644
+ /* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
17645
+ /* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
17646
+ /* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
17647
+ /* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
17648
+ /* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
17649
+ /* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
17650
+ /* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
17651
+ /* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
17652
+
17472
17653
 
17473
17654
 
17474
17655
 
@@ -18143,6 +18324,87 @@ class MobileViTImageProcessor extends _base_image_processors_utils_js__WEBPACK_I
18143
18324
  class MobileViTFeatureExtractor extends MobileViTImageProcessor { }
18144
18325
 
18145
18326
 
18327
+ /***/ }),
18328
+
18329
+ /***/ "./src/models/moonshine/feature_extraction_moonshine.js":
18330
+ /*!**************************************************************!*\
18331
+ !*** ./src/models/moonshine/feature_extraction_moonshine.js ***!
18332
+ \**************************************************************/
18333
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18334
+
18335
+ "use strict";
18336
+ __webpack_require__.r(__webpack_exports__);
18337
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18338
+ /* harmony export */ MoonshineFeatureExtractor: () => (/* binding */ MoonshineFeatureExtractor)
18339
+ /* harmony export */ });
18340
+ /* harmony import */ var _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/feature_extraction_utils.js */ "./src/base/feature_extraction_utils.js");
18341
+ /* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
18342
+
18343
+
18344
+
18345
+
18346
+ class MoonshineFeatureExtractor extends _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__.FeatureExtractor {
18347
+ /**
18348
+ * Asynchronously extracts input values from a given audio using the provided configuration.
18349
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
18350
+ * @returns {Promise<{ input_values: Tensor; }>} The extracted input values.
18351
+ */
18352
+ async _call(audio) {
18353
+ (0,_base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__.validate_audio_inputs)(audio, 'MoonshineFeatureExtractor');
18354
+
18355
+ if (audio instanceof Float64Array) {
18356
+ audio = new Float32Array(audio);
18357
+ }
18358
+
18359
+ const shape = [
18360
+ 1, /* batch_size */
18361
+ audio.length, /* num_samples */
18362
+ ];
18363
+ return {
18364
+ input_values: new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.Tensor('float32', audio, shape),
18365
+ };
18366
+ }
18367
+ }
18368
+
18369
+
18370
+ /***/ }),
18371
+
18372
+ /***/ "./src/models/moonshine/processing_moonshine.js":
18373
+ /*!******************************************************!*\
18374
+ !*** ./src/models/moonshine/processing_moonshine.js ***!
18375
+ \******************************************************/
18376
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18377
+
18378
+ "use strict";
18379
+ __webpack_require__.r(__webpack_exports__);
18380
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18381
+ /* harmony export */ MoonshineProcessor: () => (/* binding */ MoonshineProcessor)
18382
+ /* harmony export */ });
18383
+ /* harmony import */ var _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../auto/feature_extraction_auto.js */ "./src/models/auto/feature_extraction_auto.js");
18384
+ /* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
18385
+ /* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
18386
+
18387
+
18388
+
18389
+
18390
+ /**
18391
+ * Represents a MoonshineProcessor that extracts features from an audio input.
18392
+ */
18393
+ class MoonshineProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_2__.Processor {
18394
+ static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_1__.AutoTokenizer
18395
+ static feature_extractor_class = _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_0__.AutoFeatureExtractor
18396
+
18397
+ /**
18398
+ * Calls the feature_extractor function with the given audio input.
18399
+ * @param {any} audio The audio input to extract features from.
18400
+ * @returns {Promise<any>} A Promise that resolves with the extracted features.
18401
+ */
18402
+ async _call(audio) {
18403
+ return await this.feature_extractor(audio);
18404
+ }
18405
+ }
18406
+
18407
+
18146
18408
  /***/ }),
18147
18409
 
18148
18410
  /***/ "./src/models/nougat/image_processing_nougat.js":
@@ -18336,6 +18598,256 @@ class PaliGemmaProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MOD
18336
18598
  }
18337
18599
 
18338
18600
 
18601
+ /***/ }),
18602
+
18603
+ /***/ "./src/models/phi3_v/image_processing_phi3_v.js":
18604
+ /*!******************************************************!*\
18605
+ !*** ./src/models/phi3_v/image_processing_phi3_v.js ***!
18606
+ \******************************************************/
18607
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18608
+
18609
+ "use strict";
18610
+ __webpack_require__.r(__webpack_exports__);
18611
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18612
+ /* harmony export */ Phi3VImageProcessor: () => (/* binding */ Phi3VImageProcessor)
18613
+ /* harmony export */ });
18614
+ /* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
18615
+ /* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
18616
+
18617
+
18618
+
18619
+ const IMAGE_SIZE = 336;
18620
+ const SLICE_AXES = [2, 3]; // axes to slice on
18621
+ const { ceil, floor, sqrt } = Math;
18622
+
18623
+ class Phi3VImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor {
18624
+ constructor(config) {
18625
+ super({
18626
+ ...config,
18627
+ do_normalize: true,
18628
+ do_pad: true,
18629
+ pad_size: 'custom',
18630
+ do_convert_rgb: true,
18631
+ do_resize: true, // Smart resizing "hd_transform"
18632
+ });
18633
+
18634
+ this._num_crops = config.num_crops;
18635
+ }
18636
+ calc_num_image_tokens_from_image_size(width, height) {
18637
+ // @ts-expect-error
18638
+ const { num_img_tokens } = this.config;
18639
+ return floor(((floor((height / IMAGE_SIZE)) * floor((width / IMAGE_SIZE)) + 1) * num_img_tokens) + 1 + (floor(height / IMAGE_SIZE) + 1) * sqrt(num_img_tokens));
18640
+ }
18641
+
18642
+ /** @type {ImageProcessor['get_resize_output_image_size']} */
18643
+ get_resize_output_image_size(image, size) {
18644
+ const hd_num = this._num_crops;
18645
+ const [width, height] = image.size
18646
+
18647
+ let ratio = width / height;
18648
+ let scale = 1;
18649
+
18650
+ // Calculate the scaling factor
18651
+ while (scale * Math.ceil(scale / ratio) <= hd_num) {
18652
+ scale += 1;
18653
+ }
18654
+ scale -= 1;
18655
+
18656
+ // Compute the new dimensions
18657
+ const new_w = Math.floor(scale * 336);
18658
+ const new_h = Math.floor(new_w / ratio);
18659
+
18660
+ return [new_w, new_h]
18661
+ }
18662
+
18663
+
18664
+ /** @type {ImageProcessor['pad_image']} */
18665
+ pad_image(pixelData, imgDims, padSize, options = {}) {
18666
+ // Phi3V uses a custom padding strategy:
18667
+ // - Pad to a multiple of 336
18668
+ // - Pad with white pixels
18669
+ const [imageHeight, imageWidth] = imgDims;
18670
+ const height = IMAGE_SIZE * ceil(imageHeight / IMAGE_SIZE);
18671
+ const width = IMAGE_SIZE * ceil(imageWidth / IMAGE_SIZE);
18672
+
18673
+ // NOTE: Since padding is done after normalization, we need to fill with the normalized values
18674
+ const constant_values = [1, 1, 1].map((x, i) => (x - this.image_mean[i]) / this.image_std[i]);
18675
+ return super.pad_image(pixelData, imgDims, { width, height }, {
18676
+ center: true,
18677
+ constant_values,
18678
+ ...options,
18679
+ });
18680
+ }
18681
+
18682
+ async _call(images, {
18683
+ num_crops = null,
18684
+ } = {}) {
18685
+ // @ts-expect-error
18686
+ this._num_crops = num_crops ??= this.config.num_crops;
18687
+ if (num_crops < 4 || sqrt(num_crops) % 1 !== 0) {
18688
+ throw new Error("num_crops must be a square number >= 4");
18689
+ }
18690
+
18691
+ if (!Array.isArray(images)) {
18692
+ images = [images];
18693
+ }
18694
+
18695
+ const num_images = images.length;
18696
+ const imageData = await Promise.all(images.map(x => this.preprocess(x)));
18697
+
18698
+ const original_sizes = imageData.map(x => x.original_size);
18699
+ const reshaped_input_sizes = imageData.map(x => x.reshaped_input_size);
18700
+
18701
+ // Process each image in batch
18702
+ const all_pixel_values = [];
18703
+ for (const { pixel_values } of imageData) {
18704
+ pixel_values.unsqueeze_(0); // Easier processing as 4D tensor
18705
+
18706
+ const [height, width] = pixel_values.dims.slice(-2);
18707
+
18708
+ // Global image (Tensor of shape [num_channels, height, width])
18709
+ const batch_pixel_values = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)(pixel_values, {
18710
+ size: [IMAGE_SIZE, IMAGE_SIZE],
18711
+ mode: 'bicubic',
18712
+ });
18713
+
18714
+ if (num_crops > 0) {
18715
+ const patches = [];
18716
+ const sqrt_patches = sqrt(num_crops);
18717
+ const patch_width = floor(width / sqrt_patches);
18718
+ const patch_height = floor(height / sqrt_patches);
18719
+ for (let y = 0; y < sqrt_patches; ++y) {
18720
+ for (let x = 0; x < sqrt_patches; ++x) {
18721
+ let start_x, start_y, end_x, end_y;
18722
+ if (y === sqrt_patches - 1) { // At bottom
18723
+ start_y = height - patch_height;
18724
+ end_y = height;
18725
+ } else {
18726
+ start_y = y * patch_height;
18727
+ end_y = (y + 1) * patch_height;
18728
+ }
18729
+ if (x === sqrt_patches - 1) { // At right
18730
+ start_x = width - patch_width;
18731
+ end_x = width;
18732
+ } else {
18733
+ start_x = x * patch_width;
18734
+ end_x = (x + 1) * patch_width;
18735
+ }
18736
+
18737
+ const starts = [start_y, start_x];
18738
+ const ends = [end_y, end_x];
18739
+ const patch = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.slice)(pixel_values, starts, ends, SLICE_AXES);
18740
+ patches.push(patch);
18741
+ }
18742
+ }
18743
+
18744
+ const resized_tensors = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)((0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)(patches, 0), {
18745
+ size: [IMAGE_SIZE, IMAGE_SIZE],
18746
+ mode: 'bicubic',
18747
+ }); // [num_crops, 3, 336, 336]
18748
+
18749
+ // Concatenate the global image with the patches
18750
+ all_pixel_values.push((0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)([batch_pixel_values, resized_tensors], 0));
18751
+ } else {
18752
+ // Only use the global image
18753
+ // NOTE: Not currently supported in modelling code
18754
+ all_pixel_values.push(batch_pixel_values);
18755
+ }
18756
+ }
18757
+
18758
+ // [num_images, 1 + num_crops, num_channels=3, height, width]
18759
+ const pixel_values = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.stack)(all_pixel_values, 0);
18760
+
18761
+ // Calculate padded image sizes
18762
+ const sizes = reshaped_input_sizes.map(x => x.map(y => IMAGE_SIZE * ceil(y / IMAGE_SIZE)));
18763
+
18764
+ const image_sizes = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.Tensor(
18765
+ 'int64',
18766
+ sizes.flat(),
18767
+ [num_images, 2],
18768
+ );
18769
+
18770
+ const num_img_tokens = sizes.map(
18771
+ ([height, width]) => this.calc_num_image_tokens_from_image_size(width, height),
18772
+ );
18773
+
18774
+ return { pixel_values, original_sizes, reshaped_input_sizes, image_sizes, num_img_tokens };
18775
+ }
18776
+ }
18777
+
18778
+
18779
+ /***/ }),
18780
+
18781
+ /***/ "./src/models/phi3_v/processing_phi3_v.js":
18782
+ /*!************************************************!*\
18783
+ !*** ./src/models/phi3_v/processing_phi3_v.js ***!
18784
+ \************************************************/
18785
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18786
+
18787
+ "use strict";
18788
+ __webpack_require__.r(__webpack_exports__);
18789
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18790
+ /* harmony export */ Phi3VProcessor: () => (/* binding */ Phi3VProcessor)
18791
+ /* harmony export */ });
18792
+ /* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
18793
+ /* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
18794
+ /* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
18795
+ /* harmony import */ var _utils_image_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ../../utils/image.js */ "./src/utils/image.js");
18796
+
18797
+
18798
+
18799
+
18800
+
18801
+ const IMAGE_TOKEN = "<|image|>";
18802
+ const IMAGE_TOKEN_PATTERN = /<\|image_\d+\|>/g;
18803
+
18804
+ class Phi3VProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
18805
+ static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
18806
+ static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
18807
+
18808
+ /**
18809
+ *
18810
+ * @param {string|string[]} text
18811
+ * @param {RawImage|RawImage[]} images
18812
+ * @param {...any} args
18813
+ * @returns {Promise<any>}
18814
+ */
18815
+ async _call(text, images = null, {
18816
+ padding = true,
18817
+ truncation = true,
18818
+ num_crops = null,
18819
+ } = {}) {
18820
+
18821
+ if (!Array.isArray(text)) {
18822
+ text = [text];
18823
+ }
18824
+
18825
+ let text_inputs, image_inputs;
18826
+ if (images) {
18827
+ image_inputs = await this.image_processor(images, { num_crops });
18828
+ const { num_img_tokens } = image_inputs;
18829
+
18830
+ // The original implementation adds a bos_token before the image tokens
18831
+ // TODO: Check if this affects performance, since it looks like a bug in the original implementation
18832
+ const prompt_chunks = text.map((t, i) => t.split(IMAGE_TOKEN_PATTERN).join(IMAGE_TOKEN.repeat(num_img_tokens[i])));
18833
+
18834
+ text_inputs = this.tokenizer(prompt_chunks, { padding, truncation });
18835
+
18836
+ // The model expects image tokens to be negative, so we negate the image token ids
18837
+ const image_token_id = this.tokenizer.model.convert_tokens_to_ids([IMAGE_TOKEN])[0];
18838
+ text_inputs.input_ids.map_(id => (id == image_token_id) ? -id : id);
18839
+ } else {
18840
+ text_inputs = this.tokenizer(text);
18841
+ }
18842
+
18843
+ return {
18844
+ ...text_inputs,
18845
+ ...image_inputs,
18846
+ }
18847
+ }
18848
+ }
18849
+
18850
+
18339
18851
  /***/ }),
18340
18852
 
18341
18853
  /***/ "./src/models/processors.js":
@@ -18348,32 +18860,38 @@ class PaliGemmaProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MOD
18348
18860
  __webpack_require__.r(__webpack_exports__);
18349
18861
  /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18350
18862
  /* harmony export */ Florence2Processor: () => (/* reexport safe */ _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__.Florence2Processor),
18351
- /* harmony export */ Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3Processor),
18352
- /* harmony export */ JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__.JinaCLIPProcessor),
18863
+ /* harmony export */ Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_3__.Idefics3Processor),
18864
+ /* harmony export */ JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_5__.JinaCLIPProcessor),
18353
18865
  /* harmony export */ MgpstrProcessor: () => (/* reexport safe */ _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__.MgpstrProcessor),
18354
- /* harmony export */ OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_5__.OwlViTProcessor),
18355
- /* harmony export */ PaliGemmaProcessor: () => (/* reexport safe */ _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_6__.PaliGemmaProcessor),
18356
- /* harmony export */ PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_7__.PyAnnoteProcessor),
18357
- /* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_8__.Qwen2VLProcessor),
18358
- /* harmony export */ SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_9__.SamProcessor),
18359
- /* harmony export */ SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_10__.SpeechT5Processor),
18360
- /* harmony export */ VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__.VLChatProcessor),
18361
- /* harmony export */ Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_11__.Wav2Vec2ProcessorWithLM),
18362
- /* harmony export */ WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_12__.WhisperProcessor)
18866
+ /* harmony export */ MoonshineProcessor: () => (/* reexport safe */ _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineProcessor),
18867
+ /* harmony export */ OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_6__.OwlViTProcessor),
18868
+ /* harmony export */ PaliGemmaProcessor: () => (/* reexport safe */ _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_8__.PaliGemmaProcessor),
18869
+ /* harmony export */ Phi3VProcessor: () => (/* reexport safe */ _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_7__.Phi3VProcessor),
18870
+ /* harmony export */ PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_9__.PyAnnoteProcessor),
18871
+ /* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_10__.Qwen2VLProcessor),
18872
+ /* harmony export */ SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_11__.SamProcessor),
18873
+ /* harmony export */ SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_12__.SpeechT5Processor),
18874
+ /* harmony export */ VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_4__.VLChatProcessor),
18875
+ /* harmony export */ Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_13__.Wav2Vec2ProcessorWithLM),
18876
+ /* harmony export */ WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_14__.WhisperProcessor)
18363
18877
  /* harmony export */ });
18364
18878
  /* harmony import */ var _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./florence2/processing_florence2.js */ "./src/models/florence2/processing_florence2.js");
18365
18879
  /* harmony import */ var _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./mgp_str/processing_mgp_str.js */ "./src/models/mgp_str/processing_mgp_str.js");
18366
- /* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
18367
- /* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
18368
- /* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
18369
- /* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
18370
- /* harmony import */ var _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./paligemma/processing_paligemma.js */ "./src/models/paligemma/processing_paligemma.js");
18371
- /* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
18372
- /* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
18373
- /* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
18374
- /* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
18375
- /* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
18376
- /* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
18880
+ /* harmony import */ var _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./moonshine/processing_moonshine.js */ "./src/models/moonshine/processing_moonshine.js");
18881
+ /* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
18882
+ /* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
18883
+ /* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
18884
+ /* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
18885
+ /* harmony import */ var _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./phi3_v/processing_phi3_v.js */ "./src/models/phi3_v/processing_phi3_v.js");
18886
+ /* harmony import */ var _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./paligemma/processing_paligemma.js */ "./src/models/paligemma/processing_paligemma.js");
18887
+ /* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
18888
+ /* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
18889
+ /* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
18890
+ /* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
18891
+ /* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
18892
+ /* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
18893
+
18894
+
18377
18895
 
18378
18896
 
18379
18897
 
@@ -18423,6 +18941,8 @@ __webpack_require__.r(__webpack_exports__);
18423
18941
  /* harmony export */ });
18424
18942
  /* harmony import */ var _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/feature_extraction_utils.js */ "./src/base/feature_extraction_utils.js");
18425
18943
  /* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
18944
+ /* harmony import */ var _utils_maths_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../utils/maths.js */ "./src/utils/maths.js");
18945
+
18426
18946
 
18427
18947
 
18428
18948
 
@@ -18450,41 +18970,6 @@ class PyAnnoteFeatureExtractor extends _base_feature_extraction_utils_js__WEBPAC
18450
18970
  };
18451
18971
  }
18452
18972
 
18453
- }
18454
-
18455
-
18456
- /***/ }),
18457
-
18458
- /***/ "./src/models/pyannote/processing_pyannote.js":
18459
- /*!****************************************************!*\
18460
- !*** ./src/models/pyannote/processing_pyannote.js ***!
18461
- \****************************************************/
18462
- /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18463
-
18464
- "use strict";
18465
- __webpack_require__.r(__webpack_exports__);
18466
- /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18467
- /* harmony export */ PyAnnoteProcessor: () => (/* binding */ PyAnnoteProcessor)
18468
- /* harmony export */ });
18469
- /* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
18470
- /* harmony import */ var _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/feature_extraction_auto.js */ "./src/models/auto/feature_extraction_auto.js");
18471
- /* harmony import */ var _utils_maths_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../utils/maths.js */ "./src/utils/maths.js");
18472
-
18473
-
18474
-
18475
-
18476
- class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
18477
- static feature_extractor_class = _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoFeatureExtractor
18478
-
18479
- /**
18480
- * Calls the feature_extractor function with the given audio input.
18481
- * @param {any} audio The audio input to extract features from.
18482
- * @returns {Promise<any>} A Promise that resolves with the extracted features.
18483
- */
18484
- async _call(audio) {
18485
- return await this.feature_extractor(audio)
18486
- }
18487
-
18488
18973
  /**
18489
18974
  * NOTE: Can return fractional values. `Math.ceil` will ensure correct value.
18490
18975
  * @param {number} samples The number of frames in the audio.
@@ -18539,6 +19024,48 @@ class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODU
18539
19024
  }
18540
19025
  return results;
18541
19026
  }
19027
+
19028
+ }
19029
+
19030
+
19031
+ /***/ }),
19032
+
19033
+ /***/ "./src/models/pyannote/processing_pyannote.js":
19034
+ /*!****************************************************!*\
19035
+ !*** ./src/models/pyannote/processing_pyannote.js ***!
19036
+ \****************************************************/
19037
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
19038
+
19039
+ "use strict";
19040
+ __webpack_require__.r(__webpack_exports__);
19041
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
19042
+ /* harmony export */ PyAnnoteProcessor: () => (/* binding */ PyAnnoteProcessor)
19043
+ /* harmony export */ });
19044
+ /* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
19045
+ /* harmony import */ var _feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
19046
+
19047
+
19048
+
19049
+ class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
19050
+ static feature_extractor_class = _feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_1__.PyAnnoteFeatureExtractor
19051
+
19052
+ /**
19053
+ * Calls the feature_extractor function with the given audio input.
19054
+ * @param {any} audio The audio input to extract features from.
19055
+ * @returns {Promise<any>} A Promise that resolves with the extracted features.
19056
+ */
19057
+ async _call(audio) {
19058
+ return await this.feature_extractor(audio)
19059
+ }
19060
+
19061
+ /** @type {PyAnnoteFeatureExtractor['post_process_speaker_diarization']} */
19062
+ post_process_speaker_diarization(...args) {
19063
+ return /** @type {PyAnnoteFeatureExtractor} */(this.feature_extractor).post_process_speaker_diarization(...args);
19064
+ }
19065
+
19066
+ get sampling_rate() {
19067
+ return this.feature_extractor.config.sampling_rate;
19068
+ }
18542
19069
  }
18543
19070
 
18544
19071
 
@@ -20288,6 +20815,17 @@ class TensorOpRegistry {
20288
20815
  }
20289
20816
  return this._top_k;
20290
20817
  }
20818
+
20819
+ static get slice() {
20820
+ if (!this._slice) {
20821
+ this._slice = wrap(
20822
+ [8, 7, 18, 0, 58, 96, 10, 25, 10, 1, 120, 10, 1, 115, 10, 1, 101, 10, 1, 97, 10, 1, 116, 18, 1, 121, 34, 5, 83, 108, 105, 99, 101, 18, 1, 114, 90, 9, 10, 1, 120, 18, 4, 10, 2, 8, 1, 90, 9, 10, 1, 115, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 101, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 97, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 116, 18, 4, 10, 2, 8, 7, 98, 9, 10, 1, 121, 18, 4, 10, 2, 8, 1, 66, 2, 16, 13],
20823
+ this.session_options,
20824
+ 'y',
20825
+ )
20826
+ }
20827
+ return this._slice;
20828
+ }
20291
20829
  }
20292
20830
 
20293
20831
 
@@ -22021,6 +22559,8 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
22021
22559
  case 'unispeech-sat':
22022
22560
  case 'hubert':
22023
22561
  return this._call_wav2vec2(audio, kwargs)
22562
+ case 'moonshine':
22563
+ return this._call_moonshine(audio, kwargs)
22024
22564
  default:
22025
22565
  throw new Error(`AutomaticSpeechRecognitionPipeline does not support model type '${this.model.config.model_type}'.`)
22026
22566
  }
@@ -22174,6 +22714,34 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
22174
22714
  }
22175
22715
  return single ? toReturn[0] : toReturn;
22176
22716
  }
22717
+
22718
+ /**
22719
+ * @type {AutomaticSpeechRecognitionPipelineCallback}
22720
+ * @private
22721
+ */
22722
+ async _call_moonshine(audio, kwargs) {
22723
+ const single = !Array.isArray(audio);
22724
+ if (single) {
22725
+ audio = [/** @type {AudioInput} */ (audio)];
22726
+ }
22727
+ const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
22728
+ const preparedAudios = await prepareAudios(audio, sampling_rate);
22729
+ const toReturn = [];
22730
+ for (const aud of preparedAudios) {
22731
+ const inputs = await this.processor(aud);
22732
+
22733
+ // According to the [paper](https://arxiv.org/pdf/2410.15608):
22734
+ // "We use greedy decoding, with a heuristic limit of 6 output tokens
22735
+ // per second of audio to avoid repeated output sequences."
22736
+ const max_new_tokens = Math.floor(aud.length / sampling_rate) * 6;
22737
+ const outputs = await this.model.generate({ max_new_tokens, ...kwargs, ...inputs });
22738
+
22739
+ const text = this.processor.batch_decode(outputs, { skip_special_tokens: true })[0];
22740
+ toReturn.push({ text });
22741
+ }
22742
+ return single ? toReturn[0] : toReturn;
22743
+ }
22744
+
22177
22745
  }
22178
22746
 
22179
22747
  /**
@@ -32372,7 +32940,9 @@ __webpack_require__.r(__webpack_exports__);
32372
32940
  /* harmony export */ ones_like: () => (/* binding */ ones_like),
32373
32941
  /* harmony export */ permute: () => (/* binding */ permute),
32374
32942
  /* harmony export */ quantize_embeddings: () => (/* binding */ quantize_embeddings),
32943
+ /* harmony export */ rand: () => (/* binding */ rand),
32375
32944
  /* harmony export */ rfft: () => (/* binding */ rfft),
32945
+ /* harmony export */ slice: () => (/* binding */ slice),
32376
32946
  /* harmony export */ stack: () => (/* binding */ stack),
32377
32947
  /* harmony export */ std_mean: () => (/* binding */ std_mean),
32378
32948
  /* harmony export */ topk: () => (/* binding */ topk),
@@ -33151,8 +33721,21 @@ class Tensor {
33151
33721
  if (!DataTypeMap.hasOwnProperty(type)) {
33152
33722
  throw new Error(`Unsupported type: ${type}`);
33153
33723
  }
33724
+
33725
+ // Handle special cases where a mapping function is needed (e.g., where one type is a bigint and the other is a number)
33726
+ let map_fn;
33727
+ const is_source_bigint = ['int64', 'uint64'].includes(this.type);
33728
+ const is_dest_bigint = ['int64', 'uint64'].includes(type);
33729
+ if (is_source_bigint && !is_dest_bigint) {
33730
+ // TypeError: Cannot convert a BigInt value to a number
33731
+ map_fn = Number;
33732
+ } else if (!is_source_bigint && is_dest_bigint) {
33733
+ // TypeError: Cannot convert [x] to a BigInt
33734
+ map_fn = BigInt;
33735
+ }
33736
+
33154
33737
  // @ts-ignore
33155
- return new Tensor(type, DataTypeMap[type].from(this.data), this.dims);
33738
+ return new Tensor(type, DataTypeMap[type].from(this.data, map_fn), this.dims);
33156
33739
  }
33157
33740
  }
33158
33741
 
@@ -33350,6 +33933,29 @@ async function topk(x, k) {
33350
33933
  });
33351
33934
  }
33352
33935
 
33936
+
33937
+ const arrayToIndexTensor = (array) => new Tensor('int64', array, [array.length]);
33938
+ /**
33939
+ * Slice a multidimensional float32 tensor.
33940
+ * @param {Tensor} data: Tensor of data to extract slices from
33941
+ * @param {number[]} starts: 1-D array of starting indices of corresponding axis in axes
33942
+ * @param {number[]} ends: 1-D array of ending indices (exclusive) of corresponding axis in axes
33943
+ * @param {number[]} axes: 1-D array of axes that starts and ends apply to
33944
+ * @param {number[]} [steps]: 1-D array of slice step of corresponding axis in axes.
33945
+ * @returns {Promise<Tensor>} Sliced data tensor.
33946
+ */
33947
+ async function slice(data, starts, ends, axes, steps) {
33948
+ const op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.slice;
33949
+ return await op({
33950
+ x: data,
33951
+ s: arrayToIndexTensor(starts),
33952
+ e: arrayToIndexTensor(ends),
33953
+ a: arrayToIndexTensor(axes),
33954
+ t: arrayToIndexTensor(steps ?? new Array(axes.length).fill(1)),
33955
+ });
33956
+ }
33957
+
33958
+
33353
33959
  /**
33354
33960
  * Perform mean pooling of the last hidden state followed by a normalization step.
33355
33961
  * @param {Tensor} last_hidden_state Tensor of shape [batchSize, seqLength, embedDim]
@@ -33796,6 +34402,20 @@ function zeros_like(tensor) {
33796
34402
  return zeros(tensor.dims);
33797
34403
  }
33798
34404
 
34405
+ /**
34406
+ * Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)
34407
+ * @param {number[]} size A sequence of integers defining the shape of the output tensor.
34408
+ * @returns {Tensor} The random tensor.
34409
+ */
34410
+ function rand(size) {
34411
+ const length = size.reduce((a, b) => a * b, 1);
34412
+ return new Tensor(
34413
+ "float32",
34414
+ Float32Array.from({ length }, () => Math.random()),
34415
+ size,
34416
+ )
34417
+ }
34418
+
33799
34419
  /**
33800
34420
  * Quantizes the embeddings tensor to binary or unsigned binary precision.
33801
34421
  * @param {Tensor} tensor The tensor to quantize.
@@ -34141,6 +34761,9 @@ __webpack_require__.r(__webpack_exports__);
34141
34761
  /* harmony export */ EsmModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.EsmModel),
34142
34762
  /* harmony export */ EsmPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.EsmPreTrainedModel),
34143
34763
  /* harmony export */ EsmTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.EsmTokenizer),
34764
+ /* harmony export */ ExaoneForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaoneForCausalLM),
34765
+ /* harmony export */ ExaoneModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaoneModel),
34766
+ /* harmony export */ ExaonePreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaonePreTrainedModel),
34144
34767
  /* harmony export */ FFT: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.FFT),
34145
34768
  /* harmony export */ FalconForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.FalconForCausalLM),
34146
34769
  /* harmony export */ FalconModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.FalconModel),
@@ -34320,6 +34943,11 @@ __webpack_require__.r(__webpack_exports__);
34320
34943
  /* harmony export */ MobileViTV2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MobileViTV2PreTrainedModel),
34321
34944
  /* harmony export */ ModelOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ModelOutput),
34322
34945
  /* harmony export */ Moondream1ForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Moondream1ForConditionalGeneration),
34946
+ /* harmony export */ MoonshineFeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_10__.MoonshineFeatureExtractor),
34947
+ /* harmony export */ MoonshineForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineForConditionalGeneration),
34948
+ /* harmony export */ MoonshineModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineModel),
34949
+ /* harmony export */ MoonshinePreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshinePreTrainedModel),
34950
+ /* harmony export */ MoonshineProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.MoonshineProcessor),
34323
34951
  /* harmony export */ MptForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptForCausalLM),
34324
34952
  /* harmony export */ MptModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptModel),
34325
34953
  /* harmony export */ MptPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptPreTrainedModel),
@@ -34371,6 +34999,10 @@ __webpack_require__.r(__webpack_exports__);
34371
34999
  /* harmony export */ Phi3ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3ForCausalLM),
34372
35000
  /* harmony export */ Phi3Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3Model),
34373
35001
  /* harmony export */ Phi3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3PreTrainedModel),
35002
+ /* harmony export */ Phi3VForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3VForCausalLM),
35003
+ /* harmony export */ Phi3VImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Phi3VImageProcessor),
35004
+ /* harmony export */ Phi3VPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3VPreTrainedModel),
35005
+ /* harmony export */ Phi3VProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.Phi3VProcessor),
34374
35006
  /* harmony export */ PhiForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiForCausalLM),
34375
35007
  /* harmony export */ PhiModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiModel),
34376
35008
  /* harmony export */ PhiPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiPreTrainedModel),
@@ -34619,9 +35251,11 @@ __webpack_require__.r(__webpack_exports__);
34619
35251
  /* harmony export */ permute_data: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.permute_data),
34620
35252
  /* harmony export */ pipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.pipeline),
34621
35253
  /* harmony export */ quantize_embeddings: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.quantize_embeddings),
35254
+ /* harmony export */ rand: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.rand),
34622
35255
  /* harmony export */ read_audio: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.read_audio),
34623
35256
  /* harmony export */ rfft: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.rfft),
34624
35257
  /* harmony export */ round: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.round),
35258
+ /* harmony export */ slice: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.slice),
34625
35259
  /* harmony export */ softmax: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.softmax),
34626
35260
  /* harmony export */ spectrogram: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.spectrogram),
34627
35261
  /* harmony export */ stack: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.stack),