@huggingface/transformers 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/README.md +10 -4
  2. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  3. package/dist/transformers.cjs +1062 -183
  4. package/dist/transformers.cjs.map +1 -1
  5. package/dist/transformers.js +2239 -1232
  6. package/dist/transformers.js.map +1 -1
  7. package/dist/transformers.min.cjs +1 -358
  8. package/dist/transformers.min.cjs.map +1 -1
  9. package/dist/transformers.min.js +1 -421
  10. package/dist/transformers.min.js.map +1 -1
  11. package/dist/transformers.min.mjs +1 -358
  12. package/dist/transformers.min.mjs.map +1 -1
  13. package/dist/transformers.mjs +1082 -181
  14. package/dist/transformers.mjs.map +1 -1
  15. package/package.json +11 -16
  16. package/src/backends/onnx.js +2 -7
  17. package/src/base/image_processors_utils.js +3 -1
  18. package/src/configs.js +11 -2
  19. package/src/env.js +1 -1
  20. package/src/models/feature_extractors.js +1 -0
  21. package/src/models/idefics3/image_processing_idefics3.js +24 -13
  22. package/src/models/image_processors.js +1 -0
  23. package/src/models/moonshine/feature_extraction_moonshine.js +26 -0
  24. package/src/models/moonshine/processing_moonshine.js +20 -0
  25. package/src/models/paligemma/processing_paligemma.js +82 -0
  26. package/src/models/phi3_v/image_processing_phi3_v.js +163 -0
  27. package/src/models/phi3_v/processing_phi3_v.js +53 -0
  28. package/src/models/processors.js +3 -0
  29. package/src/models/pyannote/feature_extraction_pyannote.js +56 -0
  30. package/src/models/pyannote/processing_pyannote.js +7 -54
  31. package/src/models.js +233 -35
  32. package/src/ops/registry.js +11 -0
  33. package/src/pipelines.js +30 -0
  34. package/src/tokenizers.js +12 -1
  35. package/src/utils/core.js +39 -9
  36. package/src/utils/hub.js +8 -12
  37. package/src/utils/image.js +40 -0
  38. package/src/utils/tensor.js +51 -1
  39. package/types/backends/onnx.d.ts +2 -2
  40. package/types/backends/onnx.d.ts.map +1 -1
  41. package/types/base/feature_extraction_utils.d.ts +1 -1
  42. package/types/base/feature_extraction_utils.d.ts.map +1 -1
  43. package/types/base/image_processors_utils.d.ts +4 -4
  44. package/types/base/image_processors_utils.d.ts.map +1 -1
  45. package/types/base/processing_utils.d.ts +4 -4
  46. package/types/base/processing_utils.d.ts.map +1 -1
  47. package/types/configs.d.ts +7 -7
  48. package/types/configs.d.ts.map +1 -1
  49. package/types/env.d.ts +1 -1
  50. package/types/env.d.ts.map +1 -1
  51. package/types/generation/configuration_utils.d.ts +2 -2
  52. package/types/generation/logits_process.d.ts +2 -2
  53. package/types/generation/logits_process.d.ts.map +1 -1
  54. package/types/generation/logits_sampler.d.ts.map +1 -1
  55. package/types/generation/parameters.d.ts +5 -5
  56. package/types/generation/stopping_criteria.d.ts +1 -1
  57. package/types/generation/stopping_criteria.d.ts.map +1 -1
  58. package/types/generation/streamers.d.ts +2 -2
  59. package/types/generation/streamers.d.ts.map +1 -1
  60. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +1 -1
  61. package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts.map +1 -1
  62. package/types/models/auto/feature_extraction_auto.d.ts.map +1 -1
  63. package/types/models/auto/image_processing_auto.d.ts.map +1 -1
  64. package/types/models/auto/processing_auto.d.ts +1 -1
  65. package/types/models/auto/processing_auto.d.ts.map +1 -1
  66. package/types/models/clap/feature_extraction_clap.d.ts +1 -1
  67. package/types/models/clap/feature_extraction_clap.d.ts.map +1 -1
  68. package/types/models/detr/image_processing_detr.d.ts +11 -11
  69. package/types/models/detr/image_processing_detr.d.ts.map +1 -1
  70. package/types/models/donut/image_processing_donut.d.ts +1 -1
  71. package/types/models/donut/image_processing_donut.d.ts.map +1 -1
  72. package/types/models/feature_extractors.d.ts +1 -0
  73. package/types/models/florence2/processing_florence2.d.ts.map +1 -1
  74. package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -1
  75. package/types/models/idefics3/processing_idefics3.d.ts.map +1 -1
  76. package/types/models/image_processors.d.ts +1 -0
  77. package/types/models/janus/image_processing_janus.d.ts +1 -1
  78. package/types/models/janus/image_processing_janus.d.ts.map +1 -1
  79. package/types/models/janus/processing_janus.d.ts.map +1 -1
  80. package/types/models/maskformer/image_processing_maskformer.d.ts +8 -8
  81. package/types/models/maskformer/image_processing_maskformer.d.ts.map +1 -1
  82. package/types/models/mgp_str/processing_mgp_str.d.ts +2 -2
  83. package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -1
  84. package/types/models/moonshine/feature_extraction_moonshine.d.ts +13 -0
  85. package/types/models/moonshine/feature_extraction_moonshine.d.ts.map +1 -0
  86. package/types/models/moonshine/processing_moonshine.d.ts +17 -0
  87. package/types/models/moonshine/processing_moonshine.d.ts.map +1 -0
  88. package/types/models/owlvit/image_processing_owlvit.d.ts.map +1 -1
  89. package/types/models/paligemma/processing_paligemma.d.ts +12 -0
  90. package/types/models/paligemma/processing_paligemma.d.ts.map +1 -0
  91. package/types/models/phi3_v/image_processing_phi3_v.d.ts +17 -0
  92. package/types/models/phi3_v/image_processing_phi3_v.d.ts.map +1 -0
  93. package/types/models/phi3_v/processing_phi3_v.d.ts +17 -0
  94. package/types/models/phi3_v/processing_phi3_v.d.ts.map +1 -0
  95. package/types/models/processors.d.ts +3 -0
  96. package/types/models/pyannote/feature_extraction_pyannote.d.ts +18 -0
  97. package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
  98. package/types/models/pyannote/processing_pyannote.d.ts +4 -15
  99. package/types/models/pyannote/processing_pyannote.d.ts.map +1 -1
  100. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
  101. package/types/models/rt_detr/image_processing_rt_detr.d.ts.map +1 -1
  102. package/types/models/sam/image_processing_sam.d.ts.map +1 -1
  103. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +1 -1
  104. package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts.map +1 -1
  105. package/types/models/segformer/image_processing_segformer.d.ts.map +1 -1
  106. package/types/models/speecht5/processing_speecht5.d.ts.map +1 -1
  107. package/types/models/swin2sr/image_processing_swin2sr.d.ts +1 -1
  108. package/types/models/swin2sr/image_processing_swin2sr.d.ts.map +1 -1
  109. package/types/models/vitmatte/image_processing_vitmatte.d.ts.map +1 -1
  110. package/types/models/vitpose/image_processing_vitpose.d.ts +1 -1
  111. package/types/models/vitpose/image_processing_vitpose.d.ts.map +1 -1
  112. package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts.map +1 -1
  113. package/types/models/wav2vec2/processing_wav2vec2.d.ts.map +1 -1
  114. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts +1 -1
  115. package/types/models/wespeaker/feature_extraction_wespeaker.d.ts.map +1 -1
  116. package/types/models/whisper/feature_extraction_whisper.d.ts +1 -1
  117. package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
  118. package/types/models/whisper/generation_whisper.d.ts.map +1 -1
  119. package/types/models/whisper/processing_whisper.d.ts.map +1 -1
  120. package/types/models/yolos/image_processing_yolos.d.ts.map +1 -1
  121. package/types/models.d.ts +61 -5
  122. package/types/models.d.ts.map +1 -1
  123. package/types/ops/registry.d.ts +1 -0
  124. package/types/ops/registry.d.ts.map +1 -1
  125. package/types/pipelines.d.ts +31 -51
  126. package/types/pipelines.d.ts.map +1 -1
  127. package/types/tokenizers.d.ts +10 -6
  128. package/types/tokenizers.d.ts.map +1 -1
  129. package/types/utils/audio.d.ts.map +1 -1
  130. package/types/utils/constants.d.ts.map +1 -1
  131. package/types/utils/core.d.ts +87 -22
  132. package/types/utils/core.d.ts.map +1 -1
  133. package/types/utils/data-structures.d.ts.map +1 -1
  134. package/types/utils/devices.d.ts.map +1 -1
  135. package/types/utils/dtypes.d.ts.map +1 -1
  136. package/types/utils/generic.d.ts.map +1 -1
  137. package/types/utils/hub.d.ts +3 -3
  138. package/types/utils/hub.d.ts.map +1 -1
  139. package/types/utils/image.d.ts +10 -1
  140. package/types/utils/image.d.ts.map +1 -1
  141. package/types/utils/maths.d.ts +10 -10
  142. package/types/utils/maths.d.ts.map +1 -1
  143. package/types/utils/tensor.d.ts +22 -6
  144. package/types/utils/tensor.d.ts.map +1 -1
@@ -56,10 +56,10 @@ module.exports = require("url");
56
56
 
57
57
  /***/ }),
58
58
 
59
- /***/ "?cb4d":
60
- /*!*************************************!*\
61
- !*** #onnxruntime-webgpu (ignored) ***!
62
- \*************************************/
59
+ /***/ "?8b6b":
60
+ /*!*********************************!*\
61
+ !*** onnxruntime-web (ignored) ***!
62
+ \*********************************/
63
63
  /***/ (() => {
64
64
 
65
65
  /* (ignored) */
@@ -3896,7 +3896,7 @@ const version = '1.20.1';
3896
3896
 
3897
3897
  "use strict";
3898
3898
  var onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache;
3899
- var _onnxruntime_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache;
3899
+ var onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache;
3900
3900
  __webpack_require__.r(__webpack_exports__);
3901
3901
  /* harmony export */ __webpack_require__.d(__webpack_exports__, {
3902
3902
  /* harmony export */ Tensor: () => (/* reexport safe */ onnxruntime_common__WEBPACK_IMPORTED_MODULE_3__.Tensor),
@@ -3907,7 +3907,7 @@ __webpack_require__.r(__webpack_exports__);
3907
3907
  /* harmony export */ });
3908
3908
  /* harmony import */ var _env_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../env.js */ "./src/env.js");
3909
3909
  /* harmony import */ var onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! onnxruntime-node */ "onnxruntime-node");
3910
- /* harmony import */ var _onnxruntime_webgpu__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! #onnxruntime-webgpu */ "?cb4d");
3910
+ /* harmony import */ var onnxruntime_web__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! onnxruntime-web */ "?8b6b");
3911
3911
  /* harmony import */ var onnxruntime_common__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! onnxruntime-common */ "./node_modules/onnxruntime-common/dist/esm/index.js");
3912
3912
  /**
3913
3913
  * @file Handler file for choosing the correct version of ONNX Runtime, based on the environment.
@@ -3933,11 +3933,6 @@ __webpack_require__.r(__webpack_exports__);
3933
3933
  // In either case, we select the default export if it exists, otherwise we use the named export.
3934
3934
 
3935
3935
 
3936
- // Use subpath-imports to ensure Node.js and browser interoperability.
3937
- // See package.json and https://nodejs.org/api/packages.html#subpath-imports
3938
- // for more information.
3939
- // @ts-ignore
3940
-
3941
3936
 
3942
3937
 
3943
3938
 
@@ -3979,7 +3974,7 @@ if (ORT_SYMBOL in globalThis) {
3979
3974
  } else if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_NODE_ENV) {
3980
3975
  ONNX = onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__ ?? /*#__PURE__*/ (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache || (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache = __webpack_require__.t(onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__, 2)));
3981
3976
 
3982
- // Updated as of ONNX Runtime 1.18.0
3977
+ // Updated as of ONNX Runtime 1.20.1
3983
3978
  // The following table lists the supported versions of ONNX Runtime Node.js binding provided with pre-built binaries.
3984
3979
  // | EPs/Platforms | Windows x64 | Windows arm64 | Linux x64 | Linux arm64 | MacOS x64 | MacOS arm64 |
3985
3980
  // | ------------- | ----------- | ------------- | ----------------- | ----------- | --------- | ----------- |
@@ -4002,7 +3997,7 @@ if (ORT_SYMBOL in globalThis) {
4002
3997
  supportedDevices.push('cpu');
4003
3998
  defaultDevices = ['cpu'];
4004
3999
  } else {
4005
- ONNX = /*#__PURE__*/ (_onnxruntime_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache || (_onnxruntime_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache = __webpack_require__.t(_onnxruntime_webgpu__WEBPACK_IMPORTED_MODULE_2__, 2)));
4000
+ ONNX = /*#__PURE__*/ (onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache || (onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache = __webpack_require__.t(onnxruntime_web__WEBPACK_IMPORTED_MODULE_2__, 2)));
4006
4001
 
4007
4002
  if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_WEBNN_AVAILABLE) {
4008
4003
  // TODO: Only push supported providers (depending on available hardware)
@@ -4925,7 +4920,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
4925
4920
  * Pad the image by a certain amount.
4926
4921
  * @param {Float32Array} pixelData The pixel data to pad.
4927
4922
  * @param {number[]} imgDims The dimensions of the image (height, width, channels).
4928
- * @param {{width:number; height:number}|number} padSize The dimensions of the padded image.
4923
+ * @param {{width:number; height:number}|number|'square'} padSize The dimensions of the padded image.
4929
4924
  * @param {Object} options The options for padding.
4930
4925
  * @param {'constant'|'symmetric'} [options.mode='constant'] The type of padding to add.
4931
4926
  * @param {boolean} [options.center=false] Whether to center the image.
@@ -4943,6 +4938,8 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
4943
4938
  if (typeof padSize === 'number') {
4944
4939
  paddedImageWidth = padSize;
4945
4940
  paddedImageHeight = padSize;
4941
+ } else if (padSize === 'square') {
4942
+ paddedImageWidth = paddedImageHeight = Math.max(imageHeight, imageWidth);
4946
4943
  } else {
4947
4944
  paddedImageWidth = padSize.width;
4948
4945
  paddedImageHeight = padSize.height;
@@ -5588,8 +5585,6 @@ function getNormalizedConfig(config) {
5588
5585
  case 'gpt_neox':
5589
5586
  case 'stablelm':
5590
5587
  case 'opt':
5591
- case 'phi':
5592
- case 'phi3':
5593
5588
  case 'falcon':
5594
5589
  mapping['num_heads'] = 'num_attention_heads';
5595
5590
  mapping['num_layers'] = 'num_hidden_layers';
@@ -5597,6 +5592,7 @@ function getNormalizedConfig(config) {
5597
5592
  break;
5598
5593
  case 'llama':
5599
5594
  case 'olmo':
5595
+ case 'olmo2':
5600
5596
  case 'mobilellm':
5601
5597
  case 'granite':
5602
5598
  case 'cohere':
@@ -5604,6 +5600,9 @@ function getNormalizedConfig(config) {
5604
5600
  case 'starcoder2':
5605
5601
  case 'qwen2':
5606
5602
  case 'qwen2_vl':
5603
+ case 'phi':
5604
+ case 'phi3':
5605
+ case 'phi3_v':
5607
5606
  mapping['num_heads'] = 'num_key_value_heads';
5608
5607
  mapping['num_layers'] = 'num_hidden_layers';
5609
5608
  mapping['hidden_size'] = 'hidden_size';
@@ -5636,6 +5635,12 @@ function getNormalizedConfig(config) {
5636
5635
  mapping['num_layers'] = 'n_layers';
5637
5636
  mapping['hidden_size'] = 'd_model';
5638
5637
  break;
5638
+ case 'exaone':
5639
+ mapping['num_heads'] = 'num_key_value_heads';
5640
+ mapping['num_layers'] = 'num_layers';
5641
+ mapping['dim_kv'] = 'head_dim';
5642
+ mapping['num_attention_heads'] = 'num_attention_heads';
5643
+ break;
5639
5644
 
5640
5645
  // Encoder-decoder models
5641
5646
  case 't5':
@@ -5677,6 +5682,7 @@ function getNormalizedConfig(config) {
5677
5682
  mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'd_model';
5678
5683
  break;
5679
5684
  case 'musicgen_decoder':
5685
+ case 'moonshine':
5680
5686
  mapping['num_encoder_layers'] = mapping['num_decoder_layers'] = 'num_hidden_layers';
5681
5687
  mapping['num_encoder_heads'] = mapping['num_decoder_heads'] = 'num_attention_heads';
5682
5688
  mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'hidden_size';
@@ -5926,7 +5932,7 @@ __webpack_require__.r(__webpack_exports__);
5926
5932
 
5927
5933
 
5928
5934
 
5929
- const VERSION = '3.1.1';
5935
+ const VERSION = '3.2.0';
5930
5936
 
5931
5937
  // Check if various APIs are available (depends on environment)
5932
5938
  const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
@@ -8024,6 +8030,9 @@ __webpack_require__.r(__webpack_exports__);
8024
8030
  /* harmony export */ EsmForTokenClassification: () => (/* binding */ EsmForTokenClassification),
8025
8031
  /* harmony export */ EsmModel: () => (/* binding */ EsmModel),
8026
8032
  /* harmony export */ EsmPreTrainedModel: () => (/* binding */ EsmPreTrainedModel),
8033
+ /* harmony export */ ExaoneForCausalLM: () => (/* binding */ ExaoneForCausalLM),
8034
+ /* harmony export */ ExaoneModel: () => (/* binding */ ExaoneModel),
8035
+ /* harmony export */ ExaonePreTrainedModel: () => (/* binding */ ExaonePreTrainedModel),
8027
8036
  /* harmony export */ FalconForCausalLM: () => (/* binding */ FalconForCausalLM),
8028
8037
  /* harmony export */ FalconModel: () => (/* binding */ FalconModel),
8029
8038
  /* harmony export */ FalconPreTrainedModel: () => (/* binding */ FalconPreTrainedModel),
@@ -8068,6 +8077,9 @@ __webpack_require__.r(__webpack_exports__);
8068
8077
  /* harmony export */ HubertForSequenceClassification: () => (/* binding */ HubertForSequenceClassification),
8069
8078
  /* harmony export */ HubertModel: () => (/* binding */ HubertModel),
8070
8079
  /* harmony export */ HubertPreTrainedModel: () => (/* binding */ HubertPreTrainedModel),
8080
+ /* harmony export */ IJepaForImageClassification: () => (/* binding */ IJepaForImageClassification),
8081
+ /* harmony export */ IJepaModel: () => (/* binding */ IJepaModel),
8082
+ /* harmony export */ IJepaPreTrainedModel: () => (/* binding */ IJepaPreTrainedModel),
8071
8083
  /* harmony export */ Idefics3ForConditionalGeneration: () => (/* binding */ Idefics3ForConditionalGeneration),
8072
8084
  /* harmony export */ Idefics3PreTrainedModel: () => (/* binding */ Idefics3PreTrainedModel),
8073
8085
  /* harmony export */ ImageMattingOutput: () => (/* binding */ ImageMattingOutput),
@@ -8145,6 +8157,9 @@ __webpack_require__.r(__webpack_exports__);
8145
8157
  /* harmony export */ MobileViTV2PreTrainedModel: () => (/* binding */ MobileViTV2PreTrainedModel),
8146
8158
  /* harmony export */ ModelOutput: () => (/* binding */ ModelOutput),
8147
8159
  /* harmony export */ Moondream1ForConditionalGeneration: () => (/* binding */ Moondream1ForConditionalGeneration),
8160
+ /* harmony export */ MoonshineForConditionalGeneration: () => (/* binding */ MoonshineForConditionalGeneration),
8161
+ /* harmony export */ MoonshineModel: () => (/* binding */ MoonshineModel),
8162
+ /* harmony export */ MoonshinePreTrainedModel: () => (/* binding */ MoonshinePreTrainedModel),
8148
8163
  /* harmony export */ MptForCausalLM: () => (/* binding */ MptForCausalLM),
8149
8164
  /* harmony export */ MptModel: () => (/* binding */ MptModel),
8150
8165
  /* harmony export */ MptPreTrainedModel: () => (/* binding */ MptPreTrainedModel),
@@ -8159,6 +8174,9 @@ __webpack_require__.r(__webpack_exports__);
8159
8174
  /* harmony export */ OPTForCausalLM: () => (/* binding */ OPTForCausalLM),
8160
8175
  /* harmony export */ OPTModel: () => (/* binding */ OPTModel),
8161
8176
  /* harmony export */ OPTPreTrainedModel: () => (/* binding */ OPTPreTrainedModel),
8177
+ /* harmony export */ Olmo2ForCausalLM: () => (/* binding */ Olmo2ForCausalLM),
8178
+ /* harmony export */ Olmo2Model: () => (/* binding */ Olmo2Model),
8179
+ /* harmony export */ Olmo2PreTrainedModel: () => (/* binding */ Olmo2PreTrainedModel),
8162
8180
  /* harmony export */ OlmoForCausalLM: () => (/* binding */ OlmoForCausalLM),
8163
8181
  /* harmony export */ OlmoModel: () => (/* binding */ OlmoModel),
8164
8182
  /* harmony export */ OlmoPreTrainedModel: () => (/* binding */ OlmoPreTrainedModel),
@@ -8171,6 +8189,8 @@ __webpack_require__.r(__webpack_exports__);
8171
8189
  /* harmony export */ Owlv2ForObjectDetection: () => (/* binding */ Owlv2ForObjectDetection),
8172
8190
  /* harmony export */ Owlv2Model: () => (/* binding */ Owlv2Model),
8173
8191
  /* harmony export */ Owlv2PreTrainedModel: () => (/* binding */ Owlv2PreTrainedModel),
8192
+ /* harmony export */ PaliGemmaForConditionalGeneration: () => (/* binding */ PaliGemmaForConditionalGeneration),
8193
+ /* harmony export */ PaliGemmaPreTrainedModel: () => (/* binding */ PaliGemmaPreTrainedModel),
8174
8194
  /* harmony export */ PatchTSMixerForPrediction: () => (/* binding */ PatchTSMixerForPrediction),
8175
8195
  /* harmony export */ PatchTSMixerModel: () => (/* binding */ PatchTSMixerModel),
8176
8196
  /* harmony export */ PatchTSMixerPreTrainedModel: () => (/* binding */ PatchTSMixerPreTrainedModel),
@@ -8180,6 +8200,8 @@ __webpack_require__.r(__webpack_exports__);
8180
8200
  /* harmony export */ Phi3ForCausalLM: () => (/* binding */ Phi3ForCausalLM),
8181
8201
  /* harmony export */ Phi3Model: () => (/* binding */ Phi3Model),
8182
8202
  /* harmony export */ Phi3PreTrainedModel: () => (/* binding */ Phi3PreTrainedModel),
8203
+ /* harmony export */ Phi3VForCausalLM: () => (/* binding */ Phi3VForCausalLM),
8204
+ /* harmony export */ Phi3VPreTrainedModel: () => (/* binding */ Phi3VPreTrainedModel),
8183
8205
  /* harmony export */ PhiForCausalLM: () => (/* binding */ PhiForCausalLM),
8184
8206
  /* harmony export */ PhiModel: () => (/* binding */ PhiModel),
8185
8207
  /* harmony export */ PhiPreTrainedModel: () => (/* binding */ PhiPreTrainedModel),
@@ -8425,6 +8447,7 @@ const MODEL_TYPES = {
8425
8447
  ImageTextToText: 6,
8426
8448
  Musicgen: 7,
8427
8449
  MultiModality: 8,
8450
+ Phi3V: 9,
8428
8451
  }
8429
8452
  //////////////////////////////////////////////////
8430
8453
 
@@ -8852,7 +8875,9 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
8852
8875
  new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
8853
8876
  }
8854
8877
  if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
8855
- new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values);
8878
+ // NOTE: Handle a special case for paligemma models, where positions are 1-indexed
8879
+ const start_index = self.config.model_type === 'paligemma' ? 1 : 0;
8880
+ new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
8856
8881
  }
8857
8882
 
8858
8883
  // Unpack the `past_key_values` object into model inputs
@@ -8988,14 +9013,14 @@ async function imageTextToTextForward(self, {
8988
9013
  * @param {Tensor} attention_mask
8989
9014
  * @returns {{data: BigInt64Array, dims: number[]}}
8990
9015
  */
8991
- function cumsum_masked_fill(attention_mask) {
9016
+ function cumsum_masked_fill(attention_mask, start_index = 0) {
8992
9017
  const [bz, seq_len] = attention_mask.dims;
8993
9018
  const attn_mask_data = attention_mask.data;
8994
9019
 
8995
9020
  const data = new BigInt64Array(attn_mask_data.length);
8996
9021
  for (let i = 0; i < bz; ++i) {
8997
9022
  const start = i * seq_len;
8998
- let sum = BigInt(0);
9023
+ let sum = BigInt(start_index);
8999
9024
  for (let j = 0; j < seq_len; ++j) {
9000
9025
  const index = start + j;
9001
9026
  if (attn_mask_data[index] === 0n) {
@@ -9022,10 +9047,10 @@ function cumsum_masked_fill(attention_mask) {
9022
9047
  * position_ids = position_ids[:, -input_ids.shape[1] :]
9023
9048
  * ```
9024
9049
  */
9025
- function createPositionIds(model_inputs, past_key_values = null) {
9050
+ function createPositionIds(model_inputs, past_key_values = null, start_index = 0) {
9026
9051
  const { input_ids, inputs_embeds, attention_mask } = model_inputs;
9027
9052
 
9028
- const { data, dims } = cumsum_masked_fill(attention_mask);
9053
+ const { data, dims } = cumsum_masked_fill(attention_mask, start_index);
9029
9054
  let position_ids = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.Tensor('int64', data, dims);
9030
9055
  if (past_key_values) {
9031
9056
  const offset = -(input_ids ?? inputs_embeds).dims.at(1);
@@ -9198,6 +9223,10 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
9198
9223
  this._forward = imageTextToTextForward;
9199
9224
  this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
9200
9225
  break;
9226
+ case MODEL_TYPES.Phi3V:
9227
+ this.can_generate = true;
9228
+ this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
9229
+ break;
9201
9230
 
9202
9231
  case MODEL_TYPES.MultiModality:
9203
9232
  this.can_generate = true;
@@ -9362,6 +9391,18 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
9362
9391
  }, options),
9363
9392
  ]);
9364
9393
 
9394
+ } else if (modelType === MODEL_TYPES.Phi3V) {
9395
+ info = await Promise.all([
9396
+ constructSessions(pretrained_model_name_or_path, {
9397
+ prepare_inputs_embeds: 'prepare_inputs_embeds',
9398
+ model: 'model',
9399
+ vision_encoder: 'vision_encoder',
9400
+ }, options),
9401
+ getOptionalConfigs(pretrained_model_name_or_path, {
9402
+ generation_config: 'generation_config.json',
9403
+ }, options),
9404
+ ]);
9405
+
9365
9406
  } else { // should be MODEL_TYPES.EncoderOnly
9366
9407
  if (modelType !== MODEL_TYPES.EncoderOnly) {
9367
9408
  const type = modelName ?? config?.model_type;
@@ -11634,6 +11675,29 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
11634
11675
  }
11635
11676
  //////////////////////////////////////////////////
11636
11677
 
11678
+
11679
+ //////////////////////////////////////////////////
11680
+ // Moonshine models
11681
+ class MoonshinePreTrainedModel extends PreTrainedModel {
11682
+
11683
+ requires_attention_mask = false;
11684
+ main_input_name = 'input_values';
11685
+ forward_params = [
11686
+ 'input_values',
11687
+ 'decoder_input_ids',
11688
+ 'past_key_values',
11689
+ ];
11690
+ };
11691
+
11692
+ /**
11693
+ * MoonshineModel class for training Moonshine models without a language model head.
11694
+ */
11695
+ class MoonshineModel extends MoonshinePreTrainedModel { }
11696
+
11697
+ class MoonshineForConditionalGeneration extends MoonshinePreTrainedModel { }
11698
+ //////////////////////////////////////////////////
11699
+
11700
+
11637
11701
  //////////////////////////////////////////////////
11638
11702
  /**
11639
11703
  * Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
@@ -11842,6 +11906,30 @@ class Florence2ForConditionalGeneration extends Florence2PreTrainedModel {
11842
11906
  }
11843
11907
  }
11844
11908
 
11909
+ class PaliGemmaPreTrainedModel extends PreTrainedModel {
11910
+ forward_params = [
11911
+ 'input_ids',
11912
+ // 'inputs_embeds',
11913
+ 'attention_mask',
11914
+ 'pixel_values',
11915
+ 'position_ids',
11916
+ 'past_key_values',
11917
+ ];
11918
+ }
11919
+
11920
+ class PaliGemmaForConditionalGeneration extends PaliGemmaPreTrainedModel {
11921
+ _merge_input_ids_with_image_features(kwargs) {
11922
+ const vision_hidden_size = kwargs.image_features.dims.at(-1);
11923
+ const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
11924
+
11925
+ return default_merge_input_ids_with_image_features({
11926
+ // @ts-ignore
11927
+ image_token_id: this.config.image_token_index,
11928
+ ...kwargs,
11929
+ image_features: reshaped_image_hidden_states,
11930
+ })
11931
+ }
11932
+ }
11845
11933
 
11846
11934
  //////////////////////////////////////////////////
11847
11935
  // Idefics3 Models
@@ -11880,6 +11968,77 @@ class Idefics3ForConditionalGeneration extends Idefics3PreTrainedModel {
11880
11968
  }
11881
11969
  //////////////////////////////////////////////////
11882
11970
 
11971
+ class Phi3VPreTrainedModel extends PreTrainedModel {
11972
+ forward_params = [
11973
+ 'input_ids',
11974
+ 'inputs_embeds',
11975
+ 'attention_mask',
11976
+ 'position_ids',
11977
+ 'pixel_values',
11978
+ 'image_sizes',
11979
+ 'past_key_values',
11980
+ ];
11981
+ }
11982
+ class Phi3VForCausalLM extends Phi3VPreTrainedModel {
11983
+
11984
+ async forward({
11985
+ // Produced by the tokenizer/processor:
11986
+ input_ids = null,
11987
+ attention_mask = null,
11988
+ pixel_values = null,
11989
+ image_sizes = null,
11990
+
11991
+ // Used during generation:
11992
+ position_ids = null,
11993
+ inputs_embeds = null,
11994
+ past_key_values = null,
11995
+
11996
+ // Generic generation parameters
11997
+ generation_config = null,
11998
+ logits_processor = null,
11999
+
12000
+ // TODO: needed?
12001
+ ...kwargs
12002
+ }) {
12003
+ if (!inputs_embeds) {
12004
+ let image_features;
12005
+ if (pixel_values && input_ids.dims[1] !== 1) {
12006
+ if (!image_sizes) {
12007
+ throw new Error('`image_sizes` must be provided when `pixel_values` is provided.');
12008
+ }
12009
+
12010
+ // Encode the image
12011
+ ({ image_features } = await sessionRun(this.sessions['vision_encoder'], {
12012
+ pixel_values,
12013
+ image_sizes,
12014
+ }));
12015
+ } else {
12016
+ const hidden_size = this.config.normalized_config.hidden_size;
12017
+ image_features = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.Tensor(
12018
+ 'float32',
12019
+ [],
12020
+ [0, hidden_size],
12021
+ );
12022
+ }
12023
+
12024
+ ({ inputs_embeds } = await sessionRun(this.sessions['prepare_inputs_embeds'], {
12025
+ input_ids,
12026
+ image_features,
12027
+ }));
12028
+ }
12029
+
12030
+ const outputs = await decoderForward(this, {
12031
+ inputs_embeds,
12032
+ past_key_values,
12033
+ attention_mask,
12034
+ position_ids,
12035
+ generation_config,
12036
+ logits_processor,
12037
+ }, false);
12038
+ return outputs;
12039
+ }
12040
+ }
12041
+
11883
12042
  //////////////////////////////////////////////////
11884
12043
  class CLIPPreTrainedModel extends PreTrainedModel { }
11885
12044
 
@@ -11934,9 +12093,11 @@ class CLIPModel extends CLIPPreTrainedModel { }
11934
12093
  class CLIPTextModel extends CLIPPreTrainedModel {
11935
12094
  /** @type {typeof PreTrainedModel.from_pretrained} */
11936
12095
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
11937
- // Update default model file name if not provided
11938
- options.model_file_name ??= 'text_model';
11939
- return super.from_pretrained(pretrained_model_name_or_path, options);
12096
+ return super.from_pretrained(pretrained_model_name_or_path, {
12097
+ // Update default model file name if not provided
12098
+ model_file_name: 'text_model',
12099
+ ...options,
12100
+ });
11940
12101
  }
11941
12102
  }
11942
12103
 
@@ -11969,9 +12130,11 @@ class CLIPTextModel extends CLIPPreTrainedModel {
11969
12130
  class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
11970
12131
  /** @type {typeof PreTrainedModel.from_pretrained} */
11971
12132
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
11972
- // Update default model file name if not provided
11973
- options.model_file_name ??= 'text_model';
11974
- return super.from_pretrained(pretrained_model_name_or_path, options);
12133
+ return super.from_pretrained(pretrained_model_name_or_path, {
12134
+ // Update default model file name if not provided
12135
+ model_file_name: 'text_model',
12136
+ ...options,
12137
+ });
11975
12138
  }
11976
12139
  }
11977
12140
 
@@ -11981,9 +12144,11 @@ class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
11981
12144
  class CLIPVisionModel extends CLIPPreTrainedModel {
11982
12145
  /** @type {typeof PreTrainedModel.from_pretrained} */
11983
12146
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
11984
- // Update default model file name if not provided
11985
- options.model_file_name ??= 'vision_model';
11986
- return super.from_pretrained(pretrained_model_name_or_path, options);
12147
+ return super.from_pretrained(pretrained_model_name_or_path, {
12148
+ // Update default model file name if not provided
12149
+ model_file_name: 'vision_model',
12150
+ ...options,
12151
+ });
11987
12152
  }
11988
12153
  }
11989
12154
 
@@ -12016,9 +12181,11 @@ class CLIPVisionModel extends CLIPPreTrainedModel {
12016
12181
  class CLIPVisionModelWithProjection extends CLIPPreTrainedModel {
12017
12182
  /** @type {typeof PreTrainedModel.from_pretrained} */
12018
12183
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12019
- // Update default model file name if not provided
12020
- options.model_file_name ??= 'vision_model';
12021
- return super.from_pretrained(pretrained_model_name_or_path, options);
12184
+ return super.from_pretrained(pretrained_model_name_or_path, {
12185
+ // Update default model file name if not provided
12186
+ model_file_name: 'vision_model',
12187
+ ...options,
12188
+ });
12022
12189
  }
12023
12190
  }
12024
12191
  //////////////////////////////////////////////////
@@ -12102,9 +12269,11 @@ class SiglipModel extends SiglipPreTrainedModel { }
12102
12269
  class SiglipTextModel extends SiglipPreTrainedModel {
12103
12270
  /** @type {typeof PreTrainedModel.from_pretrained} */
12104
12271
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12105
- // Update default model file name if not provided
12106
- options.model_file_name ??= 'text_model';
12107
- return super.from_pretrained(pretrained_model_name_or_path, options);
12272
+ return super.from_pretrained(pretrained_model_name_or_path, {
12273
+ // Update default model file name if not provided
12274
+ model_file_name: 'text_model',
12275
+ ...options,
12276
+ });
12108
12277
  }
12109
12278
  }
12110
12279
 
@@ -12137,9 +12306,11 @@ class SiglipTextModel extends SiglipPreTrainedModel {
12137
12306
  class SiglipVisionModel extends CLIPPreTrainedModel {
12138
12307
  /** @type {typeof PreTrainedModel.from_pretrained} */
12139
12308
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12140
- // Update default model file name if not provided
12141
- options.model_file_name ??= 'vision_model';
12142
- return super.from_pretrained(pretrained_model_name_or_path, options);
12309
+ return super.from_pretrained(pretrained_model_name_or_path, {
12310
+ // Update default model file name if not provided
12311
+ model_file_name: 'vision_model',
12312
+ ...options,
12313
+ });
12143
12314
  }
12144
12315
  }
12145
12316
  //////////////////////////////////////////////////
@@ -12194,18 +12365,22 @@ class JinaCLIPModel extends JinaCLIPPreTrainedModel {
12194
12365
  class JinaCLIPTextModel extends JinaCLIPPreTrainedModel {
12195
12366
  /** @type {typeof PreTrainedModel.from_pretrained} */
12196
12367
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12197
- // Update default model file name if not provided
12198
- options.model_file_name ??= 'text_model';
12199
- return super.from_pretrained(pretrained_model_name_or_path, options);
12368
+ return super.from_pretrained(pretrained_model_name_or_path, {
12369
+ // Update default model file name if not provided
12370
+ model_file_name: 'text_model',
12371
+ ...options,
12372
+ });
12200
12373
  }
12201
12374
  }
12202
12375
 
12203
12376
  class JinaCLIPVisionModel extends JinaCLIPPreTrainedModel {
12204
12377
  /** @type {typeof PreTrainedModel.from_pretrained} */
12205
12378
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12206
- // Update default model file name if not provided
12207
- options.model_file_name ??= 'vision_model';
12208
- return super.from_pretrained(pretrained_model_name_or_path, options);
12379
+ return super.from_pretrained(pretrained_model_name_or_path, {
12380
+ // Update default model file name if not provided
12381
+ model_file_name: 'vision_model',
12382
+ ...options,
12383
+ });
12209
12384
  }
12210
12385
  }
12211
12386
  //////////////////////////////////////////////////
@@ -12365,6 +12540,14 @@ class LlamaForCausalLM extends LlamaPreTrainedModel { }
12365
12540
  //////////////////////////////////////////////////
12366
12541
 
12367
12542
 
12543
+ //////////////////////////////////////////////////
12544
+ // EXAONE models
12545
+ class ExaonePreTrainedModel extends PreTrainedModel { }
12546
+ class ExaoneModel extends ExaonePreTrainedModel { }
12547
+ class ExaoneForCausalLM extends ExaonePreTrainedModel { }
12548
+ //////////////////////////////////////////////////
12549
+
12550
+
12368
12551
  //////////////////////////////////////////////////
12369
12552
  // MobileLLM models
12370
12553
  class MobileLLMPreTrainedModel extends PreTrainedModel { }
@@ -12380,6 +12563,13 @@ class OlmoModel extends OlmoPreTrainedModel { }
12380
12563
  class OlmoForCausalLM extends OlmoPreTrainedModel { }
12381
12564
  //////////////////////////////////////////////////
12382
12565
 
12566
+ //////////////////////////////////////////////////
12567
+ // OLMo2 models
12568
+ class Olmo2PreTrainedModel extends PreTrainedModel { }
12569
+ class Olmo2Model extends Olmo2PreTrainedModel { }
12570
+ class Olmo2ForCausalLM extends Olmo2PreTrainedModel { }
12571
+ //////////////////////////////////////////////////
12572
+
12383
12573
 
12384
12574
  //////////////////////////////////////////////////
12385
12575
  // Granite models
@@ -12796,6 +12986,20 @@ class ViTForImageClassification extends ViTPreTrainedModel {
12796
12986
  //////////////////////////////////////////////////
12797
12987
 
12798
12988
 
12989
+ //////////////////////////////////////////////////
12990
+ class IJepaPreTrainedModel extends PreTrainedModel { }
12991
+ class IJepaModel extends IJepaPreTrainedModel { }
12992
+ class IJepaForImageClassification extends IJepaPreTrainedModel {
12993
+ /**
12994
+ * @param {any} model_inputs
12995
+ */
12996
+ async _call(model_inputs) {
12997
+ return new SequenceClassifierOutput(await super._call(model_inputs));
12998
+ }
12999
+ }
13000
+ //////////////////////////////////////////////////
13001
+
13002
+
12799
13003
  //////////////////////////////////////////////////
12800
13004
  class VitPosePreTrainedModel extends PreTrainedModel { }
12801
13005
 
@@ -14406,9 +14610,11 @@ class ClapModel extends ClapPreTrainedModel { }
14406
14610
  class ClapTextModelWithProjection extends ClapPreTrainedModel {
14407
14611
  /** @type {typeof PreTrainedModel.from_pretrained} */
14408
14612
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
14409
- // Update default model file name if not provided
14410
- options.model_file_name ??= 'text_model';
14411
- return super.from_pretrained(pretrained_model_name_or_path, options);
14613
+ return super.from_pretrained(pretrained_model_name_or_path, {
14614
+ // Update default model file name if not provided
14615
+ model_file_name: 'text_model',
14616
+ ...options,
14617
+ });
14412
14618
  }
14413
14619
  }
14414
14620
 
@@ -14441,9 +14647,11 @@ class ClapTextModelWithProjection extends ClapPreTrainedModel {
14441
14647
  class ClapAudioModelWithProjection extends ClapPreTrainedModel {
14442
14648
  /** @type {typeof PreTrainedModel.from_pretrained} */
14443
14649
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
14444
- // Update default model file name if not provided
14445
- options.model_file_name ??= 'audio_model';
14446
- return super.from_pretrained(pretrained_model_name_or_path, options);
14650
+ return super.from_pretrained(pretrained_model_name_or_path, {
14651
+ // Update default model file name if not provided
14652
+ model_file_name: 'audio_model',
14653
+ ...options,
14654
+ });
14447
14655
  }
14448
14656
  }
14449
14657
  //////////////////////////////////////////////////
@@ -15066,6 +15274,7 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
15066
15274
  ['rt_detr', ['RTDetrModel', RTDetrModel]],
15067
15275
  ['table-transformer', ['TableTransformerModel', TableTransformerModel]],
15068
15276
  ['vit', ['ViTModel', ViTModel]],
15277
+ ['ijepa', ['IJepaModel', IJepaModel]],
15069
15278
  ['pvt', ['PvtModel', PvtModel]],
15070
15279
  ['vit_msn', ['ViTMSNModel', ViTMSNModel]],
15071
15280
  ['vit_mae', ['ViTMAEModel', ViTMAEModel]],
@@ -15129,7 +15338,9 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
15129
15338
  ['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]],
15130
15339
  ['codegen', ['CodeGenModel', CodeGenModel]],
15131
15340
  ['llama', ['LlamaModel', LlamaModel]],
15341
+ ['exaone', ['ExaoneModel', ExaoneModel]],
15132
15342
  ['olmo', ['OlmoModel', OlmoModel]],
15343
+ ['olmo2', ['Olmo2Model', Olmo2Model]],
15133
15344
  ['mobilellm', ['MobileLLMModel', MobileLLMModel]],
15134
15345
  ['granite', ['GraniteModel', GraniteModel]],
15135
15346
  ['cohere', ['CohereModel', CohereModel]],
@@ -15150,6 +15361,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
15150
15361
  const MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES = new Map([
15151
15362
  ['speecht5', ['SpeechT5ForSpeechToText', SpeechT5ForSpeechToText]],
15152
15363
  ['whisper', ['WhisperForConditionalGeneration', WhisperForConditionalGeneration]],
15364
+ ['moonshine', ['MoonshineForConditionalGeneration', MoonshineForConditionalGeneration]],
15153
15365
  ]);
15154
15366
 
15155
15367
  const MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES = new Map([
@@ -15220,7 +15432,9 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
15220
15432
  ['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]],
15221
15433
  ['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]],
15222
15434
  ['llama', ['LlamaForCausalLM', LlamaForCausalLM]],
15435
+ ['exaone', ['ExaoneForCausalLM', ExaoneForCausalLM]],
15223
15436
  ['olmo', ['OlmoForCausalLM', OlmoForCausalLM]],
15437
+ ['olmo2', ['Olmo2ForCausalLM', Olmo2ForCausalLM]],
15224
15438
  ['mobilellm', ['MobileLLMForCausalLM', MobileLLMForCausalLM]],
15225
15439
  ['granite', ['GraniteForCausalLM', GraniteForCausalLM]],
15226
15440
  ['cohere', ['CohereForCausalLM', CohereForCausalLM]],
@@ -15238,6 +15452,9 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
15238
15452
  ['falcon', ['FalconForCausalLM', FalconForCausalLM]],
15239
15453
  ['trocr', ['TrOCRForCausalLM', TrOCRForCausalLM]],
15240
15454
  ['stablelm', ['StableLmForCausalLM', StableLmForCausalLM]],
15455
+
15456
+ // Also image-text-to-text
15457
+ ['phi3_v', ['Phi3VForCausalLM', Phi3VForCausalLM]],
15241
15458
  ]);
15242
15459
 
15243
15460
  const MODEL_FOR_MULTIMODALITY_MAPPING_NAMES = new Map([
@@ -15294,6 +15511,7 @@ const MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = new Map([
15294
15511
  ['florence2', ['Florence2ForConditionalGeneration', Florence2ForConditionalGeneration]],
15295
15512
  ['qwen2-vl', ['Qwen2VLForConditionalGeneration', Qwen2VLForConditionalGeneration]],
15296
15513
  ['idefics3', ['Idefics3ForConditionalGeneration', Idefics3ForConditionalGeneration]],
15514
+ ['paligemma', ['PaliGemmaForConditionalGeneration', PaliGemmaForConditionalGeneration]],
15297
15515
  ]);
15298
15516
 
15299
15517
  const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
@@ -15302,6 +15520,7 @@ const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
15302
15520
 
15303
15521
  const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
15304
15522
  ['vit', ['ViTForImageClassification', ViTForImageClassification]],
15523
+ ['ijepa', ['IJepaForImageClassification', IJepaForImageClassification]],
15305
15524
  ['pvt', ['PvtForImageClassification', PvtForImageClassification]],
15306
15525
  ['vit_msn', ['ViTMSNForImageClassification', ViTMSNForImageClassification]],
15307
15526
  ['fastvit', ['FastViTForImageClassification', FastViTForImageClassification]],
@@ -15473,6 +15692,7 @@ const CUSTOM_MAPPING = [
15473
15692
  // OVERRIDE:
15474
15693
  // TODO: Refactor to allow class to specify model
15475
15694
  ['MusicgenForConditionalGeneration', MusicgenForConditionalGeneration, MODEL_TYPES.Musicgen],
15695
+ ['Phi3VForCausalLM', Phi3VForCausalLM, MODEL_TYPES.Phi3V],
15476
15696
 
15477
15697
  ['CLIPTextModelWithProjection', CLIPTextModelWithProjection, MODEL_TYPES.EncoderOnly],
15478
15698
  ['SiglipTextModel', SiglipTextModel, MODEL_TYPES.EncoderOnly],
@@ -16727,23 +16947,26 @@ __webpack_require__.r(__webpack_exports__);
16727
16947
  /* harmony export */ __webpack_require__.d(__webpack_exports__, {
16728
16948
  /* harmony export */ ASTFeatureExtractor: () => (/* reexport safe */ _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__.ASTFeatureExtractor),
16729
16949
  /* harmony export */ ClapFeatureExtractor: () => (/* reexport safe */ _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_1__.ClapFeatureExtractor),
16730
- /* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_8__.ImageProcessor),
16731
- /* harmony export */ PyAnnoteFeatureExtractor: () => (/* reexport safe */ _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_2__.PyAnnoteFeatureExtractor),
16732
- /* harmony export */ SeamlessM4TFeatureExtractor: () => (/* reexport safe */ _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_3__.SeamlessM4TFeatureExtractor),
16733
- /* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_4__.SpeechT5FeatureExtractor),
16734
- /* harmony export */ Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_5__.Wav2Vec2FeatureExtractor),
16735
- /* harmony export */ WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_6__.WeSpeakerFeatureExtractor),
16736
- /* harmony export */ WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_7__.WhisperFeatureExtractor)
16950
+ /* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_9__.ImageProcessor),
16951
+ /* harmony export */ MoonshineFeatureExtractor: () => (/* reexport safe */ _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineFeatureExtractor),
16952
+ /* harmony export */ PyAnnoteFeatureExtractor: () => (/* reexport safe */ _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_3__.PyAnnoteFeatureExtractor),
16953
+ /* harmony export */ SeamlessM4TFeatureExtractor: () => (/* reexport safe */ _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_4__.SeamlessM4TFeatureExtractor),
16954
+ /* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_5__.SpeechT5FeatureExtractor),
16955
+ /* harmony export */ Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_6__.Wav2Vec2FeatureExtractor),
16956
+ /* harmony export */ WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_7__.WeSpeakerFeatureExtractor),
16957
+ /* harmony export */ WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_8__.WhisperFeatureExtractor)
16737
16958
  /* harmony export */ });
16738
16959
  /* harmony import */ var _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js */ "./src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js");
16739
16960
  /* harmony import */ var _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./clap/feature_extraction_clap.js */ "./src/models/clap/feature_extraction_clap.js");
16740
- /* harmony import */ var _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./pyannote/feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
16741
- /* harmony import */ var _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./seamless_m4t/feature_extraction_seamless_m4t.js */ "./src/models/seamless_m4t/feature_extraction_seamless_m4t.js");
16742
- /* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
16743
- /* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
16744
- /* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
16745
- /* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
16746
- /* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
16961
+ /* harmony import */ var _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./moonshine/feature_extraction_moonshine.js */ "./src/models/moonshine/feature_extraction_moonshine.js");
16962
+ /* harmony import */ var _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./pyannote/feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
16963
+ /* harmony import */ var _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./seamless_m4t/feature_extraction_seamless_m4t.js */ "./src/models/seamless_m4t/feature_extraction_seamless_m4t.js");
16964
+ /* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
16965
+ /* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
16966
+ /* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
16967
+ /* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
16968
+ /* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
16969
+
16747
16970
 
16748
16971
 
16749
16972
 
@@ -17124,18 +17347,29 @@ class Idefics3ImageProcessor extends _base_image_processors_utils_js__WEBPACK_IM
17124
17347
  const optimal_width = Math.ceil(width / num_splits_w);
17125
17348
 
17126
17349
  // Iterate through each row and column
17127
- for (let r = 0; r < num_splits_h; r++) {
17128
- for (let c = 0; c < num_splits_w; c++) {
17129
- // Calculate the starting point of the crop
17130
- const start_x = c * optimal_width;
17131
- const start_y = r * optimal_height;
17132
-
17133
- // Calculate the ending point of the crop
17134
- const end_x = Math.min(start_x + optimal_width, width);
17135
- const end_y = Math.min(start_y + optimal_height, height);
17136
-
17137
- // Crop the image
17138
- frames.push(pixel_values.slice(null, null, [start_y, end_y], [start_x, end_x]));
17350
+ for (let r = 0; r < num_splits_h; ++r) {
17351
+ for (let c = 0; c < num_splits_w; ++c) {
17352
+ let start_x, start_y, end_x, end_y;
17353
+ if (r === num_splits_h - 1) { // At bottom
17354
+ start_y = height - optimal_height;
17355
+ end_y = height;
17356
+ } else {
17357
+ start_y = r * optimal_height;
17358
+ end_y = (r + 1) * optimal_height;
17359
+ }
17360
+ if (c === num_splits_w - 1) { // At right
17361
+ start_x = width - optimal_width;
17362
+ end_x = width;
17363
+ } else {
17364
+ start_x = c * optimal_width;
17365
+ end_x = (c + 1) * optimal_width;
17366
+ }
17367
+
17368
+ const starts = [start_y, start_x];
17369
+ const ends = [end_y, end_x];
17370
+
17371
+ const patch = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.slice)(pixel_values, starts, ends, [2, 3]);
17372
+ frames.push(patch);
17139
17373
  }
17140
17374
  }
17141
17375
 
@@ -17361,21 +17595,22 @@ __webpack_require__.r(__webpack_exports__);
17361
17595
  /* harmony export */ OwlViTFeatureExtractor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTFeatureExtractor),
17362
17596
  /* harmony export */ OwlViTImageProcessor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTImageProcessor),
17363
17597
  /* harmony export */ Owlv2ImageProcessor: () => (/* reexport safe */ _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__.Owlv2ImageProcessor),
17364
- /* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_25__.PvtImageProcessor),
17365
- /* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_26__.Qwen2VLImageProcessor),
17366
- /* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_27__.RTDetrImageProcessor),
17367
- /* harmony export */ SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_28__.SamImageProcessor),
17368
- /* harmony export */ SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__.SegformerFeatureExtractor),
17369
- /* harmony export */ SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__.SegformerImageProcessor),
17370
- /* harmony export */ SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_30__.SiglipImageProcessor),
17371
- /* harmony export */ Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_31__.Swin2SRImageProcessor),
17598
+ /* harmony export */ Phi3VImageProcessor: () => (/* reexport safe */ _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_25__.Phi3VImageProcessor),
17599
+ /* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_26__.PvtImageProcessor),
17600
+ /* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_27__.Qwen2VLImageProcessor),
17601
+ /* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_28__.RTDetrImageProcessor),
17602
+ /* harmony export */ SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_29__.SamImageProcessor),
17603
+ /* harmony export */ SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__.SegformerFeatureExtractor),
17604
+ /* harmony export */ SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__.SegformerImageProcessor),
17605
+ /* harmony export */ SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_31__.SiglipImageProcessor),
17606
+ /* harmony export */ Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_32__.Swin2SRImageProcessor),
17372
17607
  /* harmony export */ VLMImageProcessor: () => (/* reexport safe */ _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_12__.VLMImageProcessor),
17373
- /* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__.ViTFeatureExtractor),
17374
- /* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__.ViTImageProcessor),
17375
- /* harmony export */ VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_33__.VitMatteImageProcessor),
17376
- /* harmony export */ VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_34__.VitPoseImageProcessor),
17377
- /* harmony export */ YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__.YolosFeatureExtractor),
17378
- /* harmony export */ YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__.YolosImageProcessor)
17608
+ /* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__.ViTFeatureExtractor),
17609
+ /* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__.ViTImageProcessor),
17610
+ /* harmony export */ VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_34__.VitMatteImageProcessor),
17611
+ /* harmony export */ VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_35__.VitPoseImageProcessor),
17612
+ /* harmony export */ YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__.YolosFeatureExtractor),
17613
+ /* harmony export */ YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__.YolosImageProcessor)
17379
17614
  /* harmony export */ });
17380
17615
  /* harmony import */ var _beit_image_processing_beit_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./beit/image_processing_beit.js */ "./src/models/beit/image_processing_beit.js");
17381
17616
  /* harmony import */ var _bit_image_processing_bit_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./bit/image_processing_bit.js */ "./src/models/bit/image_processing_bit.js");
@@ -17402,17 +17637,19 @@ __webpack_require__.r(__webpack_exports__);
17402
17637
  /* harmony import */ var _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_22__ = __webpack_require__(/*! ./nougat/image_processing_nougat.js */ "./src/models/nougat/image_processing_nougat.js");
17403
17638
  /* harmony import */ var _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__ = __webpack_require__(/*! ./owlv2/image_processing_owlv2.js */ "./src/models/owlv2/image_processing_owlv2.js");
17404
17639
  /* harmony import */ var _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__ = __webpack_require__(/*! ./owlvit/image_processing_owlvit.js */ "./src/models/owlvit/image_processing_owlvit.js");
17405
- /* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
17406
- /* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
17407
- /* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
17408
- /* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
17409
- /* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
17410
- /* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
17411
- /* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
17412
- /* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
17413
- /* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
17414
- /* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
17415
- /* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
17640
+ /* harmony import */ var _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./phi3_v/image_processing_phi3_v.js */ "./src/models/phi3_v/image_processing_phi3_v.js");
17641
+ /* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
17642
+ /* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
17643
+ /* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
17644
+ /* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
17645
+ /* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
17646
+ /* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
17647
+ /* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
17648
+ /* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
17649
+ /* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
17650
+ /* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
17651
+ /* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
17652
+
17416
17653
 
17417
17654
 
17418
17655
 
@@ -18087,6 +18324,87 @@ class MobileViTImageProcessor extends _base_image_processors_utils_js__WEBPACK_I
18087
18324
  class MobileViTFeatureExtractor extends MobileViTImageProcessor { }
18088
18325
 
18089
18326
 
18327
+ /***/ }),
18328
+
18329
+ /***/ "./src/models/moonshine/feature_extraction_moonshine.js":
18330
+ /*!**************************************************************!*\
18331
+ !*** ./src/models/moonshine/feature_extraction_moonshine.js ***!
18332
+ \**************************************************************/
18333
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18334
+
18335
+ "use strict";
18336
+ __webpack_require__.r(__webpack_exports__);
18337
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18338
+ /* harmony export */ MoonshineFeatureExtractor: () => (/* binding */ MoonshineFeatureExtractor)
18339
+ /* harmony export */ });
18340
+ /* harmony import */ var _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/feature_extraction_utils.js */ "./src/base/feature_extraction_utils.js");
18341
+ /* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
18342
+
18343
+
18344
+
18345
+
18346
+ class MoonshineFeatureExtractor extends _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__.FeatureExtractor {
18347
+ /**
18348
+ * Asynchronously extracts input values from a given audio using the provided configuration.
18349
+ * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
18350
+ * @returns {Promise<{ input_values: Tensor; }>} The extracted input values.
18351
+ */
18352
+ async _call(audio) {
18353
+ (0,_base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__.validate_audio_inputs)(audio, 'MoonshineFeatureExtractor');
18354
+
18355
+ if (audio instanceof Float64Array) {
18356
+ audio = new Float32Array(audio);
18357
+ }
18358
+
18359
+ const shape = [
18360
+ 1, /* batch_size */
18361
+ audio.length, /* num_samples */
18362
+ ];
18363
+ return {
18364
+ input_values: new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.Tensor('float32', audio, shape),
18365
+ };
18366
+ }
18367
+ }
18368
+
18369
+
18370
+ /***/ }),
18371
+
18372
+ /***/ "./src/models/moonshine/processing_moonshine.js":
18373
+ /*!******************************************************!*\
18374
+ !*** ./src/models/moonshine/processing_moonshine.js ***!
18375
+ \******************************************************/
18376
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18377
+
18378
+ "use strict";
18379
+ __webpack_require__.r(__webpack_exports__);
18380
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18381
+ /* harmony export */ MoonshineProcessor: () => (/* binding */ MoonshineProcessor)
18382
+ /* harmony export */ });
18383
+ /* harmony import */ var _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../auto/feature_extraction_auto.js */ "./src/models/auto/feature_extraction_auto.js");
18384
+ /* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
18385
+ /* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
18386
+
18387
+
18388
+
18389
+
18390
+ /**
18391
+ * Represents a MoonshineProcessor that extracts features from an audio input.
18392
+ */
18393
+ class MoonshineProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_2__.Processor {
18394
+ static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_1__.AutoTokenizer
18395
+ static feature_extractor_class = _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_0__.AutoFeatureExtractor
18396
+
18397
+ /**
18398
+ * Calls the feature_extractor function with the given audio input.
18399
+ * @param {any} audio The audio input to extract features from.
18400
+ * @returns {Promise<any>} A Promise that resolves with the extracted features.
18401
+ */
18402
+ async _call(audio) {
18403
+ return await this.feature_extractor(audio);
18404
+ }
18405
+ }
18406
+
18407
+
18090
18408
  /***/ }),
18091
18409
 
18092
18410
  /***/ "./src/models/nougat/image_processing_nougat.js":
@@ -18180,6 +18498,356 @@ class OwlViTProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
18180
18498
  }
18181
18499
 
18182
18500
 
18501
+ /***/ }),
18502
+
18503
+ /***/ "./src/models/paligemma/processing_paligemma.js":
18504
+ /*!******************************************************!*\
18505
+ !*** ./src/models/paligemma/processing_paligemma.js ***!
18506
+ \******************************************************/
18507
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18508
+
18509
+ "use strict";
18510
+ __webpack_require__.r(__webpack_exports__);
18511
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18512
+ /* harmony export */ PaliGemmaProcessor: () => (/* binding */ PaliGemmaProcessor)
18513
+ /* harmony export */ });
18514
+ /* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
18515
+ /* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
18516
+ /* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
18517
+
18518
+
18519
+
18520
+
18521
+ const IMAGE_TOKEN = "<image>";
18522
+
18523
+ function build_string_from_input(
18524
+ prompt,
18525
+ bos_token,
18526
+ image_seq_len,
18527
+ image_token,
18528
+ num_images,
18529
+ ) {
18530
+ return `${image_token.repeat(image_seq_len * num_images)}${bos_token}${prompt}\n`
18531
+ }
18532
+
18533
+ class PaliGemmaProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
18534
+ static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
18535
+ static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
18536
+ static uses_processor_config = false;
18537
+
18538
+ /**
18539
+ * @typedef {import('../../utils/image.js').RawImage} RawImage
18540
+ */
18541
+
18542
+ // `images` is required, `text` is optional
18543
+ async _call(/** @type {RawImage|RawImage[]} */ images, text = null, kwargs = {}) {
18544
+ if (!text) {
18545
+ console.warn(
18546
+ "You are using PaliGemma without a text prefix. It will perform as a picture-captioning model."
18547
+ )
18548
+ text = ""
18549
+ }
18550
+
18551
+ if (!Array.isArray(images)) {
18552
+ images = [images]
18553
+ }
18554
+
18555
+ if (!Array.isArray(text)) {
18556
+ text = [text]
18557
+ }
18558
+
18559
+ const bos_token = this.tokenizer.bos_token;
18560
+ const image_seq_length = this.image_processor.config.image_seq_length;
18561
+ let input_strings;
18562
+ if (text.some((t) => t.includes(IMAGE_TOKEN))) {
18563
+ input_strings = text.map(
18564
+ sample => {
18565
+ const expanded_sample = sample.replaceAll(IMAGE_TOKEN, IMAGE_TOKEN.repeat(image_seq_length));
18566
+ const bos_rfind_index = expanded_sample.lastIndexOf(IMAGE_TOKEN);
18567
+ const bos_index = bos_rfind_index === -1 ? 0 : bos_rfind_index + IMAGE_TOKEN.length;
18568
+ return expanded_sample.slice(0, bos_index) + bos_token + expanded_sample.slice(bos_index) + "\n";
18569
+ }
18570
+ )
18571
+ } else {
18572
+ console.warn(
18573
+ "You are passing both `text` and `images` to `PaliGemmaProcessor`. The processor expects special " +
18574
+ "image tokens in the text, as many tokens as there are images per each text. It is recommended to " +
18575
+ "add `<image>` tokens in the very beginning of your text. For this call, we will infer how many images " +
18576
+ "each text has and add special tokens."
18577
+ )
18578
+
18579
+ input_strings = text.map(
18580
+ sample => build_string_from_input(
18581
+ sample,
18582
+ bos_token,
18583
+ image_seq_length,
18584
+ IMAGE_TOKEN,
18585
+ images.length,
18586
+ )
18587
+ )
18588
+ }
18589
+
18590
+ const text_inputs = this.tokenizer(input_strings, kwargs);
18591
+ const image_inputs = await this.image_processor(images, kwargs);
18592
+
18593
+ return {
18594
+ ...image_inputs,
18595
+ ...text_inputs,
18596
+ }
18597
+ }
18598
+ }
18599
+
18600
+
18601
+ /***/ }),
18602
+
18603
+ /***/ "./src/models/phi3_v/image_processing_phi3_v.js":
18604
+ /*!******************************************************!*\
18605
+ !*** ./src/models/phi3_v/image_processing_phi3_v.js ***!
18606
+ \******************************************************/
18607
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18608
+
18609
+ "use strict";
18610
+ __webpack_require__.r(__webpack_exports__);
18611
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18612
+ /* harmony export */ Phi3VImageProcessor: () => (/* binding */ Phi3VImageProcessor)
18613
+ /* harmony export */ });
18614
+ /* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
18615
+ /* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
18616
+
18617
+
18618
+
18619
+ const IMAGE_SIZE = 336;
18620
+ const SLICE_AXES = [2, 3]; // axes to slice on
18621
+ const { ceil, floor, sqrt } = Math;
18622
+
18623
+ class Phi3VImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor {
18624
+ constructor(config) {
18625
+ super({
18626
+ ...config,
18627
+ do_normalize: true,
18628
+ do_pad: true,
18629
+ pad_size: 'custom',
18630
+ do_convert_rgb: true,
18631
+ do_resize: true, // Smart resizing "hd_transform"
18632
+ });
18633
+
18634
+ this._num_crops = config.num_crops;
18635
+ }
18636
+ calc_num_image_tokens_from_image_size(width, height) {
18637
+ // @ts-expect-error
18638
+ const { num_img_tokens } = this.config;
18639
+ return floor(((floor((height / IMAGE_SIZE)) * floor((width / IMAGE_SIZE)) + 1) * num_img_tokens) + 1 + (floor(height / IMAGE_SIZE) + 1) * sqrt(num_img_tokens));
18640
+ }
18641
+
18642
+ /** @type {ImageProcessor['get_resize_output_image_size']} */
18643
+ get_resize_output_image_size(image, size) {
18644
+ const hd_num = this._num_crops;
18645
+ const [width, height] = image.size
18646
+
18647
+ let ratio = width / height;
18648
+ let scale = 1;
18649
+
18650
+ // Calculate the scaling factor
18651
+ while (scale * Math.ceil(scale / ratio) <= hd_num) {
18652
+ scale += 1;
18653
+ }
18654
+ scale -= 1;
18655
+
18656
+ // Compute the new dimensions
18657
+ const new_w = Math.floor(scale * 336);
18658
+ const new_h = Math.floor(new_w / ratio);
18659
+
18660
+ return [new_w, new_h]
18661
+ }
18662
+
18663
+
18664
+ /** @type {ImageProcessor['pad_image']} */
18665
+ pad_image(pixelData, imgDims, padSize, options = {}) {
18666
+ // Phi3V uses a custom padding strategy:
18667
+ // - Pad to a multiple of 336
18668
+ // - Pad with white pixels
18669
+ const [imageHeight, imageWidth] = imgDims;
18670
+ const height = IMAGE_SIZE * ceil(imageHeight / IMAGE_SIZE);
18671
+ const width = IMAGE_SIZE * ceil(imageWidth / IMAGE_SIZE);
18672
+
18673
+ // NOTE: Since padding is done after normalization, we need to fill with the normalized values
18674
+ const constant_values = [1, 1, 1].map((x, i) => (x - this.image_mean[i]) / this.image_std[i]);
18675
+ return super.pad_image(pixelData, imgDims, { width, height }, {
18676
+ center: true,
18677
+ constant_values,
18678
+ ...options,
18679
+ });
18680
+ }
18681
+
18682
+ async _call(images, {
18683
+ num_crops = null,
18684
+ } = {}) {
18685
+ // @ts-expect-error
18686
+ this._num_crops = num_crops ??= this.config.num_crops;
18687
+ if (num_crops < 4 || sqrt(num_crops) % 1 !== 0) {
18688
+ throw new Error("num_crops must be a square number >= 4");
18689
+ }
18690
+
18691
+ if (!Array.isArray(images)) {
18692
+ images = [images];
18693
+ }
18694
+
18695
+ const num_images = images.length;
18696
+ const imageData = await Promise.all(images.map(x => this.preprocess(x)));
18697
+
18698
+ const original_sizes = imageData.map(x => x.original_size);
18699
+ const reshaped_input_sizes = imageData.map(x => x.reshaped_input_size);
18700
+
18701
+ // Process each image in batch
18702
+ const all_pixel_values = [];
18703
+ for (const { pixel_values } of imageData) {
18704
+ pixel_values.unsqueeze_(0); // Easier processing as 4D tensor
18705
+
18706
+ const [height, width] = pixel_values.dims.slice(-2);
18707
+
18708
+ // Global image (Tensor of shape [num_channels, height, width])
18709
+ const batch_pixel_values = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)(pixel_values, {
18710
+ size: [IMAGE_SIZE, IMAGE_SIZE],
18711
+ mode: 'bicubic',
18712
+ });
18713
+
18714
+ if (num_crops > 0) {
18715
+ const patches = [];
18716
+ const sqrt_patches = sqrt(num_crops);
18717
+ const patch_width = floor(width / sqrt_patches);
18718
+ const patch_height = floor(height / sqrt_patches);
18719
+ for (let y = 0; y < sqrt_patches; ++y) {
18720
+ for (let x = 0; x < sqrt_patches; ++x) {
18721
+ let start_x, start_y, end_x, end_y;
18722
+ if (y === sqrt_patches - 1) { // At bottom
18723
+ start_y = height - patch_height;
18724
+ end_y = height;
18725
+ } else {
18726
+ start_y = y * patch_height;
18727
+ end_y = (y + 1) * patch_height;
18728
+ }
18729
+ if (x === sqrt_patches - 1) { // At right
18730
+ start_x = width - patch_width;
18731
+ end_x = width;
18732
+ } else {
18733
+ start_x = x * patch_width;
18734
+ end_x = (x + 1) * patch_width;
18735
+ }
18736
+
18737
+ const starts = [start_y, start_x];
18738
+ const ends = [end_y, end_x];
18739
+ const patch = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.slice)(pixel_values, starts, ends, SLICE_AXES);
18740
+ patches.push(patch);
18741
+ }
18742
+ }
18743
+
18744
+ const resized_tensors = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)((0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)(patches, 0), {
18745
+ size: [IMAGE_SIZE, IMAGE_SIZE],
18746
+ mode: 'bicubic',
18747
+ }); // [num_crops, 3, 336, 336]
18748
+
18749
+ // Concatenate the global image with the patches
18750
+ all_pixel_values.push((0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)([batch_pixel_values, resized_tensors], 0));
18751
+ } else {
18752
+ // Only use the global image
18753
+ // NOTE: Not currently supported in modelling code
18754
+ all_pixel_values.push(batch_pixel_values);
18755
+ }
18756
+ }
18757
+
18758
+ // [num_images, 1 + num_crops, num_channels=3, height, width]
18759
+ const pixel_values = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.stack)(all_pixel_values, 0);
18760
+
18761
+ // Calculate padded image sizes
18762
+ const sizes = reshaped_input_sizes.map(x => x.map(y => IMAGE_SIZE * ceil(y / IMAGE_SIZE)));
18763
+
18764
+ const image_sizes = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.Tensor(
18765
+ 'int64',
18766
+ sizes.flat(),
18767
+ [num_images, 2],
18768
+ );
18769
+
18770
+ const num_img_tokens = sizes.map(
18771
+ ([height, width]) => this.calc_num_image_tokens_from_image_size(width, height),
18772
+ );
18773
+
18774
+ return { pixel_values, original_sizes, reshaped_input_sizes, image_sizes, num_img_tokens };
18775
+ }
18776
+ }
18777
+
18778
+
18779
+ /***/ }),
18780
+
18781
+ /***/ "./src/models/phi3_v/processing_phi3_v.js":
18782
+ /*!************************************************!*\
18783
+ !*** ./src/models/phi3_v/processing_phi3_v.js ***!
18784
+ \************************************************/
18785
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18786
+
18787
+ "use strict";
18788
+ __webpack_require__.r(__webpack_exports__);
18789
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18790
+ /* harmony export */ Phi3VProcessor: () => (/* binding */ Phi3VProcessor)
18791
+ /* harmony export */ });
18792
+ /* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
18793
+ /* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
18794
+ /* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
18795
+ /* harmony import */ var _utils_image_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ../../utils/image.js */ "./src/utils/image.js");
18796
+
18797
+
18798
+
18799
+
18800
+
18801
+ const IMAGE_TOKEN = "<|image|>";
18802
+ const IMAGE_TOKEN_PATTERN = /<\|image_\d+\|>/g;
18803
+
18804
+ class Phi3VProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
18805
+ static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
18806
+ static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
18807
+
18808
+ /**
18809
+ *
18810
+ * @param {string|string[]} text
18811
+ * @param {RawImage|RawImage[]} images
18812
+ * @param {...any} args
18813
+ * @returns {Promise<any>}
18814
+ */
18815
+ async _call(text, images = null, {
18816
+ padding = true,
18817
+ truncation = true,
18818
+ num_crops = null,
18819
+ } = {}) {
18820
+
18821
+ if (!Array.isArray(text)) {
18822
+ text = [text];
18823
+ }
18824
+
18825
+ let text_inputs, image_inputs;
18826
+ if (images) {
18827
+ image_inputs = await this.image_processor(images, { num_crops });
18828
+ const { num_img_tokens } = image_inputs;
18829
+
18830
+ // The original implementation adds a bos_token before the image tokens
18831
+ // TODO: Check if this affects performance, since it looks like a bug in the original implementation
18832
+ const prompt_chunks = text.map((t, i) => t.split(IMAGE_TOKEN_PATTERN).join(IMAGE_TOKEN.repeat(num_img_tokens[i])));
18833
+
18834
+ text_inputs = this.tokenizer(prompt_chunks, { padding, truncation });
18835
+
18836
+ // The model expects image tokens to be negative, so we negate the image token ids
18837
+ const image_token_id = this.tokenizer.model.convert_tokens_to_ids([IMAGE_TOKEN])[0];
18838
+ text_inputs.input_ids.map_(id => (id == image_token_id) ? -id : id);
18839
+ } else {
18840
+ text_inputs = this.tokenizer(text);
18841
+ }
18842
+
18843
+ return {
18844
+ ...text_inputs,
18845
+ ...image_inputs,
18846
+ }
18847
+ }
18848
+ }
18849
+
18850
+
18183
18851
  /***/ }),
18184
18852
 
18185
18853
  /***/ "./src/models/processors.js":
@@ -18192,30 +18860,39 @@ class OwlViTProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
18192
18860
  __webpack_require__.r(__webpack_exports__);
18193
18861
  /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18194
18862
  /* harmony export */ Florence2Processor: () => (/* reexport safe */ _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__.Florence2Processor),
18195
- /* harmony export */ Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3Processor),
18196
- /* harmony export */ JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__.JinaCLIPProcessor),
18863
+ /* harmony export */ Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_3__.Idefics3Processor),
18864
+ /* harmony export */ JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_5__.JinaCLIPProcessor),
18197
18865
  /* harmony export */ MgpstrProcessor: () => (/* reexport safe */ _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__.MgpstrProcessor),
18198
- /* harmony export */ OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_5__.OwlViTProcessor),
18199
- /* harmony export */ PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_6__.PyAnnoteProcessor),
18200
- /* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_7__.Qwen2VLProcessor),
18201
- /* harmony export */ SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_8__.SamProcessor),
18202
- /* harmony export */ SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_9__.SpeechT5Processor),
18203
- /* harmony export */ VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__.VLChatProcessor),
18204
- /* harmony export */ Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_10__.Wav2Vec2ProcessorWithLM),
18205
- /* harmony export */ WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_11__.WhisperProcessor)
18866
+ /* harmony export */ MoonshineProcessor: () => (/* reexport safe */ _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineProcessor),
18867
+ /* harmony export */ OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_6__.OwlViTProcessor),
18868
+ /* harmony export */ PaliGemmaProcessor: () => (/* reexport safe */ _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_8__.PaliGemmaProcessor),
18869
+ /* harmony export */ Phi3VProcessor: () => (/* reexport safe */ _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_7__.Phi3VProcessor),
18870
+ /* harmony export */ PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_9__.PyAnnoteProcessor),
18871
+ /* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_10__.Qwen2VLProcessor),
18872
+ /* harmony export */ SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_11__.SamProcessor),
18873
+ /* harmony export */ SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_12__.SpeechT5Processor),
18874
+ /* harmony export */ VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_4__.VLChatProcessor),
18875
+ /* harmony export */ Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_13__.Wav2Vec2ProcessorWithLM),
18876
+ /* harmony export */ WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_14__.WhisperProcessor)
18206
18877
  /* harmony export */ });
18207
18878
  /* harmony import */ var _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./florence2/processing_florence2.js */ "./src/models/florence2/processing_florence2.js");
18208
18879
  /* harmony import */ var _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./mgp_str/processing_mgp_str.js */ "./src/models/mgp_str/processing_mgp_str.js");
18209
- /* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
18210
- /* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
18211
- /* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
18212
- /* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
18213
- /* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
18214
- /* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
18215
- /* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
18216
- /* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
18217
- /* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
18218
- /* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
18880
+ /* harmony import */ var _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./moonshine/processing_moonshine.js */ "./src/models/moonshine/processing_moonshine.js");
18881
+ /* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
18882
+ /* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
18883
+ /* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
18884
+ /* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
18885
+ /* harmony import */ var _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./phi3_v/processing_phi3_v.js */ "./src/models/phi3_v/processing_phi3_v.js");
18886
+ /* harmony import */ var _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./paligemma/processing_paligemma.js */ "./src/models/paligemma/processing_paligemma.js");
18887
+ /* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
18888
+ /* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
18889
+ /* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
18890
+ /* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
18891
+ /* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
18892
+ /* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
18893
+
18894
+
18895
+
18219
18896
 
18220
18897
 
18221
18898
 
@@ -18264,6 +18941,8 @@ __webpack_require__.r(__webpack_exports__);
18264
18941
  /* harmony export */ });
18265
18942
  /* harmony import */ var _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/feature_extraction_utils.js */ "./src/base/feature_extraction_utils.js");
18266
18943
  /* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
18944
+ /* harmony import */ var _utils_maths_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../utils/maths.js */ "./src/utils/maths.js");
18945
+
18267
18946
 
18268
18947
 
18269
18948
 
@@ -18291,41 +18970,6 @@ class PyAnnoteFeatureExtractor extends _base_feature_extraction_utils_js__WEBPAC
18291
18970
  };
18292
18971
  }
18293
18972
 
18294
- }
18295
-
18296
-
18297
- /***/ }),
18298
-
18299
- /***/ "./src/models/pyannote/processing_pyannote.js":
18300
- /*!****************************************************!*\
18301
- !*** ./src/models/pyannote/processing_pyannote.js ***!
18302
- \****************************************************/
18303
- /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
18304
-
18305
- "use strict";
18306
- __webpack_require__.r(__webpack_exports__);
18307
- /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18308
- /* harmony export */ PyAnnoteProcessor: () => (/* binding */ PyAnnoteProcessor)
18309
- /* harmony export */ });
18310
- /* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
18311
- /* harmony import */ var _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/feature_extraction_auto.js */ "./src/models/auto/feature_extraction_auto.js");
18312
- /* harmony import */ var _utils_maths_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../utils/maths.js */ "./src/utils/maths.js");
18313
-
18314
-
18315
-
18316
-
18317
- class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
18318
- static feature_extractor_class = _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoFeatureExtractor
18319
-
18320
- /**
18321
- * Calls the feature_extractor function with the given audio input.
18322
- * @param {any} audio The audio input to extract features from.
18323
- * @returns {Promise<any>} A Promise that resolves with the extracted features.
18324
- */
18325
- async _call(audio) {
18326
- return await this.feature_extractor(audio)
18327
- }
18328
-
18329
18973
  /**
18330
18974
  * NOTE: Can return fractional values. `Math.ceil` will ensure correct value.
18331
18975
  * @param {number} samples The number of frames in the audio.
@@ -18380,6 +19024,48 @@ class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODU
18380
19024
  }
18381
19025
  return results;
18382
19026
  }
19027
+
19028
+ }
19029
+
19030
+
19031
+ /***/ }),
19032
+
19033
+ /***/ "./src/models/pyannote/processing_pyannote.js":
19034
+ /*!****************************************************!*\
19035
+ !*** ./src/models/pyannote/processing_pyannote.js ***!
19036
+ \****************************************************/
19037
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
19038
+
19039
+ "use strict";
19040
+ __webpack_require__.r(__webpack_exports__);
19041
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
19042
+ /* harmony export */ PyAnnoteProcessor: () => (/* binding */ PyAnnoteProcessor)
19043
+ /* harmony export */ });
19044
+ /* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
19045
+ /* harmony import */ var _feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
19046
+
19047
+
19048
+
19049
+ class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
19050
+ static feature_extractor_class = _feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_1__.PyAnnoteFeatureExtractor
19051
+
19052
+ /**
19053
+ * Calls the feature_extractor function with the given audio input.
19054
+ * @param {any} audio The audio input to extract features from.
19055
+ * @returns {Promise<any>} A Promise that resolves with the extracted features.
19056
+ */
19057
+ async _call(audio) {
19058
+ return await this.feature_extractor(audio)
19059
+ }
19060
+
19061
+ /** @type {PyAnnoteFeatureExtractor['post_process_speaker_diarization']} */
19062
+ post_process_speaker_diarization(...args) {
19063
+ return /** @type {PyAnnoteFeatureExtractor} */(this.feature_extractor).post_process_speaker_diarization(...args);
19064
+ }
19065
+
19066
+ get sampling_rate() {
19067
+ return this.feature_extractor.config.sampling_rate;
19068
+ }
18383
19069
  }
18384
19070
 
18385
19071
 
@@ -20129,6 +20815,17 @@ class TensorOpRegistry {
20129
20815
  }
20130
20816
  return this._top_k;
20131
20817
  }
20818
+
20819
+ static get slice() {
20820
+ if (!this._slice) {
20821
+ this._slice = wrap(
20822
+ [8, 7, 18, 0, 58, 96, 10, 25, 10, 1, 120, 10, 1, 115, 10, 1, 101, 10, 1, 97, 10, 1, 116, 18, 1, 121, 34, 5, 83, 108, 105, 99, 101, 18, 1, 114, 90, 9, 10, 1, 120, 18, 4, 10, 2, 8, 1, 90, 9, 10, 1, 115, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 101, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 97, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 116, 18, 4, 10, 2, 8, 7, 98, 9, 10, 1, 121, 18, 4, 10, 2, 8, 1, 66, 2, 16, 13],
20823
+ this.session_options,
20824
+ 'y',
20825
+ )
20826
+ }
20827
+ return this._slice;
20828
+ }
20132
20829
  }
20133
20830
 
20134
20831
 
@@ -21862,6 +22559,8 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
21862
22559
  case 'unispeech-sat':
21863
22560
  case 'hubert':
21864
22561
  return this._call_wav2vec2(audio, kwargs)
22562
+ case 'moonshine':
22563
+ return this._call_moonshine(audio, kwargs)
21865
22564
  default:
21866
22565
  throw new Error(`AutomaticSpeechRecognitionPipeline does not support model type '${this.model.config.model_type}'.`)
21867
22566
  }
@@ -22015,6 +22714,34 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
22015
22714
  }
22016
22715
  return single ? toReturn[0] : toReturn;
22017
22716
  }
22717
+
22718
+ /**
22719
+ * @type {AutomaticSpeechRecognitionPipelineCallback}
22720
+ * @private
22721
+ */
22722
+ async _call_moonshine(audio, kwargs) {
22723
+ const single = !Array.isArray(audio);
22724
+ if (single) {
22725
+ audio = [/** @type {AudioInput} */ (audio)];
22726
+ }
22727
+ const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
22728
+ const preparedAudios = await prepareAudios(audio, sampling_rate);
22729
+ const toReturn = [];
22730
+ for (const aud of preparedAudios) {
22731
+ const inputs = await this.processor(aud);
22732
+
22733
+ // According to the [paper](https://arxiv.org/pdf/2410.15608):
22734
+ // "We use greedy decoding, with a heuristic limit of 6 output tokens
22735
+ // per second of audio to avoid repeated output sequences."
22736
+ const max_new_tokens = Math.floor(aud.length / sampling_rate) * 6;
22737
+ const outputs = await this.model.generate({ max_new_tokens, ...kwargs, ...inputs });
22738
+
22739
+ const text = this.processor.batch_decode(outputs, { skip_special_tokens: true })[0];
22740
+ toReturn.push({ text });
22741
+ }
22742
+ return single ? toReturn[0] : toReturn;
22743
+ }
22744
+
22018
22745
  }
22019
22746
 
22020
22747
  /**
@@ -26133,6 +26860,12 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
26133
26860
  this.unk_token = this.getToken('unk_token');
26134
26861
  this.unk_token_id = this.model.tokens_to_ids.get(this.unk_token);
26135
26862
 
26863
+ this.bos_token = this.getToken('bos_token');
26864
+ this.bos_token_id = this.model.tokens_to_ids.get(this.bos_token);
26865
+
26866
+ this.eos_token = this.getToken('eos_token');
26867
+ this.eos_token_id = this.model.tokens_to_ids.get(this.eos_token);
26868
+
26136
26869
  this.model_max_length = tokenizerConfig.model_max_length;
26137
26870
 
26138
26871
  /** @type {boolean} Whether or not to strip the text when tokenizing (removing excess spaces before and after the string). */
@@ -27105,6 +27838,11 @@ class WhisperTokenizer extends PreTrainedTokenizer {
27105
27838
  let chunk = new_chunk();
27106
27839
  let time_offset = 0.0;
27107
27840
  const timestamp_begin = this.timestamp_begin;
27841
+ // Whisper timestamp tokens start from 0.00 and go to timestamp 30.00 in 0.02 increments.
27842
+ // We can calculate the last time stamp token as timestamp_begin plus the number of tokens
27843
+ // tokens from 0.00 to 30.00 which is 1500.
27844
+ const total_timestamp_tokens = 1500; // (30.00 - 0.00) / 0.02
27845
+ const timestamp_end = timestamp_begin + total_timestamp_tokens;
27108
27846
 
27109
27847
  let previous_tokens = [];
27110
27848
  let previous_token_timestamps = [];
@@ -27192,7 +27930,7 @@ class WhisperTokenizer extends PreTrainedTokenizer {
27192
27930
  } else {
27193
27931
  // 2/ This is a regular special token, ignoring it
27194
27932
  }
27195
- } else if (token >= timestamp_begin) {
27933
+ } else if (token >= timestamp_begin && token <= timestamp_end) {
27196
27934
  // 3/ Timestamp token
27197
27935
  const time = (token - timestamp_begin) * time_precision + time_offset;
27198
27936
  const rounded_time = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.round)(time, 2);
@@ -28684,15 +29422,45 @@ __webpack_require__.r(__webpack_exports__);
28684
29422
  */
28685
29423
 
28686
29424
  /**
28687
- * @typedef {Object} ProgressInfo
28688
- * @property {'initiate' | 'download' | 'progress' | 'done'} status The status of the progress item.
28689
- * @property {string} name This can be either:
28690
- * - a string, the *model id* of a model repo on huggingface.co.
28691
- * - a path to a *directory* potentially containing the file.
28692
- * @property {string} file The name of the file
28693
- * @property {number} [progress] A number between 0 and 100. Only available for the 'progress' status.
28694
- * @property {number} [loaded] The number of bytes loaded. Only available for the 'progress' status.
28695
- * @property {number} [total] The total number of bytes to be loaded. Only available for the 'progress' status.
29425
+ * @typedef {Object} InitiateProgressInfo
29426
+ * @property {'initiate'} status
29427
+ * @property {string} name The model id or directory path.
29428
+ * @property {string} file The name of the file.
29429
+ */
29430
+
29431
+ /**
29432
+ * @typedef {Object} DownloadProgressInfo
29433
+ * @property {'download'} status
29434
+ * @property {string} name The model id or directory path.
29435
+ * @property {string} file The name of the file.
29436
+ */
29437
+
29438
+ /**
29439
+ * @typedef {Object} ProgressStatusInfo
29440
+ * @property {'progress'} status
29441
+ * @property {string} name The model id or directory path.
29442
+ * @property {string} file The name of the file.
29443
+ * @property {number} progress A number between 0 and 100.
29444
+ * @property {number} loaded The number of bytes loaded.
29445
+ * @property {number} total The total number of bytes to be loaded.
29446
+ */
29447
+
29448
+ /**
29449
+ * @typedef {Object} DoneProgressInfo
29450
+ * @property {'done'} status
29451
+ * @property {string} name The model id or directory path.
29452
+ * @property {string} file The name of the file.
29453
+ */
29454
+
29455
+ /**
29456
+ * @typedef {Object} ReadyProgressInfo
29457
+ * @property {'ready'} status
29458
+ * @property {string} task The loaded task.
29459
+ * @property {string} model The loaded model.
29460
+ */
29461
+
29462
+ /**
29463
+ * @typedef {InitiateProgressInfo | DownloadProgressInfo | ProgressStatusInfo | DoneProgressInfo | ReadyProgressInfo} ProgressInfo
28696
29464
  */
28697
29465
 
28698
29466
  /**
@@ -30035,13 +30803,6 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
30035
30803
  file: filename
30036
30804
  })
30037
30805
 
30038
- /** @type {import('./core.js').ProgressInfo} */
30039
- const progressInfo = {
30040
- status: 'progress',
30041
- name: path_or_repo_id,
30042
- file: filename
30043
- }
30044
-
30045
30806
  /** @type {Uint8Array} */
30046
30807
  let buffer;
30047
30808
 
@@ -30061,7 +30822,9 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
30061
30822
 
30062
30823
  // For completeness, we still fire the final progress callback
30063
30824
  (0,_core_js__WEBPACK_IMPORTED_MODULE_3__.dispatchCallback)(options.progress_callback, {
30064
- ...progressInfo,
30825
+ status: 'progress',
30826
+ name: path_or_repo_id,
30827
+ file: filename,
30065
30828
  progress: 100,
30066
30829
  loaded: buffer.length,
30067
30830
  total: buffer.length,
@@ -30069,7 +30832,9 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
30069
30832
  } else {
30070
30833
  buffer = await readResponse(response, data => {
30071
30834
  (0,_core_js__WEBPACK_IMPORTED_MODULE_3__.dispatchCallback)(options.progress_callback, {
30072
- ...progressInfo,
30835
+ status: 'progress',
30836
+ name: path_or_repo_id,
30837
+ file: filename,
30073
30838
  ...data,
30074
30839
  })
30075
30840
  })
@@ -30126,12 +30891,11 @@ async function getModelJSON(modelPath, fileName, fatal = true, options = {}) {
30126
30891
 
30127
30892
  return JSON.parse(jsonData);
30128
30893
  }
30129
-
30130
30894
  /**
30131
30895
  * Read and track progress when reading a Response object
30132
30896
  *
30133
- * @param {any} response The Response object to read
30134
- * @param {function} progress_callback The function to call with progress updates
30897
+ * @param {Response|FileResponse} response The Response object to read
30898
+ * @param {(data: {progress: number, loaded: number, total: number}) => void} progress_callback The function to call with progress updates
30135
30899
  * @returns {Promise<Uint8Array>} A Promise that resolves with the Uint8Array buffer
30136
30900
  */
30137
30901
  async function readResponse(response, progress_callback) {
@@ -30528,6 +31292,46 @@ class RawImage {
30528
31292
  return this._update(newData, this.width, this.height, 4);
30529
31293
  }
30530
31294
 
31295
+ /**
31296
+ * Apply an alpha mask to the image. Operates in place.
31297
+ * @param {RawImage} mask The mask to apply. It should have a single channel.
31298
+ * @returns {RawImage} The masked image.
31299
+ * @throws {Error} If the mask is not the same size as the image.
31300
+ * @throws {Error} If the image does not have 4 channels.
31301
+ * @throws {Error} If the mask is not a single channel.
31302
+ */
31303
+ putAlpha(mask) {
31304
+ if (mask.width !== this.width || mask.height !== this.height) {
31305
+ throw new Error(`Expected mask size to be ${this.width}x${this.height}, but got ${mask.width}x${mask.height}`);
31306
+ }
31307
+ if (mask.channels !== 1) {
31308
+ throw new Error(`Expected mask to have 1 channel, but got ${mask.channels}`);
31309
+ }
31310
+
31311
+ const this_data = this.data;
31312
+ const mask_data = mask.data;
31313
+ const num_pixels = this.width * this.height;
31314
+ if (this.channels === 3) {
31315
+ // Convert to RGBA and simultaneously apply mask to alpha channel
31316
+ const newData = new Uint8ClampedArray(num_pixels * 4);
31317
+ for (let i = 0, in_offset = 0, out_offset = 0; i < num_pixels; ++i) {
31318
+ newData[out_offset++] = this_data[in_offset++];
31319
+ newData[out_offset++] = this_data[in_offset++];
31320
+ newData[out_offset++] = this_data[in_offset++];
31321
+ newData[out_offset++] = mask_data[i];
31322
+ }
31323
+ return this._update(newData, this.width, this.height, 4);
31324
+
31325
+ } else if (this.channels === 4) {
31326
+ // Apply mask to alpha channel in place
31327
+ for (let i = 0; i < num_pixels; ++i) {
31328
+ this_data[4 * i + 3] = mask_data[i];
31329
+ }
31330
+ return this;
31331
+ }
31332
+ throw new Error(`Expected image to have 3 or 4 channels, but got ${this.channels}`);
31333
+ }
31334
+
30531
31335
  /**
30532
31336
  * Resize the image to the given dimensions. This method uses the canvas API to perform the resizing.
30533
31337
  * @param {number} width The width of the new image. `null` or `-1` will preserve the aspect ratio.
@@ -32136,7 +32940,9 @@ __webpack_require__.r(__webpack_exports__);
32136
32940
  /* harmony export */ ones_like: () => (/* binding */ ones_like),
32137
32941
  /* harmony export */ permute: () => (/* binding */ permute),
32138
32942
  /* harmony export */ quantize_embeddings: () => (/* binding */ quantize_embeddings),
32943
+ /* harmony export */ rand: () => (/* binding */ rand),
32139
32944
  /* harmony export */ rfft: () => (/* binding */ rfft),
32945
+ /* harmony export */ slice: () => (/* binding */ slice),
32140
32946
  /* harmony export */ stack: () => (/* binding */ stack),
32141
32947
  /* harmony export */ std_mean: () => (/* binding */ std_mean),
32142
32948
  /* harmony export */ topk: () => (/* binding */ topk),
@@ -32915,8 +33721,21 @@ class Tensor {
32915
33721
  if (!DataTypeMap.hasOwnProperty(type)) {
32916
33722
  throw new Error(`Unsupported type: ${type}`);
32917
33723
  }
33724
+
33725
+ // Handle special cases where a mapping function is needed (e.g., where one type is a bigint and the other is a number)
33726
+ let map_fn;
33727
+ const is_source_bigint = ['int64', 'uint64'].includes(this.type);
33728
+ const is_dest_bigint = ['int64', 'uint64'].includes(type);
33729
+ if (is_source_bigint && !is_dest_bigint) {
33730
+ // TypeError: Cannot convert a BigInt value to a number
33731
+ map_fn = Number;
33732
+ } else if (!is_source_bigint && is_dest_bigint) {
33733
+ // TypeError: Cannot convert [x] to a BigInt
33734
+ map_fn = BigInt;
33735
+ }
33736
+
32918
33737
  // @ts-ignore
32919
- return new Tensor(type, DataTypeMap[type].from(this.data), this.dims);
33738
+ return new Tensor(type, DataTypeMap[type].from(this.data, map_fn), this.dims);
32920
33739
  }
32921
33740
  }
32922
33741
 
@@ -33114,6 +33933,29 @@ async function topk(x, k) {
33114
33933
  });
33115
33934
  }
33116
33935
 
33936
+
33937
+ const arrayToIndexTensor = (array) => new Tensor('int64', array, [array.length]);
33938
+ /**
33939
+ * Slice a multidimensional float32 tensor.
33940
+ * @param {Tensor} data: Tensor of data to extract slices from
33941
+ * @param {number[]} starts: 1-D array of starting indices of corresponding axis in axes
33942
+ * @param {number[]} ends: 1-D array of ending indices (exclusive) of corresponding axis in axes
33943
+ * @param {number[]} axes: 1-D array of axes that starts and ends apply to
33944
+ * @param {number[]} [steps]: 1-D array of slice step of corresponding axis in axes.
33945
+ * @returns {Promise<Tensor>} Sliced data tensor.
33946
+ */
33947
+ async function slice(data, starts, ends, axes, steps) {
33948
+ const op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.slice;
33949
+ return await op({
33950
+ x: data,
33951
+ s: arrayToIndexTensor(starts),
33952
+ e: arrayToIndexTensor(ends),
33953
+ a: arrayToIndexTensor(axes),
33954
+ t: arrayToIndexTensor(steps ?? new Array(axes.length).fill(1)),
33955
+ });
33956
+ }
33957
+
33958
+
33117
33959
  /**
33118
33960
  * Perform mean pooling of the last hidden state followed by a normalization step.
33119
33961
  * @param {Tensor} last_hidden_state Tensor of shape [batchSize, seqLength, embedDim]
@@ -33560,6 +34402,20 @@ function zeros_like(tensor) {
33560
34402
  return zeros(tensor.dims);
33561
34403
  }
33562
34404
 
34405
+ /**
34406
+ * Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)
34407
+ * @param {number[]} size A sequence of integers defining the shape of the output tensor.
34408
+ * @returns {Tensor} The random tensor.
34409
+ */
34410
+ function rand(size) {
34411
+ const length = size.reduce((a, b) => a * b, 1);
34412
+ return new Tensor(
34413
+ "float32",
34414
+ Float32Array.from({ length }, () => Math.random()),
34415
+ size,
34416
+ )
34417
+ }
34418
+
33563
34419
  /**
33564
34420
  * Quantizes the embeddings tensor to binary or unsigned binary precision.
33565
34421
  * @param {Tensor} tensor The tensor to quantize.
@@ -33694,7 +34550,7 @@ function quantize_embeddings(tensor, precision) {
33694
34550
  /******/
33695
34551
  /************************************************************************/
33696
34552
  var __webpack_exports__ = {};
33697
- // This entry need to be wrapped in an IIFE because it need to be in strict mode.
34553
+ // This entry needs to be wrapped in an IIFE because it needs to be in strict mode.
33698
34554
  (() => {
33699
34555
  "use strict";
33700
34556
  /*!*****************************!*\
@@ -33905,6 +34761,9 @@ __webpack_require__.r(__webpack_exports__);
33905
34761
  /* harmony export */ EsmModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.EsmModel),
33906
34762
  /* harmony export */ EsmPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.EsmPreTrainedModel),
33907
34763
  /* harmony export */ EsmTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.EsmTokenizer),
34764
+ /* harmony export */ ExaoneForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaoneForCausalLM),
34765
+ /* harmony export */ ExaoneModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaoneModel),
34766
+ /* harmony export */ ExaonePreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaonePreTrainedModel),
33908
34767
  /* harmony export */ FFT: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.FFT),
33909
34768
  /* harmony export */ FalconForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.FalconForCausalLM),
33910
34769
  /* harmony export */ FalconModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.FalconModel),
@@ -33963,6 +34822,9 @@ __webpack_require__.r(__webpack_exports__);
33963
34822
  /* harmony export */ HubertForSequenceClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertForSequenceClassification),
33964
34823
  /* harmony export */ HubertModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertModel),
33965
34824
  /* harmony export */ HubertPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertPreTrainedModel),
34825
+ /* harmony export */ IJepaForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaForImageClassification),
34826
+ /* harmony export */ IJepaModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaModel),
34827
+ /* harmony export */ IJepaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaPreTrainedModel),
33966
34828
  /* harmony export */ Idefics3ForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3ForConditionalGeneration),
33967
34829
  /* harmony export */ Idefics3ImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Idefics3ImageProcessor),
33968
34830
  /* harmony export */ Idefics3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3PreTrainedModel),
@@ -34081,6 +34943,11 @@ __webpack_require__.r(__webpack_exports__);
34081
34943
  /* harmony export */ MobileViTV2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MobileViTV2PreTrainedModel),
34082
34944
  /* harmony export */ ModelOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ModelOutput),
34083
34945
  /* harmony export */ Moondream1ForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Moondream1ForConditionalGeneration),
34946
+ /* harmony export */ MoonshineFeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_10__.MoonshineFeatureExtractor),
34947
+ /* harmony export */ MoonshineForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineForConditionalGeneration),
34948
+ /* harmony export */ MoonshineModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineModel),
34949
+ /* harmony export */ MoonshinePreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshinePreTrainedModel),
34950
+ /* harmony export */ MoonshineProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.MoonshineProcessor),
34084
34951
  /* harmony export */ MptForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptForCausalLM),
34085
34952
  /* harmony export */ MptModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptModel),
34086
34953
  /* harmony export */ MptPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptPreTrainedModel),
@@ -34101,6 +34968,9 @@ __webpack_require__.r(__webpack_exports__);
34101
34968
  /* harmony export */ OPTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OPTModel),
34102
34969
  /* harmony export */ OPTPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OPTPreTrainedModel),
34103
34970
  /* harmony export */ ObjectDetectionPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.ObjectDetectionPipeline),
34971
+ /* harmony export */ Olmo2ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2ForCausalLM),
34972
+ /* harmony export */ Olmo2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2Model),
34973
+ /* harmony export */ Olmo2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2PreTrainedModel),
34104
34974
  /* harmony export */ OlmoForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoForCausalLM),
34105
34975
  /* harmony export */ OlmoModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoModel),
34106
34976
  /* harmony export */ OlmoPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoPreTrainedModel),
@@ -34117,6 +34987,9 @@ __webpack_require__.r(__webpack_exports__);
34117
34987
  /* harmony export */ Owlv2ImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Owlv2ImageProcessor),
34118
34988
  /* harmony export */ Owlv2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Owlv2Model),
34119
34989
  /* harmony export */ Owlv2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Owlv2PreTrainedModel),
34990
+ /* harmony export */ PaliGemmaForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PaliGemmaForConditionalGeneration),
34991
+ /* harmony export */ PaliGemmaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PaliGemmaPreTrainedModel),
34992
+ /* harmony export */ PaliGemmaProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.PaliGemmaProcessor),
34120
34993
  /* harmony export */ PatchTSMixerForPrediction: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerForPrediction),
34121
34994
  /* harmony export */ PatchTSMixerModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerModel),
34122
34995
  /* harmony export */ PatchTSMixerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerPreTrainedModel),
@@ -34126,6 +34999,10 @@ __webpack_require__.r(__webpack_exports__);
34126
34999
  /* harmony export */ Phi3ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3ForCausalLM),
34127
35000
  /* harmony export */ Phi3Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3Model),
34128
35001
  /* harmony export */ Phi3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3PreTrainedModel),
35002
+ /* harmony export */ Phi3VForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3VForCausalLM),
35003
+ /* harmony export */ Phi3VImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Phi3VImageProcessor),
35004
+ /* harmony export */ Phi3VPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3VPreTrainedModel),
35005
+ /* harmony export */ Phi3VProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.Phi3VProcessor),
34129
35006
  /* harmony export */ PhiForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiForCausalLM),
34130
35007
  /* harmony export */ PhiModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiModel),
34131
35008
  /* harmony export */ PhiPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiPreTrainedModel),
@@ -34374,9 +35251,11 @@ __webpack_require__.r(__webpack_exports__);
34374
35251
  /* harmony export */ permute_data: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.permute_data),
34375
35252
  /* harmony export */ pipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.pipeline),
34376
35253
  /* harmony export */ quantize_embeddings: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.quantize_embeddings),
35254
+ /* harmony export */ rand: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.rand),
34377
35255
  /* harmony export */ read_audio: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.read_audio),
34378
35256
  /* harmony export */ rfft: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.rfft),
34379
35257
  /* harmony export */ round: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.round),
35258
+ /* harmony export */ slice: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.slice),
34380
35259
  /* harmony export */ softmax: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.softmax),
34381
35260
  /* harmony export */ spectrogram: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.spectrogram),
34382
35261
  /* harmony export */ stack: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.stack),
@@ -34452,7 +35331,7 @@ __webpack_require__.r(__webpack_exports__);
34452
35331
  })();
34453
35332
 
34454
35333
  var __webpack_export_target__ = exports;
34455
- for(var i in __webpack_exports__) __webpack_export_target__[i] = __webpack_exports__[i];
35334
+ for(var __webpack_i__ in __webpack_exports__) __webpack_export_target__[__webpack_i__] = __webpack_exports__[__webpack_i__];
34456
35335
  if(__webpack_exports__.__esModule) Object.defineProperty(__webpack_export_target__, "__esModule", { value: true });
34457
35336
  /******/ })()
34458
35337
  ;