npm - @huggingface/transformers - Versions diffs - 3.1.1 → 3.2.0 - Mend

@huggingface/transformers 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

package/README.md +10 -4
package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
package/dist/transformers.cjs +1062 -183
package/dist/transformers.cjs.map +1 -1
package/dist/transformers.js +2239 -1232
package/dist/transformers.js.map +1 -1
package/dist/transformers.min.cjs +1 -358
package/dist/transformers.min.cjs.map +1 -1
package/dist/transformers.min.js +1 -421
package/dist/transformers.min.js.map +1 -1
package/dist/transformers.min.mjs +1 -358
package/dist/transformers.min.mjs.map +1 -1
package/dist/transformers.mjs +1082 -181
package/dist/transformers.mjs.map +1 -1
package/package.json +11 -16
package/src/backends/onnx.js +2 -7
package/src/base/image_processors_utils.js +3 -1
package/src/configs.js +11 -2
package/src/env.js +1 -1
package/src/models/feature_extractors.js +1 -0
package/src/models/idefics3/image_processing_idefics3.js +24 -13
package/src/models/image_processors.js +1 -0
package/src/models/moonshine/feature_extraction_moonshine.js +26 -0
package/src/models/moonshine/processing_moonshine.js +20 -0
package/src/models/paligemma/processing_paligemma.js +82 -0
package/src/models/phi3_v/image_processing_phi3_v.js +163 -0
package/src/models/phi3_v/processing_phi3_v.js +53 -0
package/src/models/processors.js +3 -0
package/src/models/pyannote/feature_extraction_pyannote.js +56 -0
package/src/models/pyannote/processing_pyannote.js +7 -54
package/src/models.js +233 -35
package/src/ops/registry.js +11 -0
package/src/pipelines.js +30 -0
package/src/tokenizers.js +12 -1
package/src/utils/core.js +39 -9
package/src/utils/hub.js +8 -12
package/src/utils/image.js +40 -0
package/src/utils/tensor.js +51 -1
package/types/backends/onnx.d.ts +2 -2
package/types/backends/onnx.d.ts.map +1 -1
package/types/base/feature_extraction_utils.d.ts +1 -1
package/types/base/feature_extraction_utils.d.ts.map +1 -1
package/types/base/image_processors_utils.d.ts +4 -4
package/types/base/image_processors_utils.d.ts.map +1 -1
package/types/base/processing_utils.d.ts +4 -4
package/types/base/processing_utils.d.ts.map +1 -1
package/types/configs.d.ts +7 -7
package/types/configs.d.ts.map +1 -1
package/types/env.d.ts +1 -1
package/types/env.d.ts.map +1 -1
package/types/generation/configuration_utils.d.ts +2 -2
package/types/generation/logits_process.d.ts +2 -2
package/types/generation/logits_process.d.ts.map +1 -1
package/types/generation/logits_sampler.d.ts.map +1 -1
package/types/generation/parameters.d.ts +5 -5
package/types/generation/stopping_criteria.d.ts +1 -1
package/types/generation/stopping_criteria.d.ts.map +1 -1
package/types/generation/streamers.d.ts +2 -2
package/types/generation/streamers.d.ts.map +1 -1
package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts +1 -1
package/types/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.d.ts.map +1 -1
package/types/models/auto/feature_extraction_auto.d.ts.map +1 -1
package/types/models/auto/image_processing_auto.d.ts.map +1 -1
package/types/models/auto/processing_auto.d.ts +1 -1
package/types/models/auto/processing_auto.d.ts.map +1 -1
package/types/models/clap/feature_extraction_clap.d.ts +1 -1
package/types/models/clap/feature_extraction_clap.d.ts.map +1 -1
package/types/models/detr/image_processing_detr.d.ts +11 -11
package/types/models/detr/image_processing_detr.d.ts.map +1 -1
package/types/models/donut/image_processing_donut.d.ts +1 -1
package/types/models/donut/image_processing_donut.d.ts.map +1 -1
package/types/models/feature_extractors.d.ts +1 -0
package/types/models/florence2/processing_florence2.d.ts.map +1 -1
package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -1
package/types/models/idefics3/processing_idefics3.d.ts.map +1 -1
package/types/models/image_processors.d.ts +1 -0
package/types/models/janus/image_processing_janus.d.ts +1 -1
package/types/models/janus/image_processing_janus.d.ts.map +1 -1
package/types/models/janus/processing_janus.d.ts.map +1 -1
package/types/models/maskformer/image_processing_maskformer.d.ts +8 -8
package/types/models/maskformer/image_processing_maskformer.d.ts.map +1 -1
package/types/models/mgp_str/processing_mgp_str.d.ts +2 -2
package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -1
package/types/models/moonshine/feature_extraction_moonshine.d.ts +13 -0
package/types/models/moonshine/feature_extraction_moonshine.d.ts.map +1 -0
package/types/models/moonshine/processing_moonshine.d.ts +17 -0
package/types/models/moonshine/processing_moonshine.d.ts.map +1 -0
package/types/models/owlvit/image_processing_owlvit.d.ts.map +1 -1
package/types/models/paligemma/processing_paligemma.d.ts +12 -0
package/types/models/paligemma/processing_paligemma.d.ts.map +1 -0
package/types/models/phi3_v/image_processing_phi3_v.d.ts +17 -0
package/types/models/phi3_v/image_processing_phi3_v.d.ts.map +1 -0
package/types/models/phi3_v/processing_phi3_v.d.ts +17 -0
package/types/models/phi3_v/processing_phi3_v.d.ts.map +1 -0
package/types/models/processors.d.ts +3 -0
package/types/models/pyannote/feature_extraction_pyannote.d.ts +18 -0
package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
package/types/models/pyannote/processing_pyannote.d.ts +4 -15
package/types/models/pyannote/processing_pyannote.d.ts.map +1 -1
package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
package/types/models/rt_detr/image_processing_rt_detr.d.ts.map +1 -1
package/types/models/sam/image_processing_sam.d.ts.map +1 -1
package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts +1 -1
package/types/models/seamless_m4t/feature_extraction_seamless_m4t.d.ts.map +1 -1
package/types/models/segformer/image_processing_segformer.d.ts.map +1 -1
package/types/models/speecht5/processing_speecht5.d.ts.map +1 -1
package/types/models/swin2sr/image_processing_swin2sr.d.ts +1 -1
package/types/models/swin2sr/image_processing_swin2sr.d.ts.map +1 -1
package/types/models/vitmatte/image_processing_vitmatte.d.ts.map +1 -1
package/types/models/vitpose/image_processing_vitpose.d.ts +1 -1
package/types/models/vitpose/image_processing_vitpose.d.ts.map +1 -1
package/types/models/wav2vec2/feature_extraction_wav2vec2.d.ts.map +1 -1
package/types/models/wav2vec2/processing_wav2vec2.d.ts.map +1 -1
package/types/models/wespeaker/feature_extraction_wespeaker.d.ts +1 -1
package/types/models/wespeaker/feature_extraction_wespeaker.d.ts.map +1 -1
package/types/models/whisper/feature_extraction_whisper.d.ts +1 -1
package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
package/types/models/whisper/generation_whisper.d.ts.map +1 -1
package/types/models/whisper/processing_whisper.d.ts.map +1 -1
package/types/models/yolos/image_processing_yolos.d.ts.map +1 -1
package/types/models.d.ts +61 -5
package/types/models.d.ts.map +1 -1
package/types/ops/registry.d.ts +1 -0
package/types/ops/registry.d.ts.map +1 -1
package/types/pipelines.d.ts +31 -51
package/types/pipelines.d.ts.map +1 -1
package/types/tokenizers.d.ts +10 -6
package/types/tokenizers.d.ts.map +1 -1
package/types/utils/audio.d.ts.map +1 -1
package/types/utils/constants.d.ts.map +1 -1
package/types/utils/core.d.ts +87 -22
package/types/utils/core.d.ts.map +1 -1
package/types/utils/data-structures.d.ts.map +1 -1
package/types/utils/devices.d.ts.map +1 -1
package/types/utils/dtypes.d.ts.map +1 -1
package/types/utils/generic.d.ts.map +1 -1
package/types/utils/hub.d.ts +3 -3
package/types/utils/hub.d.ts.map +1 -1
package/types/utils/image.d.ts +10 -1
package/types/utils/image.d.ts.map +1 -1
package/types/utils/maths.d.ts +10 -10
package/types/utils/maths.d.ts.map +1 -1
package/types/utils/tensor.d.ts +22 -6
package/types/utils/tensor.d.ts.map +1 -1

package/dist/transformers.cjs CHANGED Viewed

@@ -56,10 +56,10 @@ module.exports = require("url");
 /***/ }),
-/***/ "?cb4d":
-/*!*************************************!*\
-  !*** #onnxruntime-webgpu (ignored) ***!
-  \*************************************/
+/***/ "?8b6b":
+/*!*********************************!*\
+  !*** onnxruntime-web (ignored) ***!
+  \*********************************/
 /***/ (() => {
 /* (ignored) */
@@ -3896,7 +3896,7 @@ const version = '1.20.1';
 "use strict";
 var onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache;
-var _onnxruntime_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache;
+var onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache;
 __webpack_require__.r(__webpack_exports__);
 /* harmony export */ __webpack_require__.d(__webpack_exports__, {
 /* harmony export */   Tensor: () => (/* reexport safe */ onnxruntime_common__WEBPACK_IMPORTED_MODULE_3__.Tensor),
@@ -3907,7 +3907,7 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */ });
 /* harmony import */ var _env_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../env.js */ "./src/env.js");
 /* harmony import */ var onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! onnxruntime-node */ "onnxruntime-node");
-/* harmony import */ var _onnxruntime_webgpu__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! #onnxruntime-webgpu */ "?cb4d");
+/* harmony import */ var onnxruntime_web__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! onnxruntime-web */ "?8b6b");
 /* harmony import */ var onnxruntime_common__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! onnxruntime-common */ "./node_modules/onnxruntime-common/dist/esm/index.js");
 /**
  * @file Handler file for choosing the correct version of ONNX Runtime, based on the environment.
@@ -3933,11 +3933,6 @@ __webpack_require__.r(__webpack_exports__);
 // In either case, we select the default export if it exists, otherwise we use the named export.
-// Use subpath-imports to ensure Node.js and browser interoperability.
-// See package.json and https://nodejs.org/api/packages.html#subpath-imports
-// for more information.
-// @ts-ignore
@@ -3979,7 +3974,7 @@ if (ORT_SYMBOL in globalThis) {
 } else if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_NODE_ENV) {
     ONNX = onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__ ?? /*#__PURE__*/ (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache || (onnxruntime_node__WEBPACK_IMPORTED_MODULE_1___namespace_cache = __webpack_require__.t(onnxruntime_node__WEBPACK_IMPORTED_MODULE_1__, 2)));
-    // Updated as of ONNX Runtime 1.18.0
+    // Updated as of ONNX Runtime 1.20.1
     // The following table lists the supported versions of ONNX Runtime Node.js binding provided with pre-built binaries.
     // | EPs/Platforms | Windows x64 | Windows arm64 | Linux x64         | Linux arm64 | MacOS x64 | MacOS arm64 |
     // | ------------- | ----------- | ------------- | ----------------- | ----------- | --------- | ----------- |
@@ -4002,7 +3997,7 @@ if (ORT_SYMBOL in globalThis) {
     supportedDevices.push('cpu');
     defaultDevices = ['cpu'];
 } else {
-    ONNX = /*#__PURE__*/ (_onnxruntime_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache || (_onnxruntime_webgpu__WEBPACK_IMPORTED_MODULE_2___namespace_cache = __webpack_require__.t(_onnxruntime_webgpu__WEBPACK_IMPORTED_MODULE_2__, 2)));
+    ONNX = /*#__PURE__*/ (onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache || (onnxruntime_web__WEBPACK_IMPORTED_MODULE_2___namespace_cache = __webpack_require__.t(onnxruntime_web__WEBPACK_IMPORTED_MODULE_2__, 2)));
     if (_env_js__WEBPACK_IMPORTED_MODULE_0__.apis.IS_WEBNN_AVAILABLE) {
         // TODO: Only push supported providers (depending on available hardware)
@@ -4925,7 +4920,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
      * Pad the image by a certain amount.
      * @param {Float32Array} pixelData The pixel data to pad.
      * @param {number[]} imgDims The dimensions of the image (height, width, channels).
-     * @param {{width:number; height:number}|number} padSize The dimensions of the padded image.
+     * @param {{width:number; height:number}|number|'square'} padSize The dimensions of the padded image.
      * @param {Object} options The options for padding.
      * @param {'constant'|'symmetric'} [options.mode='constant'] The type of padding to add.
      * @param {boolean} [options.center=false] Whether to center the image.
@@ -4943,6 +4938,8 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
         if (typeof padSize === 'number') {
             paddedImageWidth = padSize;
             paddedImageHeight = padSize;
+        } else if (padSize === 'square') {
+            paddedImageWidth = paddedImageHeight = Math.max(imageHeight, imageWidth);
         } else {
             paddedImageWidth = padSize.width;
             paddedImageHeight = padSize.height;
@@ -5588,8 +5585,6 @@ function getNormalizedConfig(config) {
         case 'gpt_neox':
         case 'stablelm':
         case 'opt':
-        case 'phi':
-        case 'phi3':
         case 'falcon':
             mapping['num_heads'] = 'num_attention_heads';
             mapping['num_layers'] = 'num_hidden_layers';
@@ -5597,6 +5592,7 @@ function getNormalizedConfig(config) {
             break;
         case 'llama':
         case 'olmo':
+        case 'olmo2':
         case 'mobilellm':
         case 'granite':
         case 'cohere':
@@ -5604,6 +5600,9 @@ function getNormalizedConfig(config) {
         case 'starcoder2':
         case 'qwen2':
         case 'qwen2_vl':
+        case 'phi':
+        case 'phi3':
+        case 'phi3_v':
             mapping['num_heads'] = 'num_key_value_heads';
             mapping['num_layers'] = 'num_hidden_layers';
             mapping['hidden_size'] = 'hidden_size';
@@ -5636,6 +5635,12 @@ function getNormalizedConfig(config) {
             mapping['num_layers'] = 'n_layers';
             mapping['hidden_size'] = 'd_model';
             break;
+        case 'exaone':
+            mapping['num_heads'] = 'num_key_value_heads';
+            mapping['num_layers'] = 'num_layers';
+            mapping['dim_kv'] = 'head_dim';
+            mapping['num_attention_heads'] = 'num_attention_heads';
+            break;
         // Encoder-decoder models
         case 't5':
@@ -5677,6 +5682,7 @@ function getNormalizedConfig(config) {
             mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'd_model';
             break;
         case 'musicgen_decoder':
+        case 'moonshine':
             mapping['num_encoder_layers'] = mapping['num_decoder_layers'] = 'num_hidden_layers';
             mapping['num_encoder_heads'] = mapping['num_decoder_heads'] = 'num_attention_heads';
             mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'hidden_size';
@@ -5926,7 +5932,7 @@ __webpack_require__.r(__webpack_exports__);
-const VERSION = '3.1.1';
+const VERSION = '3.2.0';
 // Check if various APIs are available (depends on environment)
 const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
@@ -8024,6 +8030,9 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   EsmForTokenClassification: () => (/* binding */ EsmForTokenClassification),
 /* harmony export */   EsmModel: () => (/* binding */ EsmModel),
 /* harmony export */   EsmPreTrainedModel: () => (/* binding */ EsmPreTrainedModel),
+/* harmony export */   ExaoneForCausalLM: () => (/* binding */ ExaoneForCausalLM),
+/* harmony export */   ExaoneModel: () => (/* binding */ ExaoneModel),
+/* harmony export */   ExaonePreTrainedModel: () => (/* binding */ ExaonePreTrainedModel),
 /* harmony export */   FalconForCausalLM: () => (/* binding */ FalconForCausalLM),
 /* harmony export */   FalconModel: () => (/* binding */ FalconModel),
 /* harmony export */   FalconPreTrainedModel: () => (/* binding */ FalconPreTrainedModel),
@@ -8068,6 +8077,9 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   HubertForSequenceClassification: () => (/* binding */ HubertForSequenceClassification),
 /* harmony export */   HubertModel: () => (/* binding */ HubertModel),
 /* harmony export */   HubertPreTrainedModel: () => (/* binding */ HubertPreTrainedModel),
+/* harmony export */   IJepaForImageClassification: () => (/* binding */ IJepaForImageClassification),
+/* harmony export */   IJepaModel: () => (/* binding */ IJepaModel),
+/* harmony export */   IJepaPreTrainedModel: () => (/* binding */ IJepaPreTrainedModel),
 /* harmony export */   Idefics3ForConditionalGeneration: () => (/* binding */ Idefics3ForConditionalGeneration),
 /* harmony export */   Idefics3PreTrainedModel: () => (/* binding */ Idefics3PreTrainedModel),
 /* harmony export */   ImageMattingOutput: () => (/* binding */ ImageMattingOutput),
@@ -8145,6 +8157,9 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   MobileViTV2PreTrainedModel: () => (/* binding */ MobileViTV2PreTrainedModel),
 /* harmony export */   ModelOutput: () => (/* binding */ ModelOutput),
 /* harmony export */   Moondream1ForConditionalGeneration: () => (/* binding */ Moondream1ForConditionalGeneration),
+/* harmony export */   MoonshineForConditionalGeneration: () => (/* binding */ MoonshineForConditionalGeneration),
+/* harmony export */   MoonshineModel: () => (/* binding */ MoonshineModel),
+/* harmony export */   MoonshinePreTrainedModel: () => (/* binding */ MoonshinePreTrainedModel),
 /* harmony export */   MptForCausalLM: () => (/* binding */ MptForCausalLM),
 /* harmony export */   MptModel: () => (/* binding */ MptModel),
 /* harmony export */   MptPreTrainedModel: () => (/* binding */ MptPreTrainedModel),
@@ -8159,6 +8174,9 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   OPTForCausalLM: () => (/* binding */ OPTForCausalLM),
 /* harmony export */   OPTModel: () => (/* binding */ OPTModel),
 /* harmony export */   OPTPreTrainedModel: () => (/* binding */ OPTPreTrainedModel),
+/* harmony export */   Olmo2ForCausalLM: () => (/* binding */ Olmo2ForCausalLM),
+/* harmony export */   Olmo2Model: () => (/* binding */ Olmo2Model),
+/* harmony export */   Olmo2PreTrainedModel: () => (/* binding */ Olmo2PreTrainedModel),
 /* harmony export */   OlmoForCausalLM: () => (/* binding */ OlmoForCausalLM),
 /* harmony export */   OlmoModel: () => (/* binding */ OlmoModel),
 /* harmony export */   OlmoPreTrainedModel: () => (/* binding */ OlmoPreTrainedModel),
@@ -8171,6 +8189,8 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   Owlv2ForObjectDetection: () => (/* binding */ Owlv2ForObjectDetection),
 /* harmony export */   Owlv2Model: () => (/* binding */ Owlv2Model),
 /* harmony export */   Owlv2PreTrainedModel: () => (/* binding */ Owlv2PreTrainedModel),
+/* harmony export */   PaliGemmaForConditionalGeneration: () => (/* binding */ PaliGemmaForConditionalGeneration),
+/* harmony export */   PaliGemmaPreTrainedModel: () => (/* binding */ PaliGemmaPreTrainedModel),
 /* harmony export */   PatchTSMixerForPrediction: () => (/* binding */ PatchTSMixerForPrediction),
 /* harmony export */   PatchTSMixerModel: () => (/* binding */ PatchTSMixerModel),
 /* harmony export */   PatchTSMixerPreTrainedModel: () => (/* binding */ PatchTSMixerPreTrainedModel),
@@ -8180,6 +8200,8 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   Phi3ForCausalLM: () => (/* binding */ Phi3ForCausalLM),
 /* harmony export */   Phi3Model: () => (/* binding */ Phi3Model),
 /* harmony export */   Phi3PreTrainedModel: () => (/* binding */ Phi3PreTrainedModel),
+/* harmony export */   Phi3VForCausalLM: () => (/* binding */ Phi3VForCausalLM),
+/* harmony export */   Phi3VPreTrainedModel: () => (/* binding */ Phi3VPreTrainedModel),
 /* harmony export */   PhiForCausalLM: () => (/* binding */ PhiForCausalLM),
 /* harmony export */   PhiModel: () => (/* binding */ PhiModel),
 /* harmony export */   PhiPreTrainedModel: () => (/* binding */ PhiPreTrainedModel),
@@ -8425,6 +8447,7 @@ const MODEL_TYPES = {
     ImageTextToText: 6,
     Musicgen: 7,
     MultiModality: 8,
+    Phi3V: 9,
 }
 //////////////////////////////////////////////////
@@ -8852,7 +8875,9 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
         new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
     }
     if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
-        new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values);
+        // NOTE: Handle a special case for paligemma models, where positions are 1-indexed
+        const start_index = self.config.model_type === 'paligemma' ? 1 : 0;
+        new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
     }
     // Unpack the `past_key_values` object into model inputs
@@ -8988,14 +9013,14 @@ async function imageTextToTextForward(self, {
  * @param {Tensor} attention_mask
  * @returns {{data: BigInt64Array, dims: number[]}}
  */
-function cumsum_masked_fill(attention_mask) {
+function cumsum_masked_fill(attention_mask, start_index = 0) {
     const [bz, seq_len] = attention_mask.dims;
     const attn_mask_data = attention_mask.data;
     const data = new BigInt64Array(attn_mask_data.length);
     for (let i = 0; i < bz; ++i) {
         const start = i * seq_len;
-        let sum = BigInt(0);
+        let sum = BigInt(start_index);
         for (let j = 0; j < seq_len; ++j) {
             const index = start + j;
             if (attn_mask_data[index] === 0n) {
@@ -9022,10 +9047,10 @@ function cumsum_masked_fill(attention_mask) {
  *     position_ids = position_ids[:, -input_ids.shape[1] :]
  * ```
  */
-function createPositionIds(model_inputs, past_key_values = null) {
+function createPositionIds(model_inputs, past_key_values = null, start_index = 0) {
     const { input_ids, inputs_embeds, attention_mask } = model_inputs;
-    const { data, dims } = cumsum_masked_fill(attention_mask);
+    const { data, dims } = cumsum_masked_fill(attention_mask, start_index);
     let position_ids = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.Tensor('int64', data, dims);
     if (past_key_values) {
         const offset = -(input_ids ?? inputs_embeds).dims.at(1);
@@ -9198,6 +9223,10 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
                 this._forward = imageTextToTextForward;
                 this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
                 break;
+            case MODEL_TYPES.Phi3V:
+                this.can_generate = true;
+                this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
+                break;
             case MODEL_TYPES.MultiModality:
                 this.can_generate = true;
@@ -9362,6 +9391,18 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
                 }, options),
             ]);
+        } else if (modelType === MODEL_TYPES.Phi3V) {
+            info = await Promise.all([
+                constructSessions(pretrained_model_name_or_path, {
+                    prepare_inputs_embeds: 'prepare_inputs_embeds',
+                    model: 'model',
+                    vision_encoder: 'vision_encoder',
+                }, options),
+                getOptionalConfigs(pretrained_model_name_or_path, {
+                    generation_config: 'generation_config.json',
+                }, options),
+            ]);
         } else { // should be MODEL_TYPES.EncoderOnly
             if (modelType !== MODEL_TYPES.EncoderOnly) {
                 const type = modelName ?? config?.model_type;
@@ -11634,6 +11675,29 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
 }
 //////////////////////////////////////////////////
+//////////////////////////////////////////////////
+// Moonshine models
+class MoonshinePreTrainedModel extends PreTrainedModel {
+    requires_attention_mask = false;
+    main_input_name = 'input_values';
+    forward_params = [
+        'input_values',
+        'decoder_input_ids',
+        'past_key_values',
+    ];
+};
+/**
+ * MoonshineModel class for training Moonshine models without a language model head.
+ */
+class MoonshineModel extends MoonshinePreTrainedModel { }
+class MoonshineForConditionalGeneration extends MoonshinePreTrainedModel { }
+//////////////////////////////////////////////////
 //////////////////////////////////////////////////
 /**
  * Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
@@ -11842,6 +11906,30 @@ class Florence2ForConditionalGeneration extends Florence2PreTrainedModel {
     }
 }
+class PaliGemmaPreTrainedModel extends PreTrainedModel {
+    forward_params = [
+        'input_ids',
+        // 'inputs_embeds',
+        'attention_mask',
+        'pixel_values',
+        'position_ids',
+        'past_key_values',
+    ];
+}
+class PaliGemmaForConditionalGeneration extends PaliGemmaPreTrainedModel {
+    _merge_input_ids_with_image_features(kwargs) {
+        const vision_hidden_size = kwargs.image_features.dims.at(-1);
+        const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
+        return default_merge_input_ids_with_image_features({
+            // @ts-ignore
+            image_token_id: this.config.image_token_index,
+            ...kwargs,
+            image_features: reshaped_image_hidden_states,
+        })
+    }
+}
 //////////////////////////////////////////////////
 // Idefics3 Models
@@ -11880,6 +11968,77 @@ class Idefics3ForConditionalGeneration extends Idefics3PreTrainedModel {
 }
 //////////////////////////////////////////////////
+class Phi3VPreTrainedModel extends PreTrainedModel {
+    forward_params = [
+        'input_ids',
+        'inputs_embeds',
+        'attention_mask',
+        'position_ids',
+        'pixel_values',
+        'image_sizes',
+        'past_key_values',
+    ];
+}
+class Phi3VForCausalLM extends Phi3VPreTrainedModel {
+    async forward({
+        // Produced by the tokenizer/processor:
+        input_ids = null,
+        attention_mask = null,
+        pixel_values = null,
+        image_sizes = null,
+        // Used during generation:
+        position_ids = null,
+        inputs_embeds = null,
+        past_key_values = null,
+        // Generic generation parameters
+        generation_config = null,
+        logits_processor = null,
+        // TODO: needed?
+        ...kwargs
+    }) {
+        if (!inputs_embeds) {
+            let image_features;
+            if (pixel_values && input_ids.dims[1] !== 1) {
+                if (!image_sizes) {
+                    throw new Error('`image_sizes` must be provided when `pixel_values` is provided.');
+                }
+                // Encode the image
+                ({ image_features } = await sessionRun(this.sessions['vision_encoder'], {
+                    pixel_values,
+                    image_sizes,
+                }));
+            } else {
+                const hidden_size = this.config.normalized_config.hidden_size;
+                image_features = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.Tensor(
+                    'float32',
+                    [],
+                    [0, hidden_size],
+                );
+            }
+            ({ inputs_embeds } = await sessionRun(this.sessions['prepare_inputs_embeds'], {
+                input_ids,
+                image_features,
+            }));
+        }
+        const outputs = await decoderForward(this, {
+            inputs_embeds,
+            past_key_values,
+            attention_mask,
+            position_ids,
+            generation_config,
+            logits_processor,
+        }, false);
+        return outputs;
+    }
+}
 //////////////////////////////////////////////////
 class CLIPPreTrainedModel extends PreTrainedModel { }
@@ -11934,9 +12093,11 @@ class CLIPModel extends CLIPPreTrainedModel { }
 class CLIPTextModel extends CLIPPreTrainedModel {
     /** @type {typeof PreTrainedModel.from_pretrained} */
     static async from_pretrained(pretrained_model_name_or_path, options = {}) {
-        // Update default model file name if not provided
-        options.model_file_name ??= 'text_model';
-        return super.from_pretrained(pretrained_model_name_or_path, options);
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            // Update default model file name if not provided
+            model_file_name: 'text_model',
+            ...options,
+        });
     }
 }
@@ -11969,9 +12130,11 @@ class CLIPTextModel extends CLIPPreTrainedModel {
 class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
     /** @type {typeof PreTrainedModel.from_pretrained} */
     static async from_pretrained(pretrained_model_name_or_path, options = {}) {
-        // Update default model file name if not provided
-        options.model_file_name ??= 'text_model';
-        return super.from_pretrained(pretrained_model_name_or_path, options);
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            // Update default model file name if not provided
+            model_file_name: 'text_model',
+            ...options,
+        });
     }
 }
@@ -11981,9 +12144,11 @@ class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
 class CLIPVisionModel extends CLIPPreTrainedModel {
     /** @type {typeof PreTrainedModel.from_pretrained} */
     static async from_pretrained(pretrained_model_name_or_path, options = {}) {
-        // Update default model file name if not provided
-        options.model_file_name ??= 'vision_model';
-        return super.from_pretrained(pretrained_model_name_or_path, options);
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            // Update default model file name if not provided
+            model_file_name: 'vision_model',
+            ...options,
+        });
     }
 }
@@ -12016,9 +12181,11 @@ class CLIPVisionModel extends CLIPPreTrainedModel {
 class CLIPVisionModelWithProjection extends CLIPPreTrainedModel {
     /** @type {typeof PreTrainedModel.from_pretrained} */
     static async from_pretrained(pretrained_model_name_or_path, options = {}) {
-        // Update default model file name if not provided
-        options.model_file_name ??= 'vision_model';
-        return super.from_pretrained(pretrained_model_name_or_path, options);
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            // Update default model file name if not provided
+            model_file_name: 'vision_model',
+            ...options,
+        });
     }
 }
 //////////////////////////////////////////////////
@@ -12102,9 +12269,11 @@ class SiglipModel extends SiglipPreTrainedModel { }
 class SiglipTextModel extends SiglipPreTrainedModel {
     /** @type {typeof PreTrainedModel.from_pretrained} */
     static async from_pretrained(pretrained_model_name_or_path, options = {}) {
-        // Update default model file name if not provided
-        options.model_file_name ??= 'text_model';
-        return super.from_pretrained(pretrained_model_name_or_path, options);
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            // Update default model file name if not provided
+            model_file_name: 'text_model',
+            ...options,
+        });
     }
 }
@@ -12137,9 +12306,11 @@ class SiglipTextModel extends SiglipPreTrainedModel {
 class SiglipVisionModel extends CLIPPreTrainedModel {
     /** @type {typeof PreTrainedModel.from_pretrained} */
     static async from_pretrained(pretrained_model_name_or_path, options = {}) {
-        // Update default model file name if not provided
-        options.model_file_name ??= 'vision_model';
-        return super.from_pretrained(pretrained_model_name_or_path, options);
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            // Update default model file name if not provided
+            model_file_name: 'vision_model',
+            ...options,
+        });
     }
 }
 //////////////////////////////////////////////////
@@ -12194,18 +12365,22 @@ class JinaCLIPModel extends JinaCLIPPreTrainedModel {
 class JinaCLIPTextModel extends JinaCLIPPreTrainedModel {
     /** @type {typeof PreTrainedModel.from_pretrained} */
     static async from_pretrained(pretrained_model_name_or_path, options = {}) {
-        // Update default model file name if not provided
-        options.model_file_name ??= 'text_model';
-        return super.from_pretrained(pretrained_model_name_or_path, options);
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            // Update default model file name if not provided
+            model_file_name: 'text_model',
+            ...options,
+        });
     }
 }
 class JinaCLIPVisionModel extends JinaCLIPPreTrainedModel {
     /** @type {typeof PreTrainedModel.from_pretrained} */
     static async from_pretrained(pretrained_model_name_or_path, options = {}) {
-        // Update default model file name if not provided
-        options.model_file_name ??= 'vision_model';
-        return super.from_pretrained(pretrained_model_name_or_path, options);
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            // Update default model file name if not provided
+            model_file_name: 'vision_model',
+            ...options,
+        });
     }
 }
 //////////////////////////////////////////////////
@@ -12365,6 +12540,14 @@ class LlamaForCausalLM extends LlamaPreTrainedModel { }
 //////////////////////////////////////////////////
+//////////////////////////////////////////////////
+// EXAONE models
+class ExaonePreTrainedModel extends PreTrainedModel { }
+class ExaoneModel extends ExaonePreTrainedModel { }
+class ExaoneForCausalLM extends ExaonePreTrainedModel { }
+//////////////////////////////////////////////////
 //////////////////////////////////////////////////
 // MobileLLM models
 class MobileLLMPreTrainedModel extends PreTrainedModel { }
@@ -12380,6 +12563,13 @@ class OlmoModel extends OlmoPreTrainedModel { }
 class OlmoForCausalLM extends OlmoPreTrainedModel { }
 //////////////////////////////////////////////////
+//////////////////////////////////////////////////
+// OLMo2 models
+class Olmo2PreTrainedModel extends PreTrainedModel { }
+class Olmo2Model extends Olmo2PreTrainedModel { }
+class Olmo2ForCausalLM extends Olmo2PreTrainedModel { }
+//////////////////////////////////////////////////
 //////////////////////////////////////////////////
 // Granite models
@@ -12796,6 +12986,20 @@ class ViTForImageClassification extends ViTPreTrainedModel {
 //////////////////////////////////////////////////
+//////////////////////////////////////////////////
+class IJepaPreTrainedModel extends PreTrainedModel { }
+class IJepaModel extends IJepaPreTrainedModel { }
+class IJepaForImageClassification extends IJepaPreTrainedModel {
+    /**
+     * @param {any} model_inputs
+     */
+    async _call(model_inputs) {
+        return new SequenceClassifierOutput(await super._call(model_inputs));
+    }
+}
+//////////////////////////////////////////////////
 //////////////////////////////////////////////////
 class VitPosePreTrainedModel extends PreTrainedModel { }
@@ -14406,9 +14610,11 @@ class ClapModel extends ClapPreTrainedModel { }
 class ClapTextModelWithProjection extends ClapPreTrainedModel {
     /** @type {typeof PreTrainedModel.from_pretrained} */
     static async from_pretrained(pretrained_model_name_or_path, options = {}) {
-        // Update default model file name if not provided
-        options.model_file_name ??= 'text_model';
-        return super.from_pretrained(pretrained_model_name_or_path, options);
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            // Update default model file name if not provided
+            model_file_name: 'text_model',
+            ...options,
+        });
     }
 }
@@ -14441,9 +14647,11 @@ class ClapTextModelWithProjection extends ClapPreTrainedModel {
 class ClapAudioModelWithProjection extends ClapPreTrainedModel {
     /** @type {typeof PreTrainedModel.from_pretrained} */
     static async from_pretrained(pretrained_model_name_or_path, options = {}) {
-        // Update default model file name if not provided
-        options.model_file_name ??= 'audio_model';
-        return super.from_pretrained(pretrained_model_name_or_path, options);
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            // Update default model file name if not provided
+            model_file_name: 'audio_model',
+            ...options,
+        });
     }
 }
 //////////////////////////////////////////////////
@@ -15066,6 +15274,7 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
     ['rt_detr', ['RTDetrModel', RTDetrModel]],
     ['table-transformer', ['TableTransformerModel', TableTransformerModel]],
     ['vit', ['ViTModel', ViTModel]],
+    ['ijepa', ['IJepaModel', IJepaModel]],
     ['pvt', ['PvtModel', PvtModel]],
     ['vit_msn', ['ViTMSNModel', ViTMSNModel]],
     ['vit_mae', ['ViTMAEModel', ViTMAEModel]],
@@ -15129,7 +15338,9 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
     ['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]],
     ['codegen', ['CodeGenModel', CodeGenModel]],
     ['llama', ['LlamaModel', LlamaModel]],
+    ['exaone', ['ExaoneModel', ExaoneModel]],
     ['olmo', ['OlmoModel', OlmoModel]],
+    ['olmo2', ['Olmo2Model', Olmo2Model]],
     ['mobilellm', ['MobileLLMModel', MobileLLMModel]],
     ['granite', ['GraniteModel', GraniteModel]],
     ['cohere', ['CohereModel', CohereModel]],
@@ -15150,6 +15361,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
 const MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES = new Map([
     ['speecht5', ['SpeechT5ForSpeechToText', SpeechT5ForSpeechToText]],
     ['whisper', ['WhisperForConditionalGeneration', WhisperForConditionalGeneration]],
+    ['moonshine', ['MoonshineForConditionalGeneration', MoonshineForConditionalGeneration]],
 ]);
 const MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES = new Map([
@@ -15220,7 +15432,9 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
     ['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]],
     ['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]],
     ['llama', ['LlamaForCausalLM', LlamaForCausalLM]],
+    ['exaone', ['ExaoneForCausalLM', ExaoneForCausalLM]],
     ['olmo', ['OlmoForCausalLM', OlmoForCausalLM]],
+    ['olmo2', ['Olmo2ForCausalLM', Olmo2ForCausalLM]],
     ['mobilellm', ['MobileLLMForCausalLM', MobileLLMForCausalLM]],
     ['granite', ['GraniteForCausalLM', GraniteForCausalLM]],
     ['cohere', ['CohereForCausalLM', CohereForCausalLM]],
@@ -15238,6 +15452,9 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
     ['falcon', ['FalconForCausalLM', FalconForCausalLM]],
     ['trocr', ['TrOCRForCausalLM', TrOCRForCausalLM]],
     ['stablelm', ['StableLmForCausalLM', StableLmForCausalLM]],
+    // Also image-text-to-text
+    ['phi3_v', ['Phi3VForCausalLM', Phi3VForCausalLM]],
 ]);
 const MODEL_FOR_MULTIMODALITY_MAPPING_NAMES = new Map([
@@ -15294,6 +15511,7 @@ const MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = new Map([
     ['florence2', ['Florence2ForConditionalGeneration', Florence2ForConditionalGeneration]],
     ['qwen2-vl', ['Qwen2VLForConditionalGeneration', Qwen2VLForConditionalGeneration]],
     ['idefics3', ['Idefics3ForConditionalGeneration', Idefics3ForConditionalGeneration]],
+    ['paligemma', ['PaliGemmaForConditionalGeneration', PaliGemmaForConditionalGeneration]],
 ]);
 const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
@@ -15302,6 +15520,7 @@ const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
 const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
     ['vit', ['ViTForImageClassification', ViTForImageClassification]],
+    ['ijepa', ['IJepaForImageClassification', IJepaForImageClassification]],
     ['pvt', ['PvtForImageClassification', PvtForImageClassification]],
     ['vit_msn', ['ViTMSNForImageClassification', ViTMSNForImageClassification]],
     ['fastvit', ['FastViTForImageClassification', FastViTForImageClassification]],
@@ -15473,6 +15692,7 @@ const CUSTOM_MAPPING = [
     // OVERRIDE:
     // TODO: Refactor to allow class to specify model
     ['MusicgenForConditionalGeneration', MusicgenForConditionalGeneration, MODEL_TYPES.Musicgen],
+    ['Phi3VForCausalLM', Phi3VForCausalLM, MODEL_TYPES.Phi3V],
     ['CLIPTextModelWithProjection', CLIPTextModelWithProjection, MODEL_TYPES.EncoderOnly],
     ['SiglipTextModel', SiglipTextModel, MODEL_TYPES.EncoderOnly],
@@ -16727,23 +16947,26 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */ __webpack_require__.d(__webpack_exports__, {
 /* harmony export */   ASTFeatureExtractor: () => (/* reexport safe */ _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__.ASTFeatureExtractor),
 /* harmony export */   ClapFeatureExtractor: () => (/* reexport safe */ _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_1__.ClapFeatureExtractor),
-/* harmony export */   ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_8__.ImageProcessor),
-/* harmony export */   PyAnnoteFeatureExtractor: () => (/* reexport safe */ _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_2__.PyAnnoteFeatureExtractor),
-/* harmony export */   SeamlessM4TFeatureExtractor: () => (/* reexport safe */ _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_3__.SeamlessM4TFeatureExtractor),
-/* harmony export */   SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_4__.SpeechT5FeatureExtractor),
-/* harmony export */   Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_5__.Wav2Vec2FeatureExtractor),
-/* harmony export */   WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_6__.WeSpeakerFeatureExtractor),
-/* harmony export */   WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_7__.WhisperFeatureExtractor)
+/* harmony export */   ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_9__.ImageProcessor),
+/* harmony export */   MoonshineFeatureExtractor: () => (/* reexport safe */ _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineFeatureExtractor),
+/* harmony export */   PyAnnoteFeatureExtractor: () => (/* reexport safe */ _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_3__.PyAnnoteFeatureExtractor),
+/* harmony export */   SeamlessM4TFeatureExtractor: () => (/* reexport safe */ _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_4__.SeamlessM4TFeatureExtractor),
+/* harmony export */   SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_5__.SpeechT5FeatureExtractor),
+/* harmony export */   Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_6__.Wav2Vec2FeatureExtractor),
+/* harmony export */   WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_7__.WeSpeakerFeatureExtractor),
+/* harmony export */   WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_8__.WhisperFeatureExtractor)
 /* harmony export */ });
 /* harmony import */ var _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js */ "./src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js");
 /* harmony import */ var _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./clap/feature_extraction_clap.js */ "./src/models/clap/feature_extraction_clap.js");
-/* harmony import */ var _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./pyannote/feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
-/* harmony import */ var _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./seamless_m4t/feature_extraction_seamless_m4t.js */ "./src/models/seamless_m4t/feature_extraction_seamless_m4t.js");
-/* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
-/* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
-/* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
-/* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
-/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
+/* harmony import */ var _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./moonshine/feature_extraction_moonshine.js */ "./src/models/moonshine/feature_extraction_moonshine.js");
+/* harmony import */ var _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./pyannote/feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
+/* harmony import */ var _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./seamless_m4t/feature_extraction_seamless_m4t.js */ "./src/models/seamless_m4t/feature_extraction_seamless_m4t.js");
+/* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
+/* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
+/* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
+/* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
+/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
@@ -17124,18 +17347,29 @@ class Idefics3ImageProcessor extends _base_image_processors_utils_js__WEBPACK_IM
             const optimal_width = Math.ceil(width / num_splits_w);
             // Iterate through each row and column
-            for (let r = 0; r < num_splits_h; r++) {
-                for (let c = 0; c < num_splits_w; c++) {
-                    // Calculate the starting point of the crop
-                    const start_x = c * optimal_width;
-                    const start_y = r * optimal_height;
-                    // Calculate the ending point of the crop
-                    const end_x = Math.min(start_x + optimal_width, width);
-                    const end_y = Math.min(start_y + optimal_height, height);
-                    // Crop the image
-                    frames.push(pixel_values.slice(null, null, [start_y, end_y], [start_x, end_x]));
+            for (let r = 0; r < num_splits_h; ++r) {
+                for (let c = 0; c < num_splits_w; ++c) {
+                    let start_x, start_y, end_x, end_y;
+                    if (r === num_splits_h - 1) { // At bottom
+                        start_y = height - optimal_height;
+                        end_y = height;
+                    } else {
+                        start_y = r * optimal_height;
+                        end_y = (r + 1) * optimal_height;
+                    }
+                    if (c === num_splits_w - 1) { // At right
+                        start_x = width - optimal_width;
+                        end_x = width;
+                    } else {
+                        start_x = c * optimal_width;
+                        end_x = (c + 1) * optimal_width;
+                    }
+                    const starts = [start_y, start_x];
+                    const ends = [end_y, end_x];
+                    const patch = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.slice)(pixel_values, starts, ends, [2, 3]);
+                    frames.push(patch);
                 }
             }
@@ -17361,21 +17595,22 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   OwlViTFeatureExtractor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTFeatureExtractor),
 /* harmony export */   OwlViTImageProcessor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTImageProcessor),
 /* harmony export */   Owlv2ImageProcessor: () => (/* reexport safe */ _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__.Owlv2ImageProcessor),
-/* harmony export */   PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_25__.PvtImageProcessor),
-/* harmony export */   Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_26__.Qwen2VLImageProcessor),
-/* harmony export */   RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_27__.RTDetrImageProcessor),
-/* harmony export */   SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_28__.SamImageProcessor),
-/* harmony export */   SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__.SegformerFeatureExtractor),
-/* harmony export */   SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__.SegformerImageProcessor),
-/* harmony export */   SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_30__.SiglipImageProcessor),
-/* harmony export */   Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_31__.Swin2SRImageProcessor),
+/* harmony export */   Phi3VImageProcessor: () => (/* reexport safe */ _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_25__.Phi3VImageProcessor),
+/* harmony export */   PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_26__.PvtImageProcessor),
+/* harmony export */   Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_27__.Qwen2VLImageProcessor),
+/* harmony export */   RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_28__.RTDetrImageProcessor),
+/* harmony export */   SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_29__.SamImageProcessor),
+/* harmony export */   SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__.SegformerFeatureExtractor),
+/* harmony export */   SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__.SegformerImageProcessor),
+/* harmony export */   SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_31__.SiglipImageProcessor),
+/* harmony export */   Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_32__.Swin2SRImageProcessor),
 /* harmony export */   VLMImageProcessor: () => (/* reexport safe */ _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_12__.VLMImageProcessor),
-/* harmony export */   ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__.ViTFeatureExtractor),
-/* harmony export */   ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__.ViTImageProcessor),
-/* harmony export */   VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_33__.VitMatteImageProcessor),
-/* harmony export */   VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_34__.VitPoseImageProcessor),
-/* harmony export */   YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__.YolosFeatureExtractor),
-/* harmony export */   YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__.YolosImageProcessor)
+/* harmony export */   ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__.ViTFeatureExtractor),
+/* harmony export */   ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__.ViTImageProcessor),
+/* harmony export */   VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_34__.VitMatteImageProcessor),
+/* harmony export */   VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_35__.VitPoseImageProcessor),
+/* harmony export */   YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__.YolosFeatureExtractor),
+/* harmony export */   YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__.YolosImageProcessor)
 /* harmony export */ });
 /* harmony import */ var _beit_image_processing_beit_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./beit/image_processing_beit.js */ "./src/models/beit/image_processing_beit.js");
 /* harmony import */ var _bit_image_processing_bit_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./bit/image_processing_bit.js */ "./src/models/bit/image_processing_bit.js");
@@ -17402,17 +17637,19 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony import */ var _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_22__ = __webpack_require__(/*! ./nougat/image_processing_nougat.js */ "./src/models/nougat/image_processing_nougat.js");
 /* harmony import */ var _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__ = __webpack_require__(/*! ./owlv2/image_processing_owlv2.js */ "./src/models/owlv2/image_processing_owlv2.js");
 /* harmony import */ var _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__ = __webpack_require__(/*! ./owlvit/image_processing_owlvit.js */ "./src/models/owlvit/image_processing_owlvit.js");
-/* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
-/* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
-/* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
-/* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
-/* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
-/* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
-/* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
-/* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
-/* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
-/* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
-/* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
+/* harmony import */ var _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./phi3_v/image_processing_phi3_v.js */ "./src/models/phi3_v/image_processing_phi3_v.js");
+/* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
+/* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
+/* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
+/* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
+/* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
+/* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
+/* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
+/* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
+/* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
+/* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
+/* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
@@ -18087,6 +18324,87 @@ class MobileViTImageProcessor extends _base_image_processors_utils_js__WEBPACK_I
 class MobileViTFeatureExtractor extends MobileViTImageProcessor { }
+/***/ }),
+/***/ "./src/models/moonshine/feature_extraction_moonshine.js":
+/*!**************************************************************!*\
+  !*** ./src/models/moonshine/feature_extraction_moonshine.js ***!
+  \**************************************************************/
+/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
+"use strict";
+__webpack_require__.r(__webpack_exports__);
+/* harmony export */ __webpack_require__.d(__webpack_exports__, {
+/* harmony export */   MoonshineFeatureExtractor: () => (/* binding */ MoonshineFeatureExtractor)
+/* harmony export */ });
+/* harmony import */ var _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/feature_extraction_utils.js */ "./src/base/feature_extraction_utils.js");
+/* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
+class MoonshineFeatureExtractor extends _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__.FeatureExtractor {
+    /**
+     * Asynchronously extracts input values from a given audio using the provided configuration.
+     * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
+     * @returns {Promise<{ input_values: Tensor; }>} The extracted input values.
+     */
+    async _call(audio) {
+        (0,_base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__.validate_audio_inputs)(audio, 'MoonshineFeatureExtractor');
+        if (audio instanceof Float64Array) {
+            audio = new Float32Array(audio);
+        }
+        const shape = [
+            1,            /* batch_size */
+            audio.length, /* num_samples */
+        ];
+        return {
+            input_values: new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.Tensor('float32', audio, shape),
+        };
+    }
+}
+/***/ }),
+/***/ "./src/models/moonshine/processing_moonshine.js":
+/*!******************************************************!*\
+  !*** ./src/models/moonshine/processing_moonshine.js ***!
+  \******************************************************/
+/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
+"use strict";
+__webpack_require__.r(__webpack_exports__);
+/* harmony export */ __webpack_require__.d(__webpack_exports__, {
+/* harmony export */   MoonshineProcessor: () => (/* binding */ MoonshineProcessor)
+/* harmony export */ });
+/* harmony import */ var _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../auto/feature_extraction_auto.js */ "./src/models/auto/feature_extraction_auto.js");
+/* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
+/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
+/**
+ * Represents a MoonshineProcessor that extracts features from an audio input.
+ */
+class MoonshineProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_2__.Processor {
+    static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_1__.AutoTokenizer
+    static feature_extractor_class = _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_0__.AutoFeatureExtractor
+    /**
+     * Calls the feature_extractor function with the given audio input.
+     * @param {any} audio The audio input to extract features from.
+     * @returns {Promise<any>} A Promise that resolves with the extracted features.
+     */
+    async _call(audio) {
+        return await this.feature_extractor(audio);
+    }
+}
 /***/ }),
 /***/ "./src/models/nougat/image_processing_nougat.js":
@@ -18180,6 +18498,356 @@ class OwlViTProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
 }
+/***/ }),
+/***/ "./src/models/paligemma/processing_paligemma.js":
+/*!******************************************************!*\
+  !*** ./src/models/paligemma/processing_paligemma.js ***!
+  \******************************************************/
+/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
+"use strict";
+__webpack_require__.r(__webpack_exports__);
+/* harmony export */ __webpack_require__.d(__webpack_exports__, {
+/* harmony export */   PaliGemmaProcessor: () => (/* binding */ PaliGemmaProcessor)
+/* harmony export */ });
+/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
+/* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
+/* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
+const IMAGE_TOKEN = "<image>";
+function build_string_from_input(
+    prompt,
+    bos_token,
+    image_seq_len,
+    image_token,
+    num_images,
+) {
+    return `${image_token.repeat(image_seq_len * num_images)}${bos_token}${prompt}\n`
+}
+class PaliGemmaProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
+    static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
+    static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
+    static uses_processor_config = false;
+    /**
+     * @typedef {import('../../utils/image.js').RawImage} RawImage
+     */
+    // `images` is required, `text` is optional
+    async _call(/** @type {RawImage|RawImage[]} */ images, text = null, kwargs = {}) {
+        if (!text) {
+            console.warn(
+                "You are using PaliGemma without a text prefix. It will perform as a picture-captioning model."
+            )
+            text = ""
+        }
+        if (!Array.isArray(images)) {
+            images = [images]
+        }
+        if (!Array.isArray(text)) {
+            text = [text]
+        }
+        const bos_token = this.tokenizer.bos_token;
+        const image_seq_length = this.image_processor.config.image_seq_length;
+        let input_strings;
+        if (text.some((t) => t.includes(IMAGE_TOKEN))) {
+            input_strings = text.map(
+                sample => {
+                    const expanded_sample = sample.replaceAll(IMAGE_TOKEN, IMAGE_TOKEN.repeat(image_seq_length));
+                    const bos_rfind_index = expanded_sample.lastIndexOf(IMAGE_TOKEN);
+                    const bos_index = bos_rfind_index === -1 ? 0 : bos_rfind_index + IMAGE_TOKEN.length;
+                    return expanded_sample.slice(0, bos_index) + bos_token + expanded_sample.slice(bos_index) + "\n";
+                }
+            )
+        } else {
+            console.warn(
+                "You are passing both `text` and `images` to `PaliGemmaProcessor`. The processor expects special " +
+                "image tokens in the text, as many tokens as there are images per each text. It is recommended to " +
+                "add `<image>` tokens in the very beginning of your text. For this call, we will infer how many images " +
+                "each text has and add special tokens."
+            )
+            input_strings = text.map(
+                sample => build_string_from_input(
+                    sample,
+                    bos_token,
+                    image_seq_length,
+                    IMAGE_TOKEN,
+                    images.length,
+                )
+            )
+        }
+        const text_inputs = this.tokenizer(input_strings, kwargs);
+        const image_inputs = await this.image_processor(images, kwargs);
+        return {
+            ...image_inputs,
+            ...text_inputs,
+        }
+    }
+}
+/***/ }),
+/***/ "./src/models/phi3_v/image_processing_phi3_v.js":
+/*!******************************************************!*\
+  !*** ./src/models/phi3_v/image_processing_phi3_v.js ***!
+  \******************************************************/
+/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
+"use strict";
+__webpack_require__.r(__webpack_exports__);
+/* harmony export */ __webpack_require__.d(__webpack_exports__, {
+/* harmony export */   Phi3VImageProcessor: () => (/* binding */ Phi3VImageProcessor)
+/* harmony export */ });
+/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
+/* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
+const IMAGE_SIZE = 336;
+const SLICE_AXES = [2, 3]; // axes to slice on
+const { ceil, floor, sqrt } = Math;
+class Phi3VImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor {
+    constructor(config) {
+        super({
+            ...config,
+            do_normalize: true,
+            do_pad: true,
+            pad_size: 'custom',
+            do_convert_rgb: true,
+            do_resize: true, // Smart resizing "hd_transform"
+        });
+        this._num_crops = config.num_crops;
+    }
+    calc_num_image_tokens_from_image_size(width, height) {
+        // @ts-expect-error
+        const { num_img_tokens } = this.config;
+        return floor(((floor((height / IMAGE_SIZE)) * floor((width / IMAGE_SIZE)) + 1) * num_img_tokens) + 1 + (floor(height / IMAGE_SIZE) + 1) * sqrt(num_img_tokens));
+    }
+    /** @type {ImageProcessor['get_resize_output_image_size']} */
+    get_resize_output_image_size(image, size) {
+        const hd_num = this._num_crops;
+        const [width, height] = image.size
+        let ratio = width / height;
+        let scale = 1;
+        // Calculate the scaling factor
+        while (scale * Math.ceil(scale / ratio) <= hd_num) {
+            scale += 1;
+        }
+        scale -= 1;
+        // Compute the new dimensions
+        const new_w = Math.floor(scale * 336);
+        const new_h = Math.floor(new_w / ratio);
+        return [new_w, new_h]
+    }
+    /** @type {ImageProcessor['pad_image']} */
+    pad_image(pixelData, imgDims, padSize, options = {}) {
+        // Phi3V uses a custom padding strategy:
+        // - Pad to a multiple of 336
+        // - Pad with white pixels
+        const [imageHeight, imageWidth] = imgDims;
+        const height = IMAGE_SIZE * ceil(imageHeight / IMAGE_SIZE);
+        const width = IMAGE_SIZE * ceil(imageWidth / IMAGE_SIZE);
+        // NOTE: Since padding is done after normalization, we need to fill with the normalized values
+        const constant_values = [1, 1, 1].map((x, i) => (x - this.image_mean[i]) / this.image_std[i]);
+        return super.pad_image(pixelData, imgDims, { width, height }, {
+            center: true,
+            constant_values,
+            ...options,
+        });
+    }
+    async _call(images, {
+        num_crops = null,
+    } = {}) {
+        // @ts-expect-error
+        this._num_crops = num_crops ??= this.config.num_crops;
+        if (num_crops < 4 || sqrt(num_crops) % 1 !== 0) {
+            throw new Error("num_crops must be a square number >= 4");
+        }
+        if (!Array.isArray(images)) {
+            images = [images];
+        }
+        const num_images = images.length;
+        const imageData = await Promise.all(images.map(x => this.preprocess(x)));
+        const original_sizes = imageData.map(x => x.original_size);
+        const reshaped_input_sizes = imageData.map(x => x.reshaped_input_size);
+        // Process each image in batch
+        const all_pixel_values = [];
+        for (const { pixel_values } of imageData) {
+            pixel_values.unsqueeze_(0); // Easier processing as 4D tensor
+            const [height, width] = pixel_values.dims.slice(-2);
+            // Global image (Tensor of shape [num_channels, height, width])
+            const batch_pixel_values = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)(pixel_values, {
+                size: [IMAGE_SIZE, IMAGE_SIZE],
+                mode: 'bicubic',
+            });
+            if (num_crops > 0) {
+                const patches = [];
+                const sqrt_patches = sqrt(num_crops);
+                const patch_width = floor(width / sqrt_patches);
+                const patch_height = floor(height / sqrt_patches);
+                for (let y = 0; y < sqrt_patches; ++y) {
+                    for (let x = 0; x < sqrt_patches; ++x) {
+                        let start_x, start_y, end_x, end_y;
+                        if (y === sqrt_patches - 1) { // At bottom
+                            start_y = height - patch_height;
+                            end_y = height;
+                        } else {
+                            start_y = y * patch_height;
+                            end_y = (y + 1) * patch_height;
+                        }
+                        if (x === sqrt_patches - 1) { // At right
+                            start_x = width - patch_width;
+                            end_x = width;
+                        } else {
+                            start_x = x * patch_width;
+                            end_x = (x + 1) * patch_width;
+                        }
+                        const starts = [start_y, start_x];
+                        const ends = [end_y, end_x];
+                        const patch = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.slice)(pixel_values, starts, ends, SLICE_AXES);
+                        patches.push(patch);
+                    }
+                }
+                const resized_tensors = await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.interpolate_4d)((0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)(patches, 0), {
+                    size: [IMAGE_SIZE, IMAGE_SIZE],
+                    mode: 'bicubic',
+                }); // [num_crops, 3, 336, 336]
+                // Concatenate the global image with the patches
+                all_pixel_values.push((0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.cat)([batch_pixel_values, resized_tensors], 0));
+            } else {
+                // Only use the global image
+                // NOTE: Not currently supported in modelling code
+                all_pixel_values.push(batch_pixel_values);
+            }
+        }
+        // [num_images, 1 + num_crops, num_channels=3, height, width]
+        const pixel_values = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.stack)(all_pixel_values, 0);
+        // Calculate padded image sizes
+        const sizes = reshaped_input_sizes.map(x => x.map(y => IMAGE_SIZE * ceil(y / IMAGE_SIZE)));
+        const image_sizes = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.Tensor(
+            'int64',
+            sizes.flat(),
+            [num_images, 2],
+        );
+        const num_img_tokens = sizes.map(
+            ([height, width]) => this.calc_num_image_tokens_from_image_size(width, height),
+        );
+        return { pixel_values, original_sizes, reshaped_input_sizes, image_sizes, num_img_tokens };
+    }
+}
+/***/ }),
+/***/ "./src/models/phi3_v/processing_phi3_v.js":
+/*!************************************************!*\
+  !*** ./src/models/phi3_v/processing_phi3_v.js ***!
+  \************************************************/
+/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
+"use strict";
+__webpack_require__.r(__webpack_exports__);
+/* harmony export */ __webpack_require__.d(__webpack_exports__, {
+/* harmony export */   Phi3VProcessor: () => (/* binding */ Phi3VProcessor)
+/* harmony export */ });
+/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
+/* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
+/* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
+/* harmony import */ var _utils_image_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ../../utils/image.js */ "./src/utils/image.js");
+const IMAGE_TOKEN = "<|image|>";
+const IMAGE_TOKEN_PATTERN = /<\|image_\d+\|>/g;
+class Phi3VProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
+    static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
+    static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
+    /**
+     *
+     * @param {string|string[]} text
+     * @param {RawImage|RawImage[]} images
+     * @param  {...any} args
+     * @returns {Promise<any>}
+     */
+    async _call(text, images = null, {
+        padding = true,
+        truncation = true,
+        num_crops = null,
+    } = {}) {
+        if (!Array.isArray(text)) {
+            text = [text];
+        }
+        let text_inputs, image_inputs;
+        if (images) {
+            image_inputs = await this.image_processor(images, { num_crops });
+            const { num_img_tokens } = image_inputs;
+            // The original implementation adds a bos_token before the image tokens
+            // TODO: Check if this affects performance, since it looks like a bug in the original implementation
+            const prompt_chunks = text.map((t, i) => t.split(IMAGE_TOKEN_PATTERN).join(IMAGE_TOKEN.repeat(num_img_tokens[i])));
+            text_inputs = this.tokenizer(prompt_chunks, { padding, truncation });
+            // The model expects image tokens to be negative, so we negate the image token ids
+            const image_token_id = this.tokenizer.model.convert_tokens_to_ids([IMAGE_TOKEN])[0];
+            text_inputs.input_ids.map_(id => (id == image_token_id) ? -id : id);
+        } else {
+            text_inputs = this.tokenizer(text);
+        }
+        return {
+            ...text_inputs,
+            ...image_inputs,
+        }
+    }
+}
 /***/ }),
 /***/ "./src/models/processors.js":
@@ -18192,30 +18860,39 @@ class OwlViTProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
 __webpack_require__.r(__webpack_exports__);
 /* harmony export */ __webpack_require__.d(__webpack_exports__, {
 /* harmony export */   Florence2Processor: () => (/* reexport safe */ _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__.Florence2Processor),
-/* harmony export */   Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3Processor),
-/* harmony export */   JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__.JinaCLIPProcessor),
+/* harmony export */   Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_3__.Idefics3Processor),
+/* harmony export */   JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_5__.JinaCLIPProcessor),
 /* harmony export */   MgpstrProcessor: () => (/* reexport safe */ _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__.MgpstrProcessor),
-/* harmony export */   OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_5__.OwlViTProcessor),
-/* harmony export */   PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_6__.PyAnnoteProcessor),
-/* harmony export */   Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_7__.Qwen2VLProcessor),
-/* harmony export */   SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_8__.SamProcessor),
-/* harmony export */   SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_9__.SpeechT5Processor),
-/* harmony export */   VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__.VLChatProcessor),
-/* harmony export */   Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_10__.Wav2Vec2ProcessorWithLM),
-/* harmony export */   WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_11__.WhisperProcessor)
+/* harmony export */   MoonshineProcessor: () => (/* reexport safe */ _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineProcessor),
+/* harmony export */   OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_6__.OwlViTProcessor),
+/* harmony export */   PaliGemmaProcessor: () => (/* reexport safe */ _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_8__.PaliGemmaProcessor),
+/* harmony export */   Phi3VProcessor: () => (/* reexport safe */ _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_7__.Phi3VProcessor),
+/* harmony export */   PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_9__.PyAnnoteProcessor),
+/* harmony export */   Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_10__.Qwen2VLProcessor),
+/* harmony export */   SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_11__.SamProcessor),
+/* harmony export */   SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_12__.SpeechT5Processor),
+/* harmony export */   VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_4__.VLChatProcessor),
+/* harmony export */   Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_13__.Wav2Vec2ProcessorWithLM),
+/* harmony export */   WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_14__.WhisperProcessor)
 /* harmony export */ });
 /* harmony import */ var _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./florence2/processing_florence2.js */ "./src/models/florence2/processing_florence2.js");
 /* harmony import */ var _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./mgp_str/processing_mgp_str.js */ "./src/models/mgp_str/processing_mgp_str.js");
-/* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
-/* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
-/* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
-/* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
-/* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
-/* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
-/* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
-/* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
-/* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
-/* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
+/* harmony import */ var _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./moonshine/processing_moonshine.js */ "./src/models/moonshine/processing_moonshine.js");
+/* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
+/* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
+/* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
+/* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
+/* harmony import */ var _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./phi3_v/processing_phi3_v.js */ "./src/models/phi3_v/processing_phi3_v.js");
+/* harmony import */ var _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./paligemma/processing_paligemma.js */ "./src/models/paligemma/processing_paligemma.js");
+/* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
+/* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
+/* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
+/* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
+/* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
+/* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
@@ -18264,6 +18941,8 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */ });
 /* harmony import */ var _base_feature_extraction_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/feature_extraction_utils.js */ "./src/base/feature_extraction_utils.js");
 /* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
+/* harmony import */ var _utils_maths_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../utils/maths.js */ "./src/utils/maths.js");
@@ -18291,41 +18970,6 @@ class PyAnnoteFeatureExtractor extends _base_feature_extraction_utils_js__WEBPAC
         };
     }
-}
-/***/ }),
-/***/ "./src/models/pyannote/processing_pyannote.js":
-/*!****************************************************!*\
-  !*** ./src/models/pyannote/processing_pyannote.js ***!
-  \****************************************************/
-/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
-"use strict";
-__webpack_require__.r(__webpack_exports__);
-/* harmony export */ __webpack_require__.d(__webpack_exports__, {
-/* harmony export */   PyAnnoteProcessor: () => (/* binding */ PyAnnoteProcessor)
-/* harmony export */ });
-/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
-/* harmony import */ var _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/feature_extraction_auto.js */ "./src/models/auto/feature_extraction_auto.js");
-/* harmony import */ var _utils_maths_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../utils/maths.js */ "./src/utils/maths.js");
-class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
-    static feature_extractor_class = _auto_feature_extraction_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoFeatureExtractor
-    /**
-     * Calls the feature_extractor function with the given audio input.
-     * @param {any} audio The audio input to extract features from.
-     * @returns {Promise<any>} A Promise that resolves with the extracted features.
-     */
-    async _call(audio) {
-        return await this.feature_extractor(audio)
-    }
     /**
      * NOTE: Can return fractional values. `Math.ceil` will ensure correct value.
      * @param {number} samples The number of frames in the audio.
@@ -18380,6 +19024,48 @@ class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODU
         }
         return results;
     }
+}
+/***/ }),
+/***/ "./src/models/pyannote/processing_pyannote.js":
+/*!****************************************************!*\
+  !*** ./src/models/pyannote/processing_pyannote.js ***!
+  \****************************************************/
+/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
+"use strict";
+__webpack_require__.r(__webpack_exports__);
+/* harmony export */ __webpack_require__.d(__webpack_exports__, {
+/* harmony export */   PyAnnoteProcessor: () => (/* binding */ PyAnnoteProcessor)
+/* harmony export */ });
+/* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
+/* harmony import */ var _feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
+class PyAnnoteProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
+    static feature_extractor_class = _feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_1__.PyAnnoteFeatureExtractor
+    /**
+     * Calls the feature_extractor function with the given audio input.
+     * @param {any} audio The audio input to extract features from.
+     * @returns {Promise<any>} A Promise that resolves with the extracted features.
+     */
+    async _call(audio) {
+        return await this.feature_extractor(audio)
+    }
+    /** @type {PyAnnoteFeatureExtractor['post_process_speaker_diarization']} */
+    post_process_speaker_diarization(...args) {
+        return /** @type {PyAnnoteFeatureExtractor} */(this.feature_extractor).post_process_speaker_diarization(...args);
+    }
+    get sampling_rate() {
+        return this.feature_extractor.config.sampling_rate;
+    }
 }
@@ -20129,6 +20815,17 @@ class TensorOpRegistry {
         }
         return this._top_k;
     }
+    static get slice() {
+        if (!this._slice) {
+            this._slice = wrap(
+                [8, 7, 18, 0, 58, 96, 10, 25, 10, 1, 120, 10, 1, 115, 10, 1, 101, 10, 1, 97, 10, 1, 116, 18, 1, 121, 34, 5, 83, 108, 105, 99, 101, 18, 1, 114, 90, 9, 10, 1, 120, 18, 4, 10, 2, 8, 1, 90, 9, 10, 1, 115, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 101, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 97, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 116, 18, 4, 10, 2, 8, 7, 98, 9, 10, 1, 121, 18, 4, 10, 2, 8, 1, 66, 2, 16, 13],
+                this.session_options,
+                'y',
+            )
+        }
+        return this._slice;
+    }
 }
@@ -21862,6 +22559,8 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
             case 'unispeech-sat':
             case 'hubert':
                 return this._call_wav2vec2(audio, kwargs)
+            case 'moonshine':
+                return this._call_moonshine(audio, kwargs)
             default:
                 throw new Error(`AutomaticSpeechRecognitionPipeline does not support model type '${this.model.config.model_type}'.`)
         }
@@ -22015,6 +22714,34 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
         }
         return single ? toReturn[0] : toReturn;
     }
+    /**
+     * @type {AutomaticSpeechRecognitionPipelineCallback}
+     * @private
+     */
+    async _call_moonshine(audio, kwargs) {
+        const single = !Array.isArray(audio);
+        if (single) {
+            audio = [/** @type {AudioInput} */ (audio)];
+        }
+        const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
+        const preparedAudios = await prepareAudios(audio, sampling_rate);
+        const toReturn = [];
+        for (const aud of preparedAudios) {
+            const inputs = await this.processor(aud);
+            // According to the [paper](https://arxiv.org/pdf/2410.15608):
+            // "We use greedy decoding, with a heuristic limit of 6 output tokens
+            // per second of audio to avoid repeated output sequences."
+            const max_new_tokens = Math.floor(aud.length / sampling_rate) * 6;
+            const outputs = await this.model.generate({ max_new_tokens, ...kwargs, ...inputs });
+            const text = this.processor.batch_decode(outputs, { skip_special_tokens: true })[0];
+            toReturn.push({ text });
+        }
+        return single ? toReturn[0] : toReturn;
+    }
 }
 /**
@@ -26133,6 +26860,12 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
         this.unk_token = this.getToken('unk_token');
         this.unk_token_id = this.model.tokens_to_ids.get(this.unk_token);
+        this.bos_token = this.getToken('bos_token');
+        this.bos_token_id = this.model.tokens_to_ids.get(this.bos_token);
+        this.eos_token = this.getToken('eos_token');
+        this.eos_token_id = this.model.tokens_to_ids.get(this.eos_token);
         this.model_max_length = tokenizerConfig.model_max_length;
         /** @type {boolean} Whether or not to strip the text when tokenizing (removing excess spaces before and after the string). */
@@ -27105,6 +27838,11 @@ class WhisperTokenizer extends PreTrainedTokenizer {
         let chunk = new_chunk();
         let time_offset = 0.0;
         const timestamp_begin = this.timestamp_begin;
+        // Whisper timestamp tokens start from 0.00 and go to timestamp 30.00 in 0.02 increments.
+        // We can calculate the last time stamp token as timestamp_begin plus the number of tokens
+        // tokens from 0.00 to 30.00 which is 1500.
+        const total_timestamp_tokens = 1500; // (30.00 - 0.00) / 0.02
+        const timestamp_end = timestamp_begin + total_timestamp_tokens;
         let previous_tokens = [];
         let previous_token_timestamps = [];
@@ -27192,7 +27930,7 @@ class WhisperTokenizer extends PreTrainedTokenizer {
                     } else {
                         // 2/ This is a regular special token, ignoring it
                     }
-                } else if (token >= timestamp_begin) {
+                } else if (token >= timestamp_begin && token <= timestamp_end) {
                     // 3/ Timestamp token
                     const time = (token - timestamp_begin) * time_precision + time_offset;
                     const rounded_time = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.round)(time, 2);
@@ -28684,15 +29422,45 @@ __webpack_require__.r(__webpack_exports__);
  */
 /**
- * @typedef {Object} ProgressInfo
- * @property {'initiate' | 'download' | 'progress' | 'done'} status The status of the progress item.
- * @property {string} name This can be either:
- * - a string, the *model id* of a model repo on huggingface.co.
- * - a path to a *directory* potentially containing the file.
- * @property {string} file The name of the file
- * @property {number} [progress] A number between 0 and 100. Only available for the 'progress' status.
- * @property {number} [loaded] The number of bytes loaded. Only available for the 'progress' status.
- * @property {number} [total] The total number of bytes to be loaded. Only available for the 'progress' status.
+ * @typedef {Object} InitiateProgressInfo
+ * @property {'initiate'} status
+ * @property {string} name The model id or directory path.
+ * @property {string} file The name of the file.
+ */
+/**
+ * @typedef {Object} DownloadProgressInfo
+ * @property {'download'} status
+ * @property {string} name The model id or directory path.
+ * @property {string} file The name of the file.
+ */
+/**
+ * @typedef {Object} ProgressStatusInfo
+ * @property {'progress'} status
+ * @property {string} name The model id or directory path.
+ * @property {string} file The name of the file.
+ * @property {number} progress A number between 0 and 100.
+ * @property {number} loaded The number of bytes loaded.
+ * @property {number} total The total number of bytes to be loaded.
+ */
+/**
+ * @typedef {Object} DoneProgressInfo
+ * @property {'done'} status
+ * @property {string} name The model id or directory path.
+ * @property {string} file The name of the file.
+ */
+/**
+ * @typedef {Object} ReadyProgressInfo
+ * @property {'ready'} status
+ * @property {string} task The loaded task.
+ * @property {string} model The loaded model.
+ */
+/**
+ * @typedef {InitiateProgressInfo | DownloadProgressInfo | ProgressStatusInfo | DoneProgressInfo | ReadyProgressInfo} ProgressInfo
  */
 /**
@@ -30035,13 +30803,6 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
         file: filename
     })
-    /** @type {import('./core.js').ProgressInfo} */
-    const progressInfo = {
-        status: 'progress',
-        name: path_or_repo_id,
-        file: filename
-    }
     /** @type {Uint8Array} */
     let buffer;
@@ -30061,7 +30822,9 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
         // For completeness, we still fire the final progress callback
         (0,_core_js__WEBPACK_IMPORTED_MODULE_3__.dispatchCallback)(options.progress_callback, {
-            ...progressInfo,
+            status: 'progress',
+            name: path_or_repo_id,
+            file: filename,
             progress: 100,
             loaded: buffer.length,
             total: buffer.length,
@@ -30069,7 +30832,9 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
     } else {
         buffer = await readResponse(response, data => {
             (0,_core_js__WEBPACK_IMPORTED_MODULE_3__.dispatchCallback)(options.progress_callback, {
-                ...progressInfo,
+                status: 'progress',
+                name: path_or_repo_id,
+                file: filename,
                 ...data,
             })
         })
@@ -30126,12 +30891,11 @@ async function getModelJSON(modelPath, fileName, fatal = true, options = {}) {
     return JSON.parse(jsonData);
 }
 /**
  * Read and track progress when reading a Response object
  *
- * @param {any} response The Response object to read
- * @param {function} progress_callback The function to call with progress updates
+ * @param {Response|FileResponse} response The Response object to read
+ * @param {(data: {progress: number, loaded: number, total: number}) => void} progress_callback The function to call with progress updates
  * @returns {Promise<Uint8Array>} A Promise that resolves with the Uint8Array buffer
  */
 async function readResponse(response, progress_callback) {
@@ -30528,6 +31292,46 @@ class RawImage {
         return this._update(newData, this.width, this.height, 4);
     }
+    /**
+     * Apply an alpha mask to the image. Operates in place.
+     * @param {RawImage} mask The mask to apply. It should have a single channel.
+     * @returns {RawImage} The masked image.
+     * @throws {Error} If the mask is not the same size as the image.
+     * @throws {Error} If the image does not have 4 channels.
+     * @throws {Error} If the mask is not a single channel.
+     */
+    putAlpha(mask) {
+        if (mask.width !== this.width || mask.height !== this.height) {
+            throw new Error(`Expected mask size to be ${this.width}x${this.height}, but got ${mask.width}x${mask.height}`);
+        }
+        if (mask.channels !== 1) {
+            throw new Error(`Expected mask to have 1 channel, but got ${mask.channels}`);
+        }
+        const this_data = this.data;
+        const mask_data = mask.data;
+        const num_pixels = this.width * this.height;
+        if (this.channels === 3) {
+            // Convert to RGBA and simultaneously apply mask to alpha channel
+            const newData = new Uint8ClampedArray(num_pixels * 4);
+            for (let i = 0, in_offset = 0, out_offset = 0; i < num_pixels; ++i) {
+                newData[out_offset++] = this_data[in_offset++];
+                newData[out_offset++] = this_data[in_offset++];
+                newData[out_offset++] = this_data[in_offset++];
+                newData[out_offset++] = mask_data[i];
+            }
+            return this._update(newData, this.width, this.height, 4);
+        } else if (this.channels === 4) {
+            // Apply mask to alpha channel in place
+            for (let i = 0; i < num_pixels; ++i) {
+                this_data[4 * i + 3] = mask_data[i];
+            }
+            return this;
+        }
+        throw new Error(`Expected image to have 3 or 4 channels, but got ${this.channels}`);
+    }
     /**
      * Resize the image to the given dimensions. This method uses the canvas API to perform the resizing.
      * @param {number} width The width of the new image. `null` or `-1` will preserve the aspect ratio.
@@ -32136,7 +32940,9 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   ones_like: () => (/* binding */ ones_like),
 /* harmony export */   permute: () => (/* binding */ permute),
 /* harmony export */   quantize_embeddings: () => (/* binding */ quantize_embeddings),
+/* harmony export */   rand: () => (/* binding */ rand),
 /* harmony export */   rfft: () => (/* binding */ rfft),
+/* harmony export */   slice: () => (/* binding */ slice),
 /* harmony export */   stack: () => (/* binding */ stack),
 /* harmony export */   std_mean: () => (/* binding */ std_mean),
 /* harmony export */   topk: () => (/* binding */ topk),
@@ -32915,8 +33721,21 @@ class Tensor {
         if (!DataTypeMap.hasOwnProperty(type)) {
             throw new Error(`Unsupported type: ${type}`);
         }
+        // Handle special cases where a mapping function is needed (e.g., where one type is a bigint and the other is a number)
+        let map_fn;
+        const is_source_bigint = ['int64', 'uint64'].includes(this.type);
+        const is_dest_bigint = ['int64', 'uint64'].includes(type);
+        if (is_source_bigint && !is_dest_bigint) {
+            // TypeError: Cannot convert a BigInt value to a number
+            map_fn = Number;
+        } else if (!is_source_bigint && is_dest_bigint) {
+            // TypeError: Cannot convert [x] to a BigInt
+            map_fn = BigInt;
+        }
         // @ts-ignore
-        return new Tensor(type, DataTypeMap[type].from(this.data), this.dims);
+        return new Tensor(type, DataTypeMap[type].from(this.data, map_fn), this.dims);
     }
 }
@@ -33114,6 +33933,29 @@ async function topk(x, k) {
     });
 }
+const arrayToIndexTensor = (array) => new Tensor('int64', array, [array.length]);
+/**
+ * Slice a multidimensional float32 tensor.
+ * @param {Tensor} data: Tensor of data to extract slices from
+ * @param {number[]} starts: 1-D array of starting indices of corresponding axis in axes
+ * @param {number[]} ends: 1-D array of ending indices (exclusive) of corresponding axis in axes
+ * @param {number[]} axes: 1-D array of axes that starts and ends apply to
+ * @param {number[]} [steps]: 1-D array of slice step of corresponding axis in axes.
+ * @returns {Promise<Tensor>} Sliced data tensor.
+ */
+async function slice(data, starts, ends, axes, steps) {
+    const op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.slice;
+    return await op({
+        x: data,
+        s: arrayToIndexTensor(starts),
+        e: arrayToIndexTensor(ends),
+        a: arrayToIndexTensor(axes),
+        t: arrayToIndexTensor(steps ?? new Array(axes.length).fill(1)),
+    });
+}
 /**
  * Perform mean pooling of the last hidden state followed by a normalization step.
  * @param {Tensor} last_hidden_state Tensor of shape [batchSize, seqLength, embedDim]
@@ -33560,6 +34402,20 @@ function zeros_like(tensor) {
     return zeros(tensor.dims);
 }
+/**
+ * Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)
+ * @param {number[]} size A sequence of integers defining the shape of the output tensor.
+ * @returns {Tensor} The random tensor.
+ */
+function rand(size) {
+    const length = size.reduce((a, b) => a * b, 1);
+    return new Tensor(
+        "float32",
+        Float32Array.from({ length }, () => Math.random()),
+        size,
+    )
+}
 /**
  * Quantizes the embeddings tensor to binary or unsigned binary precision.
  * @param {Tensor} tensor The tensor to quantize.
@@ -33694,7 +34550,7 @@ function quantize_embeddings(tensor, precision) {
 /******/
 /************************************************************************/
 var __webpack_exports__ = {};
-// This entry need to be wrapped in an IIFE because it need to be in strict mode.
+// This entry needs to be wrapped in an IIFE because it needs to be in strict mode.
 (() => {
 "use strict";
 /*!*****************************!*\
@@ -33905,6 +34761,9 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   EsmModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.EsmModel),
 /* harmony export */   EsmPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.EsmPreTrainedModel),
 /* harmony export */   EsmTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.EsmTokenizer),
+/* harmony export */   ExaoneForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaoneForCausalLM),
+/* harmony export */   ExaoneModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaoneModel),
+/* harmony export */   ExaonePreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ExaonePreTrainedModel),
 /* harmony export */   FFT: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.FFT),
 /* harmony export */   FalconForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.FalconForCausalLM),
 /* harmony export */   FalconModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.FalconModel),
@@ -33963,6 +34822,9 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   HubertForSequenceClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertForSequenceClassification),
 /* harmony export */   HubertModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertModel),
 /* harmony export */   HubertPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.HubertPreTrainedModel),
+/* harmony export */   IJepaForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaForImageClassification),
+/* harmony export */   IJepaModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaModel),
+/* harmony export */   IJepaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.IJepaPreTrainedModel),
 /* harmony export */   Idefics3ForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3ForConditionalGeneration),
 /* harmony export */   Idefics3ImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Idefics3ImageProcessor),
 /* harmony export */   Idefics3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3PreTrainedModel),
@@ -34081,6 +34943,11 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   MobileViTV2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MobileViTV2PreTrainedModel),
 /* harmony export */   ModelOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ModelOutput),
 /* harmony export */   Moondream1ForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Moondream1ForConditionalGeneration),
+/* harmony export */   MoonshineFeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_10__.MoonshineFeatureExtractor),
+/* harmony export */   MoonshineForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineForConditionalGeneration),
+/* harmony export */   MoonshineModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineModel),
+/* harmony export */   MoonshinePreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MoonshinePreTrainedModel),
+/* harmony export */   MoonshineProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.MoonshineProcessor),
 /* harmony export */   MptForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptForCausalLM),
 /* harmony export */   MptModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptModel),
 /* harmony export */   MptPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MptPreTrainedModel),
@@ -34101,6 +34968,9 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   OPTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OPTModel),
 /* harmony export */   OPTPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OPTPreTrainedModel),
 /* harmony export */   ObjectDetectionPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.ObjectDetectionPipeline),
+/* harmony export */   Olmo2ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2ForCausalLM),
+/* harmony export */   Olmo2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2Model),
+/* harmony export */   Olmo2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Olmo2PreTrainedModel),
 /* harmony export */   OlmoForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoForCausalLM),
 /* harmony export */   OlmoModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoModel),
 /* harmony export */   OlmoPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.OlmoPreTrainedModel),
@@ -34117,6 +34987,9 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   Owlv2ImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Owlv2ImageProcessor),
 /* harmony export */   Owlv2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Owlv2Model),
 /* harmony export */   Owlv2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Owlv2PreTrainedModel),
+/* harmony export */   PaliGemmaForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PaliGemmaForConditionalGeneration),
+/* harmony export */   PaliGemmaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PaliGemmaPreTrainedModel),
+/* harmony export */   PaliGemmaProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.PaliGemmaProcessor),
 /* harmony export */   PatchTSMixerForPrediction: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerForPrediction),
 /* harmony export */   PatchTSMixerModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerModel),
 /* harmony export */   PatchTSMixerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PatchTSMixerPreTrainedModel),
@@ -34126,6 +34999,10 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   Phi3ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3ForCausalLM),
 /* harmony export */   Phi3Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3Model),
 /* harmony export */   Phi3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3PreTrainedModel),
+/* harmony export */   Phi3VForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3VForCausalLM),
+/* harmony export */   Phi3VImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.Phi3VImageProcessor),
+/* harmony export */   Phi3VPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Phi3VPreTrainedModel),
+/* harmony export */   Phi3VProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.Phi3VProcessor),
 /* harmony export */   PhiForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiForCausalLM),
 /* harmony export */   PhiModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiModel),
 /* harmony export */   PhiPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.PhiPreTrainedModel),
@@ -34374,9 +35251,11 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   permute_data: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.permute_data),
 /* harmony export */   pipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.pipeline),
 /* harmony export */   quantize_embeddings: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.quantize_embeddings),
+/* harmony export */   rand: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.rand),
 /* harmony export */   read_audio: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.read_audio),
 /* harmony export */   rfft: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.rfft),
 /* harmony export */   round: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.round),
+/* harmony export */   slice: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.slice),
 /* harmony export */   softmax: () => (/* reexport safe */ _utils_maths_js__WEBPACK_IMPORTED_MODULE_8__.softmax),
 /* harmony export */   spectrogram: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.spectrogram),
 /* harmony export */   stack: () => (/* reexport safe */ _utils_tensor_js__WEBPACK_IMPORTED_MODULE_7__.stack),
@@ -34452,7 +35331,7 @@ __webpack_require__.r(__webpack_exports__);
 })();
 var __webpack_export_target__ = exports;
-for(var i in __webpack_exports__) __webpack_export_target__[i] = __webpack_exports__[i];
+for(var __webpack_i__ in __webpack_exports__) __webpack_export_target__[__webpack_i__] = __webpack_exports__[__webpack_i__];
 if(__webpack_exports__.__esModule) Object.defineProperty(__webpack_export_target__, "__esModule", { value: true });
 /******/ })()
 ;