npm - @huggingface/transformers - Versions diffs - 3.4.0 → 3.4.1 - Mend

@huggingface/transformers 3.4.0 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +6 -2
package/dist/transformers.js +315 -152
package/dist/transformers.js.map +1 -1
package/dist/transformers.min.js +1 -1
package/dist/transformers.min.js.map +1 -1
package/dist/transformers.node.cjs +303 -151
package/dist/transformers.node.cjs.map +1 -1
package/dist/transformers.node.min.cjs +1 -1
package/dist/transformers.node.min.cjs.map +1 -1
package/dist/transformers.node.min.mjs +1 -1
package/dist/transformers.node.min.mjs.map +1 -1
package/dist/transformers.node.mjs +315 -152
package/dist/transformers.node.mjs.map +1 -1
package/dist/transformers.web.js +315 -152
package/dist/transformers.web.js.map +1 -1
package/dist/transformers.web.min.js +1 -1
package/dist/transformers.web.min.js.map +1 -1
package/package.json +1 -1
package/src/configs.js +2 -0
package/src/env.js +1 -1
package/src/models/feature_extractors.js +1 -0
package/src/models/snac/feature_extraction_snac.js +3 -0
package/src/models.js +90 -2
package/src/pipelines.js +140 -135
package/src/utils/image.js +9 -1
package/src/utils/tensor.js +6 -2
package/types/configs.d.ts.map +1 -1
package/types/models/feature_extractors.d.ts +1 -0
package/types/models/snac/feature_extraction_snac.d.ts +4 -0
package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
package/types/models.d.ts +48 -0
package/types/models.d.ts.map +1 -1
package/types/pipelines.d.ts +2 -2
package/types/pipelines.d.ts.map +1 -1
package/types/tsconfig.tsbuildinfo +1 -1
package/types/utils/image.d.ts +2 -2
package/types/utils/image.d.ts.map +1 -1
package/types/utils/tensor.d.ts.map +1 -1

package/dist/transformers.node.cjs CHANGED Viewed

@@ -3684,6 +3684,7 @@ function getNormalizedConfig(config) {
         // Sub-configs
         case 'llava':
         case 'paligemma':
+        case 'gemma3':
         case 'florence2':
         case 'llava_onevision':
         case 'idefics3':
@@ -3743,6 +3744,7 @@ function getNormalizedConfig(config) {
             break;
         case 'gemma':
         case 'gemma2':
+        case 'gemma3_text':
         case 'glm':
         case 'helium':
             mapping['num_heads'] = 'num_key_value_heads';
@@ -4074,7 +4076,7 @@ __webpack_require__.r(__webpack_exports__);
-const VERSION = '3.4.0';
+const VERSION = '3.4.1';
 // Check if various APIs are available (depends on environment)
 const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
@@ -6218,6 +6220,9 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   Gemma2ForCausalLM: () => (/* binding */ Gemma2ForCausalLM),
 /* harmony export */   Gemma2Model: () => (/* binding */ Gemma2Model),
 /* harmony export */   Gemma2PreTrainedModel: () => (/* binding */ Gemma2PreTrainedModel),
+/* harmony export */   Gemma3ForCausalLM: () => (/* binding */ Gemma3ForCausalLM),
+/* harmony export */   Gemma3Model: () => (/* binding */ Gemma3Model),
+/* harmony export */   Gemma3PreTrainedModel: () => (/* binding */ Gemma3PreTrainedModel),
 /* harmony export */   GemmaForCausalLM: () => (/* binding */ GemmaForCausalLM),
 /* harmony export */   GemmaModel: () => (/* binding */ GemmaModel),
 /* harmony export */   GemmaPreTrainedModel: () => (/* binding */ GemmaPreTrainedModel),
@@ -6288,6 +6293,10 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   MaskFormerModel: () => (/* binding */ MaskFormerModel),
 /* harmony export */   MaskFormerPreTrainedModel: () => (/* binding */ MaskFormerPreTrainedModel),
 /* harmony export */   MaskedLMOutput: () => (/* binding */ MaskedLMOutput),
+/* harmony export */   Metric3DForDepthEstimation: () => (/* binding */ Metric3DForDepthEstimation),
+/* harmony export */   Metric3DPreTrainedModel: () => (/* binding */ Metric3DPreTrainedModel),
+/* harmony export */   Metric3Dv2ForDepthEstimation: () => (/* binding */ Metric3Dv2ForDepthEstimation),
+/* harmony export */   Metric3Dv2PreTrainedModel: () => (/* binding */ Metric3Dv2PreTrainedModel),
 /* harmony export */   MgpstrForSceneTextRecognition: () => (/* binding */ MgpstrForSceneTextRecognition),
 /* harmony export */   MgpstrModelOutput: () => (/* binding */ MgpstrModelOutput),
 /* harmony export */   MgpstrPreTrainedModel: () => (/* binding */ MgpstrPreTrainedModel),
@@ -6436,6 +6445,10 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   SiglipTextModel: () => (/* binding */ SiglipTextModel),
 /* harmony export */   SiglipVisionModel: () => (/* binding */ SiglipVisionModel),
 /* harmony export */   SmolVLMForConditionalGeneration: () => (/* binding */ SmolVLMForConditionalGeneration),
+/* harmony export */   SnacDecoderModel: () => (/* binding */ SnacDecoderModel),
+/* harmony export */   SnacEncoderModel: () => (/* binding */ SnacEncoderModel),
+/* harmony export */   SnacModel: () => (/* binding */ SnacModel),
+/* harmony export */   SnacPreTrainedModel: () => (/* binding */ SnacPreTrainedModel),
 /* harmony export */   SpeechT5ForSpeechToText: () => (/* binding */ SpeechT5ForSpeechToText),
 /* harmony export */   SpeechT5ForTextToSpeech: () => (/* binding */ SpeechT5ForTextToSpeech),
 /* harmony export */   SpeechT5HifiGan: () => (/* binding */ SpeechT5HifiGan),
@@ -7094,8 +7107,8 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
         new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
     }
     if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
-        // NOTE: Handle a special case for paligemma models, where positions are 1-indexed
-        const start_index = self.config.model_type === 'paligemma' ? 1 : 0;
+        // NOTE: Handle a special case for paligemma/gemma3 models, where positions are 1-indexed
+        const start_index = ['paligemma', 'gemma3_text', 'gemma3'].includes(self.config.model_type) ? 1 : 0;
         new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
     }
@@ -11020,6 +11033,23 @@ class Gemma2Model extends Gemma2PreTrainedModel { }
 class Gemma2ForCausalLM extends Gemma2PreTrainedModel { }
 //////////////////////////////////////////////////
+//////////////////////////////////////////////////
+// Gemma3 models
+/**
+ * The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
+ */
+class Gemma3PreTrainedModel extends PreTrainedModel { }
+/**
+ * The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
+ */
+class Gemma3Model extends Gemma3PreTrainedModel { }
+class Gemma3ForCausalLM extends Gemma3PreTrainedModel { }
+//////////////////////////////////////////////////
 //////////////////////////////////////////////////
 class OpenELMPreTrainedModel extends PreTrainedModel { }
 class OpenELMModel extends OpenELMPreTrainedModel { }
@@ -11872,6 +11902,16 @@ class DepthProPreTrainedModel extends PreTrainedModel { }
 class DepthProForDepthEstimation extends DepthProPreTrainedModel { }
 //////////////////////////////////////////////////
+//////////////////////////////////////////////////
+class Metric3DPreTrainedModel extends PreTrainedModel { }
+class Metric3DForDepthEstimation extends Metric3DPreTrainedModel { }
+//////////////////////////////////////////////////
+//////////////////////////////////////////////////
+class Metric3Dv2PreTrainedModel extends PreTrainedModel { }
+class Metric3Dv2ForDepthEstimation extends Metric3Dv2PreTrainedModel { }
+//////////////////////////////////////////////////
 //////////////////////////////////////////////////
 class MaskFormerPreTrainedModel extends PreTrainedModel { }
 class MaskFormerModel extends MaskFormerPreTrainedModel { }
@@ -13787,6 +13827,60 @@ class DacDecoderModel extends DacPreTrainedModel {
 }
 //////////////////////////////////////////////////
+//////////////////////////////////////////////////
+// Snac models
+class SnacPreTrainedModel extends PreTrainedModel {
+    main_input_name = 'input_values';
+    forward_params = ['input_values'];
+}
+/**
+ * The SNAC (Multi-Scale Neural Audio Codec) model.
+ */
+class SnacModel extends SnacPreTrainedModel {
+    /**
+     * Encodes the input audio waveform into discrete codes.
+     * @param {Object} inputs Model inputs
+     * @param {Tensor} [inputs.input_values] Float values of the input audio waveform, of shape `(batch_size, channels, sequence_length)`).
+     * @returns {Promise<Record<string, Tensor>>} The output tensors of shape `(batch_size, num_codebooks, sequence_length)`.
+     */
+    async encode(inputs) {
+        return await sessionRun(this.sessions['encoder_model'], inputs);
+    }
+    /**
+     * Decodes the given frames into an output audio waveform.
+     * @param {Record<string, Tensor>} inputs The encoded audio codes.
+     * @returns {Promise<{audio_values: Tensor}>} The output tensor of shape `(batch_size, num_channels, sequence_length)`.
+     */
+    async decode(inputs) {
+        return await sessionRun(this.sessions['decoder_model'], inputs);
+    }
+}
+class SnacEncoderModel extends SnacPreTrainedModel {
+    /** @type {typeof PreTrainedModel.from_pretrained} */
+    static async from_pretrained(pretrained_model_name_or_path, options = {}) {
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            ...options,
+            // Update default model file name if not provided
+            model_file_name: options.model_file_name ?? 'encoder_model',
+        });
+    }
+}
+class SnacDecoderModel extends SnacPreTrainedModel {
+    /** @type {typeof PreTrainedModel.from_pretrained} */
+    static async from_pretrained(pretrained_model_name_or_path, options = {}) {
+        return super.from_pretrained(pretrained_model_name_or_path, {
+            ...options,
+            // Update default model file name if not provided
+            model_file_name: options.model_file_name ?? 'decoder_model',
+        });
+    }
+}
+//////////////////////////////////////////////////
 //////////////////////////////////////////////////
 // AutoModels, used to simplify construction of PreTrainedModels
 // (uses config to instantiate correct class)
@@ -13968,6 +14062,7 @@ const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
 const MODEL_MAPPING_NAMES_AUTO_ENCODER = new Map([
     ['mimi', ['MimiModel', MimiModel]],
     ['dac', ['DacModel', DacModel]],
+    ['snac', ['SnacModel', SnacModel]],
 ]);
 const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
@@ -13988,6 +14083,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
     ['cohere', ['CohereModel', CohereModel]],
     ['gemma', ['GemmaModel', GemmaModel]],
     ['gemma2', ['Gemma2Model', Gemma2Model]],
+    ['gemma3_text', ['Gemma3Model', Gemma3Model]],
     ['helium', ['HeliumModel', HeliumModel]],
     ['glm', ['GlmModel', GlmModel]],
     ['openelm', ['OpenELMModel', OpenELMModel]],
@@ -14087,6 +14183,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
     ['cohere', ['CohereForCausalLM', CohereForCausalLM]],
     ['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],
     ['gemma2', ['Gemma2ForCausalLM', Gemma2ForCausalLM]],
+    ['gemma3_text', ['Gemma3ForCausalLM', Gemma3ForCausalLM]],
     ['helium', ['HeliumForCausalLM', HeliumForCausalLM]],
     ['glm', ['GlmForCausalLM', GlmForCausalLM]],
     ['openelm', ['OpenELMForCausalLM', OpenELMForCausalLM]],
@@ -14288,6 +14385,8 @@ const MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = new Map([
     ['glpn', ['GLPNForDepthEstimation', GLPNForDepthEstimation]],
     ['sapiens', ['SapiensForDepthEstimation', SapiensForDepthEstimation]],
     ['depth_pro', ['DepthProForDepthEstimation', DepthProForDepthEstimation]],
+    ['metric3d', ['Metric3DForDepthEstimation', Metric3DForDepthEstimation]],
+    ['metric3dv2', ['Metric3Dv2ForDepthEstimation', Metric3Dv2ForDepthEstimation]],
 ])
 const MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES = new Map([
@@ -14373,6 +14472,8 @@ const CUSTOM_MAPPING = [
     ['DacDecoderModel', DacDecoderModel, MODEL_TYPES.EncoderOnly],
     ['MimiEncoderModel', MimiEncoderModel, MODEL_TYPES.EncoderOnly],
     ['MimiDecoderModel', MimiDecoderModel, MODEL_TYPES.EncoderOnly],
+    ['SnacEncoderModel', SnacEncoderModel, MODEL_TYPES.EncoderOnly],
+    ['SnacDecoderModel', SnacDecoderModel, MODEL_TYPES.EncoderOnly],
 ]
 for (const [name, model, type] of CUSTOM_MAPPING) {
     MODEL_TYPE_MAPPING.set(name, type);
@@ -15689,14 +15790,15 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   ClapFeatureExtractor: () => (/* reexport safe */ _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_2__.ClapFeatureExtractor),
 /* harmony export */   DacFeatureExtractor: () => (/* reexport safe */ _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_3__.DacFeatureExtractor),
 /* harmony export */   EncodecFeatureExtractor: () => (/* reexport safe */ _encodec_feature_extraction_encodec_js__WEBPACK_IMPORTED_MODULE_1__.EncodecFeatureExtractor),
-/* harmony export */   ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_11__.ImageProcessor),
+/* harmony export */   ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_12__.ImageProcessor),
 /* harmony export */   MoonshineFeatureExtractor: () => (/* reexport safe */ _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_4__.MoonshineFeatureExtractor),
 /* harmony export */   PyAnnoteFeatureExtractor: () => (/* reexport safe */ _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_5__.PyAnnoteFeatureExtractor),
 /* harmony export */   SeamlessM4TFeatureExtractor: () => (/* reexport safe */ _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_6__.SeamlessM4TFeatureExtractor),
-/* harmony export */   SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_7__.SpeechT5FeatureExtractor),
-/* harmony export */   Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_8__.Wav2Vec2FeatureExtractor),
-/* harmony export */   WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_9__.WeSpeakerFeatureExtractor),
-/* harmony export */   WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_10__.WhisperFeatureExtractor)
+/* harmony export */   SnacFeatureExtractor: () => (/* reexport safe */ _snac_feature_extraction_snac_js__WEBPACK_IMPORTED_MODULE_7__.SnacFeatureExtractor),
+/* harmony export */   SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_8__.SpeechT5FeatureExtractor),
+/* harmony export */   Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_9__.Wav2Vec2FeatureExtractor),
+/* harmony export */   WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_10__.WeSpeakerFeatureExtractor),
+/* harmony export */   WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_11__.WhisperFeatureExtractor)
 /* harmony export */ });
 /* harmony import */ var _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js */ "./src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js");
 /* harmony import */ var _encodec_feature_extraction_encodec_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./encodec/feature_extraction_encodec.js */ "./src/models/encodec/feature_extraction_encodec.js");
@@ -15705,11 +15807,13 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony import */ var _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./moonshine/feature_extraction_moonshine.js */ "./src/models/moonshine/feature_extraction_moonshine.js");
 /* harmony import */ var _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./pyannote/feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
 /* harmony import */ var _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./seamless_m4t/feature_extraction_seamless_m4t.js */ "./src/models/seamless_m4t/feature_extraction_seamless_m4t.js");
-/* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
-/* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
-/* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
-/* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
-/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
+/* harmony import */ var _snac_feature_extraction_snac_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./snac/feature_extraction_snac.js */ "./src/models/snac/feature_extraction_snac.js");
+/* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
+/* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
+/* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
+/* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
+/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
@@ -18742,6 +18846,25 @@ __webpack_require__.r(__webpack_exports__);
+/***/ }),
+/***/ "./src/models/snac/feature_extraction_snac.js":
+/*!****************************************************!*\
+  !*** ./src/models/snac/feature_extraction_snac.js ***!
+  \****************************************************/
+/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
+"use strict";
+__webpack_require__.r(__webpack_exports__);
+/* harmony export */ __webpack_require__.d(__webpack_exports__, {
+/* harmony export */   SnacFeatureExtractor: () => (/* binding */ SnacFeatureExtractor)
+/* harmony export */ });
+/* harmony import */ var _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../dac/feature_extraction_dac.js */ "./src/models/dac/feature_extraction_dac.js");
+class SnacFeatureExtractor extends _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_0__.DacFeatureExtractor { }
 /***/ }),
 /***/ "./src/models/speecht5/feature_extraction_speecht5.js":
@@ -19999,16 +20122,16 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony import */ var _utils_image_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./utils/image.js */ "./src/utils/image.js");
 /**
  * @file Pipelines provide a high-level, easy to use, API for running machine learning models.
- *
+ *
  * **Example:** Instantiate pipeline using the `pipeline` function.
  * ```javascript
  * import { pipeline } from '@huggingface/transformers';
- *
+ *
  * const classifier = await pipeline('sentiment-analysis');
  * const output = await classifier('I love transformers!');
  * // [{'label': 'POSITIVE', 'score': 0.999817686}]
  * ```
- *
+ *
  * @module pipelines
  */
@@ -20027,7 +20150,7 @@ __webpack_require__.r(__webpack_exports__);
 /**
- * @typedef {string | RawImage | URL} ImageInput
+ * @typedef {string | RawImage | URL | Blob | HTMLCanvasElement | OffscreenCanvas} ImageInput
  * @typedef {ImageInput|ImageInput[]} ImagePipelineInputs
  */
@@ -20101,7 +20224,7 @@ function get_bounding_box(box, asInteger) {
 /**
  * @callback DisposeType Disposes the item.
  * @returns {Promise<void>} A promise that resolves when the item has been disposed.
- *
+ *
  * @typedef {Object} Disposable
  * @property {DisposeType} dispose A promise that resolves when the pipeline has been disposed.
  */
@@ -20138,7 +20261,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
  * @property {string} task The task of the pipeline. Useful for specifying subtasks.
  * @property {PreTrainedModel} model The model used by the pipeline.
  * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
- *
+ *
  * @typedef {ModelTokenizerConstructorArgs} TextPipelineConstructorArgs An object used to instantiate a text-based pipeline.
  */
@@ -20147,7 +20270,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
  * @property {string} task The task of the pipeline. Useful for specifying subtasks.
  * @property {PreTrainedModel} model The model used by the pipeline.
  * @property {Processor} processor The processor used by the pipeline.
- *
+ *
  * @typedef {ModelProcessorConstructorArgs} AudioPipelineConstructorArgs An object used to instantiate an audio-based pipeline.
  * @typedef {ModelProcessorConstructorArgs} ImagePipelineConstructorArgs An object used to instantiate an image-based pipeline.
  */
@@ -20159,7 +20282,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
  * @property {PreTrainedModel} model The model used by the pipeline.
  * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
  * @property {Processor} processor The processor used by the pipeline.
- *
+ *
  * @typedef {ModelTokenizerProcessorConstructorArgs} TextAudioPipelineConstructorArgs An object used to instantiate a text- and audio-based pipeline.
  * @typedef {ModelTokenizerProcessorConstructorArgs} TextImagePipelineConstructorArgs An object used to instantiate a text- and image-based pipeline.
  */
@@ -20169,15 +20292,15 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
  * @property {string} label The label predicted.
  * @property {number} score The corresponding probability.
  * @typedef {TextClassificationSingle[]} TextClassificationOutput
- *
+ *
  * @typedef {Object} TextClassificationPipelineOptions Parameters specific to text classification pipelines.
  * @property {number} [top_k=1] The number of top predictions to be returned.
- *
+ *
  * @callback TextClassificationPipelineCallback Classify the text(s) given as inputs.
  * @param {string|string[]} texts The input text(s) to be classified.
  * @param {TextClassificationPipelineOptions} [options] The options to use for text classification.
  * @returns {Promise<TextClassificationOutput|TextClassificationOutput[]>} An array or object containing the predicted labels and scores.
- *
+ *
  * @typedef {TextPipelineConstructorArgs & TextClassificationPipelineCallback & Disposable} TextClassificationPipelineType
  */
@@ -20190,7 +20313,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
  * const output = await classifier('I love transformers!');
  * // [{ label: 'POSITIVE', score: 0.999788761138916 }]
  * ```
- *
+ *
  * **Example:** Multilingual sentiment-analysis w/ `Xenova/bert-base-multilingual-uncased-sentiment` (and return top 5 classes).
  * ```javascript
  * const classifier = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
@@ -20203,7 +20326,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
  * //   { label: '2 stars', score: 0.0009423971059732139 }
  * // ]
  * ```
- *
+ *
  * **Example:** Toxic comment classification w/ `Xenova/toxic-bert` (and return all classes).
  * ```javascript
  * const classifier = await pipeline('text-classification', 'Xenova/toxic-bert');
@@ -20288,21 +20411,21 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
  * @property {number} [start] The index of the start of the corresponding entity in the sentence.
  * @property {number} [end] The index of the end of the corresponding entity in the sentence.
  * @typedef {TokenClassificationSingle[]} TokenClassificationOutput
- *
+ *
  * @typedef {Object} TokenClassificationPipelineOptions Parameters specific to token classification pipelines.
  * @property {string[]} [ignore_labels] A list of labels to ignore.
- *
+ *
  * @callback TokenClassificationPipelineCallback Classify each token of the text(s) given as inputs.
  * @param {string|string[]} texts One or several texts (or one list of texts) for token classification.
  * @param {TokenClassificationPipelineOptions} [options] The options to use for token classification.
  * @returns {Promise<TokenClassificationOutput|TokenClassificationOutput[]>} The result.
- *
+ *
  * @typedef {TextPipelineConstructorArgs & TokenClassificationPipelineCallback & Disposable} TokenClassificationPipelineType
  */
 /**
  * Named Entity Recognition pipeline using any `ModelForTokenClassification`.
- *
+ *
  * **Example:** Perform named entity recognition with `Xenova/bert-base-NER`.
  * ```javascript
  * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
@@ -20312,7 +20435,7 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
  * //   { entity: 'B-LOC', score: 0.9994474053382874, index: 9, word: 'London' }
  * // ]
  * ```
- *
+ *
  * **Example:** Perform named entity recognition with `Xenova/bert-base-NER` (and return all labels).
  * ```javascript
  * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
@@ -20408,22 +20531,22 @@ class TokenClassificationPipeline extends (/** @type {new (options: TextPipeline
  * @property {number} [start] The character start index of the answer (in the tokenized version of the input).
  * @property {number} [end] The character end index of the answer (in the tokenized version of the input).
  * @property {string} answer The answer to the question.
- *
+ *
  * @typedef {Object} QuestionAnsweringPipelineOptions Parameters specific to question answering pipelines.
  * @property {number} [top_k=1] The number of top answer predictions to be returned.
- *
+ *
  * @callback QuestionAnsweringPipelineCallback Answer the question(s) given as inputs by using the context(s).
  * @param {string|string[]} question One or several question(s) (must be used in conjunction with the `context` argument).
  * @param {string|string[]} context One or several context(s) associated with the question(s) (must be used in conjunction with the `question` argument).
  * @param {QuestionAnsweringPipelineOptions} [options] The options to use for question answering.
  * @returns {Promise<QuestionAnsweringOutput|QuestionAnsweringOutput[]>} An array or object containing the predicted answers and scores.
- *
+ *
  * @typedef {TextPipelineConstructorArgs & QuestionAnsweringPipelineCallback & Disposable} QuestionAnsweringPipelineType
  */
 /**
  * Question Answering pipeline using any `ModelForQuestionAnswering`.
- *
+ *
  * **Example:** Run question answering with `Xenova/distilbert-base-uncased-distilled-squad`.
  * ```javascript
  * const answerer = await pipeline('question-answering', 'Xenova/distilbert-base-uncased-distilled-squad');
@@ -20548,10 +20671,10 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
  * @property {number} token The predicted token id (to replace the masked one).
  * @property {string} token_str The predicted token (to replace the masked one).
  * @typedef {FillMaskSingle[]} FillMaskOutput
- *
+ *
  * @typedef {Object} FillMaskPipelineOptions Parameters specific to fill mask pipelines.
  * @property {number} [top_k=5] When passed, overrides the number of predictions to return.
- *
+ *
  * @callback FillMaskPipelineCallback Fill the masked token in the text(s) given as inputs.
  * @param {string|string[]} texts One or several texts (or one list of prompts) with masked tokens.
  * @param {FillMaskPipelineOptions} [options] The options to use for masked language modelling.
@@ -20559,13 +20682,13 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
  * and the sequence with the predicted token filled in, or an array of such arrays (one for each input text).
  * If only one input text is given, the output will be an array of objects.
  * @throws {Error} When the mask token is not found in the input text.
- *
+ *
  * @typedef {TextPipelineConstructorArgs & FillMaskPipelineCallback & Disposable} FillMaskPipelineType
  */
 /**
  * Masked language modeling prediction pipeline using any `ModelWithLMHead`.
- *
+ *
  * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-uncased`.
  * ```javascript
  * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
@@ -20578,7 +20701,7 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
  * //   { token_str: 'life', score: 0.01859794743359089, token: 1297, sequence: 'The goal of life is life.' }
  * // ]
  * ```
- *
+ *
  * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-cased` (and return top result).
  * ```javascript
  * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
@@ -20655,18 +20778,18 @@ class FillMaskPipeline extends (/** @type {new (options: TextPipelineConstructor
  * @typedef {Object} Text2TextGenerationSingle
  * @property {string} generated_text The generated text.
  * @typedef {Text2TextGenerationSingle[]} Text2TextGenerationOutput
- *
+ *
  * @callback Text2TextGenerationPipelineCallback Generate the output text(s) using text(s) given as inputs.
  * @param {string|string[]} texts Input text for the encoder.
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<Text2TextGenerationOutput|Text2TextGenerationOutput[]>}
- *
+ *
  * @typedef {TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable} Text2TextGenerationPipelineType
  */
 /**
  * Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks.
- *
+ *
  * **Example:** Text-to-text generation w/ `Xenova/LaMini-Flan-T5-783M`.
  * ```javascript
  * const generator = await pipeline('text2text-generation', 'Xenova/LaMini-Flan-T5-783M');
@@ -20742,18 +20865,18 @@ class Text2TextGenerationPipeline extends (/** @type {new (options: TextPipeline
  * @typedef {Object} SummarizationSingle
  * @property {string} summary_text The summary text.
  * @typedef {SummarizationSingle[]} SummarizationOutput
- *
+ *
  * @callback SummarizationPipelineCallback Summarize the text(s) given as inputs.
  * @param {string|string[]} texts One or several articles (or one list of articles) to summarize.
  * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<SummarizationOutput|SummarizationOutput[]>}
- *
+ *
  * @typedef {TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable} SummarizationPipelineType
  */
 /**
  * A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline.
- *
+ *
  * **Example:** Summarization w/ `Xenova/distilbart-cnn-6-6`.
  * ```javascript
  * const generator = await pipeline('summarization', 'Xenova/distilbart-cnn-6-6');
@@ -20789,23 +20912,23 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
  * @typedef {Object} TranslationSingle
  * @property {string} translation_text The translated text.
  * @typedef {TranslationSingle[]} TranslationOutput
- *
+ *
  * @callback TranslationPipelineCallback Translate the text(s) given as inputs.
  * @param {string|string[]} texts Texts to be translated.
  * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<TranslationOutput|TranslationOutput[]>}
- *
+ *
  * @typedef {TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable} TranslationPipelineType
  */
 /**
  * Translates text from one language to another.
- *
+ *
  * **Example:** Multilingual translation w/ `Xenova/nllb-200-distilled-600M`.
- *
+ *
  * See [here](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200)
  * for the full list of languages and their corresponding codes.
- *
+ *
  * ```javascript
  * const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M');
  * const output = await translator('जीवन एक चॉकलेट बॉक्स की तरह है।', {
@@ -20814,12 +20937,12 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
  * });
  * // [{ translation_text: 'La vie est comme une boîte à chocolat.' }]
  * ```
- *
+ *
  * **Example:** Multilingual translation w/ `Xenova/m2m100_418M`.
- *
+ *
  * See [here](https://huggingface.co/facebook/m2m100_418M#languages-covered)
  * for the full list of languages and their corresponding codes.
- *
+ *
  * ```javascript
  * const translator = await pipeline('translation', 'Xenova/m2m100_418M');
  * const output = await translator('生活就像一盒巧克力。', {
@@ -20828,12 +20951,12 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
  * });
  * // [{ translation_text: 'Life is like a box of chocolate.' }]
  * ```
- *
+ *
  * **Example:** Multilingual translation w/ `Xenova/mbart-large-50-many-to-many-mmt`.
- *
+ *
  * See [here](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt#languages-covered)
  * for the full list of languages and their corresponding codes.
- *
+ *
  * ```javascript
  * const translator = await pipeline('translation', 'Xenova/mbart-large-50-many-to-many-mmt');
  * const output = await translator('संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है', {
@@ -20862,21 +20985,21 @@ function isChat(x) {
 /**
  * @typedef {import('./tokenizers.js').Message[]} Chat
- *
+ *
  * @typedef {Object} TextGenerationSingle
  * @property {string|Chat} generated_text The generated text.
  * @typedef {TextGenerationSingle[]} TextGenerationOutput
- *
+ *
  * @typedef {Object} TextGenerationSpecificParams Parameters specific to text-generation pipelines.
  * @property {boolean} [add_special_tokens] Whether or not to add special tokens when tokenizing the sequences.
  * @property {boolean} [return_full_text=true] If set to `false` only added text is returned, otherwise the full text is returned.
  * @typedef {import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams} TextGenerationConfig
- *
+ *
  * @callback TextGenerationPipelineCallback Complete the prompt(s) given as inputs.
  * @param {string|string[]|Chat|Chat[]} texts One or several prompts (or one list of prompts) to complete.
  * @param {Partial<TextGenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<TextGenerationOutput|TextGenerationOutput[]>} An array or object containing the generated texts.
- *
+ *
  * @typedef {TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable} TextGenerationPipelineType
  */
@@ -20884,7 +21007,7 @@ function isChat(x) {
  * Language generation pipeline using any `ModelWithLMHead` or `ModelForCausalLM`.
  * This pipeline predicts the words that will follow a specified text prompt.
  * NOTE: For the full list of generation parameters, see [`GenerationConfig`](./utils/generation#module_utils/generation.GenerationConfig).
- *
+ *
  * **Example:** Text generation with `Xenova/distilgpt2` (default settings).
  * ```javascript
  * const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
@@ -20892,7 +21015,7 @@ function isChat(x) {
  * const output = await generator(text);
  * // [{ generated_text: "I enjoy walking with my cute dog, and I love to play with the other dogs." }]
  * ```
- *
+ *
  * **Example:** Text generation with `Xenova/distilgpt2` (custom settings).
  * ```javascript
  * const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
@@ -20911,7 +21034,7 @@ function isChat(x) {
  * //   "generated_text": "Once upon a time, there was an abundance of information about the most important and influential"
  * // }]
  * ```
- *
+ *
  * **Example:** Run code generation with `Xenova/codegen-350M-mono`.
  * ```javascript
  * const generator = await pipeline('text-generation', 'Xenova/codegen-350M-mono');
@@ -21030,7 +21153,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
  * @property {string} sequence The sequence for which this is the output.
  * @property {string[]} labels The labels sorted by order of likelihood.
  * @property {number[]} scores The probabilities for each of the labels.
- *
+ *
  * @typedef {Object} ZeroShotClassificationPipelineOptions Parameters specific to zero-shot classification pipelines.
  * @property {string} [hypothesis_template="This example is {}."] The template used to turn each
  * candidate label into an NLI-style hypothesis. The candidate label will replace the {} placeholder.
@@ -21038,14 +21161,14 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
  * If `false`, the scores are normalized such that the sum of the label likelihoods for each sequence
  * is 1. If `true`, the labels are considered independent and probabilities are normalized for each
  * candidate by doing a softmax of the entailment score vs. the contradiction score.
- *
+ *
  * @callback ZeroShotClassificationPipelineCallback Classify the sequence(s) given as inputs.
  * @param {string|string[]} texts The sequence(s) to classify, will be truncated if the model input is too large.
  * @param {string|string[]} candidate_labels The set of possible class labels to classify each sequence into.
  * Can be a single label, a string of comma-separated labels, or a list of labels.
  * @param {ZeroShotClassificationPipelineOptions} [options] The options to use for zero-shot classification.
  * @returns {Promise<ZeroShotClassificationOutput|ZeroShotClassificationOutput[]>} An array or object containing the predicted labels and scores.
- *
+ *
  * @typedef {TextPipelineConstructorArgs & ZeroShotClassificationPipelineCallback & Disposable} ZeroShotClassificationPipelineType
  */
@@ -21054,7 +21177,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
  * trained on NLI (natural language inference) tasks. Equivalent of `text-classification`
  * pipelines, but these models don't require a hardcoded number of potential classes, they
  * can be chosen at runtime. It usually means it's slower but it is **much** more flexible.
- *
+ *
  * **Example:** Zero shot classification with `Xenova/mobilebert-uncased-mnli`.
  * ```javascript
  * const classifier = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli');
@@ -21067,7 +21190,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
  * //   scores: [ 0.5562091040482018, 0.1843621307860853, 0.13942646639336376, 0.12000229877234923 ]
  * // }
  * ```
- *
+ *
  * **Example:** Zero shot classification with `Xenova/nli-deberta-v3-xsmall` (multi-label).
  * ```javascript
  * const classifier = await pipeline('zero-shot-classification', 'Xenova/nli-deberta-v3-xsmall');
@@ -21181,20 +21304,20 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
  * @property {'none'|'mean'|'cls'} [pooling="none"] The pooling method to use.
  * @property {boolean} [normalize=false] Whether or not to normalize the embeddings in the last dimension.
  * @property {boolean} [quantize=false] Whether or not to quantize the embeddings.
- * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
- *
+ * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
+ *
  * @callback FeatureExtractionPipelineCallback Extract the features of the input(s).
  * @param {string|string[]} texts One or several texts (or one list of texts) to get the features of.
  * @param {FeatureExtractionPipelineOptions} [options] The options to use for feature extraction.
  * @returns {Promise<Tensor>} The features computed by the model.
- *
+ *
  * @typedef {TextPipelineConstructorArgs & FeatureExtractionPipelineCallback & Disposable} FeatureExtractionPipelineType
  */
 /**
  * Feature extraction pipeline using no model head. This pipeline extracts the hidden
  * states from the base transformer, which can be used as features in downstream tasks.
- *
+ *
  * **Example:** Run feature extraction with `bert-base-uncased` (without pooling/normalization).
  * ```javascript
  * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
@@ -21205,7 +21328,7 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
  * //   dims: [1, 8, 768]
  * // }
  * ```
- *
+ *
  * **Example:** Run feature extraction with `bert-base-uncased` (with pooling/normalization).
  * ```javascript
  * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
@@ -21216,7 +21339,7 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
  * //   dims: [1, 768]
  * // }
  * ```
- *
+ *
  * **Example:** Calculating embeddings with `sentence-transformers` models.
  * ```javascript
  * const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
@@ -21297,19 +21420,19 @@ class FeatureExtractionPipeline extends (/** @type {new (options: TextPipelineCo
 /**
  * @typedef {Object} ImageFeatureExtractionPipelineOptions Parameters specific to image feature extraction pipelines.
  * @property {boolean} [pool=null] Whether or not to return the pooled output. If set to `false`, the model will return the raw hidden states.
- *
+ *
  * @callback ImageFeatureExtractionPipelineCallback Extract the features of the input(s).
  * @param {ImagePipelineInputs} images One or several images (or one list of images) to get the features of.
  * @param {ImageFeatureExtractionPipelineOptions} [options] The options to use for image feature extraction.
  * @returns {Promise<Tensor>} The image features computed by the model.
- *
+ *
  * @typedef {ImagePipelineConstructorArgs & ImageFeatureExtractionPipelineCallback & Disposable} ImageFeatureExtractionPipelineType
  */
 /**
  * Image feature extraction pipeline using no model head. This pipeline extracts the hidden
  * states from the base transformer, which can be used as features in downstream tasks.
- *
+ *
  * **Example:** Perform image feature extraction with `Xenova/vit-base-patch16-224-in21k`.
  * ```javascript
  * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/vit-base-patch16-224-in21k');
@@ -21322,7 +21445,7 @@ class FeatureExtractionPipeline extends (/** @type {new (options: TextPipelineCo
  * //   size: 151296
  * // }
  * ```
- *
+ *
  * **Example:** Compute image embeddings with `Xenova/clip-vit-base-patch32`.
  * ```javascript
  * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32');
@@ -21378,12 +21501,12 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
  * @property {string} label The label predicted.
  * @property {number} score The corresponding probability.
  * @typedef {AudioClassificationSingle[]} AudioClassificationOutput
- *
+ *
  * @typedef {Object} AudioClassificationPipelineOptions Parameters specific to audio classification pipelines.
  * @property {number} [top_k=5] The number of top labels that will be returned by the pipeline.
  * If the provided number is `null` or higher than the number of labels available in the model configuration,
  * it will default to the number of labels.
- *
+ *
  * @callback AudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
  * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -21392,14 +21515,14 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
  * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
  * @param {AudioClassificationPipelineOptions} [options] The options to use for audio classification.
  * @returns {Promise<AudioClassificationOutput|AudioClassificationOutput[]>} An array or object containing the predicted labels and scores.
- *
+ *
  * @typedef {AudioPipelineConstructorArgs & AudioClassificationPipelineCallback & Disposable} AudioClassificationPipelineType
  */
 /**
  * Audio classification pipeline using any `AutoModelForAudioClassification`.
  * This pipeline predicts the class of a raw waveform or an audio file.
- *
+ *
  * **Example:** Perform audio classification with `Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech`.
  * ```javascript
  * const classifier = await pipeline('audio-classification', 'Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech');
@@ -21410,7 +21533,7 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
  * //   { label: 'female', score: 0.001845747814513743 }
  * // ]
  * ```
- *
+ *
  * **Example:** Perform audio classification with `Xenova/ast-finetuned-audioset-10-10-0.4593` and return top 4 results.
  * ```javascript
  * const classifier = await pipeline('audio-classification', 'Xenova/ast-finetuned-audioset-10-10-0.4593');
@@ -21475,12 +21598,12 @@ class AudioClassificationPipeline extends (/** @type {new (options: AudioPipelin
  * @typedef {Object} ZeroShotAudioClassificationOutput
  * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
  * @property {number} score The score attributed by the model for that label (between 0 and 1).
- *
+ *
  * @typedef {Object} ZeroShotAudioClassificationPipelineOptions Parameters specific to zero-shot audio classification pipelines.
  * @property {string} [hypothesis_template="This is a sound of {}."] The sentence used in conjunction with `candidate_labels`
  * to attempt the audio classification by replacing the placeholder with the candidate_labels.
  * Then likelihood is estimated by using `logits_per_audio`.
- *
+ *
  * @callback ZeroShotAudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
  * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -21490,14 +21613,14 @@ class AudioClassificationPipeline extends (/** @type {new (options: AudioPipelin
  * @param {string[]} candidate_labels The candidate labels for this audio.
  * @param {ZeroShotAudioClassificationPipelineOptions} [options] The options to use for zero-shot audio classification.
  * @returns {Promise<ZeroShotAudioClassificationOutput[]|ZeroShotAudioClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
- *
+ *
  * @typedef {TextAudioPipelineConstructorArgs & ZeroShotAudioClassificationPipelineCallback & Disposable} ZeroShotAudioClassificationPipelineType
  */
 /**
  * Zero shot audio classification pipeline using `ClapModel`. This pipeline predicts the class of an audio when you
  * provide an audio and a set of `candidate_labels`.
- *
+ *
  * **Example**: Perform zero-shot audio classification with `Xenova/clap-htsat-unfused`.
  * ```javascript
  * const classifier = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused');
@@ -21530,7 +21653,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
             audio = [/** @type {AudioInput} */ (audio)];
         }
-        // Insert label into hypothesis template
+        // Insert label into hypothesis template
         const texts = candidate_labels.map(
             x => hypothesis_template.replace('{}', x)
         );
@@ -21574,7 +21697,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
  * @property {string} text The recognized text.
  * @property {Chunk[]} [chunks] When using `return_timestamps`, the `chunks` will become a list
  * containing all the various text chunks identified by the model.
- *
+ *
  * @typedef {Object} AutomaticSpeechRecognitionSpecificParams Parameters specific to automatic-speech-recognition pipelines.
  * @property {boolean|'word'} [return_timestamps] Whether to return timestamps or not. Default is `false`.
  * @property {number} [chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking).
@@ -21584,7 +21707,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
  * @property {string} [task] The task to perform. Default is `null`, meaning it should be auto-detected.
  * @property {number} [num_frames] The number of frames in the input audio.
  * @typedef {import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig
- *
+ *
  * @callback AutomaticSpeechRecognitionPipelineCallback Transcribe the audio sequence(s) given as inputs to text.
  * @param {AudioPipelineInputs} audio The input audio file(s) to be transcribed. The input is either:
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -21593,7 +21716,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
  * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
  * @param {Partial<AutomaticSpeechRecognitionConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<AutomaticSpeechRecognitionOutput|AutomaticSpeechRecognitionOutput[]>} An object containing the transcription text and optionally timestamps if `return_timestamps` is `true`.
- *
+ *
  * @typedef {TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable} AutomaticSpeechRecognitionPipelineType
  */
@@ -21607,7 +21730,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
  * const output = await transcriber(url);
  * // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." }
  * ```
- *
+ *
  * **Example:** Transcribe English w/ timestamps.
  * ```javascript
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -21621,7 +21744,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
  * //   ]
  * // }
  * ```
- *
+ *
  * **Example:** Transcribe English w/ word-level timestamps.
  * ```javascript
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -21640,7 +21763,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
  * //   ]
  * // }
  * ```
- *
+ *
  * **Example:** Transcribe French.
  * ```javascript
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
@@ -21648,7 +21771,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
  * const output = await transcriber(url, { language: 'french', task: 'transcribe' });
  * // { text: " J'adore, j'aime, je n'aime pas, je déteste." }
  * ```
- *
+ *
  * **Example:** Translate French to English.
  * ```javascript
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
@@ -21656,7 +21779,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
  * const output = await transcriber(url, { language: 'french', task: 'translate' });
  * // { text: " I love, I like, I don't like, I hate." }
  * ```
- *
+ *
  * **Example:** Transcribe/translate audio longer than 30 seconds.
  * ```javascript
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -21879,18 +22002,18 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
  * @typedef {Object} ImageToTextSingle
  * @property {string} generated_text The generated text.
  * @typedef {ImageToTextSingle[]} ImageToTextOutput
- *
+ *
  * @callback ImageToTextPipelineCallback Assign labels to the image(s) passed as inputs.
  * @param {ImagePipelineInputs} texts The images to be captioned.
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<ImageToTextOutput|ImageToTextOutput[]>} An object (or array of objects) containing the generated text(s).
- *
+ *
  * @typedef {TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable} ImageToTextPipelineType
  */
 /**
  * Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image.
- *
+ *
  * **Example:** Generate a caption for an image w/ `Xenova/vit-gpt2-image-captioning`.
  * ```javascript
  * const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');
@@ -21898,7 +22021,7 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
  * const output = await captioner(url);
  * // [{ generated_text: 'a cat laying on a couch with another cat' }]
  * ```
- *
+ *
  * **Example:** Optical Character Recognition (OCR) w/ `Xenova/trocr-small-handwritten`.
  * ```javascript
  * const captioner = await pipeline('image-to-text', 'Xenova/trocr-small-handwritten');
@@ -21944,22 +22067,22 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
  * @property {string} label The label identified by the model.
  * @property {number} score The score attributed by the model for that label.
  * @typedef {ImageClassificationSingle[]} ImageClassificationOutput
- *
+ *
  * @typedef {Object} ImageClassificationPipelineOptions Parameters specific to image classification pipelines.
- * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
- *
+ * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
+ *
  * @callback ImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
  * @param {ImagePipelineInputs} images The input images(s) to be classified.
  * @param {ImageClassificationPipelineOptions} [options] The options to use for image classification.
  * @returns {Promise<ImageClassificationOutput|ImageClassificationOutput[]>} An array or object containing the predicted labels and scores.
- *
+ *
  * @typedef {ImagePipelineConstructorArgs & ImageClassificationPipelineCallback & Disposable} ImageClassificationPipelineType
  */
 /**
  * Image classification pipeline using any `AutoModelForImageClassification`.
  * This pipeline predicts the class of an image.
- *
+ *
  * **Example:** Classify an image.
  * ```javascript
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -21969,7 +22092,7 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
  * //   { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
  * // ]
  * ```
- *
+ *
  * **Example:** Classify an image and return top `n` classes.
  * ```javascript
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -21981,7 +22104,7 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
  * //   { label: 'lion, king of beasts, Panthera leo', score: 0.00045060308184474707 },
  * // ]
  * ```
- *
+ *
  * **Example:** Classify an image and return all classes.
  * ```javascript
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -22048,7 +22171,7 @@ class ImageClassificationPipeline extends (/** @type {new (options: ImagePipelin
  * @property {string|null} label The label of the segment.
  * @property {number|null} score The score of the segment.
  * @property {RawImage} mask The mask of the segment.
- *
+ *
  * @typedef {Object} ImageSegmentationPipelineOptions Parameters specific to image segmentation pipelines.
  * @property {number} [threshold=0.5] Probability threshold to filter out predicted masks.
  * @property {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
@@ -22057,19 +22180,19 @@ class ImageClassificationPipeline extends (/** @type {new (options: ImagePipelin
  * depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
  * @property {number[]} [label_ids_to_fuse=null] List of label ids to fuse. If not set, do not fuse any labels.
  * @property {number[][]} [target_sizes=null] List of target sizes for the input images. If not set, use the original image sizes.
- *
+ *
  * @callback ImageSegmentationPipelineCallback Segment the input images.
  * @param {ImagePipelineInputs} images The input images.
  * @param {ImageSegmentationPipelineOptions} [options] The options to use for image segmentation.
  * @returns {Promise<ImageSegmentationPipelineOutput[]>} The annotated segments.
- *
+ *
  * @typedef {ImagePipelineConstructorArgs & ImageSegmentationPipelineCallback & Disposable} ImageSegmentationPipelineType
  */
 /**
  * Image segmentation pipeline using any `AutoModelForXXXSegmentation`.
  * This pipeline predicts masks of objects and their classes.
- *
+ *
  * **Example:** Perform image segmentation with `Xenova/detr-resnet-50-panoptic`.
  * ```javascript
  * const segmenter = await pipeline('image-segmentation', 'Xenova/detr-resnet-50-panoptic');
@@ -22153,12 +22276,17 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
         /** @type {ImageSegmentationPipelineOutput[]} */
         const annotation = [];
         if (!subtask) {
+            // We define an epsilon to safeguard against numerical/precision issues when detecting
+            // the normalization mode of the output (i.e., sigmoid already applied, or not).
+            // See https://github.com/microsoft/onnxruntime/issues/23943 for more information.
+            const epsilon = 1e-5;
             // Perform standard image segmentation
             const result = output[outputNames[0]];
             for (let i = 0; i < imageSizes.length; ++i) {
                 const size = imageSizes[i];
                 const item = result[i];
-                if (item.data.some(x => x < 0 || x > 1)) {
+                if (item.data.some(x => x < -epsilon || x > 1 + epsilon)) {
                     item.sigmoid_();
                 }
                 const mask = await _utils_image_js__WEBPACK_IMPORTED_MODULE_9__.RawImage.fromTensor(item.mul_(255).to('uint8')).resize(size[1], size[0]);
@@ -22227,19 +22355,19 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
 /**
  * @typedef {Object} BackgroundRemovalPipelineOptions Parameters specific to image segmentation pipelines.
- *
+ *
  * @callback BackgroundRemovalPipelineCallback Segment the input images.
  * @param {ImagePipelineInputs} images The input images.
  * @param {BackgroundRemovalPipelineOptions} [options] The options to use for image segmentation.
  * @returns {Promise<RawImage[]>} The images with the background removed.
- *
+ *
  * @typedef {ImagePipelineConstructorArgs & BackgroundRemovalPipelineCallback & Disposable} BackgroundRemovalPipelineType
  */
 /**
  * Background removal pipeline using certain `AutoModelForXXXSegmentation`.
  * This pipeline removes the backgrounds of images.
- *
+ *
  * **Example:** Perform background removal with `Xenova/modnet`.
  * ```javascript
  * const segmenter = await pipeline('background-removal', 'Xenova/modnet');
@@ -22250,7 +22378,7 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
  * // ]
  * ```
  */
-class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => ImageSegmentationPipelineType} */ (ImageSegmentationPipeline)) {
+class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => BackgroundRemovalPipelineType} */ (/** @type {any} */(ImageSegmentationPipeline))) {
     /**
      * Create a new BackgroundRemovalPipeline.
      * @param {ImagePipelineConstructorArgs} options An object used to instantiate the pipeline.
@@ -22285,25 +22413,25 @@ class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineC
  * @typedef {Object} ZeroShotImageClassificationOutput
  * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
  * @property {number} score The score attributed by the model for that label (between 0 and 1).
- *
+ *
  * @typedef {Object} ZeroShotImageClassificationPipelineOptions Parameters specific to zero-shot image classification pipelines.
  * @property {string} [hypothesis_template="This is a photo of {}"] The sentence used in conjunction with `candidate_labels`
  * to attempt the image classification by replacing the placeholder with the candidate_labels.
  * Then likelihood is estimated by using `logits_per_image`.
- *
+ *
  * @callback ZeroShotImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
  * @param {ImagePipelineInputs} images The input images.
  * @param {string[]} candidate_labels The candidate labels for this image.
  * @param {ZeroShotImageClassificationPipelineOptions} [options] The options to use for zero-shot image classification.
  * @returns {Promise<ZeroShotImageClassificationOutput[]|ZeroShotImageClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
- *
+ *
  * @typedef {TextImagePipelineConstructorArgs & ZeroShotImageClassificationPipelineCallback & Disposable} ZeroShotImageClassificationPipelineType
  */
 /**
  * Zero shot image classification pipeline. This pipeline predicts the class of
  * an image when you provide an image and a set of `candidate_labels`.
- *
+ *
  * **Example:** Zero shot image classification w/ `Xenova/clip-vit-base-patch32`.
  * ```javascript
  * const classifier = await pipeline('zero-shot-image-classification', 'Xenova/clip-vit-base-patch32');
@@ -22333,7 +22461,7 @@ class ZeroShotImageClassificationPipeline extends (/** @type {new (options: Text
         const isBatched = Array.isArray(images);
         const preparedImages = await prepareImages(images);
-        // Insert label into hypothesis template
+        // Insert label into hypothesis template
         const texts = candidate_labels.map(
             x => hypothesis_template.replace('{}', x)
         );
@@ -22380,23 +22508,23 @@ class ZeroShotImageClassificationPipeline extends (/** @type {new (options: Text
  * @property {number} score The score attributed by the model for that label.
  * @property {BoundingBox} box The bounding box of detected object in image's original size, or as a percentage if `percentage` is set to true.
  * @typedef {ObjectDetectionPipelineSingle[]} ObjectDetectionPipelineOutput
- *
+ *
  * @typedef {Object} ObjectDetectionPipelineOptions Parameters specific to object detection pipelines.
  * @property {number} [threshold=0.9] The threshold used to filter boxes by score.
  * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
- *
+ *
  * @callback ObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
  * @param {ImagePipelineInputs} images The input images.
  * @param {ObjectDetectionPipelineOptions} [options] The options to use for object detection.
- * @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
- *
+ * @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
+ *
  * @typedef {ImagePipelineConstructorArgs & ObjectDetectionPipelineCallback & Disposable} ObjectDetectionPipelineType
  */
 /**
  * Object detection pipeline using any `AutoModelForObjectDetection`.
  * This pipeline predicts bounding boxes of objects and their classes.
- *
+ *
  * **Example:** Run object-detection with `Xenova/detr-resnet-50`.
  * ```javascript
  * const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
@@ -22470,27 +22598,27 @@ class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipelineCon
  * @property {string} label Text query corresponding to the found object.
  * @property {number} score Score corresponding to the object (between 0 and 1).
  * @property {BoundingBox} box Bounding box of the detected object in image's original size, or as a percentage if `percentage` is set to true.
- *
+ *
  * @typedef {Object} ZeroShotObjectDetectionPipelineOptions Parameters specific to zero-shot object detection pipelines.
  * @property {number} [threshold=0.1] The probability necessary to make a prediction.
  * @property {number} [top_k=null] The number of top predictions that will be returned by the pipeline.
  * If the provided number is `null` or higher than the number of predictions available, it will default
  * to the number of predictions.
  * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
- *
+ *
  * @callback ZeroShotObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
  * @param {ImagePipelineInputs} images The input images.
  * @param {string[]} candidate_labels What the model should recognize in the image.
  * @param {ZeroShotObjectDetectionPipelineOptions} [options] The options to use for zero-shot object detection.
  * @returns {Promise<ZeroShotObjectDetectionOutput[]|ZeroShotObjectDetectionOutput[][]>} An array of objects containing the predicted labels, scores, and bounding boxes.
- *
+ *
  * @typedef {TextImagePipelineConstructorArgs & ZeroShotObjectDetectionPipelineCallback & Disposable} ZeroShotObjectDetectionPipelineType
  */
 /**
  * Zero-shot object detection pipeline. This pipeline predicts bounding boxes of
  * objects when you provide an image and a set of `candidate_labels`.
- *
+ *
  * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32`.
  * ```javascript
  * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
@@ -22520,7 +22648,7 @@ class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipelineCon
  * //   }
  * // ]
  * ```
- *
+ *
  * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32` (returning top 4 matches and setting a threshold).
  * ```javascript
  * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
@@ -22635,13 +22763,13 @@ class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: TextImag
  * @typedef {Object} DocumentQuestionAnsweringSingle
  * @property {string} answer The generated text.
  * @typedef {DocumentQuestionAnsweringSingle[]} DocumentQuestionAnsweringOutput
- *
+ *
  * @callback DocumentQuestionAnsweringPipelineCallback Answer the question given as input by using the document.
  * @param {ImageInput} image The image of the document to use.
  * @param {string} question A question to ask of the document.
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<DocumentQuestionAnsweringOutput|DocumentQuestionAnsweringOutput[]>} An object (or array of objects) containing the answer(s).
- *
+ *
  * @typedef {TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable} DocumentQuestionAnsweringPipelineType
  */
@@ -22649,7 +22777,7 @@ class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: TextImag
  * Document Question Answering pipeline using any `AutoModelForDocumentQuestionAnswering`.
  * The inputs/outputs are similar to the (extractive) question answering pipeline; however,
  * the pipeline takes an image (and optional OCR'd words/boxes) as input instead of text context.
- *
+ *
  * **Example:** Answer questions about a document with `Xenova/donut-base-finetuned-docvqa`.
  * ```javascript
  * const qa_pipeline = await pipeline('document-question-answering', 'Xenova/donut-base-finetuned-docvqa');
@@ -22719,22 +22847,22 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
  * @typedef {Object} TextToAudioOutput
  * @property {Float32Array} audio The generated audio waveform.
  * @property {number} sampling_rate The sampling rate of the generated audio waveform.
- *
+ *
  * @typedef {Object} TextToAudioPipelineOptions Parameters specific to text-to-audio pipelines.
  * @property {Tensor|Float32Array|string|URL} [speaker_embeddings=null] The speaker embeddings (if the model requires it).
- *
+ *
  * @callback TextToAudioPipelineCallback Generates speech/audio from the inputs.
  * @param {string|string[]} texts The text(s) to generate.
  * @param {TextToAudioPipelineOptions} options Parameters passed to the model generation/forward method.
  * @returns {Promise<TextToAudioOutput>} An object containing the generated audio and sampling rate.
- *
+ *
  * @typedef {TextToAudioPipelineConstructorArgs & TextToAudioPipelineCallback & Disposable} TextToAudioPipelineType
  */
 /**
  * Text-to-audio generation pipeline using any `AutoModelForTextToWaveform` or `AutoModelForTextToSpectrogram`.
  * This pipeline generates an audio file from an input text and optional other conditional inputs.
- *
+ *
  * **Example:** Generate audio from text with `Xenova/speecht5_tts`.
  * ```javascript
  * const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
@@ -22745,17 +22873,17 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
  * //   sampling_rate: 16000
  * // }
  * ```
- *
+ *
  * You can then save the audio to a .wav file with the `wavefile` package:
  * ```javascript
  * import wavefile from 'wavefile';
  * import fs from 'fs';
- *
+ *
  * const wav = new wavefile.WaveFile();
  * wav.fromScratch(1, out.sampling_rate, '32f', out.audio);
  * fs.writeFileSync('out.wav', wav.toBuffer());
  * ```
- *
+ *
  * **Example:** Multilingual speech generation with `Xenova/mms-tts-fra`. See [here](https://huggingface.co/models?pipeline_tag=text-to-speech&other=vits&sort=trending) for the full list of available languages (1107).
  * ```javascript
  * const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra');
@@ -22861,13 +22989,13 @@ class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPipelineC
  * @callback ImageToImagePipelineCallback Transform the image(s) passed as inputs.
  * @param {ImagePipelineInputs} images The images to transform.
  * @returns {Promise<RawImage|RawImage[]>} The transformed image or list of images.
- *
+ *
  * @typedef {ImagePipelineConstructorArgs & ImageToImagePipelineCallback & Disposable} ImageToImagePipelineType
  */
 /**
  * Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input.
- *
+ *
  * **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`
  * ```javascript
  * const upscaler = await pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64');
@@ -22912,17 +23040,17 @@ class ImageToImagePipeline extends (/** @type {new (options: ImagePipelineConstr
  * @typedef {Object} DepthEstimationPipelineOutput
  * @property {Tensor} predicted_depth The raw depth map predicted by the model.
  * @property {RawImage} depth The processed depth map as an image (with the same size as the input image).
- *
+ *
  * @callback DepthEstimationPipelineCallback Predicts the depth for the image(s) passed as inputs.
  * @param {ImagePipelineInputs} images The images to compute depth for.
  * @returns {Promise<DepthEstimationPipelineOutput|DepthEstimationPipelineOutput[]>} An image or a list of images containing result(s).
- *
+ *
  * @typedef {ImagePipelineConstructorArgs & DepthEstimationPipelineCallback & Disposable} DepthEstimationPipelineType
  */
 /**
  * Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image.
- *
+ *
  * **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`
  * ```javascript
  * const depth_estimator = await pipeline('depth-estimation', 'Xenova/dpt-hybrid-midas');
@@ -23307,7 +23435,7 @@ const TASK_ALIASES = Object.freeze({
 /**
  * Utility factory method to build a `Pipeline` object.
- *
+ *
  * @template {PipelineType} T The type of pipeline to return.
  * @param {T} task The task defining which pipeline will be returned. Currently accepted tasks are:
  *  - `"audio-classification"`: will return a `AudioClassificationPipeline`.
@@ -30583,7 +30711,7 @@ class RawImage {
     /**
      * Helper method for reading an image from a variety of input types.
-     * @param {RawImage|string|URL} input
+     * @param {RawImage|string|URL|Blob|HTMLCanvasElement|OffscreenCanvas} input
      * @returns The image object.
      *
      * **Example:** Read image from a URL.
@@ -30602,6 +30730,14 @@ class RawImage {
             return input;
         } else if (typeof input === 'string' || input instanceof URL) {
             return await this.fromURL(input);
+        } else if (input instanceof Blob) {
+            return await this.fromBlob(input);
+        } else if (
+            (typeof HTMLCanvasElement !== "undefined" && input instanceof HTMLCanvasElement)
+            ||
+            (typeof OffscreenCanvas !== "undefined" && input instanceof OffscreenCanvas)
+        ) {
+            return this.fromCanvas(input);
         } else {
             throw new Error(`Unsupported input type: ${typeof input}`);
         }
@@ -33613,8 +33749,12 @@ function calc_unsqueeze_dims(dims, dim) {
  * @private
  */
 function safeIndex(index, size, dimension = null, boundsCheck = true) {
-    if (boundsCheck && (index < -size || index >= size)) {
-        throw new Error(`IndexError: index ${index} is out of bounds for dimension${dimension === null ? '' : ' ' + dimension} with size ${size}`);
+    if (index < -size || index >= size) {
+        if (boundsCheck) {
+            throw new Error(`IndexError: index ${index} is out of bounds for dimension${dimension === null ? '' : ' ' + dimension} with size ${size}`);
+        } else {
+            return index < -size ? 0 : size;
+        }
     }
     if (index < 0) {
@@ -34471,6 +34611,9 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   Gemma2ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2ForCausalLM),
 /* harmony export */   Gemma2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2Model),
 /* harmony export */   Gemma2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2PreTrainedModel),
+/* harmony export */   Gemma3ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3ForCausalLM),
+/* harmony export */   Gemma3Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3Model),
+/* harmony export */   Gemma3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3PreTrainedModel),
 /* harmony export */   GemmaForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaForCausalLM),
 /* harmony export */   GemmaModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaModel),
 /* harmony export */   GemmaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaPreTrainedModel),
@@ -34572,6 +34715,10 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   MaskFormerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskFormerPreTrainedModel),
 /* harmony export */   MaskedLMOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskedLMOutput),
 /* harmony export */   MaxLengthCriteria: () => (/* reexport safe */ _generation_stopping_criteria_js__WEBPACK_IMPORTED_MODULE_20__.MaxLengthCriteria),
+/* harmony export */   Metric3DForDepthEstimation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3DForDepthEstimation),
+/* harmony export */   Metric3DPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3DPreTrainedModel),
+/* harmony export */   Metric3Dv2ForDepthEstimation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3Dv2ForDepthEstimation),
+/* harmony export */   Metric3Dv2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3Dv2PreTrainedModel),
 /* harmony export */   MgpstrForSceneTextRecognition: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrForSceneTextRecognition),
 /* harmony export */   MgpstrModelOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrModelOutput),
 /* harmony export */   MgpstrPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrPreTrainedModel),
@@ -34778,6 +34925,11 @@ __webpack_require__.r(__webpack_exports__);
 /* harmony export */   SmolVLMForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SmolVLMForConditionalGeneration),
 /* harmony export */   SmolVLMImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_14__.SmolVLMImageProcessor),
 /* harmony export */   SmolVLMProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_17__.SmolVLMProcessor),
+/* harmony export */   SnacDecoderModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacDecoderModel),
+/* harmony export */   SnacEncoderModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacEncoderModel),
+/* harmony export */   SnacFeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_11__.SnacFeatureExtractor),
+/* harmony export */   SnacModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacModel),
+/* harmony export */   SnacPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacPreTrainedModel),
 /* harmony export */   SpeechT5FeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_11__.SpeechT5FeatureExtractor),
 /* harmony export */   SpeechT5ForSpeechToText: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SpeechT5ForSpeechToText),
 /* harmony export */   SpeechT5ForTextToSpeech: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SpeechT5ForTextToSpeech),