@huggingface/transformers 3.2.4 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +5 -3
  2. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  3. package/dist/ort.bundle.min.mjs +2776 -0
  4. package/dist/transformers.cjs +598 -247
  5. package/dist/transformers.cjs.map +1 -1
  6. package/dist/transformers.js +956 -573
  7. package/dist/transformers.js.map +1 -1
  8. package/dist/transformers.min.cjs +1 -1
  9. package/dist/transformers.min.cjs.map +1 -1
  10. package/dist/transformers.min.js +1 -1
  11. package/dist/transformers.min.js.map +1 -1
  12. package/dist/transformers.min.mjs +1 -1
  13. package/dist/transformers.min.mjs.map +1 -1
  14. package/dist/transformers.mjs +604 -248
  15. package/dist/transformers.mjs.map +1 -1
  16. package/package.json +2 -2
  17. package/src/base/image_processors_utils.js +1 -1
  18. package/src/base/processing_utils.js +11 -0
  19. package/src/env.js +1 -2
  20. package/src/generation/streamers.js +5 -2
  21. package/src/models/grounding_dino/image_processing_grounding_dino.js +29 -0
  22. package/src/models/grounding_dino/processing_grounding_dino.js +101 -0
  23. package/src/models/image_processors.js +1 -0
  24. package/src/models/processors.js +3 -2
  25. package/src/models.js +22 -5
  26. package/src/pipelines.js +39 -16
  27. package/src/utils/audio.js +113 -1
  28. package/src/utils/core.js +26 -0
  29. package/src/utils/image.js +5 -18
  30. package/src/utils/tensor.js +100 -112
  31. package/types/base/image_processors_utils.d.ts +7 -0
  32. package/types/base/image_processors_utils.d.ts.map +1 -1
  33. package/types/base/processing_utils.d.ts +8 -0
  34. package/types/base/processing_utils.d.ts.map +1 -1
  35. package/types/generation/streamers.d.ts +3 -1
  36. package/types/generation/streamers.d.ts.map +1 -1
  37. package/types/models/auto/image_processing_auto.d.ts.map +1 -1
  38. package/types/models/grounding_dino/image_processing_grounding_dino.d.ts +20 -0
  39. package/types/models/grounding_dino/image_processing_grounding_dino.d.ts.map +1 -0
  40. package/types/models/grounding_dino/processing_grounding_dino.d.ts +27 -0
  41. package/types/models/grounding_dino/processing_grounding_dino.d.ts.map +1 -0
  42. package/types/models/image_processors.d.ts +1 -0
  43. package/types/models/processors.d.ts +3 -2
  44. package/types/models.d.ts +8 -0
  45. package/types/models.d.ts.map +1 -1
  46. package/types/pipelines.d.ts +5 -10
  47. package/types/pipelines.d.ts.map +1 -1
  48. package/types/tsconfig.tsbuildinfo +1 -1
  49. package/types/utils/audio.d.ts +25 -0
  50. package/types/utils/audio.d.ts.map +1 -1
  51. package/types/utils/core.d.ts +6 -0
  52. package/types/utils/core.d.ts.map +1 -1
  53. package/types/utils/image.d.ts.map +1 -1
  54. package/types/utils/tensor.d.ts +14 -2
  55. package/types/utils/tensor.d.ts.map +1 -1
@@ -4207,6 +4207,7 @@ function validate_audio_inputs(audio, feature_extractor) {
4207
4207
  __webpack_require__.r(__webpack_exports__);
4208
4208
  /* harmony export */ __webpack_require__.d(__webpack_exports__, {
4209
4209
  /* harmony export */ ImageProcessor: () => (/* binding */ ImageProcessor),
4210
+ /* harmony export */ center_to_corners_format: () => (/* binding */ center_to_corners_format),
4210
4211
  /* harmony export */ post_process_instance_segmentation: () => (/* binding */ post_process_instance_segmentation),
4211
4212
  /* harmony export */ post_process_object_detection: () => (/* binding */ post_process_object_detection),
4212
4213
  /* harmony export */ post_process_panoptic_segmentation: () => (/* binding */ post_process_panoptic_segmentation),
@@ -5440,6 +5441,17 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
5440
5441
  return this.tokenizer.batch_decode(...args);
5441
5442
  }
5442
5443
 
5444
+ /**
5445
+ * @param {Parameters<PreTrainedTokenizer['decode']>} args
5446
+ * @returns {ReturnType<PreTrainedTokenizer['decode']>}
5447
+ */
5448
+ decode(...args) {
5449
+ if (!this.tokenizer) {
5450
+ throw new Error('Unable to decode without a tokenizer.');
5451
+ }
5452
+ return this.tokenizer.decode(...args);
5453
+ }
5454
+
5443
5455
 
5444
5456
  /**
5445
5457
  * Calls the feature_extractor function with the given input.
@@ -5958,7 +5970,7 @@ __webpack_require__.r(__webpack_exports__);
5958
5970
 
5959
5971
 
5960
5972
 
5961
- const VERSION = '3.2.4';
5973
+ const VERSION = '3.3.0';
5962
5974
 
5963
5975
  // Check if various APIs are available (depends on environment)
5964
5976
  const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
@@ -6094,7 +6106,6 @@ function isEmpty(obj) {
6094
6106
  }
6095
6107
 
6096
6108
 
6097
-
6098
6109
  /***/ }),
6099
6110
 
6100
6111
  /***/ "./src/generation/configuration_utils.js":
@@ -7709,6 +7720,7 @@ class TextStreamer extends BaseStreamer {
7709
7720
  * @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer
7710
7721
  * @param {Object} options
7711
7722
  * @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
7723
+ * @param {boolean} [options.skip_special_tokens=true] Whether to skip special tokens when decoding
7712
7724
  * @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
7713
7725
  * @param {function(bigint[]): void} [options.token_callback_function=null] Function to call when a new token is generated
7714
7726
  * @param {Object} [options.decode_kwargs={}] Additional keyword arguments to pass to the tokenizer's decode method
@@ -7717,6 +7729,7 @@ class TextStreamer extends BaseStreamer {
7717
7729
  skip_prompt = false,
7718
7730
  callback_function = null,
7719
7731
  token_callback_function = null,
7732
+ skip_special_tokens = true,
7720
7733
  decode_kwargs = {},
7721
7734
  ...kwargs
7722
7735
  } = {}) {
@@ -7725,7 +7738,7 @@ class TextStreamer extends BaseStreamer {
7725
7738
  this.skip_prompt = skip_prompt;
7726
7739
  this.callback_function = callback_function ?? stdout_write;
7727
7740
  this.token_callback_function = token_callback_function;
7728
- this.decode_kwargs = { ...decode_kwargs, ...kwargs };
7741
+ this.decode_kwargs = { skip_special_tokens, ...decode_kwargs, ...kwargs };
7729
7742
 
7730
7743
  // variables used in the streaming process
7731
7744
  this.token_cache = [];
@@ -7841,9 +7854,10 @@ class WhisperTextStreamer extends TextStreamer {
7841
7854
  } = {}) {
7842
7855
  super(tokenizer, {
7843
7856
  skip_prompt,
7857
+ skip_special_tokens,
7844
7858
  callback_function,
7845
7859
  token_callback_function,
7846
- decode_kwargs: { skip_special_tokens, ...decode_kwargs },
7860
+ decode_kwargs,
7847
7861
  });
7848
7862
  this.timestamp_begin = tokenizer.timestamp_begin;
7849
7863
 
@@ -8097,6 +8111,8 @@ __webpack_require__.r(__webpack_exports__);
8097
8111
  /* harmony export */ GraniteForCausalLM: () => (/* binding */ GraniteForCausalLM),
8098
8112
  /* harmony export */ GraniteModel: () => (/* binding */ GraniteModel),
8099
8113
  /* harmony export */ GranitePreTrainedModel: () => (/* binding */ GranitePreTrainedModel),
8114
+ /* harmony export */ GroundingDinoForObjectDetection: () => (/* binding */ GroundingDinoForObjectDetection),
8115
+ /* harmony export */ GroundingDinoPreTrainedModel: () => (/* binding */ GroundingDinoPreTrainedModel),
8100
8116
  /* harmony export */ GroupViTModel: () => (/* binding */ GroupViTModel),
8101
8117
  /* harmony export */ GroupViTPreTrainedModel: () => (/* binding */ GroupViTPreTrainedModel),
8102
8118
  /* harmony export */ HieraForImageClassification: () => (/* binding */ HieraForImageClassification),
@@ -8305,6 +8321,8 @@ __webpack_require__.r(__webpack_exports__);
8305
8321
  /* harmony export */ Starcoder2ForCausalLM: () => (/* binding */ Starcoder2ForCausalLM),
8306
8322
  /* harmony export */ Starcoder2Model: () => (/* binding */ Starcoder2Model),
8307
8323
  /* harmony export */ Starcoder2PreTrainedModel: () => (/* binding */ Starcoder2PreTrainedModel),
8324
+ /* harmony export */ StyleTextToSpeech2Model: () => (/* binding */ StyleTextToSpeech2Model),
8325
+ /* harmony export */ StyleTextToSpeech2PreTrainedModel: () => (/* binding */ StyleTextToSpeech2PreTrainedModel),
8308
8326
  /* harmony export */ Swin2SRForImageSuperResolution: () => (/* binding */ Swin2SRForImageSuperResolution),
8309
8327
  /* harmony export */ Swin2SRModel: () => (/* binding */ Swin2SRModel),
8310
8328
  /* harmony export */ Swin2SRPreTrainedModel: () => (/* binding */ Swin2SRPreTrainedModel),
@@ -8882,14 +8900,23 @@ async function encoderForward(self, model_inputs) {
8882
8900
  encoderFeeds.inputs_embeds = await self.encode_text({ input_ids: model_inputs.input_ids });
8883
8901
  }
8884
8902
  if (session.inputNames.includes('token_type_ids') && !encoderFeeds.token_type_ids) {
8903
+ if (!encoderFeeds.input_ids) {
8904
+ throw new Error('Both `input_ids` and `token_type_ids` are missing in the model inputs.');
8905
+ }
8885
8906
  // Assign default `token_type_ids` (all zeroes) to the `encoderFeeds` if the model expects it,
8886
8907
  // but they weren't created by the tokenizer.
8887
- encoderFeeds.token_type_ids = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.Tensor(
8888
- 'int64',
8889
- new BigInt64Array(encoderFeeds.input_ids.data.length),
8890
- encoderFeeds.input_ids.dims
8891
- )
8908
+ encoderFeeds.token_type_ids = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.zeros_like)(encoderFeeds.input_ids);
8892
8909
  }
8910
+ if (session.inputNames.includes('pixel_mask') && !encoderFeeds.pixel_mask) {
8911
+ if (!encoderFeeds.pixel_values) {
8912
+ throw new Error('Both `pixel_values` and `pixel_mask` are missing in the model inputs.');
8913
+ }
8914
+ // Assign default `pixel_mask` (all ones) to the `encoderFeeds` if the model expects it,
8915
+ // but they weren't created by the processor.
8916
+ const dims = encoderFeeds.pixel_values.dims;
8917
+ encoderFeeds.pixel_mask = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.ones)([dims[0], dims[2], dims[3]]);
8918
+ }
8919
+
8893
8920
  return await sessionRun(session, encoderFeeds);
8894
8921
  }
8895
8922
 
@@ -13778,6 +13805,8 @@ class Dinov2WithRegistersForImageClassification extends Dinov2WithRegistersPreTr
13778
13805
  }
13779
13806
  }
13780
13807
  //////////////////////////////////////////////////
13808
+ class GroundingDinoPreTrainedModel extends PreTrainedModel { }
13809
+ class GroundingDinoForObjectDetection extends GroundingDinoPreTrainedModel { }
13781
13810
 
13782
13811
  //////////////////////////////////////////////////
13783
13812
  class YolosPreTrainedModel extends PreTrainedModel { }
@@ -14476,6 +14505,9 @@ class WavLMForAudioFrameClassification extends WavLMPreTrainedModel {
14476
14505
  }
14477
14506
  }
14478
14507
 
14508
+ class StyleTextToSpeech2PreTrainedModel extends PreTrainedModel { }
14509
+ class StyleTextToSpeech2Model extends StyleTextToSpeech2PreTrainedModel { }
14510
+
14479
14511
  //////////////////////////////////////////////////
14480
14512
  // SpeechT5 models
14481
14513
  /**
@@ -15439,6 +15471,8 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
15439
15471
 
15440
15472
  ['maskformer', ['MaskFormerModel', MaskFormerModel]],
15441
15473
  ['mgp-str', ['MgpstrForSceneTextRecognition', MgpstrForSceneTextRecognition]],
15474
+
15475
+ ['style_text_to_speech_2', ['StyleTextToSpeech2Model', StyleTextToSpeech2Model]],
15442
15476
  ]);
15443
15477
 
15444
15478
  const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
@@ -15683,6 +15717,7 @@ const MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES = new Map([
15683
15717
  const MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES = new Map([
15684
15718
  ['owlvit', ['OwlViTForObjectDetection', OwlViTForObjectDetection]],
15685
15719
  ['owlv2', ['Owlv2ForObjectDetection', Owlv2ForObjectDetection]],
15720
+ ['grounding-dino', ['GroundingDinoForObjectDetection', GroundingDinoForObjectDetection]],
15686
15721
  ]);
15687
15722
 
15688
15723
  const MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES = new Map([
@@ -17257,6 +17292,170 @@ __webpack_require__.r(__webpack_exports__);
17257
17292
  class GLPNFeatureExtractor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor { }
17258
17293
 
17259
17294
 
17295
+ /***/ }),
17296
+
17297
+ /***/ "./src/models/grounding_dino/image_processing_grounding_dino.js":
17298
+ /*!**********************************************************************!*\
17299
+ !*** ./src/models/grounding_dino/image_processing_grounding_dino.js ***!
17300
+ \**********************************************************************/
17301
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
17302
+
17303
+ "use strict";
17304
+ __webpack_require__.r(__webpack_exports__);
17305
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
17306
+ /* harmony export */ GroundingDinoImageProcessor: () => (/* binding */ GroundingDinoImageProcessor)
17307
+ /* harmony export */ });
17308
+ /* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
17309
+ /* harmony import */ var _utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../../utils/tensor.js */ "./src/utils/tensor.js");
17310
+
17311
+
17312
+
17313
+
17314
+
17315
+ /**
17316
+ * @typedef {object} GroundingDinoFeatureExtractorResultProps
17317
+ * @property {import('../../utils/tensor.js').Tensor} pixel_mask
17318
+ * @typedef {import('../../base/image_processors_utils.js').ImageProcessorResult & GroundingDinoFeatureExtractorResultProps} GroundingDinoFeatureExtractorResult
17319
+ */
17320
+
17321
+ class GroundingDinoImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor {
17322
+ /**
17323
+ * Calls the feature extraction process on an array of images, preprocesses
17324
+ * each image, and concatenates the resulting features into a single Tensor.
17325
+ * @param {import('../../utils/image.js').RawImage[]} images The image(s) to extract features from.
17326
+ * @returns {Promise<GroundingDinoFeatureExtractorResult>} An object containing the concatenated pixel values of the preprocessed images.
17327
+ */
17328
+ async _call(images) {
17329
+ const result = await super._call(images);
17330
+
17331
+ const dims = result.pixel_values.dims;
17332
+ const pixel_mask = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_1__.ones)([dims[0], dims[2], dims[3]]);
17333
+
17334
+ return { ...result, pixel_mask };
17335
+ }
17336
+ }
17337
+
17338
+
17339
+ /***/ }),
17340
+
17341
+ /***/ "./src/models/grounding_dino/processing_grounding_dino.js":
17342
+ /*!****************************************************************!*\
17343
+ !*** ./src/models/grounding_dino/processing_grounding_dino.js ***!
17344
+ \****************************************************************/
17345
+ /***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
17346
+
17347
+ "use strict";
17348
+ __webpack_require__.r(__webpack_exports__);
17349
+ /* harmony export */ __webpack_require__.d(__webpack_exports__, {
17350
+ /* harmony export */ GroundingDinoProcessor: () => (/* binding */ GroundingDinoProcessor)
17351
+ /* harmony export */ });
17352
+ /* harmony import */ var _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../../base/processing_utils.js */ "./src/base/processing_utils.js");
17353
+ /* harmony import */ var _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ../auto/image_processing_auto.js */ "./src/models/auto/image_processing_auto.js");
17354
+ /* harmony import */ var _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ../../tokenizers.js */ "./src/tokenizers.js");
17355
+ /* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ../../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
17356
+
17357
+
17358
+
17359
+
17360
+
17361
+ /**
17362
+ * Get token ids of phrases from posmaps and input_ids.
17363
+ * @param {import('../../utils/tensor.js').Tensor} posmaps A boolean tensor of unbatched text-thresholded logits related to the detected bounding boxes of shape `(hidden_size, )`.
17364
+ * @param {import('../../utils/tensor.js').Tensor} input_ids A tensor of token ids of shape `(sequence_length, )`.
17365
+ */
17366
+ function get_phrases_from_posmap(posmaps, input_ids) {
17367
+
17368
+ const left_idx = 0;
17369
+ const right_idx = posmaps.dims.at(-1) - 1;
17370
+
17371
+ const posmaps_list = posmaps.tolist();
17372
+ posmaps_list.fill(false, 0, left_idx + 1);
17373
+ posmaps_list.fill(false, right_idx);
17374
+
17375
+ const input_ids_list = input_ids.tolist();
17376
+ return posmaps_list
17377
+ .map((val, idx) => val ? idx : null)
17378
+ .filter(idx => idx !== null)
17379
+ .map(i => input_ids_list[i]);
17380
+ }
17381
+
17382
+ class GroundingDinoProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_0__.Processor {
17383
+ static tokenizer_class = _tokenizers_js__WEBPACK_IMPORTED_MODULE_2__.AutoTokenizer
17384
+ static image_processor_class = _auto_image_processing_auto_js__WEBPACK_IMPORTED_MODULE_1__.AutoImageProcessor
17385
+
17386
+ /**
17387
+ * @typedef {import('../../utils/image.js').RawImage} RawImage
17388
+ */
17389
+ /**
17390
+ *
17391
+ * @param {RawImage|RawImage[]|RawImage[][]} images
17392
+ * @param {string|string[]} text
17393
+ * @returns {Promise<any>}
17394
+ */
17395
+ async _call(images, text, options = {}) {
17396
+
17397
+ const image_inputs = images ? await this.image_processor(images, options) : {};
17398
+ const text_inputs = text ? this.tokenizer(text, options) : {};
17399
+
17400
+ return {
17401
+ ...text_inputs,
17402
+ ...image_inputs,
17403
+ }
17404
+ }
17405
+ post_process_grounded_object_detection(outputs, input_ids, {
17406
+ box_threshold = 0.25,
17407
+ text_threshold = 0.25,
17408
+ target_sizes = null
17409
+ } = {}) {
17410
+ const { logits, pred_boxes } = outputs;
17411
+ const batch_size = logits.dims[0];
17412
+
17413
+ if (target_sizes !== null && target_sizes.length !== batch_size) {
17414
+ throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits")
17415
+ }
17416
+ const num_queries = logits.dims.at(1);
17417
+
17418
+ const probs = logits.sigmoid(); // (batch_size, num_queries, 256)
17419
+ const scores = probs.max(-1).tolist(); // (batch_size, num_queries)
17420
+
17421
+ // Convert to [x0, y0, x1, y1] format
17422
+ const boxes = pred_boxes.tolist() // (batch_size, num_queries, 4)
17423
+ .map(batch => batch.map(box => (0,_base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_3__.center_to_corners_format)(box)));
17424
+
17425
+ const results = [];
17426
+ for (let i = 0; i < batch_size; ++i) {
17427
+ const target_size = target_sizes !== null ? target_sizes[i] : null;
17428
+
17429
+ // Convert from relative [0, 1] to absolute [0, height] coordinates
17430
+ if (target_size !== null) {
17431
+ boxes[i] = boxes[i].map(box => box.map((x, j) => x * target_size[(j + 1) % 2]));
17432
+ }
17433
+
17434
+ const batch_scores = scores[i];
17435
+ const final_scores = [];
17436
+ const final_phrases = [];
17437
+ const final_boxes = [];
17438
+ for (let j = 0; j < num_queries; ++j) {
17439
+ const score = batch_scores[j];
17440
+ if (score <= box_threshold) {
17441
+ continue;
17442
+ }
17443
+ const box = boxes[i][j];
17444
+ const prob = probs[i][j];
17445
+
17446
+ final_scores.push(score);
17447
+ final_boxes.push(box);
17448
+
17449
+ const phrases = get_phrases_from_posmap(prob.gt(text_threshold), input_ids[i]);
17450
+ final_phrases.push(phrases);
17451
+ }
17452
+ results.push({ scores: final_scores, boxes: final_boxes, labels: this.batch_decode(final_phrases) });
17453
+ }
17454
+ return results;
17455
+ }
17456
+ }
17457
+
17458
+
17260
17459
  /***/ }),
17261
17460
 
17262
17461
  /***/ "./src/models/idefics3/image_processing_idefics3.js":
@@ -17688,42 +17887,43 @@ __webpack_require__.r(__webpack_exports__);
17688
17887
  /* harmony export */ DonutImageProcessor: () => (/* reexport safe */ _donut_image_processing_donut_js__WEBPACK_IMPORTED_MODULE_7__.DonutImageProcessor),
17689
17888
  /* harmony export */ EfficientNetImageProcessor: () => (/* reexport safe */ _efficientnet_image_processing_efficientnet_js__WEBPACK_IMPORTED_MODULE_9__.EfficientNetImageProcessor),
17690
17889
  /* harmony export */ GLPNFeatureExtractor: () => (/* reexport safe */ _glpn_image_processing_glpn_js__WEBPACK_IMPORTED_MODULE_10__.GLPNFeatureExtractor),
17691
- /* harmony export */ Idefics3ImageProcessor: () => (/* reexport safe */ _idefics3_image_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_11__.Idefics3ImageProcessor),
17692
- /* harmony export */ JinaCLIPImageProcessor: () => (/* reexport safe */ _jina_clip_image_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_13__.JinaCLIPImageProcessor),
17693
- /* harmony export */ LlavaOnevisionImageProcessor: () => (/* reexport safe */ _llava_onevision_image_processing_llava_onevision_js__WEBPACK_IMPORTED_MODULE_14__.LlavaOnevisionImageProcessor),
17694
- /* harmony export */ Mask2FormerImageProcessor: () => (/* reexport safe */ _mask2former_image_processing_mask2former_js__WEBPACK_IMPORTED_MODULE_15__.Mask2FormerImageProcessor),
17695
- /* harmony export */ MaskFormerFeatureExtractor: () => (/* reexport safe */ _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_16__.MaskFormerFeatureExtractor),
17696
- /* harmony export */ MaskFormerImageProcessor: () => (/* reexport safe */ _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_16__.MaskFormerImageProcessor),
17697
- /* harmony export */ MobileNetV1FeatureExtractor: () => (/* reexport safe */ _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_17__.MobileNetV1FeatureExtractor),
17698
- /* harmony export */ MobileNetV1ImageProcessor: () => (/* reexport safe */ _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_17__.MobileNetV1ImageProcessor),
17699
- /* harmony export */ MobileNetV2FeatureExtractor: () => (/* reexport safe */ _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_18__.MobileNetV2FeatureExtractor),
17700
- /* harmony export */ MobileNetV2ImageProcessor: () => (/* reexport safe */ _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_18__.MobileNetV2ImageProcessor),
17701
- /* harmony export */ MobileNetV3FeatureExtractor: () => (/* reexport safe */ _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_19__.MobileNetV3FeatureExtractor),
17702
- /* harmony export */ MobileNetV3ImageProcessor: () => (/* reexport safe */ _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_19__.MobileNetV3ImageProcessor),
17703
- /* harmony export */ MobileNetV4FeatureExtractor: () => (/* reexport safe */ _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_20__.MobileNetV4FeatureExtractor),
17704
- /* harmony export */ MobileNetV4ImageProcessor: () => (/* reexport safe */ _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_20__.MobileNetV4ImageProcessor),
17705
- /* harmony export */ MobileViTFeatureExtractor: () => (/* reexport safe */ _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_21__.MobileViTFeatureExtractor),
17706
- /* harmony export */ MobileViTImageProcessor: () => (/* reexport safe */ _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_21__.MobileViTImageProcessor),
17707
- /* harmony export */ NougatImageProcessor: () => (/* reexport safe */ _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_22__.NougatImageProcessor),
17708
- /* harmony export */ OwlViTFeatureExtractor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTFeatureExtractor),
17709
- /* harmony export */ OwlViTImageProcessor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__.OwlViTImageProcessor),
17710
- /* harmony export */ Owlv2ImageProcessor: () => (/* reexport safe */ _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__.Owlv2ImageProcessor),
17711
- /* harmony export */ Phi3VImageProcessor: () => (/* reexport safe */ _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_25__.Phi3VImageProcessor),
17712
- /* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_26__.PvtImageProcessor),
17713
- /* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_27__.Qwen2VLImageProcessor),
17714
- /* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_28__.RTDetrImageProcessor),
17715
- /* harmony export */ SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_29__.SamImageProcessor),
17716
- /* harmony export */ SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__.SegformerFeatureExtractor),
17717
- /* harmony export */ SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__.SegformerImageProcessor),
17718
- /* harmony export */ SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_31__.SiglipImageProcessor),
17719
- /* harmony export */ Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_32__.Swin2SRImageProcessor),
17720
- /* harmony export */ VLMImageProcessor: () => (/* reexport safe */ _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_12__.VLMImageProcessor),
17721
- /* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__.ViTFeatureExtractor),
17722
- /* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__.ViTImageProcessor),
17723
- /* harmony export */ VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_34__.VitMatteImageProcessor),
17724
- /* harmony export */ VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_35__.VitPoseImageProcessor),
17725
- /* harmony export */ YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__.YolosFeatureExtractor),
17726
- /* harmony export */ YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__.YolosImageProcessor)
17890
+ /* harmony export */ GroundingDinoImageProcessor: () => (/* reexport safe */ _grounding_dino_image_processing_grounding_dino_js__WEBPACK_IMPORTED_MODULE_11__.GroundingDinoImageProcessor),
17891
+ /* harmony export */ Idefics3ImageProcessor: () => (/* reexport safe */ _idefics3_image_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_12__.Idefics3ImageProcessor),
17892
+ /* harmony export */ JinaCLIPImageProcessor: () => (/* reexport safe */ _jina_clip_image_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_14__.JinaCLIPImageProcessor),
17893
+ /* harmony export */ LlavaOnevisionImageProcessor: () => (/* reexport safe */ _llava_onevision_image_processing_llava_onevision_js__WEBPACK_IMPORTED_MODULE_15__.LlavaOnevisionImageProcessor),
17894
+ /* harmony export */ Mask2FormerImageProcessor: () => (/* reexport safe */ _mask2former_image_processing_mask2former_js__WEBPACK_IMPORTED_MODULE_16__.Mask2FormerImageProcessor),
17895
+ /* harmony export */ MaskFormerFeatureExtractor: () => (/* reexport safe */ _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_17__.MaskFormerFeatureExtractor),
17896
+ /* harmony export */ MaskFormerImageProcessor: () => (/* reexport safe */ _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_17__.MaskFormerImageProcessor),
17897
+ /* harmony export */ MobileNetV1FeatureExtractor: () => (/* reexport safe */ _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_18__.MobileNetV1FeatureExtractor),
17898
+ /* harmony export */ MobileNetV1ImageProcessor: () => (/* reexport safe */ _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_18__.MobileNetV1ImageProcessor),
17899
+ /* harmony export */ MobileNetV2FeatureExtractor: () => (/* reexport safe */ _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_19__.MobileNetV2FeatureExtractor),
17900
+ /* harmony export */ MobileNetV2ImageProcessor: () => (/* reexport safe */ _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_19__.MobileNetV2ImageProcessor),
17901
+ /* harmony export */ MobileNetV3FeatureExtractor: () => (/* reexport safe */ _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_20__.MobileNetV3FeatureExtractor),
17902
+ /* harmony export */ MobileNetV3ImageProcessor: () => (/* reexport safe */ _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_20__.MobileNetV3ImageProcessor),
17903
+ /* harmony export */ MobileNetV4FeatureExtractor: () => (/* reexport safe */ _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_21__.MobileNetV4FeatureExtractor),
17904
+ /* harmony export */ MobileNetV4ImageProcessor: () => (/* reexport safe */ _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_21__.MobileNetV4ImageProcessor),
17905
+ /* harmony export */ MobileViTFeatureExtractor: () => (/* reexport safe */ _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_22__.MobileViTFeatureExtractor),
17906
+ /* harmony export */ MobileViTImageProcessor: () => (/* reexport safe */ _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_22__.MobileViTImageProcessor),
17907
+ /* harmony export */ NougatImageProcessor: () => (/* reexport safe */ _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_23__.NougatImageProcessor),
17908
+ /* harmony export */ OwlViTFeatureExtractor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_25__.OwlViTFeatureExtractor),
17909
+ /* harmony export */ OwlViTImageProcessor: () => (/* reexport safe */ _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_25__.OwlViTImageProcessor),
17910
+ /* harmony export */ Owlv2ImageProcessor: () => (/* reexport safe */ _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_24__.Owlv2ImageProcessor),
17911
+ /* harmony export */ Phi3VImageProcessor: () => (/* reexport safe */ _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_26__.Phi3VImageProcessor),
17912
+ /* harmony export */ PvtImageProcessor: () => (/* reexport safe */ _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_27__.PvtImageProcessor),
17913
+ /* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_28__.Qwen2VLImageProcessor),
17914
+ /* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_29__.RTDetrImageProcessor),
17915
+ /* harmony export */ SamImageProcessor: () => (/* reexport safe */ _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_30__.SamImageProcessor),
17916
+ /* harmony export */ SegformerFeatureExtractor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_31__.SegformerFeatureExtractor),
17917
+ /* harmony export */ SegformerImageProcessor: () => (/* reexport safe */ _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_31__.SegformerImageProcessor),
17918
+ /* harmony export */ SiglipImageProcessor: () => (/* reexport safe */ _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_32__.SiglipImageProcessor),
17919
+ /* harmony export */ Swin2SRImageProcessor: () => (/* reexport safe */ _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_33__.Swin2SRImageProcessor),
17920
+ /* harmony export */ VLMImageProcessor: () => (/* reexport safe */ _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_13__.VLMImageProcessor),
17921
+ /* harmony export */ ViTFeatureExtractor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_34__.ViTFeatureExtractor),
17922
+ /* harmony export */ ViTImageProcessor: () => (/* reexport safe */ _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_34__.ViTImageProcessor),
17923
+ /* harmony export */ VitMatteImageProcessor: () => (/* reexport safe */ _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_35__.VitMatteImageProcessor),
17924
+ /* harmony export */ VitPoseImageProcessor: () => (/* reexport safe */ _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_36__.VitPoseImageProcessor),
17925
+ /* harmony export */ YolosFeatureExtractor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_37__.YolosFeatureExtractor),
17926
+ /* harmony export */ YolosImageProcessor: () => (/* reexport safe */ _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_37__.YolosImageProcessor)
17727
17927
  /* harmony export */ });
17728
17928
  /* harmony import */ var _beit_image_processing_beit_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./beit/image_processing_beit.js */ "./src/models/beit/image_processing_beit.js");
17729
17929
  /* harmony import */ var _bit_image_processing_bit_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./bit/image_processing_bit.js */ "./src/models/bit/image_processing_bit.js");
@@ -17736,32 +17936,34 @@ __webpack_require__.r(__webpack_exports__);
17736
17936
  /* harmony import */ var _dpt_image_processing_dpt_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./dpt/image_processing_dpt.js */ "./src/models/dpt/image_processing_dpt.js");
17737
17937
  /* harmony import */ var _efficientnet_image_processing_efficientnet_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./efficientnet/image_processing_efficientnet.js */ "./src/models/efficientnet/image_processing_efficientnet.js");
17738
17938
  /* harmony import */ var _glpn_image_processing_glpn_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./glpn/image_processing_glpn.js */ "./src/models/glpn/image_processing_glpn.js");
17739
- /* harmony import */ var _idefics3_image_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./idefics3/image_processing_idefics3.js */ "./src/models/idefics3/image_processing_idefics3.js");
17740
- /* harmony import */ var _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./janus/image_processing_janus.js */ "./src/models/janus/image_processing_janus.js");
17741
- /* harmony import */ var _jina_clip_image_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./jina_clip/image_processing_jina_clip.js */ "./src/models/jina_clip/image_processing_jina_clip.js");
17742
- /* harmony import */ var _llava_onevision_image_processing_llava_onevision_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./llava_onevision/image_processing_llava_onevision.js */ "./src/models/llava_onevision/image_processing_llava_onevision.js");
17743
- /* harmony import */ var _mask2former_image_processing_mask2former_js__WEBPACK_IMPORTED_MODULE_15__ = __webpack_require__(/*! ./mask2former/image_processing_mask2former.js */ "./src/models/mask2former/image_processing_mask2former.js");
17744
- /* harmony import */ var _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_16__ = __webpack_require__(/*! ./maskformer/image_processing_maskformer.js */ "./src/models/maskformer/image_processing_maskformer.js");
17745
- /* harmony import */ var _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_17__ = __webpack_require__(/*! ./mobilenet_v1/image_processing_mobilenet_v1.js */ "./src/models/mobilenet_v1/image_processing_mobilenet_v1.js");
17746
- /* harmony import */ var _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_18__ = __webpack_require__(/*! ./mobilenet_v2/image_processing_mobilenet_v2.js */ "./src/models/mobilenet_v2/image_processing_mobilenet_v2.js");
17747
- /* harmony import */ var _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_19__ = __webpack_require__(/*! ./mobilenet_v3/image_processing_mobilenet_v3.js */ "./src/models/mobilenet_v3/image_processing_mobilenet_v3.js");
17748
- /* harmony import */ var _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_20__ = __webpack_require__(/*! ./mobilenet_v4/image_processing_mobilenet_v4.js */ "./src/models/mobilenet_v4/image_processing_mobilenet_v4.js");
17749
- /* harmony import */ var _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_21__ = __webpack_require__(/*! ./mobilevit/image_processing_mobilevit.js */ "./src/models/mobilevit/image_processing_mobilevit.js");
17750
- /* harmony import */ var _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_22__ = __webpack_require__(/*! ./nougat/image_processing_nougat.js */ "./src/models/nougat/image_processing_nougat.js");
17751
- /* harmony import */ var _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_23__ = __webpack_require__(/*! ./owlv2/image_processing_owlv2.js */ "./src/models/owlv2/image_processing_owlv2.js");
17752
- /* harmony import */ var _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_24__ = __webpack_require__(/*! ./owlvit/image_processing_owlvit.js */ "./src/models/owlvit/image_processing_owlvit.js");
17753
- /* harmony import */ var _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./phi3_v/image_processing_phi3_v.js */ "./src/models/phi3_v/image_processing_phi3_v.js");
17754
- /* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
17755
- /* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
17756
- /* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
17757
- /* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
17758
- /* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
17759
- /* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
17760
- /* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
17761
- /* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
17762
- /* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
17763
- /* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
17764
- /* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_36__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
17939
+ /* harmony import */ var _grounding_dino_image_processing_grounding_dino_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./grounding_dino/image_processing_grounding_dino.js */ "./src/models/grounding_dino/image_processing_grounding_dino.js");
17940
+ /* harmony import */ var _idefics3_image_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./idefics3/image_processing_idefics3.js */ "./src/models/idefics3/image_processing_idefics3.js");
17941
+ /* harmony import */ var _janus_image_processing_janus_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./janus/image_processing_janus.js */ "./src/models/janus/image_processing_janus.js");
17942
+ /* harmony import */ var _jina_clip_image_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./jina_clip/image_processing_jina_clip.js */ "./src/models/jina_clip/image_processing_jina_clip.js");
17943
+ /* harmony import */ var _llava_onevision_image_processing_llava_onevision_js__WEBPACK_IMPORTED_MODULE_15__ = __webpack_require__(/*! ./llava_onevision/image_processing_llava_onevision.js */ "./src/models/llava_onevision/image_processing_llava_onevision.js");
17944
+ /* harmony import */ var _mask2former_image_processing_mask2former_js__WEBPACK_IMPORTED_MODULE_16__ = __webpack_require__(/*! ./mask2former/image_processing_mask2former.js */ "./src/models/mask2former/image_processing_mask2former.js");
17945
+ /* harmony import */ var _maskformer_image_processing_maskformer_js__WEBPACK_IMPORTED_MODULE_17__ = __webpack_require__(/*! ./maskformer/image_processing_maskformer.js */ "./src/models/maskformer/image_processing_maskformer.js");
17946
+ /* harmony import */ var _mobilenet_v1_image_processing_mobilenet_v1_js__WEBPACK_IMPORTED_MODULE_18__ = __webpack_require__(/*! ./mobilenet_v1/image_processing_mobilenet_v1.js */ "./src/models/mobilenet_v1/image_processing_mobilenet_v1.js");
17947
+ /* harmony import */ var _mobilenet_v2_image_processing_mobilenet_v2_js__WEBPACK_IMPORTED_MODULE_19__ = __webpack_require__(/*! ./mobilenet_v2/image_processing_mobilenet_v2.js */ "./src/models/mobilenet_v2/image_processing_mobilenet_v2.js");
17948
+ /* harmony import */ var _mobilenet_v3_image_processing_mobilenet_v3_js__WEBPACK_IMPORTED_MODULE_20__ = __webpack_require__(/*! ./mobilenet_v3/image_processing_mobilenet_v3.js */ "./src/models/mobilenet_v3/image_processing_mobilenet_v3.js");
17949
+ /* harmony import */ var _mobilenet_v4_image_processing_mobilenet_v4_js__WEBPACK_IMPORTED_MODULE_21__ = __webpack_require__(/*! ./mobilenet_v4/image_processing_mobilenet_v4.js */ "./src/models/mobilenet_v4/image_processing_mobilenet_v4.js");
17950
+ /* harmony import */ var _mobilevit_image_processing_mobilevit_js__WEBPACK_IMPORTED_MODULE_22__ = __webpack_require__(/*! ./mobilevit/image_processing_mobilevit.js */ "./src/models/mobilevit/image_processing_mobilevit.js");
17951
+ /* harmony import */ var _nougat_image_processing_nougat_js__WEBPACK_IMPORTED_MODULE_23__ = __webpack_require__(/*! ./nougat/image_processing_nougat.js */ "./src/models/nougat/image_processing_nougat.js");
17952
+ /* harmony import */ var _owlv2_image_processing_owlv2_js__WEBPACK_IMPORTED_MODULE_24__ = __webpack_require__(/*! ./owlv2/image_processing_owlv2.js */ "./src/models/owlv2/image_processing_owlv2.js");
17953
+ /* harmony import */ var _owlvit_image_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_25__ = __webpack_require__(/*! ./owlvit/image_processing_owlvit.js */ "./src/models/owlvit/image_processing_owlvit.js");
17954
+ /* harmony import */ var _phi3_v_image_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_26__ = __webpack_require__(/*! ./phi3_v/image_processing_phi3_v.js */ "./src/models/phi3_v/image_processing_phi3_v.js");
17955
+ /* harmony import */ var _pvt_image_processing_pvt_js__WEBPACK_IMPORTED_MODULE_27__ = __webpack_require__(/*! ./pvt/image_processing_pvt.js */ "./src/models/pvt/image_processing_pvt.js");
17956
+ /* harmony import */ var _qwen2_vl_image_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_28__ = __webpack_require__(/*! ./qwen2_vl/image_processing_qwen2_vl.js */ "./src/models/qwen2_vl/image_processing_qwen2_vl.js");
17957
+ /* harmony import */ var _rt_detr_image_processing_rt_detr_js__WEBPACK_IMPORTED_MODULE_29__ = __webpack_require__(/*! ./rt_detr/image_processing_rt_detr.js */ "./src/models/rt_detr/image_processing_rt_detr.js");
17958
+ /* harmony import */ var _sam_image_processing_sam_js__WEBPACK_IMPORTED_MODULE_30__ = __webpack_require__(/*! ./sam/image_processing_sam.js */ "./src/models/sam/image_processing_sam.js");
17959
+ /* harmony import */ var _segformer_image_processing_segformer_js__WEBPACK_IMPORTED_MODULE_31__ = __webpack_require__(/*! ./segformer/image_processing_segformer.js */ "./src/models/segformer/image_processing_segformer.js");
17960
+ /* harmony import */ var _siglip_image_processing_siglip_js__WEBPACK_IMPORTED_MODULE_32__ = __webpack_require__(/*! ./siglip/image_processing_siglip.js */ "./src/models/siglip/image_processing_siglip.js");
17961
+ /* harmony import */ var _swin2sr_image_processing_swin2sr_js__WEBPACK_IMPORTED_MODULE_33__ = __webpack_require__(/*! ./swin2sr/image_processing_swin2sr.js */ "./src/models/swin2sr/image_processing_swin2sr.js");
17962
+ /* harmony import */ var _vit_image_processing_vit_js__WEBPACK_IMPORTED_MODULE_34__ = __webpack_require__(/*! ./vit/image_processing_vit.js */ "./src/models/vit/image_processing_vit.js");
17963
+ /* harmony import */ var _vitmatte_image_processing_vitmatte_js__WEBPACK_IMPORTED_MODULE_35__ = __webpack_require__(/*! ./vitmatte/image_processing_vitmatte.js */ "./src/models/vitmatte/image_processing_vitmatte.js");
17964
+ /* harmony import */ var _vitpose_image_processing_vitpose_js__WEBPACK_IMPORTED_MODULE_36__ = __webpack_require__(/*! ./vitpose/image_processing_vitpose.js */ "./src/models/vitpose/image_processing_vitpose.js");
17965
+ /* harmony import */ var _yolos_image_processing_yolos_js__WEBPACK_IMPORTED_MODULE_37__ = __webpack_require__(/*! ./yolos/image_processing_yolos.js */ "./src/models/yolos/image_processing_yolos.js");
17966
+
17765
17967
 
17766
17968
 
17767
17969
 
@@ -18977,36 +19179,39 @@ class Phi3VProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_
18977
19179
  __webpack_require__.r(__webpack_exports__);
18978
19180
  /* harmony export */ __webpack_require__.d(__webpack_exports__, {
18979
19181
  /* harmony export */ Florence2Processor: () => (/* reexport safe */ _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__.Florence2Processor),
18980
- /* harmony export */ Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_3__.Idefics3Processor),
18981
- /* harmony export */ JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_5__.JinaCLIPProcessor),
18982
- /* harmony export */ MgpstrProcessor: () => (/* reexport safe */ _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__.MgpstrProcessor),
18983
- /* harmony export */ MoonshineProcessor: () => (/* reexport safe */ _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_2__.MoonshineProcessor),
18984
- /* harmony export */ OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_6__.OwlViTProcessor),
18985
- /* harmony export */ PaliGemmaProcessor: () => (/* reexport safe */ _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_8__.PaliGemmaProcessor),
18986
- /* harmony export */ Phi3VProcessor: () => (/* reexport safe */ _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_7__.Phi3VProcessor),
18987
- /* harmony export */ PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_9__.PyAnnoteProcessor),
18988
- /* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_10__.Qwen2VLProcessor),
18989
- /* harmony export */ SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_11__.SamProcessor),
18990
- /* harmony export */ SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_12__.SpeechT5Processor),
18991
- /* harmony export */ VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_4__.VLChatProcessor),
18992
- /* harmony export */ Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_13__.Wav2Vec2ProcessorWithLM),
18993
- /* harmony export */ WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_14__.WhisperProcessor)
19182
+ /* harmony export */ GroundingDinoProcessor: () => (/* reexport safe */ _grounding_dino_processing_grounding_dino_js__WEBPACK_IMPORTED_MODULE_1__.GroundingDinoProcessor),
19183
+ /* harmony export */ Idefics3Processor: () => (/* reexport safe */ _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__.Idefics3Processor),
19184
+ /* harmony export */ JinaCLIPProcessor: () => (/* reexport safe */ _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__.JinaCLIPProcessor),
19185
+ /* harmony export */ MgpstrProcessor: () => (/* reexport safe */ _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_5__.MgpstrProcessor),
19186
+ /* harmony export */ MoonshineProcessor: () => (/* reexport safe */ _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_6__.MoonshineProcessor),
19187
+ /* harmony export */ OwlViTProcessor: () => (/* reexport safe */ _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_7__.OwlViTProcessor),
19188
+ /* harmony export */ PaliGemmaProcessor: () => (/* reexport safe */ _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_9__.PaliGemmaProcessor),
19189
+ /* harmony export */ Phi3VProcessor: () => (/* reexport safe */ _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_8__.Phi3VProcessor),
19190
+ /* harmony export */ PyAnnoteProcessor: () => (/* reexport safe */ _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_10__.PyAnnoteProcessor),
19191
+ /* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_11__.Qwen2VLProcessor),
19192
+ /* harmony export */ SamProcessor: () => (/* reexport safe */ _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_12__.SamProcessor),
19193
+ /* harmony export */ SpeechT5Processor: () => (/* reexport safe */ _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_13__.SpeechT5Processor),
19194
+ /* harmony export */ VLChatProcessor: () => (/* reexport safe */ _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__.VLChatProcessor),
19195
+ /* harmony export */ Wav2Vec2ProcessorWithLM: () => (/* reexport safe */ _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_14__.Wav2Vec2ProcessorWithLM),
19196
+ /* harmony export */ WhisperProcessor: () => (/* reexport safe */ _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_15__.WhisperProcessor)
18994
19197
  /* harmony export */ });
18995
19198
  /* harmony import */ var _florence2_processing_florence2_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./florence2/processing_florence2.js */ "./src/models/florence2/processing_florence2.js");
18996
- /* harmony import */ var _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./mgp_str/processing_mgp_str.js */ "./src/models/mgp_str/processing_mgp_str.js");
18997
- /* harmony import */ var _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./moonshine/processing_moonshine.js */ "./src/models/moonshine/processing_moonshine.js");
18998
- /* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
18999
- /* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
19000
- /* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
19001
- /* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
19002
- /* harmony import */ var _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./phi3_v/processing_phi3_v.js */ "./src/models/phi3_v/processing_phi3_v.js");
19003
- /* harmony import */ var _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./paligemma/processing_paligemma.js */ "./src/models/paligemma/processing_paligemma.js");
19004
- /* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
19005
- /* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
19006
- /* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
19007
- /* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
19008
- /* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
19009
- /* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
19199
+ /* harmony import */ var _grounding_dino_processing_grounding_dino_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./grounding_dino/processing_grounding_dino.js */ "./src/models/grounding_dino/processing_grounding_dino.js");
19200
+ /* harmony import */ var _idefics3_processing_idefics3_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./idefics3/processing_idefics3.js */ "./src/models/idefics3/processing_idefics3.js");
19201
+ /* harmony import */ var _janus_processing_janus_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./janus/processing_janus.js */ "./src/models/janus/processing_janus.js");
19202
+ /* harmony import */ var _jina_clip_processing_jina_clip_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./jina_clip/processing_jina_clip.js */ "./src/models/jina_clip/processing_jina_clip.js");
19203
+ /* harmony import */ var _mgp_str_processing_mgp_str_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./mgp_str/processing_mgp_str.js */ "./src/models/mgp_str/processing_mgp_str.js");
19204
+ /* harmony import */ var _moonshine_processing_moonshine_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./moonshine/processing_moonshine.js */ "./src/models/moonshine/processing_moonshine.js");
19205
+ /* harmony import */ var _owlvit_processing_owlvit_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./owlvit/processing_owlvit.js */ "./src/models/owlvit/processing_owlvit.js");
19206
+ /* harmony import */ var _phi3_v_processing_phi3_v_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./phi3_v/processing_phi3_v.js */ "./src/models/phi3_v/processing_phi3_v.js");
19207
+ /* harmony import */ var _paligemma_processing_paligemma_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./paligemma/processing_paligemma.js */ "./src/models/paligemma/processing_paligemma.js");
19208
+ /* harmony import */ var _pyannote_processing_pyannote_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./pyannote/processing_pyannote.js */ "./src/models/pyannote/processing_pyannote.js");
19209
+ /* harmony import */ var _qwen2_vl_processing_qwen2_vl_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./qwen2_vl/processing_qwen2_vl.js */ "./src/models/qwen2_vl/processing_qwen2_vl.js");
19210
+ /* harmony import */ var _sam_processing_sam_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ./sam/processing_sam.js */ "./src/models/sam/processing_sam.js");
19211
+ /* harmony import */ var _speecht5_processing_speecht5_js__WEBPACK_IMPORTED_MODULE_13__ = __webpack_require__(/*! ./speecht5/processing_speecht5.js */ "./src/models/speecht5/processing_speecht5.js");
19212
+ /* harmony import */ var _wav2vec2_processing_wav2vec2_js__WEBPACK_IMPORTED_MODULE_14__ = __webpack_require__(/*! ./wav2vec2/processing_wav2vec2.js */ "./src/models/wav2vec2/processing_wav2vec2.js");
19213
+ /* harmony import */ var _whisper_processing_whisper_js__WEBPACK_IMPORTED_MODULE_15__ = __webpack_require__(/*! ./whisper/processing_whisper.js */ "./src/models/whisper/processing_whisper.js");
19214
+
19010
19215
 
19011
19216
 
19012
19217
 
@@ -23512,13 +23717,35 @@ class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: TextImag
23512
23717
  // Run model with both text and pixel inputs
23513
23718
  const output = await this.model({ ...text_inputs, pixel_values });
23514
23719
 
23515
- // @ts-ignore
23516
- const processed = this.processor.image_processor.post_process_object_detection(output, threshold, imageSize, true)[0];
23517
- let result = processed.boxes.map((box, i) => ({
23518
- score: processed.scores[i],
23519
- label: candidate_labels[processed.classes[i]],
23520
- box: get_bounding_box(box, !percentage),
23521
- })).sort((a, b) => b.score - a.score);
23720
+ let result;
23721
+ if('post_process_grounded_object_detection' in this.processor) {
23722
+ // @ts-ignore
23723
+ const processed = this.processor.post_process_grounded_object_detection(
23724
+ output,
23725
+ text_inputs.input_ids,
23726
+ {
23727
+ // TODO: support separate threshold values
23728
+ box_threshold: threshold,
23729
+ text_threshold: threshold,
23730
+ target_sizes: imageSize,
23731
+ },
23732
+ )[0];
23733
+ result = processed.boxes.map((box, i) => ({
23734
+ score: processed.scores[i],
23735
+ label: processed.labels[i],
23736
+ box: get_bounding_box(box, !percentage),
23737
+ }))
23738
+ } else {
23739
+ // @ts-ignore
23740
+ const processed = this.processor.image_processor.post_process_object_detection(output, threshold, imageSize, true)[0];
23741
+ result = processed.boxes.map((box, i) => ({
23742
+ score: processed.scores[i],
23743
+ label: candidate_labels[processed.classes[i]],
23744
+ box: get_bounding_box(box, !percentage),
23745
+ }))
23746
+ }
23747
+ result.sort((a, b) => b.score - a.score);
23748
+
23522
23749
  if (top_k !== null) {
23523
23750
  result = result.slice(0, top_k);
23524
23751
  }
@@ -23638,7 +23865,7 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
23638
23865
  * const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
23639
23866
  * const speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
23640
23867
  * const out = await synthesizer('Hello, my dog is cute', { speaker_embeddings });
23641
- * // {
23868
+ * // RawAudio {
23642
23869
  * // audio: Float32Array(26112) [-0.00005657337896991521, 0.00020583874720614403, ...],
23643
23870
  * // sampling_rate: 16000
23644
23871
  * // }
@@ -23658,7 +23885,7 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
23658
23885
  * ```javascript
23659
23886
  * const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra');
23660
23887
  * const out = await synthesizer('Bonjour');
23661
- * // {
23888
+ * // RawAudio {
23662
23889
  * // audio: Float32Array(23808) [-0.00037693005288019776, 0.0003325853613205254, ...],
23663
23890
  * // sampling_rate: 16000
23664
23891
  * // }
@@ -23705,10 +23932,10 @@ class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPipelineC
23705
23932
 
23706
23933
  // @ts-expect-error TS2339
23707
23934
  const sampling_rate = this.model.config.sampling_rate;
23708
- return {
23709
- audio: waveform.data,
23935
+ return new _utils_audio_js__WEBPACK_IMPORTED_MODULE_7__.RawAudio(
23936
+ waveform.data,
23710
23937
  sampling_rate,
23711
- }
23938
+ )
23712
23939
  }
23713
23940
 
23714
23941
  async _call_text_to_spectrogram(text_inputs, { speaker_embeddings }) {
@@ -23748,10 +23975,10 @@ class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPipelineC
23748
23975
  const { waveform } = await this.model.generate_speech(input_ids, speaker_embeddings, { vocoder: this.vocoder });
23749
23976
 
23750
23977
  const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
23751
- return {
23752
- audio: waveform.data,
23978
+ return new _utils_audio_js__WEBPACK_IMPORTED_MODULE_7__.RawAudio(
23979
+ waveform.data,
23753
23980
  sampling_rate,
23754
- }
23981
+ )
23755
23982
  }
23756
23983
  }
23757
23984
 
@@ -28797,6 +29024,7 @@ class AutoTokenizer {
28797
29024
  "use strict";
28798
29025
  __webpack_require__.r(__webpack_exports__);
28799
29026
  /* harmony export */ __webpack_require__.d(__webpack_exports__, {
29027
+ /* harmony export */ RawAudio: () => (/* binding */ RawAudio),
28800
29028
  /* harmony export */ hamming: () => (/* binding */ hamming),
28801
29029
  /* harmony export */ hanning: () => (/* binding */ hanning),
28802
29030
  /* harmony export */ mel_filter_bank: () => (/* binding */ mel_filter_bank),
@@ -28807,7 +29035,9 @@ __webpack_require__.r(__webpack_exports__);
28807
29035
  /* harmony import */ var _hub_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./hub.js */ "./src/utils/hub.js");
28808
29036
  /* harmony import */ var _maths_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./maths.js */ "./src/utils/maths.js");
28809
29037
  /* harmony import */ var _core_js__WEBPACK_IMPORTED_MODULE_2__ = __webpack_require__(/*! ./core.js */ "./src/utils/core.js");
28810
- /* harmony import */ var _tensor_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ./tensor.js */ "./src/utils/tensor.js");
29038
+ /* harmony import */ var _env_js__WEBPACK_IMPORTED_MODULE_3__ = __webpack_require__(/*! ../env.js */ "./src/env.js");
29039
+ /* harmony import */ var fs__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! fs */ "fs");
29040
+ /* harmony import */ var _tensor_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./tensor.js */ "./src/utils/tensor.js");
28811
29041
  /**
28812
29042
  * @file Helper module for audio processing.
28813
29043
  *
@@ -28823,6 +29053,8 @@ __webpack_require__.r(__webpack_exports__);
28823
29053
 
28824
29054
 
28825
29055
 
29056
+
29057
+
28826
29058
  /**
28827
29059
  * Helper function to read audio from a path/URL.
28828
29060
  * @param {string|URL} url The path/URL to load the audio from.
@@ -29416,10 +29648,10 @@ async function spectrogram(
29416
29648
  // - mel_filters.shape=(80, 201)
29417
29649
  // - magnitudes.shape=(3000, 201) => magnitudes.T.shape=(201, 3000)
29418
29650
  // - mel_spec.shape=(80, 3000)
29419
- let mel_spec = await (0,_tensor_js__WEBPACK_IMPORTED_MODULE_3__.matmul)(
29651
+ let mel_spec = await (0,_tensor_js__WEBPACK_IMPORTED_MODULE_5__.matmul)(
29420
29652
  // TODO: Make `mel_filters` a Tensor during initialization
29421
- new _tensor_js__WEBPACK_IMPORTED_MODULE_3__.Tensor('float32', mel_filters.flat(), [num_mel_filters, num_frequency_bins]),
29422
- new _tensor_js__WEBPACK_IMPORTED_MODULE_3__.Tensor('float32', transposedMagnitudeData, [num_frequency_bins, d1Max]),
29653
+ new _tensor_js__WEBPACK_IMPORTED_MODULE_5__.Tensor('float32', mel_filters.flat(), [num_mel_filters, num_frequency_bins]),
29654
+ new _tensor_js__WEBPACK_IMPORTED_MODULE_5__.Tensor('float32', transposedMagnitudeData, [num_frequency_bins, d1Max]),
29423
29655
  );
29424
29656
  if (transpose) {
29425
29657
  mel_spec = mel_spec.transpose(1, 0);
@@ -29509,6 +29741,116 @@ function window_function(window_length, name, {
29509
29741
  return window;
29510
29742
  }
29511
29743
 
29744
+ /**
29745
+ * Encode audio data to a WAV file.
29746
+ * WAV file specs : https://en.wikipedia.org/wiki/WAV#WAV_File_header
29747
+ *
29748
+ * Adapted from https://www.npmjs.com/package/audiobuffer-to-wav
29749
+ * @param {Float32Array} samples The audio samples.
29750
+ * @param {number} rate The sample rate.
29751
+ * @returns {ArrayBuffer} The WAV audio buffer.
29752
+ */
29753
+ function encodeWAV(samples, rate) {
29754
+ let offset = 44;
29755
+ const buffer = new ArrayBuffer(offset + samples.length * 4);
29756
+ const view = new DataView(buffer);
29757
+
29758
+ /* RIFF identifier */
29759
+ writeString(view, 0, "RIFF");
29760
+ /* RIFF chunk length */
29761
+ view.setUint32(4, 36 + samples.length * 4, true);
29762
+ /* RIFF type */
29763
+ writeString(view, 8, "WAVE");
29764
+ /* format chunk identifier */
29765
+ writeString(view, 12, "fmt ");
29766
+ /* format chunk length */
29767
+ view.setUint32(16, 16, true);
29768
+ /* sample format (raw) */
29769
+ view.setUint16(20, 3, true);
29770
+ /* channel count */
29771
+ view.setUint16(22, 1, true);
29772
+ /* sample rate */
29773
+ view.setUint32(24, rate, true);
29774
+ /* byte rate (sample rate * block align) */
29775
+ view.setUint32(28, rate * 4, true);
29776
+ /* block align (channel count * bytes per sample) */
29777
+ view.setUint16(32, 4, true);
29778
+ /* bits per sample */
29779
+ view.setUint16(34, 32, true);
29780
+ /* data chunk identifier */
29781
+ writeString(view, 36, "data");
29782
+ /* data chunk length */
29783
+ view.setUint32(40, samples.length * 4, true);
29784
+
29785
+ for (let i = 0; i < samples.length; ++i, offset += 4) {
29786
+ view.setFloat32(offset, samples[i], true);
29787
+ }
29788
+
29789
+ return buffer;
29790
+ }
29791
+
29792
+ function writeString(view, offset, string) {
29793
+ for (let i = 0; i < string.length; ++i) {
29794
+ view.setUint8(offset + i, string.charCodeAt(i));
29795
+ }
29796
+ }
29797
+
29798
+
29799
+ class RawAudio {
29800
+
29801
+ /**
29802
+ * Create a new `RawAudio` object.
29803
+ * @param {Float32Array} audio Audio data
29804
+ * @param {number} sampling_rate Sampling rate of the audio data
29805
+ */
29806
+ constructor(audio, sampling_rate) {
29807
+ this.audio = audio
29808
+ this.sampling_rate = sampling_rate
29809
+ }
29810
+
29811
+ /**
29812
+ * Convert the audio to a wav file buffer.
29813
+ * @returns {ArrayBuffer} The WAV file.
29814
+ */
29815
+ toWav() {
29816
+ return encodeWAV(this.audio, this.sampling_rate)
29817
+ }
29818
+
29819
+ /**
29820
+ * Convert the audio to a blob.
29821
+ * @returns {Blob}
29822
+ */
29823
+ toBlob() {
29824
+ const wav = this.toWav();
29825
+ const blob = new Blob([wav], { type: 'audio/wav' });
29826
+ return blob;
29827
+ }
29828
+
29829
+ /**
29830
+ * Save the audio to a wav file.
29831
+ * @param {string} path
29832
+ */
29833
+ async save(path) {
29834
+ let fn;
29835
+
29836
+ if (_env_js__WEBPACK_IMPORTED_MODULE_3__.apis.IS_BROWSER_ENV) {
29837
+ if (_env_js__WEBPACK_IMPORTED_MODULE_3__.apis.IS_WEBWORKER_ENV) {
29838
+ throw new Error('Unable to save a file from a Web Worker.')
29839
+ }
29840
+ fn = _core_js__WEBPACK_IMPORTED_MODULE_2__.saveBlob;
29841
+ } else if (_env_js__WEBPACK_IMPORTED_MODULE_3__.apis.IS_FS_AVAILABLE) {
29842
+ fn = async (/** @type {string} */ path, /** @type {Blob} */ blob) => {
29843
+ let buffer = await blob.arrayBuffer();
29844
+ fs__WEBPACK_IMPORTED_MODULE_4__.writeFileSync(path, Buffer.from(buffer));
29845
+ }
29846
+ } else {
29847
+ throw new Error('Unable to save because filesystem is disabled in this environment.')
29848
+ }
29849
+
29850
+ await fn(path, this.toBlob())
29851
+ }
29852
+ }
29853
+
29512
29854
 
29513
29855
  /***/ }),
29514
29856
 
@@ -29564,7 +29906,8 @@ __webpack_require__.r(__webpack_exports__);
29564
29906
  /* harmony export */ pick: () => (/* binding */ pick),
29565
29907
  /* harmony export */ pop: () => (/* binding */ pop),
29566
29908
  /* harmony export */ product: () => (/* binding */ product),
29567
- /* harmony export */ reverseDictionary: () => (/* binding */ reverseDictionary)
29909
+ /* harmony export */ reverseDictionary: () => (/* binding */ reverseDictionary),
29910
+ /* harmony export */ saveBlob: () => (/* binding */ saveBlob)
29568
29911
  /* harmony export */ });
29569
29912
 
29570
29913
  /**
@@ -29757,6 +30100,32 @@ function calculateReflectOffset(i, w) {
29757
30100
  return Math.abs((i + w) % (2 * w) - w);
29758
30101
  }
29759
30102
 
30103
+ /**
30104
+ * Save blob file on the web.
30105
+ * @param {string} path The path to save the blob to
30106
+ * @param {Blob} blob The blob to save
30107
+ */
30108
+ function saveBlob(path, blob){
30109
+ // Convert the canvas content to a data URL
30110
+ const dataURL = URL.createObjectURL(blob);
30111
+
30112
+ // Create an anchor element with the data URL as the href attribute
30113
+ const downloadLink = document.createElement('a');
30114
+ downloadLink.href = dataURL;
30115
+
30116
+ // Set the download attribute to specify the desired filename for the downloaded image
30117
+ downloadLink.download = path;
30118
+
30119
+ // Trigger the download
30120
+ downloadLink.click();
30121
+
30122
+ // Clean up: remove the anchor element from the DOM
30123
+ downloadLink.remove();
30124
+
30125
+ // Revoke the Object URL to free up memory
30126
+ URL.revokeObjectURL(dataURL);
30127
+ }
30128
+
29760
30129
  /**
29761
30130
  *
29762
30131
  * @param {Object} o
@@ -31939,23 +32308,9 @@ class RawImage {
31939
32308
  // Convert image to Blob
31940
32309
  const blob = await this.toBlob(mime);
31941
32310
 
31942
- // Convert the canvas content to a data URL
31943
- const dataURL = URL.createObjectURL(blob);
31944
-
31945
- // Create an anchor element with the data URL as the href attribute
31946
- const downloadLink = document.createElement('a');
31947
- downloadLink.href = dataURL;
31948
-
31949
- // Set the download attribute to specify the desired filename for the downloaded image
31950
- downloadLink.download = path;
32311
+ (0,_core_js__WEBPACK_IMPORTED_MODULE_0__.saveBlob)(path, blob)
31951
32312
 
31952
- // Trigger the download
31953
- downloadLink.click();
31954
-
31955
- // Clean up: remove the anchor element from the DOM
31956
- downloadLink.remove();
31957
-
31958
- } else if (!_env_js__WEBPACK_IMPORTED_MODULE_2__.env.useFS) {
32313
+ } else if (!_env_js__WEBPACK_IMPORTED_MODULE_2__.apis.IS_FS_AVAILABLE) {
31959
32314
  throw new Error('Unable to save the image because filesystem is disabled in this environment.')
31960
32315
 
31961
32316
  } else {
@@ -31985,6 +32340,7 @@ class RawImage {
31985
32340
  const load_image = RawImage.read.bind(RawImage);
31986
32341
 
31987
32342
 
32343
+
31988
32344
  /***/ }),
31989
32345
 
31990
32346
  /***/ "./src/utils/maths.js":
@@ -33600,55 +33956,22 @@ class Tensor {
33600
33956
  }
33601
33957
 
33602
33958
  const this_data = this.data;
33959
+ const fn = (a, b) => a + (b ** p);
33603
33960
 
33604
33961
  if (dim === null) {
33605
33962
  // @ts-ignore
33606
- let val = this_data.reduce((a, b) => a + (b ** p), 0) ** (1 / p);
33963
+ const val = this_data.reduce(fn, 0) ** (1 / p);
33607
33964
  return new Tensor(this.type, [val], []);
33608
33965
  }
33609
33966
 
33610
- // Negative indexing
33611
- dim = safeIndex(dim, this.dims.length);
33612
-
33613
- // Calculate the shape of the resulting array after summation
33614
- const resultDims = this.dims.slice(); // Copy the original dimensions
33615
- resultDims[dim] = 1; // Remove the specified axis
33616
-
33617
- // Create a new array to store the accumulated values
33618
- // @ts-ignore
33619
- const result = new this_data.constructor(this_data.length / this.dims[dim]);
33620
-
33621
- // Iterate over the data array
33622
- for (let i = 0; i < this_data.length; ++i) {
33623
-
33624
- // Calculate the index in the resulting array
33625
- let resultIndex = 0;
33626
-
33627
- for (let j = this.dims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
33628
- const size = this.dims[j];
33629
- if (j !== dim) {
33630
- const index = num % size;
33631
- resultIndex += index * resultMultiplier;
33632
- resultMultiplier *= resultDims[j];
33633
- }
33634
- num = Math.floor(num / size);
33635
- }
33636
-
33637
- // Accumulate the value at the current index
33638
- result[resultIndex] += (this_data[i]) ** p;
33639
- }
33967
+ const [type, result, resultDims] = reduce_helper(fn, this, dim, keepdim);
33640
33968
 
33641
33969
  if (p !== 1) {
33642
33970
  for (let i = 0; i < result.length; ++i) {
33643
33971
  result[i] = result[i] ** (1 / p);
33644
33972
  }
33645
33973
  }
33646
-
33647
- if (!keepdim) {
33648
- resultDims.splice(dim, 1);
33649
- }
33650
-
33651
- return new Tensor(this.type, result, resultDims);
33974
+ return new Tensor(type, result, resultDims);
33652
33975
  }
33653
33976
 
33654
33977
  /**
@@ -33711,7 +34034,7 @@ class Tensor {
33711
34034
  * NOTE: The returned tensor shares the storage with the input tensor, so changing the contents of one will change the contents of the other.
33712
34035
  * If you would like a copy, use `tensor.clone()` before squeezing.
33713
34036
  *
33714
- * @param {number} [dim=null] If given, the input will be squeezed only in the specified dimensions.
34037
+ * @param {number|number[]} [dim=null] If given, the input will be squeezed only in the specified dimensions.
33715
34038
  * @returns {Tensor} The squeezed tensor
33716
34039
  */
33717
34040
  squeeze(dim = null) {
@@ -33821,6 +34144,34 @@ class Tensor {
33821
34144
  return this.clone().neg_();
33822
34145
  }
33823
34146
 
34147
+ /**
34148
+ * Computes input > val element-wise.
34149
+ * @param {number} val The value to compare with.
34150
+ * @returns {Tensor} A boolean tensor that is `true` where input is greater than other and `false` elsewhere.
34151
+ */
34152
+ gt(val) {
34153
+ const mask = new Uint8Array(this.data.length);
34154
+ const this_data = this.data;
34155
+ for (let i = 0; i < this_data.length; ++i) {
34156
+ mask[i] = this_data[i] > val ? 1 : 0;
34157
+ }
34158
+ return new Tensor('bool', mask, this.dims);
34159
+ }
34160
+
34161
+ /**
34162
+ * Computes input < val element-wise.
34163
+ * @param {number} val The value to compare with.
34164
+ * @returns {Tensor} A boolean tensor that is `true` where input is less than other and `false` elsewhere.
34165
+ */
34166
+ lt(val) {
34167
+ const mask = new Uint8Array(this.data.length);
34168
+ const this_data = this.data;
34169
+ for (let i = 0; i < this_data.length; ++i) {
34170
+ mask[i] = this_data[i] < val ? 1 : 0;
34171
+ }
34172
+ return new Tensor('bool', mask, this.dims);
34173
+ }
34174
+
33824
34175
  /**
33825
34176
  * In-place version of @see {@link Tensor.clamp}
33826
34177
  */
@@ -33866,18 +34217,23 @@ class Tensor {
33866
34217
  }
33867
34218
 
33868
34219
  min(dim = null, keepdim = false) {
33869
- if (dim !== null) {
33870
- throw new Error("`dim !== null` not yet implemented.");
34220
+ if (dim === null) {
34221
+ // None to reduce over all dimensions.
34222
+ const val = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.min)(this.data)[0];
34223
+ return new Tensor(this.type, [val], [/* scalar */]);
33871
34224
  }
33872
- const value = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.min)(this.data)[0];
33873
- return new Tensor(this.type, [value], []);
34225
+ const [type, result, resultDims] = reduce_helper((a, b) => Math.min(a, b), this, dim, keepdim, Infinity);
34226
+ return new Tensor(type, result, resultDims);
33874
34227
  }
34228
+
33875
34229
  max(dim = null, keepdim = false) {
33876
- if (dim !== null) {
33877
- throw new Error("`dim !== null` not yet implemented.");
34230
+ if (dim === null) {
34231
+ // None to reduce over all dimensions.
34232
+ const val = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.max)(this.data)[0];
34233
+ return new Tensor(this.type, [val], [/* scalar */]);
33878
34234
  }
33879
- const value = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.max)(this.data)[0];
33880
- return new Tensor(this.type, [value], []);
34235
+ const [type, result, resultDims] = reduce_helper((a, b) => Math.max(a, b), this, dim, keepdim, -Infinity);
34236
+ return new Tensor(type, result, resultDims);
33881
34237
  }
33882
34238
 
33883
34239
  argmin(dim = null, keepdim = false) {
@@ -34374,35 +34730,19 @@ function stack(tensors, dim = 0) {
34374
34730
 
34375
34731
 
34376
34732
  /**
34377
- * Calculates the standard deviation and mean over the dimensions specified by dim. dim can be a single dimension or `null` to reduce over all dimensions.
34378
- * @param {Tensor} input the input tenso
34379
- * @param {number|null} dim the dimension to reduce. If None, all dimensions are reduced.
34380
- * @param {number} correction difference between the sample size and sample degrees of freedom. Defaults to Bessel's correction, correction=1.
34733
+ * @param {(previousValue: any, currentValue: any, currentIndex?: number, resultIndex?: number) => any} callbackfn
34734
+ * @param {Tensor} input the input tensor.
34735
+ * @param {number|null} dim the dimension to reduce.
34381
34736
  * @param {boolean} keepdim whether the output tensor has dim retained or not.
34382
- * @returns {Tensor[]} A tuple of (std, mean) tensors.
34737
+ * @returns {[DataType, any, number[]]} The reduced tensor data.
34383
34738
  */
34384
- function std_mean(input, dim = null, correction = 1, keepdim = false) {
34385
- const inputData = /** @type {Float32Array} */(input.data);
34739
+ function reduce_helper(callbackfn, input, dim = null, keepdim = false, initialValue = null) {
34740
+ const inputData = input.data;
34386
34741
  const inputDims = input.dims;
34387
34742
 
34388
- if (dim === null) {
34389
- // None to reduce over all dimensions.
34390
- const sum = inputData.reduce((a, b) => a + b, 0);
34391
- const mean = sum / inputData.length;
34392
- const std = Math.sqrt(inputData.reduce((a, b) => a + (b - mean) ** 2, 0) / (inputData.length - correction));
34393
-
34394
- const meanTensor = new Tensor(input.type, [mean], [/* scalar */]);
34395
- const stdTensor = new Tensor(input.type, [std], [/* scalar */]);
34396
-
34397
- return [stdTensor, meanTensor];
34398
- }
34399
-
34400
34743
  // Negative indexing
34401
34744
  dim = safeIndex(dim, inputDims.length);
34402
34745
 
34403
- const meanTensor = mean(input, dim, keepdim);
34404
- const meanTensorData = meanTensor.data;
34405
-
34406
34746
  // Calculate the shape of the resulting array after summation
34407
34747
  const resultDims = inputDims.slice(); // Copy the original dimensions
34408
34748
  resultDims[dim] = 1; // Remove the specified axis
@@ -34410,6 +34750,9 @@ function std_mean(input, dim = null, correction = 1, keepdim = false) {
34410
34750
  // Create a new array to store the accumulated values
34411
34751
  // @ts-ignore
34412
34752
  const result = new inputData.constructor(inputData.length / inputDims[dim]);
34753
+ if (initialValue !== null) {
34754
+ result.fill(initialValue);
34755
+ }
34413
34756
 
34414
34757
  // Iterate over the data array
34415
34758
  for (let i = 0; i < inputData.length; ++i) {
@@ -34428,23 +34771,55 @@ function std_mean(input, dim = null, correction = 1, keepdim = false) {
34428
34771
  }
34429
34772
 
34430
34773
  // Accumulate the value at the current index
34431
- result[resultIndex] += (inputData[i] - meanTensorData[resultIndex]) ** 2;
34774
+ result[resultIndex] = callbackfn(result[resultIndex], inputData[i], i, resultIndex);
34432
34775
  }
34433
34776
 
34434
- for (let i = 0; i < result.length; ++i) {
34435
- result[i] = Math.sqrt(result[i] / (inputDims[dim] - correction));
34777
+ if (!keepdim) resultDims.splice(dim, 1);
34778
+
34779
+ return [input.type, result, resultDims];
34780
+ }
34781
+
34782
+
34783
+ /**
34784
+ * Calculates the standard deviation and mean over the dimensions specified by dim. dim can be a single dimension or `null` to reduce over all dimensions.
34785
+ * @param {Tensor} input the input tenso
34786
+ * @param {number|null} dim the dimension to reduce. If None, all dimensions are reduced.
34787
+ * @param {number} correction difference between the sample size and sample degrees of freedom. Defaults to Bessel's correction, correction=1.
34788
+ * @param {boolean} keepdim whether the output tensor has dim retained or not.
34789
+ * @returns {Tensor[]} A tuple of (std, mean) tensors.
34790
+ */
34791
+ function std_mean(input, dim = null, correction = 1, keepdim = false) {
34792
+ const inputData = /** @type {Float32Array} */(input.data);
34793
+ const inputDims = input.dims;
34794
+
34795
+ if (dim === null) {
34796
+ // None to reduce over all dimensions.
34797
+ const sum = inputData.reduce((a, b) => a + b, 0);
34798
+ const mean = sum / inputData.length;
34799
+ const std = Math.sqrt(inputData.reduce((a, b) => a + (b - mean) ** 2, 0) / (inputData.length - correction));
34800
+
34801
+ const meanTensor = new Tensor(input.type, [mean], [/* scalar */]);
34802
+ const stdTensor = new Tensor(input.type, [std], [/* scalar */]);
34803
+
34804
+ return [stdTensor, meanTensor];
34436
34805
  }
34806
+ dim = safeIndex(dim, inputDims.length);
34807
+ const meanTensor = mean(input, dim, keepdim);
34808
+ const meanTensorData = meanTensor.data;
34437
34809
 
34438
- if (!keepdim) {
34439
- resultDims.splice(dim, 1);
34810
+ // Compute squared sum
34811
+ const [type, result, resultDims] = reduce_helper((a, b, i, j) => a + (b - meanTensorData[j]) ** 2, input, dim, keepdim);
34812
+
34813
+ // Square root of the squared sum
34814
+ for (let i = 0; i < result.length; ++i) {
34815
+ result[i] = Math.sqrt(result[i] / (inputDims[dim] - correction));
34440
34816
  }
34441
34817
 
34442
- const stdTensor = new Tensor(input.type, result, resultDims);
34818
+ const stdTensor = new Tensor(type, result, resultDims);
34443
34819
 
34444
34820
  return [stdTensor, meanTensor];
34445
34821
  }
34446
34822
 
34447
-
34448
34823
  /**
34449
34824
  * Returns the mean value of each row of the input tensor in the given dimension dim.
34450
34825
  * @param {Tensor} input the input tensor.
@@ -34453,58 +34828,27 @@ function std_mean(input, dim = null, correction = 1, keepdim = false) {
34453
34828
  * @returns {Tensor} A new tensor with means taken along the specified dimension.
34454
34829
  */
34455
34830
  function mean(input, dim = null, keepdim = false) {
34831
+ const inputDims = input.dims;
34456
34832
  const inputData = /** @type {Float32Array} */(input.data);
34457
34833
 
34458
34834
  if (dim === null) {
34459
34835
  // None to reduce over all dimensions.
34460
- // @ts-ignore
34461
34836
  const val = inputData.reduce((a, b) => a + b, 0);
34462
34837
  return new Tensor(input.type, [val / inputData.length], [/* scalar */]);
34463
34838
  }
34464
- const inputDims = input.dims;
34465
-
34466
- // Negative indexing
34467
34839
  dim = safeIndex(dim, inputDims.length);
34468
34840
 
34469
- // Calculate the shape of the resulting array after summation
34470
- const resultDims = inputDims.slice(); // Copy the original dimensions
34471
- resultDims[dim] = 1; // Remove the specified axis
34472
-
34473
- // Create a new array to store the accumulated values
34474
- // @ts-ignore
34475
- const result = new inputData.constructor(inputData.length / inputDims[dim]);
34476
-
34477
- // Iterate over the data array
34478
- for (let i = 0; i < inputData.length; ++i) {
34479
-
34480
- // Calculate the index in the resulting array
34481
- let resultIndex = 0;
34482
-
34483
- for (let j = inputDims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
34484
- const size = inputDims[j];
34485
- if (j !== dim) {
34486
- const index = num % size;
34487
- resultIndex += index * resultMultiplier;
34488
- resultMultiplier *= resultDims[j];
34489
- }
34490
- num = Math.floor(num / size);
34491
- }
34492
-
34493
- // Accumulate the value at the current index
34494
- result[resultIndex] += inputData[i];
34495
- }
34841
+ // Compute sum
34842
+ const [type, result, resultDims] = reduce_helper((a, b) => a + b, input, dim, keepdim);
34496
34843
 
34844
+ // Divide by number of elements in the dimension
34497
34845
  if (inputDims[dim] !== 1) {
34498
34846
  for (let i = 0; i < result.length; ++i) {
34499
- result[i] = result[i] / inputDims[dim];
34847
+ result[i] /= inputDims[dim];
34500
34848
  }
34501
34849
  }
34502
34850
 
34503
- if (!keepdim) {
34504
- resultDims.splice(dim, 1);
34505
- }
34506
-
34507
- return new Tensor(input.type, result, resultDims);
34851
+ return new Tensor(type, result, resultDims);
34508
34852
  }
34509
34853
 
34510
34854
 
@@ -35004,6 +35348,10 @@ __webpack_require__.r(__webpack_exports__);
35004
35348
  /* harmony export */ GraniteModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GraniteModel),
35005
35349
  /* harmony export */ GranitePreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GranitePreTrainedModel),
35006
35350
  /* harmony export */ Grok1Tokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.Grok1Tokenizer),
35351
+ /* harmony export */ GroundingDinoForObjectDetection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroundingDinoForObjectDetection),
35352
+ /* harmony export */ GroundingDinoImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_13__.GroundingDinoImageProcessor),
35353
+ /* harmony export */ GroundingDinoPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroundingDinoPreTrainedModel),
35354
+ /* harmony export */ GroundingDinoProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_16__.GroundingDinoProcessor),
35007
35355
  /* harmony export */ GroupViTModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroupViTModel),
35008
35356
  /* harmony export */ GroupViTPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GroupViTPreTrainedModel),
35009
35357
  /* harmony export */ HerbertTokenizer: () => (/* reexport safe */ _tokenizers_js__WEBPACK_IMPORTED_MODULE_3__.HerbertTokenizer),
@@ -35233,6 +35581,7 @@ __webpack_require__.r(__webpack_exports__);
35233
35581
  /* harmony export */ RTDetrModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrModel),
35234
35582
  /* harmony export */ RTDetrObjectDetectionOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrObjectDetectionOutput),
35235
35583
  /* harmony export */ RTDetrPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrPreTrainedModel),
35584
+ /* harmony export */ RawAudio: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.RawAudio),
35236
35585
  /* harmony export */ RawImage: () => (/* reexport safe */ _utils_image_js__WEBPACK_IMPORTED_MODULE_6__.RawImage),
35237
35586
  /* harmony export */ RepetitionPenaltyLogitsProcessor: () => (/* reexport safe */ _generation_logits_process_js__WEBPACK_IMPORTED_MODULE_20__.RepetitionPenaltyLogitsProcessor),
35238
35587
  /* harmony export */ ResNetForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.ResNetForImageClassification),
@@ -35298,6 +35647,8 @@ __webpack_require__.r(__webpack_exports__);
35298
35647
  /* harmony export */ Starcoder2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Starcoder2PreTrainedModel),
35299
35648
  /* harmony export */ StoppingCriteria: () => (/* reexport safe */ _generation_stopping_criteria_js__WEBPACK_IMPORTED_MODULE_19__.StoppingCriteria),
35300
35649
  /* harmony export */ StoppingCriteriaList: () => (/* reexport safe */ _generation_stopping_criteria_js__WEBPACK_IMPORTED_MODULE_19__.StoppingCriteriaList),
35650
+ /* harmony export */ StyleTextToSpeech2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.StyleTextToSpeech2Model),
35651
+ /* harmony export */ StyleTextToSpeech2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.StyleTextToSpeech2PreTrainedModel),
35301
35652
  /* harmony export */ SummarizationPipeline: () => (/* reexport safe */ _pipelines_js__WEBPACK_IMPORTED_MODULE_1__.SummarizationPipeline),
35302
35653
  /* harmony export */ SuppressTokensAtBeginLogitsProcessor: () => (/* reexport safe */ _generation_logits_process_js__WEBPACK_IMPORTED_MODULE_20__.SuppressTokensAtBeginLogitsProcessor),
35303
35654
  /* harmony export */ Swin2SRForImageSuperResolution: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Swin2SRForImageSuperResolution),