@huggingface/transformers 3.2.2 → 3.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +3 -2
  2. package/dist/transformers.cjs +252 -113
  3. package/dist/transformers.cjs.map +1 -1
  4. package/dist/transformers.js +256 -114
  5. package/dist/transformers.js.map +1 -1
  6. package/dist/transformers.min.cjs +1 -1
  7. package/dist/transformers.min.cjs.map +1 -1
  8. package/dist/transformers.min.js +1 -1
  9. package/dist/transformers.min.js.map +1 -1
  10. package/dist/transformers.min.mjs +1 -1
  11. package/dist/transformers.min.mjs.map +1 -1
  12. package/dist/transformers.mjs +256 -114
  13. package/dist/transformers.mjs.map +1 -1
  14. package/package.json +2 -2
  15. package/src/base/feature_extraction_utils.js +9 -9
  16. package/src/base/image_processors_utils.js +11 -0
  17. package/src/base/processing_utils.js +13 -3
  18. package/src/configs.js +5 -0
  19. package/src/env.js +1 -1
  20. package/src/models/auto/feature_extraction_auto.js +0 -16
  21. package/src/models/auto/processing_auto.js +0 -16
  22. package/src/models/convnext/image_processing_convnext.js +1 -0
  23. package/src/models/efficientnet/image_processing_efficientnet.js +1 -0
  24. package/src/models/florence2/processing_florence2.js +3 -0
  25. package/src/models/idefics3/image_processing_idefics3.js +2 -0
  26. package/src/models/janus/image_processing_janus.js +1 -0
  27. package/src/models/mgp_str/processing_mgp_str.js +2 -0
  28. package/src/models/paligemma/processing_paligemma.js +1 -0
  29. package/src/models/phi3_v/processing_phi3_v.js +1 -1
  30. package/src/models/pyannote/feature_extraction_pyannote.js +1 -0
  31. package/src/models/qwen2_vl/processing_qwen2_vl.js +1 -0
  32. package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +2 -2
  33. package/src/models/whisper/feature_extraction_whisper.js +1 -1
  34. package/src/models.js +93 -36
  35. package/src/ops/registry.js +10 -0
  36. package/src/pipelines.js +34 -7
  37. package/src/tokenizers.js +4 -7
  38. package/src/utils/dtypes.js +2 -0
  39. package/src/utils/hub.js +1 -1
  40. package/src/utils/maths.js +8 -6
  41. package/src/utils/tensor.js +42 -10
  42. package/types/base/feature_extraction_utils.d.ts +7 -7
  43. package/types/base/image_processors_utils.d.ts.map +1 -1
  44. package/types/base/processing_utils.d.ts +17 -19
  45. package/types/base/processing_utils.d.ts.map +1 -1
  46. package/types/configs.d.ts.map +1 -1
  47. package/types/generation/parameters.d.ts +1 -1
  48. package/types/models/auto/feature_extraction_auto.d.ts.map +1 -1
  49. package/types/models/auto/image_processing_auto.d.ts.map +1 -1
  50. package/types/models/auto/processing_auto.d.ts.map +1 -1
  51. package/types/models/convnext/image_processing_convnext.d.ts.map +1 -1
  52. package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -1
  53. package/types/models/florence2/processing_florence2.d.ts.map +1 -1
  54. package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -1
  55. package/types/models/janus/image_processing_janus.d.ts.map +1 -1
  56. package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -1
  57. package/types/models/paligemma/processing_paligemma.d.ts.map +1 -1
  58. package/types/models/phi3_v/processing_phi3_v.d.ts +6 -2
  59. package/types/models/phi3_v/processing_phi3_v.d.ts.map +1 -1
  60. package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
  61. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
  62. package/types/models/sapiens/image_processing_sapiens.d.ts +10 -0
  63. package/types/models/sapiens/image_processing_sapiens.d.ts.map +1 -0
  64. package/types/models/whisper/generation_whisper.d.ts +1 -1
  65. package/types/models/whisper/generation_whisper.d.ts.map +1 -1
  66. package/types/models.d.ts +48 -17
  67. package/types/models.d.ts.map +1 -1
  68. package/types/ops/registry.d.ts +1 -0
  69. package/types/ops/registry.d.ts.map +1 -1
  70. package/types/pipelines.d.ts +2 -2
  71. package/types/pipelines.d.ts.map +1 -1
  72. package/types/tokenizers.d.ts.map +1 -1
  73. package/types/tsconfig.tsbuildinfo +1 -0
  74. package/types/utils/dtypes.d.ts.map +1 -1
  75. package/types/utils/hub.d.ts +1 -1
  76. package/types/utils/hub.d.ts.map +1 -1
  77. package/types/utils/image.d.ts +3 -2
  78. package/types/utils/image.d.ts.map +1 -1
  79. package/types/utils/maths.d.ts +8 -6
  80. package/types/utils/maths.d.ts.map +1 -1
  81. package/types/utils/tensor.d.ts +8 -4
  82. package/types/utils/tensor.d.ts.map +1 -1
@@ -4158,23 +4158,23 @@ class FeatureExtractor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Ca
4158
4158
  }
4159
4159
 
4160
4160
  /**
4161
- * Instantiate one of the processor classes of the library from a pretrained model.
4161
+ * Instantiate one of the feature extractor classes of the library from a pretrained model.
4162
4162
  *
4163
- * The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
4164
- * property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
4163
+ * The feature extractor class to instantiate is selected based on the `feature_extractor_type` property of
4164
+ * the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
4165
4165
  *
4166
4166
  * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
4167
- * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
4167
+ * - A string, the *model id* of a pretrained feature_extractor hosted inside a model repo on huggingface.co.
4168
4168
  * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
4169
4169
  * user or organization name, like `dbmdz/bert-base-german-cased`.
4170
- * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
4171
- * @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
4170
+ * - A path to a *directory* containing feature_extractor files, e.g., `./my_model_directory/`.
4171
+ * @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the feature_extractor.
4172
4172
  *
4173
- * @returns {Promise<FeatureExtractor>} A new instance of the Processor class.
4173
+ * @returns {Promise<FeatureExtractor>} A new instance of the Feature Extractor class.
4174
4174
  */
4175
4175
  static async from_pretrained(pretrained_model_name_or_path, options) {
4176
- const preprocessorConfig = await (0,_utils_hub_js__WEBPACK_IMPORTED_MODULE_2__.getModelJSON)(pretrained_model_name_or_path, _utils_constants_js__WEBPACK_IMPORTED_MODULE_0__.FEATURE_EXTRACTOR_NAME, true, options);
4177
- return new this(preprocessorConfig);
4176
+ const config = await (0,_utils_hub_js__WEBPACK_IMPORTED_MODULE_2__.getModelJSON)(pretrained_model_name_or_path, _utils_constants_js__WEBPACK_IMPORTED_MODULE_0__.FEATURE_EXTRACTOR_NAME, true, options);
4177
+ return new this(config);
4178
4178
  }
4179
4179
  }
4180
4180
 
@@ -4825,14 +4825,20 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
4825
4825
  this.do_thumbnail = config.do_thumbnail;
4826
4826
  this.size = config.size ?? config.image_size;
4827
4827
  this.do_resize = config.do_resize ?? (this.size !== undefined);
4828
+ // @ts-expect-error TS2339
4828
4829
  this.size_divisibility = config.size_divisibility ?? config.size_divisor;
4829
4830
 
4830
4831
  this.do_center_crop = config.do_center_crop;
4832
+ // @ts-expect-error TS2339
4831
4833
  this.crop_size = config.crop_size;
4834
+ // @ts-expect-error TS2339
4832
4835
  this.do_convert_rgb = config.do_convert_rgb ?? true;
4836
+ // @ts-expect-error TS2339
4833
4837
  this.do_crop_margin = config.do_crop_margin;
4834
4838
 
4839
+ // @ts-expect-error TS2339
4835
4840
  this.pad_size = config.pad_size;
4841
+ // @ts-expect-error TS2339
4836
4842
  this.do_pad = config.do_pad;
4837
4843
 
4838
4844
  if (this.do_pad && !this.pad_size && this.size && this.size.width !== undefined && this.size.height !== undefined) {
@@ -5041,6 +5047,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
5041
5047
  // Support both formats for backwards compatibility
5042
5048
  else if (Number.isInteger(size)) {
5043
5049
  shortest_edge = size;
5050
+ // @ts-expect-error TS2339
5044
5051
  longest_edge = this.config.max_size ?? shortest_edge;
5045
5052
 
5046
5053
  } else if (size !== undefined) {
@@ -5109,6 +5116,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
5109
5116
  } else if (size.min_pixels !== undefined && size.max_pixels !== undefined) {
5110
5117
  // Custom resize logic for Qwen2-VL models
5111
5118
  const { min_pixels, max_pixels } = size;
5119
+ // @ts-expect-error TS2339
5112
5120
  const factor = this.config.patch_size * this.config.merge_size;
5113
5121
  return smart_resize(srcHeight, srcWidth, factor, min_pixels, max_pixels);
5114
5122
  } else {
@@ -5124,6 +5132,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
5124
5132
  async resize(image) {
5125
5133
  const [newWidth, newHeight] = this.get_resize_output_image_size(image, this.size);
5126
5134
  return await image.resize(newWidth, newHeight, {
5135
+ // @ts-expect-error TS2322
5127
5136
  resample: this.resample,
5128
5137
  });
5129
5138
  }
@@ -5174,6 +5183,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
5174
5183
 
5175
5184
  // Resize the image using thumbnail method.
5176
5185
  if (this.do_thumbnail) {
5186
+ // @ts-expect-error TS2345
5177
5187
  image = await this.thumbnail(image, this.size, this.resample);
5178
5188
  }
5179
5189
 
@@ -5198,6 +5208,7 @@ class ImageProcessor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__.Call
5198
5208
  // NOTE: All pixel-level manipulation (i.e., modifying `pixelData`)
5199
5209
  // occurs with data in the hwc format (height, width, channels),
5200
5210
  // to emulate the behavior of the original Python code (w/ numpy).
5211
+ /** @type {Float32Array} */
5201
5212
  let pixelData = Float32Array.from(image.data);
5202
5213
  let imgDims = [image.height, image.width, image.channels];
5203
5214
 
@@ -5356,6 +5367,7 @@ __webpack_require__.r(__webpack_exports__);
5356
5367
  /**
5357
5368
  * @typedef {Object} ProcessorProperties Additional processor-specific properties.
5358
5369
  * @typedef {import('../utils/hub.js').PretrainedOptions & ProcessorProperties} PretrainedProcessorOptions
5370
+ * @typedef {import('../tokenizers.js').PreTrainedTokenizer} PreTrainedTokenizer
5359
5371
  */
5360
5372
 
5361
5373
 
@@ -5389,7 +5401,7 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
5389
5401
  }
5390
5402
 
5391
5403
  /**
5392
- * @returns {import('../tokenizers.js').PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
5404
+ * @returns {PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
5393
5405
  */
5394
5406
  get tokenizer() {
5395
5407
  return this.components.tokenizer;
@@ -5402,6 +5414,11 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
5402
5414
  return this.components.feature_extractor;
5403
5415
  }
5404
5416
 
5417
+ /**
5418
+ * @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[0]} messages
5419
+ * @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[1]} options
5420
+ * @returns {ReturnType<PreTrainedTokenizer['apply_chat_template']>}
5421
+ */
5405
5422
  apply_chat_template(messages, options = {}) {
5406
5423
  if (!this.tokenizer) {
5407
5424
  throw new Error('Unable to apply chat template without a tokenizer.');
@@ -5412,6 +5429,10 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
5412
5429
  });
5413
5430
  }
5414
5431
 
5432
+ /**
5433
+ * @param {Parameters<PreTrainedTokenizer['batch_decode']>} args
5434
+ * @returns {ReturnType<PreTrainedTokenizer['batch_decode']>}
5435
+ */
5415
5436
  batch_decode(...args) {
5416
5437
  if (!this.tokenizer) {
5417
5438
  throw new Error('Unable to decode without a tokenizer.');
@@ -5439,8 +5460,8 @@ class Processor extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_1__.Callable
5439
5460
  /**
5440
5461
  * Instantiate one of the processor classes of the library from a pretrained model.
5441
5462
  *
5442
- * The processor class to instantiate is selected based on the `feature_extractor_type` property of the config object
5443
- * (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
5463
+ * The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
5464
+ * property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
5444
5465
  *
5445
5466
  * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
5446
5467
  * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
@@ -5560,15 +5581,19 @@ function getNormalizedConfig(config) {
5560
5581
  case 'florence2':
5561
5582
  case 'llava_onevision':
5562
5583
  case 'idefics3':
5584
+ // @ts-expect-error TS2339
5563
5585
  init_normalized_config = getNormalizedConfig(config.text_config);
5564
5586
  break;
5565
5587
  case 'moondream1':
5588
+ // @ts-expect-error TS2339
5566
5589
  init_normalized_config = getNormalizedConfig(config.phi_config);
5567
5590
  break;
5568
5591
  case 'musicgen':
5592
+ // @ts-expect-error TS2339
5569
5593
  init_normalized_config = getNormalizedConfig(config.decoder);
5570
5594
  break;
5571
5595
  case 'multi_modality':
5596
+ // @ts-expect-error TS2339
5572
5597
  init_normalized_config = getNormalizedConfig(config.language_config);
5573
5598
  break;
5574
5599
 
@@ -5689,6 +5714,7 @@ function getNormalizedConfig(config) {
5689
5714
  break;
5690
5715
 
5691
5716
  case 'vision-encoder-decoder':
5717
+ // @ts-expect-error TS2339
5692
5718
  const decoderConfig = getNormalizedConfig(config.decoder);
5693
5719
 
5694
5720
  const add_encoder_pkv = 'num_decoder_layers' in decoderConfig;
@@ -5932,7 +5958,7 @@ __webpack_require__.r(__webpack_exports__);
5932
5958
 
5933
5959
 
5934
5960
 
5935
- const VERSION = '3.2.2';
5961
+ const VERSION = '3.2.4';
5936
5962
 
5937
5963
  // Check if various APIs are available (depends on environment)
5938
5964
  const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
@@ -8008,6 +8034,9 @@ __webpack_require__.r(__webpack_exports__);
8008
8034
  /* harmony export */ Dinov2ForImageClassification: () => (/* binding */ Dinov2ForImageClassification),
8009
8035
  /* harmony export */ Dinov2Model: () => (/* binding */ Dinov2Model),
8010
8036
  /* harmony export */ Dinov2PreTrainedModel: () => (/* binding */ Dinov2PreTrainedModel),
8037
+ /* harmony export */ Dinov2WithRegistersForImageClassification: () => (/* binding */ Dinov2WithRegistersForImageClassification),
8038
+ /* harmony export */ Dinov2WithRegistersModel: () => (/* binding */ Dinov2WithRegistersModel),
8039
+ /* harmony export */ Dinov2WithRegistersPreTrainedModel: () => (/* binding */ Dinov2WithRegistersPreTrainedModel),
8011
8040
  /* harmony export */ DistilBertForMaskedLM: () => (/* binding */ DistilBertForMaskedLM),
8012
8041
  /* harmony export */ DistilBertForQuestionAnswering: () => (/* binding */ DistilBertForQuestionAnswering),
8013
8042
  /* harmony export */ DistilBertForSequenceClassification: () => (/* binding */ DistilBertForSequenceClassification),
@@ -8591,8 +8620,11 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
8591
8620
  } else if (session_options.externalData !== undefined) {
8592
8621
  externalDataPromises = session_options.externalData.map(async (ext) => {
8593
8622
  // if the external data is a string, fetch the file and replace the string with its content
8623
+ // @ts-expect-error TS2339
8594
8624
  if (typeof ext.data === "string") {
8625
+ // @ts-expect-error TS2339
8595
8626
  const ext_buffer = await (0,_utils_hub_js__WEBPACK_IMPORTED_MODULE_5__.getModelFile)(pretrained_model_name_or_path, ext.data, true, options);
8627
+ // @ts-expect-error TS2698
8596
8628
  return { ...ext, data: ext_buffer };
8597
8629
  }
8598
8630
  return ext;
@@ -9840,6 +9872,7 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
9840
9872
  if (this.config.model_type === 'musicgen') {
9841
9873
  // Custom logic (TODO: move to Musicgen class)
9842
9874
  decoder_input_ids = Array.from({
9875
+ // @ts-expect-error TS2339
9843
9876
  length: batch_size * this.config.decoder.num_codebooks
9844
9877
  }, () => [decoder_start_token_id]);
9845
9878
 
@@ -10169,11 +10202,13 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
10169
10202
  async encode_image({ pixel_values }) {
10170
10203
  // image_inputs === { pixel_values }
10171
10204
  const features = (await sessionRun(this.sessions['vision_encoder'], { pixel_values })).image_features;
10205
+ // @ts-expect-error TS2339
10172
10206
  if (!this.config.num_image_tokens) {
10173
10207
  console.warn(
10174
10208
  'The number of image tokens was not set in the model configuration. ' +
10175
10209
  `Setting it to the number of features detected by the vision encoder (${features.dims[1]}).`
10176
10210
  )
10211
+ // @ts-expect-error TS2339
10177
10212
  this.config.num_image_tokens = features.dims[1];
10178
10213
  }
10179
10214
  return features;
@@ -11601,6 +11636,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
11601
11636
 
11602
11637
  if (generation_config.return_token_timestamps) {
11603
11638
  outputs["token_timestamps"] = this._extract_token_timestamps(
11639
+ // @ts-expect-error TS2345
11604
11640
  outputs,
11605
11641
  generation_config.alignment_heads,
11606
11642
  generation_config.num_frames,
@@ -11636,6 +11672,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
11636
11672
  );
11637
11673
  }
11638
11674
 
11675
+ // @ts-expect-error TS2339
11639
11676
  let median_filter_width = this.config.median_filter_width;
11640
11677
  if (median_filter_width === undefined) {
11641
11678
  console.warn("Model config has no `median_filter_width`, using default value of 7.")
@@ -11646,6 +11683,7 @@ class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
11646
11683
  const batch = generate_outputs.cross_attentions;
11647
11684
  // Create a list with `decoder_layers` elements, each a tensor of shape
11648
11685
  // (batch size, attention_heads, output length, input length).
11686
+ // @ts-expect-error TS2339
11649
11687
  const cross_attentions = Array.from({ length: this.config.decoder_layers },
11650
11688
  // Concatenate the cross attentions for each layer across sequence length dimension.
11651
11689
  (_, i) => (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_9__.cat)(batch.map(x => x[i]), 2)
@@ -11742,7 +11780,7 @@ class MoonshinePreTrainedModel extends PreTrainedModel {
11742
11780
  */
11743
11781
  class MoonshineModel extends MoonshinePreTrainedModel { }
11744
11782
 
11745
- class MoonshineForConditionalGeneration extends MoonshinePreTrainedModel { }
11783
+ class MoonshineForConditionalGeneration extends MoonshinePreTrainedModel { }
11746
11784
  //////////////////////////////////////////////////
11747
11785
 
11748
11786
 
@@ -11789,6 +11827,7 @@ class LlavaForConditionalGeneration extends LlavaPreTrainedModel {
11789
11827
  attention_mask,
11790
11828
  }) {
11791
11829
 
11830
+ // @ts-expect-error TS2339
11792
11831
  const image_token_index = this.config.image_token_index;
11793
11832
 
11794
11833
  const idsList = input_ids.tolist();
@@ -12142,9 +12181,9 @@ class CLIPTextModel extends CLIPPreTrainedModel {
12142
12181
  /** @type {typeof PreTrainedModel.from_pretrained} */
12143
12182
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12144
12183
  return super.from_pretrained(pretrained_model_name_or_path, {
12145
- // Update default model file name if not provided
12146
- model_file_name: 'text_model',
12147
12184
  ...options,
12185
+ // Update default model file name if not provided
12186
+ model_file_name: options.model_file_name ?? 'text_model',
12148
12187
  });
12149
12188
  }
12150
12189
  }
@@ -12179,9 +12218,9 @@ class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
12179
12218
  /** @type {typeof PreTrainedModel.from_pretrained} */
12180
12219
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12181
12220
  return super.from_pretrained(pretrained_model_name_or_path, {
12182
- // Update default model file name if not provided
12183
- model_file_name: 'text_model',
12184
12221
  ...options,
12222
+ // Update default model file name if not provided
12223
+ model_file_name: options.model_file_name ?? 'text_model',
12185
12224
  });
12186
12225
  }
12187
12226
  }
@@ -12193,9 +12232,9 @@ class CLIPVisionModel extends CLIPPreTrainedModel {
12193
12232
  /** @type {typeof PreTrainedModel.from_pretrained} */
12194
12233
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12195
12234
  return super.from_pretrained(pretrained_model_name_or_path, {
12196
- // Update default model file name if not provided
12197
- model_file_name: 'vision_model',
12198
12235
  ...options,
12236
+ // Update default model file name if not provided
12237
+ model_file_name: options.model_file_name ?? 'vision_model',
12199
12238
  });
12200
12239
  }
12201
12240
  }
@@ -12230,9 +12269,9 @@ class CLIPVisionModelWithProjection extends CLIPPreTrainedModel {
12230
12269
  /** @type {typeof PreTrainedModel.from_pretrained} */
12231
12270
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12232
12271
  return super.from_pretrained(pretrained_model_name_or_path, {
12233
- // Update default model file name if not provided
12234
- model_file_name: 'vision_model',
12235
12272
  ...options,
12273
+ // Update default model file name if not provided
12274
+ model_file_name: options.model_file_name ?? 'vision_model',
12236
12275
  });
12237
12276
  }
12238
12277
  }
@@ -12318,9 +12357,9 @@ class SiglipTextModel extends SiglipPreTrainedModel {
12318
12357
  /** @type {typeof PreTrainedModel.from_pretrained} */
12319
12358
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12320
12359
  return super.from_pretrained(pretrained_model_name_or_path, {
12321
- // Update default model file name if not provided
12322
- model_file_name: 'text_model',
12323
12360
  ...options,
12361
+ // Update default model file name if not provided
12362
+ model_file_name: options.model_file_name ?? 'text_model',
12324
12363
  });
12325
12364
  }
12326
12365
  }
@@ -12355,9 +12394,9 @@ class SiglipVisionModel extends CLIPPreTrainedModel {
12355
12394
  /** @type {typeof PreTrainedModel.from_pretrained} */
12356
12395
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12357
12396
  return super.from_pretrained(pretrained_model_name_or_path, {
12358
- // Update default model file name if not provided
12359
- model_file_name: 'vision_model',
12360
12397
  ...options,
12398
+ // Update default model file name if not provided
12399
+ model_file_name: options.model_file_name ?? 'vision_model',
12361
12400
  });
12362
12401
  }
12363
12402
  }
@@ -12414,9 +12453,9 @@ class JinaCLIPTextModel extends JinaCLIPPreTrainedModel {
12414
12453
  /** @type {typeof PreTrainedModel.from_pretrained} */
12415
12454
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12416
12455
  return super.from_pretrained(pretrained_model_name_or_path, {
12417
- // Update default model file name if not provided
12418
- model_file_name: 'text_model',
12419
12456
  ...options,
12457
+ // Update default model file name if not provided
12458
+ model_file_name: options.model_file_name ?? 'text_model',
12420
12459
  });
12421
12460
  }
12422
12461
  }
@@ -12425,9 +12464,9 @@ class JinaCLIPVisionModel extends JinaCLIPPreTrainedModel {
12425
12464
  /** @type {typeof PreTrainedModel.from_pretrained} */
12426
12465
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
12427
12466
  return super.from_pretrained(pretrained_model_name_or_path, {
12428
- // Update default model file name if not provided
12429
- model_file_name: 'vision_model',
12430
12467
  ...options,
12468
+ // Update default model file name if not provided
12469
+ model_file_name: options.model_file_name ?? 'vision_model',
12431
12470
  });
12432
12471
  }
12433
12472
  }
@@ -12774,6 +12813,7 @@ class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
12774
12813
  const image_nums = vision_tokens.filter(x => x == image_token_id).length;
12775
12814
  const video_nums = vision_tokens.filter(x => x == video_token_id).length;
12776
12815
 
12816
+ /** @type {number[][]} */
12777
12817
  let llm_pos_ids_list = [];
12778
12818
  let st = 0;
12779
12819
  let remain_images = image_nums;
@@ -12843,6 +12883,7 @@ class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
12843
12883
  // NOTE: Each item in llm_pos_ids_list is an array of shape (3, text_len),
12844
12884
  // meaning to perform concatenation along dim=1, we can do the following:
12845
12885
  const num_items = llm_pos_ids_list.reduce((acc, x) => acc + x.length, 0);
12886
+ /** @type {number[]} */
12846
12887
  const llm_positions = new Array(num_items);
12847
12888
  let index = 0;
12848
12889
  for (let x = 0; x < 3; ++x) {
@@ -12883,9 +12924,10 @@ class Qwen2VLForConditionalGeneration extends Qwen2VLPreTrainedModel {
12883
12924
  { length: 3 * data.length },
12884
12925
  (_, i) => data[i % data.length]
12885
12926
  );
12927
+ /** @type {bigint[]} */
12886
12928
  const mrope_position_deltas = Array.from(
12887
12929
  { length: dims[0] },
12888
- (_, i) => (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_11__.max)(data.subarray(dims[1] * i, dims[1] * (i + 1)))[0] + 1 + dims[1]
12930
+ (_, i) => (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_11__.max)(data.subarray(dims[1] * i, dims[1] * (i + 1)))[0] + 1n + BigInt(dims[1])
12889
12931
  );
12890
12932
 
12891
12933
  return [
@@ -13456,7 +13498,7 @@ class DPTModel extends DPTPreTrainedModel { }
13456
13498
  *
13457
13499
  * **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`.
13458
13500
  * ```javascript
13459
- * import { DPTForDepthEstimation, AutoProcessor, RawImage, interpolate, max } from '@huggingface/transformers';
13501
+ * import { DPTForDepthEstimation, AutoProcessor, RawImage, interpolate_4d } from '@huggingface/transformers';
13460
13502
  *
13461
13503
  * // Load model and processor
13462
13504
  * const model_id = 'Xenova/dpt-hybrid-midas';
@@ -13465,7 +13507,7 @@ class DPTModel extends DPTPreTrainedModel { }
13465
13507
  *
13466
13508
  * // Load image from URL
13467
13509
  * const url = 'http://images.cocodataset.org/val2017/000000039769.jpg';
13468
- * const image = await RawImage.fromURL(url);
13510
+ * const image = await RawImage.read(url);
13469
13511
  *
13470
13512
  * // Prepare image for the model
13471
13513
  * const inputs = await processor(image);
@@ -13474,10 +13516,15 @@ class DPTModel extends DPTPreTrainedModel { }
13474
13516
  * const { predicted_depth } = await model(inputs);
13475
13517
  *
13476
13518
  * // Interpolate to original size
13477
- * const prediction = interpolate(predicted_depth, image.size.reverse(), 'bilinear', false);
13519
+ * const prediction = (await interpolate_4d(predicted_depth.unsqueeze(1), {
13520
+ * size: image.size.reverse(),
13521
+ * mode: 'bilinear',
13522
+ * })).squeeze(1);
13478
13523
  *
13479
13524
  * // Visualize the prediction
13480
- * const formatted = prediction.mul_(255 / max(prediction.data)[0]).to('uint8');
13525
+ * const min = prediction.min().item();
13526
+ * const max = prediction.max().item();
13527
+ * const formatted = prediction.sub_(min).div_(max - min).mul_(255).to('uint8');
13481
13528
  * const depth = RawImage.fromTensor(formatted);
13482
13529
  * // RawImage {
13483
13530
  * // data: Uint8Array(307200) [ 85, 85, 84, ... ],
@@ -13527,11 +13574,7 @@ class GLPNPreTrainedModel extends PreTrainedModel { }
13527
13574
  class GLPNModel extends GLPNPreTrainedModel { }
13528
13575
 
13529
13576
  /**
13530
- * GLPN Model transformer with a lightweight depth estimation head on top e.g. for KITTI, NYUv2.
13531
- *
13532
- * **Example:** Depth estimation w/ `Xenova/glpn-kitti`.
13533
- * ```javascript
13534
- * import { GLPNForDepthEstimation, AutoProcessor, RawImage, interpolate, max } from '@huggingface/transformers';
13577
+ * import { GLPNForDepthEstimation, AutoProcessor, RawImage, interpolate_4d } from '@huggingface/transformers';
13535
13578
  *
13536
13579
  * // Load model and processor
13537
13580
  * const model_id = 'Xenova/glpn-kitti';
@@ -13540,7 +13583,7 @@ class GLPNModel extends GLPNPreTrainedModel { }
13540
13583
  *
13541
13584
  * // Load image from URL
13542
13585
  * const url = 'http://images.cocodataset.org/val2017/000000039769.jpg';
13543
- * const image = await RawImage.fromURL(url);
13586
+ * const image = await RawImage.read(url);
13544
13587
  *
13545
13588
  * // Prepare image for the model
13546
13589
  * const inputs = await processor(image);
@@ -13549,13 +13592,18 @@ class GLPNModel extends GLPNPreTrainedModel { }
13549
13592
  * const { predicted_depth } = await model(inputs);
13550
13593
  *
13551
13594
  * // Interpolate to original size
13552
- * const prediction = interpolate(predicted_depth, image.size.reverse(), 'bilinear', false);
13595
+ * const prediction = (await interpolate_4d(predicted_depth.unsqueeze(1), {
13596
+ * size: image.size.reverse(),
13597
+ * mode: 'bilinear',
13598
+ * })).squeeze(1);
13553
13599
  *
13554
13600
  * // Visualize the prediction
13555
- * const formatted = prediction.mul_(255 / max(prediction.data)[0]).to('uint8');
13601
+ * const min = prediction.min().item();
13602
+ * const max = prediction.max().item();
13603
+ * const formatted = prediction.sub_(min).div_(max - min).mul_(255).to('uint8');
13556
13604
  * const depth = RawImage.fromTensor(formatted);
13557
13605
  * // RawImage {
13558
- * // data: Uint8Array(307200) [ 207, 169, 154, ... ],
13606
+ * // data: Uint8Array(307200) [ 85, 85, 84, ... ],
13559
13607
  * // width: 640,
13560
13608
  * // height: 480,
13561
13609
  * // channels: 1
@@ -13710,6 +13758,26 @@ class Dinov2ForImageClassification extends Dinov2PreTrainedModel {
13710
13758
  }
13711
13759
  //////////////////////////////////////////////////
13712
13760
 
13761
+ //////////////////////////////////////////////////
13762
+ class Dinov2WithRegistersPreTrainedModel extends PreTrainedModel { }
13763
+
13764
+ /**
13765
+ * The bare Dinov2WithRegisters Model transformer outputting raw hidden-states without any specific head on top.
13766
+ */
13767
+ class Dinov2WithRegistersModel extends Dinov2WithRegistersPreTrainedModel { }
13768
+
13769
+ /**
13770
+ * Dinov2WithRegisters Model transformer with an image classification head on top (a linear layer on top of the final hidden state of the [CLS] token) e.g. for ImageNet.
13771
+ */
13772
+ class Dinov2WithRegistersForImageClassification extends Dinov2WithRegistersPreTrainedModel {
13773
+ /**
13774
+ * @param {any} model_inputs
13775
+ */
13776
+ async _call(model_inputs) {
13777
+ return new SequenceClassifierOutput(await super._call(model_inputs));
13778
+ }
13779
+ }
13780
+ //////////////////////////////////////////////////
13713
13781
 
13714
13782
  //////////////////////////////////////////////////
13715
13783
  class YolosPreTrainedModel extends PreTrainedModel { }
@@ -14502,10 +14570,12 @@ class SpeechT5ForTextToSpeech extends SpeechT5PreTrainedModel {
14502
14570
 
14503
14571
  const { encoder_outputs, encoder_attention_mask } = await encoderForward(this, model_inputs);
14504
14572
 
14573
+ // @ts-expect-error TS2339
14505
14574
  const r = encoder_outputs.dims[1] / this.config.reduction_factor;
14506
14575
  const maxlen = Math.floor(r * maxlenratio);
14507
14576
  const minlen = Math.floor(r * minlenratio);
14508
14577
 
14578
+ // @ts-expect-error TS2339
14509
14579
  const num_mel_bins = this.config.num_mel_bins;
14510
14580
 
14511
14581
  let spectrogramParts = [];
@@ -14659,9 +14729,9 @@ class ClapTextModelWithProjection extends ClapPreTrainedModel {
14659
14729
  /** @type {typeof PreTrainedModel.from_pretrained} */
14660
14730
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
14661
14731
  return super.from_pretrained(pretrained_model_name_or_path, {
14662
- // Update default model file name if not provided
14663
- model_file_name: 'text_model',
14664
14732
  ...options,
14733
+ // Update default model file name if not provided
14734
+ model_file_name: options.model_file_name ?? 'text_model',
14665
14735
  });
14666
14736
  }
14667
14737
  }
@@ -14696,9 +14766,9 @@ class ClapAudioModelWithProjection extends ClapPreTrainedModel {
14696
14766
  /** @type {typeof PreTrainedModel.from_pretrained} */
14697
14767
  static async from_pretrained(pretrained_model_name_or_path, options = {}) {
14698
14768
  return super.from_pretrained(pretrained_model_name_or_path, {
14699
- // Update default model file name if not provided
14700
- model_file_name: 'audio_model',
14701
14769
  ...options,
14770
+ // Update default model file name if not provided
14771
+ model_file_name: options.model_file_name ?? 'audio_model',
14702
14772
  });
14703
14773
  }
14704
14774
  }
@@ -14870,11 +14940,13 @@ class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE: not Mu
14870
14940
  */
14871
14941
  _apply_and_filter_by_delay_pattern_mask(outputs) {
14872
14942
  const [bs_x_codebooks, seqLength] = outputs.dims;
14943
+ // @ts-expect-error TS2339
14873
14944
  const num_codebooks = this.config.decoder.num_codebooks;
14874
14945
  const upperBound = (seqLength - num_codebooks);
14875
14946
 
14876
14947
  let newDataSize = 0;
14877
14948
  for (let i = 0; i < outputs.size; ++i) {
14949
+ // @ts-expect-error TS2339
14878
14950
  if (outputs.data[i] === this.config.decoder.pad_token_id) {
14879
14951
  continue;
14880
14952
  }
@@ -14904,7 +14976,9 @@ class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE: not Mu
14904
14976
  let clonedInputIds = structuredClone(input_ids);
14905
14977
  for (let i = 0; i < clonedInputIds.length; ++i) {
14906
14978
  for (let j = 0; j < clonedInputIds[i].length; ++j) {
14979
+ // @ts-expect-error TS2339
14907
14980
  if ((i % this.config.decoder.num_codebooks) >= j) {
14981
+ // @ts-expect-error TS2339
14908
14982
  clonedInputIds[i][j] = BigInt(this.config.decoder.pad_token_id);
14909
14983
  }
14910
14984
  }
@@ -15061,6 +15135,9 @@ class MultiModalityCausalLM extends MultiModalityPreTrainedModel {
15061
15135
  'past_key_values',
15062
15136
  ];
15063
15137
 
15138
+ /**
15139
+ * @param {ConstructorParameters<typeof MultiModalityPreTrainedModel>} args
15140
+ */
15064
15141
  constructor(...args) {
15065
15142
  super(...args);
15066
15143
 
@@ -15339,6 +15416,7 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
15339
15416
  ['convnext', ['ConvNextModel', ConvNextModel]],
15340
15417
  ['convnextv2', ['ConvNextV2Model', ConvNextV2Model]],
15341
15418
  ['dinov2', ['Dinov2Model', Dinov2Model]],
15419
+ ['dinov2_with_registers', ['Dinov2WithRegistersModel', Dinov2WithRegistersModel]],
15342
15420
  ['resnet', ['ResNetModel', ResNetModel]],
15343
15421
  ['swin', ['SwinModel', SwinModel]],
15344
15422
  ['swin2sr', ['Swin2SRModel', Swin2SRModel]],
@@ -15584,6 +15662,7 @@ const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
15584
15662
  ['convnext', ['ConvNextForImageClassification', ConvNextForImageClassification]],
15585
15663
  ['convnextv2', ['ConvNextV2ForImageClassification', ConvNextV2ForImageClassification]],
15586
15664
  ['dinov2', ['Dinov2ForImageClassification', Dinov2ForImageClassification]],
15665
+ ['dinov2_with_registers', ['Dinov2WithRegistersForImageClassification', Dinov2WithRegistersForImageClassification]],
15587
15666
  ['resnet', ['ResNetForImageClassification', ResNetForImageClassification]],
15588
15667
  ['swin', ['SwinForImageClassification', SwinForImageClassification]],
15589
15668
  ['segformer', ['SegformerForImageClassification', SegformerForImageClassification]],
@@ -16027,10 +16106,17 @@ class SequenceClassifierOutput extends ModelOutput {
16027
16106
  /**
16028
16107
  * @param {Object} output The output of the model.
16029
16108
  * @param {Tensor} output.logits classification (or regression if config.num_labels==1) scores (before SoftMax).
16109
+ * @param {Record<string, Tensor>} [output.attentions] Object of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, sequence_length)`.
16110
+ * Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
16030
16111
  */
16031
- constructor({ logits }) {
16112
+ constructor({ logits, ...attentions }) {
16032
16113
  super();
16033
16114
  this.logits = logits;
16115
+ const attentions_list = Object.values(attentions);
16116
+ if (attentions_list.length > 0) {
16117
+ // Only set attentions if they are not empty
16118
+ this.attentions = attentions_list;
16119
+ }
16034
16120
  }
16035
16121
  }
16036
16122
 
@@ -16288,22 +16374,6 @@ __webpack_require__.r(__webpack_exports__);
16288
16374
 
16289
16375
  class AutoFeatureExtractor {
16290
16376
 
16291
- /**
16292
- * Instantiate one of the feature extractor classes of the library from a pretrained model.
16293
- *
16294
- * The processor class to instantiate is selected based on the `feature_extractor_type` property of
16295
- * the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
16296
- *
16297
- * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
16298
- * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
16299
- * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
16300
- * user or organization name, like `dbmdz/bert-base-german-cased`.
16301
- * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
16302
- * @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
16303
- *
16304
- * @returns {Promise<AllFeatureExtractors.ImageProcessor>} A new instance of the Processor class.
16305
- */
16306
-
16307
16377
  /** @type {typeof FeatureExtractor.from_pretrained} */
16308
16378
  static async from_pretrained(pretrained_model_name_or_path, options={}) {
16309
16379
 
@@ -16432,22 +16502,6 @@ __webpack_require__.r(__webpack_exports__);
16432
16502
  */
16433
16503
  class AutoProcessor {
16434
16504
 
16435
- /**
16436
- * Instantiate one of the processor classes of the library from a pretrained model.
16437
- *
16438
- * The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
16439
- * property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
16440
- *
16441
- * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
16442
- * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
16443
- * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
16444
- * user or organization name, like `dbmdz/bert-base-german-cased`.
16445
- * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
16446
- * @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
16447
- *
16448
- * @returns {Promise<Processor>} A new instance of the Processor class.
16449
- */
16450
-
16451
16505
  /** @type {typeof Processor.from_pretrained} */
16452
16506
  static async from_pretrained(pretrained_model_name_or_path, options={}) {
16453
16507
 
@@ -16771,6 +16825,7 @@ class ConvNextImageProcessor extends _base_image_processors_utils_js__WEBPACK_IM
16771
16825
  /**
16772
16826
  * Percentage of the image to crop. Only has an effect if this.size < 384.
16773
16827
  */
16828
+ // @ts-expect-error TS2339
16774
16829
  this.crop_pct = this.config.crop_pct ?? (224 / 256);
16775
16830
  }
16776
16831
 
@@ -16978,6 +17033,7 @@ __webpack_require__.r(__webpack_exports__);
16978
17033
  class EfficientNetImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_0__.ImageProcessor {
16979
17034
  constructor(config) {
16980
17035
  super(config);
17036
+ // @ts-expect-error TS2339
16981
17037
  this.include_top = this.config.include_top ?? true;
16982
17038
  if (this.include_top) {
16983
17039
  this.image_std = this.image_std.map(x => x * x);
@@ -17061,8 +17117,11 @@ class Florence2Processor extends _base_processing_utils_js__WEBPACK_IMPORTED_MOD
17061
17117
  super(config, components);
17062
17118
 
17063
17119
  const {
17120
+ // @ts-expect-error TS2339
17064
17121
  tasks_answer_post_processing_type,
17122
+ // @ts-expect-error TS2339
17065
17123
  task_prompts_without_inputs,
17124
+ // @ts-expect-error TS2339
17066
17125
  task_prompts_with_input,
17067
17126
  } = this.image_processor.config;
17068
17127
 
@@ -17359,6 +17418,8 @@ class Idefics3ImageProcessor extends _base_image_processors_utils_js__WEBPACK_IM
17359
17418
 
17360
17419
  const start_offset = i * pixel_attention_mask_stride + num_patches * h * w;
17361
17420
  const end_offset = (i + 1) * pixel_attention_mask_stride;
17421
+
17422
+ // @ts-expect-error
17362
17423
  pixel_attention_mask_data.fill(false, start_offset, end_offset);
17363
17424
  }
17364
17425
  }
@@ -17768,6 +17829,7 @@ class VLMImageProcessor extends _base_image_processors_utils_js__WEBPACK_IMPORTE
17768
17829
  },
17769
17830
  ...config,
17770
17831
  });
17832
+ // @ts-expect-error TS2339
17771
17833
  this.constant_values = this.config.background_color.map(x => x * this.rescale_factor)
17772
17834
  }
17773
17835
 
@@ -18216,6 +18278,8 @@ class MgpstrProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE
18216
18278
  * - bpe_preds: The list of BPE decoded sentences.
18217
18279
  * - wp_preds: The list of wp decoded sentences.
18218
18280
  */
18281
+ // @ts-expect-error The type of this method is not compatible with the one
18282
+ // in the base class. It might be a good idea to fix this.
18219
18283
  batch_decode([char_logits, bpe_logits, wp_logits]) {
18220
18284
  const [char_preds, char_scores] = this._decode_helper(char_logits, 'char');
18221
18285
  const [bpe_preds, bpe_scores] = this._decode_helper(bpe_logits, 'bpe');
@@ -18609,6 +18673,7 @@ class PaliGemmaProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MOD
18609
18673
  }
18610
18674
 
18611
18675
  const bos_token = this.tokenizer.bos_token;
18676
+ // @ts-expect-error TS2339
18612
18677
  const image_seq_length = this.image_processor.config.image_seq_length;
18613
18678
  let input_strings;
18614
18679
  if (text.some((t) => t.includes(IMAGE_TOKEN))) {
@@ -18861,7 +18926,7 @@ class Phi3VProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODULE_
18861
18926
  *
18862
18927
  * @param {string|string[]} text
18863
18928
  * @param {RawImage|RawImage[]} images
18864
- * @param {...any} args
18929
+ * @param { { padding?: boolean, truncation?: boolean, num_crops?: number } | undefined } options
18865
18930
  * @returns {Promise<any>}
18866
18931
  */
18867
18932
  async _call(text, images = null, {
@@ -19048,6 +19113,7 @@ class PyAnnoteFeatureExtractor extends _base_feature_extraction_utils_js__WEBPAC
19048
19113
 
19049
19114
  let current_speaker = -1;
19050
19115
  for (let i = 0; i < scores.length; ++i) {
19116
+ /** @type {number[]} */
19051
19117
  const probabilities = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_2__.softmax)(scores[i]);
19052
19118
  const [score, id] = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_2__.max)(probabilities);
19053
19119
  const [start, end] = [i, i + 1];
@@ -19235,6 +19301,7 @@ class Qwen2VLProcessor extends _base_processing_utils_js__WEBPACK_IMPORTED_MODUL
19235
19301
  }
19236
19302
 
19237
19303
  if (image_grid_thw) {
19304
+ // @ts-expect-error TS2551
19238
19305
  let merge_length = this.image_processor.config.merge_size ** 2;
19239
19306
  let index = 0;
19240
19307
 
@@ -19726,8 +19793,8 @@ class SeamlessM4TFeatureExtractor extends _base_feature_extraction_utils_js__WEB
19726
19793
  'int64',
19727
19794
  new BigInt64Array(numPaddedFrames),
19728
19795
  [1, numPaddedFrames],
19729
- )
19730
- padded_attention_mask.data.fill(1n, 0, num_frames);
19796
+ );
19797
+ /** @type {BigInt64Array} */ (padded_attention_mask.data).fill(1n, 0, num_frames);
19731
19798
  }
19732
19799
  }
19733
19800
  }
@@ -20540,7 +20607,7 @@ class WhisperFeatureExtractor extends _base_feature_extraction_utils_js__WEBPACK
20540
20607
  )
20541
20608
 
20542
20609
  const data = features.data;
20543
- const maxValue = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.max)(data)[0];
20610
+ const maxValue = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.max)(/** @type {Float32Array} */(data))[0];
20544
20611
 
20545
20612
  for (let i = 0; i < data.length; ++i) {
20546
20613
  data[i] = (Math.max(data[i], maxValue - 8.0) + 4.0) / 4.0;
@@ -20803,6 +20870,16 @@ class TensorOpRegistry {
20803
20870
  // executionProviders: ['webgpu'],
20804
20871
  };
20805
20872
 
20873
+ static get nearest_interpolate_4d() {
20874
+ if (!this._nearest_interpolate_4d) {
20875
+ this._nearest_interpolate_4d = wrap(
20876
+ [8, 10, 18, 0, 58, 129, 1, 10, 41, 10, 1, 120, 10, 0, 10, 0, 10, 1, 115, 18, 1, 121, 34, 6, 82, 101, 115, 105, 122, 101, 42, 18, 10, 4, 109, 111, 100, 101, 34, 7, 110, 101, 97, 114, 101, 115, 116, 160, 1, 3, 18, 1, 114, 90, 31, 10, 1, 120, 18, 26, 10, 24, 8, 1, 18, 20, 10, 3, 18, 1, 98, 10, 3, 18, 1, 99, 10, 3, 18, 1, 104, 10, 3, 18, 1, 119, 90, 15, 10, 1, 115, 18, 10, 10, 8, 8, 7, 18, 4, 10, 2, 8, 4, 98, 31, 10, 1, 121, 18, 26, 10, 24, 8, 1, 18, 20, 10, 3, 18, 1, 98, 10, 3, 18, 1, 99, 10, 3, 18, 1, 104, 10, 3, 18, 1, 119, 66, 2, 16, 21],
20877
+ this.session_options,
20878
+ 'y',
20879
+ );
20880
+ }
20881
+ return this._nearest_interpolate_4d;
20882
+ }
20806
20883
  static get bilinear_interpolate_4d() {
20807
20884
  if (!this._bilinear_interpolate_4d) {
20808
20885
  this._bilinear_interpolate_4d = wrap(
@@ -21177,6 +21254,7 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
21177
21254
 
21178
21255
  // TODO: Use softmax tensor function
21179
21256
  const function_to_apply =
21257
+ // @ts-expect-error TS2339
21180
21258
  this.model.config.problem_type === 'multi_label_classification'
21181
21259
  ? batch => batch.sigmoid()
21182
21260
  : batch => new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_8__.Tensor(
@@ -21185,6 +21263,7 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
21185
21263
  batch.dims,
21186
21264
  ); // single_label_classification (default)
21187
21265
 
21266
+ // @ts-expect-error TS2339
21188
21267
  const id2label = this.model.config.id2label;
21189
21268
 
21190
21269
  const toReturn = [];
@@ -21287,6 +21366,7 @@ class TokenClassificationPipeline extends (/** @type {new (options: TextPipeline
21287
21366
  const outputs = await this.model(model_inputs)
21288
21367
 
21289
21368
  const logits = outputs.logits;
21369
+ // @ts-expect-error TS2339
21290
21370
  const id2label = this.model.config.id2label;
21291
21371
 
21292
21372
  const toReturn = [];
@@ -21626,11 +21706,14 @@ class Text2TextGenerationPipeline extends (/** @type {new (options: TextPipeline
21626
21706
 
21627
21707
 
21628
21708
  // Add global prefix, if present
21709
+ // @ts-expect-error TS2339
21629
21710
  if (this.model.config.prefix) {
21711
+ // @ts-expect-error TS2339
21630
21712
  texts = texts.map(x => this.model.config.prefix + x)
21631
21713
  }
21632
21714
 
21633
21715
  // Handle task specific params:
21716
+ // @ts-expect-error TS2339
21634
21717
  const task_specific_params = this.model.config.task_specific_params
21635
21718
  if (task_specific_params && task_specific_params[this.task]) {
21636
21719
  // Add prefixes, if present
@@ -22369,6 +22452,7 @@ class AudioClassificationPipeline extends (/** @type {new (options: AudioPipelin
22369
22452
  const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
22370
22453
  const preparedAudios = await prepareAudios(audio, sampling_rate);
22371
22454
 
22455
+ // @ts-expect-error TS2339
22372
22456
  const id2label = this.model.config.id2label;
22373
22457
 
22374
22458
  const toReturn = [];
@@ -22679,6 +22763,7 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
22679
22763
  audio = [/** @type {AudioInput} */ (audio)];
22680
22764
  }
22681
22765
 
22766
+ // @ts-expect-error TS2339
22682
22767
  const time_precision = this.processor.feature_extractor.config.chunk_length / this.model.config.max_source_positions;
22683
22768
  const hop_length = this.processor.feature_extractor.config.hop_length;
22684
22769
 
@@ -22744,7 +22829,9 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
22744
22829
 
22745
22830
  // TODO: Right now we only get top beam
22746
22831
  if (return_timestamps === 'word') {
22832
+ // @ts-expect-error TS2339
22747
22833
  chunk.tokens = data.sequences.tolist()[0];
22834
+ // @ts-expect-error TS2339
22748
22835
  chunk.token_timestamps = data.token_timestamps.tolist()[0].map(
22749
22836
  (/** @type {number} */ x) => (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_6__.round)(x, 2)
22750
22837
  );
@@ -22789,7 +22876,7 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
22789
22876
  const max_new_tokens = Math.floor(aud.length / sampling_rate) * 6;
22790
22877
  const outputs = await this.model.generate({ max_new_tokens, ...kwargs, ...inputs });
22791
22878
 
22792
- const text = this.processor.batch_decode(outputs, { skip_special_tokens: true })[0];
22879
+ const text = this.processor.batch_decode(/** @type {Tensor} */(outputs), { skip_special_tokens: true })[0];
22793
22880
  toReturn.push({ text });
22794
22881
  }
22795
22882
  return single ? toReturn[0] : toReturn;
@@ -22938,6 +23025,7 @@ class ImageClassificationPipeline extends (/** @type {new (options: ImagePipelin
22938
23025
  const { pixel_values } = await this.processor(preparedImages);
22939
23026
  const output = await this.model({ pixel_values });
22940
23027
 
23028
+ // @ts-expect-error TS2339
22941
23029
  const id2label = this.model.config.id2label;
22942
23030
 
22943
23031
  /** @type {ImageClassificationOutput[]} */
@@ -23052,6 +23140,7 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
23052
23140
  }
23053
23141
  }
23054
23142
 
23143
+ // @ts-expect-error TS2339
23055
23144
  const id2label = this.model.config.id2label;
23056
23145
 
23057
23146
  /** @type {ImageSegmentationPipelineOutput[]} */
@@ -23278,6 +23367,7 @@ class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipelineCon
23278
23367
  const processed = this.processor.image_processor.post_process_object_detection(output, threshold, imageSizes);
23279
23368
 
23280
23369
  // Add labels
23370
+ // @ts-expect-error TS2339
23281
23371
  const id2label = this.model.config.id2label;
23282
23372
 
23283
23373
  // Format output
@@ -23497,6 +23587,7 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
23497
23587
  // Run model
23498
23588
  const output = await this.model.generate({
23499
23589
  inputs: pixel_values,
23590
+ // @ts-expect-error TS2339
23500
23591
  max_length: this.model.config.decoder.max_position_embeddings,
23501
23592
  decoder_input_ids,
23502
23593
  ...generate_kwargs,
@@ -23612,6 +23703,7 @@ class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPipelineC
23612
23703
  // Generate waveform
23613
23704
  const { waveform } = await this.model(inputs);
23614
23705
 
23706
+ // @ts-expect-error TS2339
23615
23707
  const sampling_rate = this.model.config.sampling_rate;
23616
23708
  return {
23617
23709
  audio: waveform.data,
@@ -23769,11 +23861,23 @@ class DepthEstimationPipeline extends (/** @type {new (options: ImagePipelineCon
23769
23861
 
23770
23862
  const toReturn = [];
23771
23863
  for (let i = 0; i < preparedImages.length; ++i) {
23772
- const prediction = (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_8__.interpolate)(predicted_depth[i], preparedImages[i].size.reverse(), 'bilinear', false);
23773
- const formatted = prediction.mul_(255 / (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_6__.max)(prediction.data)[0]).to('uint8');
23864
+ const batch = predicted_depth[i];
23865
+ const [height, width] = batch.dims.slice(-2);
23866
+ const [new_width, new_height] = preparedImages[i].size;
23867
+
23868
+ // Interpolate to original size
23869
+ const prediction = (await (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_8__.interpolate_4d)(batch.view(1, 1, height, width), {
23870
+ size: [new_height, new_width],
23871
+ mode: 'bilinear',
23872
+ })).view(new_height, new_width);
23873
+
23874
+ const minval = /** @type {number} */(prediction.min().item());
23875
+ const maxval = /** @type {number} */(prediction.max().item());
23876
+ const formatted = prediction.sub(minval).div_(maxval - minval).mul_(255).to('uint8').unsqueeze(0);
23877
+ const depth = _utils_image_js__WEBPACK_IMPORTED_MODULE_9__.RawImage.fromTensor(formatted);
23774
23878
  toReturn.push({
23775
- predicted_depth: predicted_depth[i],
23776
- depth: _utils_image_js__WEBPACK_IMPORTED_MODULE_9__.RawImage.fromTensor(formatted),
23879
+ predicted_depth: prediction,
23880
+ depth,
23777
23881
  });
23778
23882
  }
23779
23883
 
@@ -24253,6 +24357,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
24253
24357
  return result;
24254
24358
  }
24255
24359
 
24360
+
24256
24361
  /***/ }),
24257
24362
 
24258
24363
  /***/ "./src/tokenizers.js":
@@ -24322,7 +24427,6 @@ __webpack_require__.r(__webpack_exports__);
24322
24427
  /* harmony import */ var _utils_data_structures_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./utils/data-structures.js */ "./src/utils/data-structures.js");
24323
24428
  /* harmony import */ var _huggingface_jinja__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! @huggingface/jinja */ "./node_modules/@huggingface/jinja/dist/index.js");
24324
24429
  /* harmony import */ var _models_whisper_common_whisper_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./models/whisper/common_whisper.js */ "./src/models/whisper/common_whisper.js");
24325
- /* harmony import */ var _utils_constants_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./utils/constants.js */ "./src/utils/constants.js");
24326
24430
 
24327
24431
  /**
24328
24432
  * @file Tokenizers are used to prepare textual inputs for a model.
@@ -24359,7 +24463,6 @@ __webpack_require__.r(__webpack_exports__);
24359
24463
 
24360
24464
 
24361
24465
 
24362
-
24363
24466
  /**
24364
24467
  * @typedef {Object} TokenizerProperties Additional tokenizer-specific properties.
24365
24468
  * @property {boolean} [legacy=false] Whether or not the `legacy` behavior of the tokenizer should be used.
@@ -24843,7 +24946,7 @@ class Unigram extends TokenizerModel {
24843
24946
  * Create a new Unigram tokenizer model.
24844
24947
  * @param {Object} config The configuration object for the Unigram model.
24845
24948
  * @param {number} config.unk_id The ID of the unknown token
24846
- * @param {any[][]} config.vocab A 2D array representing a mapping of tokens to scores.
24949
+ * @param {[string, number][]} config.vocab A 2D array representing a mapping of tokens to scores.
24847
24950
  * @param {Object} moreConfig Additional configuration object for the Unigram model.
24848
24951
  */
24849
24952
  constructor(config, moreConfig) {
@@ -24851,11 +24954,10 @@ class Unigram extends TokenizerModel {
24851
24954
 
24852
24955
  const vocabSize = config.vocab.length;
24853
24956
  this.vocab = new Array(vocabSize);
24957
+ /** @type {number[]} */
24854
24958
  this.scores = new Array(vocabSize);
24855
24959
  for (let i = 0; i < vocabSize; ++i) {
24856
- const piece = config.vocab[i];
24857
- this.vocab[i] = piece[0];
24858
- this.scores[i] = piece[1];
24960
+ [this.vocab[i], this.scores[i]] = config.vocab[i];
24859
24961
  }
24860
24962
 
24861
24963
  this.unk_token_id = config.unk_id;
@@ -30218,6 +30320,8 @@ __webpack_require__.r(__webpack_exports__);
30218
30320
  /* harmony export */ });
30219
30321
  /* harmony import */ var _env_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../env.js */ "./src/env.js");
30220
30322
  /* harmony import */ var _devices_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./devices.js */ "./src/utils/devices.js");
30323
+ /// <reference types="@webgpu/types" />
30324
+
30221
30325
 
30222
30326
 
30223
30327
 
@@ -30473,7 +30577,7 @@ class FileResponse {
30473
30577
  */
30474
30578
  async arrayBuffer() {
30475
30579
  const data = await fs__WEBPACK_IMPORTED_MODULE_0__.promises.readFile(this.filePath);
30476
- return data.buffer;
30580
+ return /** @type {ArrayBuffer} */ (data.buffer);
30477
30581
  }
30478
30582
 
30479
30583
  /**
@@ -32134,8 +32238,9 @@ function magnitude(arr) {
32134
32238
 
32135
32239
  /**
32136
32240
  * Returns the value and index of the minimum element in an array.
32137
- * @param {number[]|TypedArray} arr array of numbers.
32138
- * @returns {[number, number]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
32241
+ * @template {number[]|bigint[]|AnyTypedArray} T
32242
+ * @param {T} arr array of numbers.
32243
+ * @returns {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
32139
32244
  * @throws {Error} If array is empty.
32140
32245
  */
32141
32246
  function min(arr) {
@@ -32148,14 +32253,15 @@ function min(arr) {
32148
32253
  indexOfMin = i;
32149
32254
  }
32150
32255
  }
32151
- return [min, indexOfMin];
32256
+ return /** @type {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} */([min, indexOfMin]);
32152
32257
  }
32153
32258
 
32154
32259
 
32155
32260
  /**
32156
32261
  * Returns the value and index of the maximum element in an array.
32157
- * @param {number[]|AnyTypedArray} arr array of numbers.
32158
- * @returns {[number, number]} the value and index of the maximum element, of the form: [valueOfMax, indexOfMax]
32262
+ * @template {number[]|bigint[]|AnyTypedArray} T
32263
+ * @param {T} arr array of numbers.
32264
+ * @returns {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} the value and index of the maximum element, of the form: [valueOfMax, indexOfMax]
32159
32265
  * @throws {Error} If array is empty.
32160
32266
  */
32161
32267
  function max(arr) {
@@ -32168,7 +32274,7 @@ function max(arr) {
32168
32274
  indexOfMax = i;
32169
32275
  }
32170
32276
  }
32171
- return [Number(max), indexOfMax];
32277
+ return /** @type {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} */([max, indexOfMax]);
32172
32278
  }
32173
32279
 
32174
32280
  function isPowerOfTwo(number) {
@@ -33466,8 +33572,6 @@ class Tensor {
33466
33572
  return this.permute(...dims);
33467
33573
  }
33468
33574
 
33469
- // TODO add .max() and .min() methods
33470
-
33471
33575
  /**
33472
33576
  * Returns the sum of each row of the input tensor in the given dimension dim.
33473
33577
  *
@@ -33761,6 +33865,36 @@ class Tensor {
33761
33865
  return mean(this, dim, keepdim);
33762
33866
  }
33763
33867
 
33868
+ min(dim = null, keepdim = false) {
33869
+ if (dim !== null) {
33870
+ throw new Error("`dim !== null` not yet implemented.");
33871
+ }
33872
+ const value = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.min)(this.data)[0];
33873
+ return new Tensor(this.type, [value], []);
33874
+ }
33875
+ max(dim = null, keepdim = false) {
33876
+ if (dim !== null) {
33877
+ throw new Error("`dim !== null` not yet implemented.");
33878
+ }
33879
+ const value = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.max)(this.data)[0];
33880
+ return new Tensor(this.type, [value], []);
33881
+ }
33882
+
33883
+ argmin(dim = null, keepdim = false) {
33884
+ if (dim !== null) {
33885
+ throw new Error("`dim !== null` not yet implemented.");
33886
+ }
33887
+ const index = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.min)(this.data)[1];
33888
+ return new Tensor('int64', [BigInt(index)], []);
33889
+ }
33890
+ argmax(dim = null, keepdim = false) {
33891
+ if (dim !== null) {
33892
+ throw new Error("`dim !== null` not yet implemented.");
33893
+ }
33894
+ const index = (0,_maths_js__WEBPACK_IMPORTED_MODULE_0__.max)(this.data)[1];
33895
+ return new Tensor('int64', [BigInt(index)], []);
33896
+ }
33897
+
33764
33898
  /**
33765
33899
  * Performs Tensor dtype conversion.
33766
33900
  * @param {DataType} type The desired data type.
@@ -33894,7 +34028,7 @@ function interpolate(input, [out_height, out_width], mode = 'bilinear', align_co
33894
34028
  * @param {Tensor} input the input tensor
33895
34029
  * @param {Object} options the options for the interpolation
33896
34030
  * @param {[number, number]|[number, number, number]|[number, number, number, number]} [options.size=null] output spatial size.
33897
- * @param {"bilinear"|"bicubic"} [options.mode='bilinear'] algorithm used for upsampling
34031
+ * @param {"nearest"|"bilinear"|"bicubic"} [options.mode='bilinear'] algorithm used for upsampling
33898
34032
  * @returns {Promise<Tensor>} The interpolated tensor.
33899
34033
  */
33900
34034
  async function interpolate_4d(input, {
@@ -33924,7 +34058,9 @@ async function interpolate_4d(input, {
33924
34058
  }
33925
34059
 
33926
34060
  let op;
33927
- if (mode === 'bilinear') {
34061
+ if (mode === 'nearest') {
34062
+ op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.nearest_interpolate_4d;
34063
+ } else if (mode === 'bilinear') {
33928
34064
  op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.bilinear_interpolate_4d;
33929
34065
  } else if (mode === 'bicubic') {
33930
34066
  op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.bicubic_interpolate_4d;
@@ -33965,13 +34101,13 @@ async function rfft(x, a) {
33965
34101
  * Returns the k largest elements of the given input tensor.
33966
34102
  * Inspired by https://pytorch.org/docs/stable/generated/torch.topk.html
33967
34103
  * @param {Tensor} x the input tensor
33968
- * @param {number} k the k in "top-k"
34104
+ * @param {number} [k] the k in "top-k"
33969
34105
  * @returns {Promise<[Tensor, Tensor]>} the output tuple of (Tensor, LongTensor) of top-k elements and their indices.
33970
34106
  */
33971
34107
  async function topk(x, k) {
33972
34108
  const op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.top_k;
33973
34109
 
33974
- if (k === null) {
34110
+ if (k == null) {
33975
34111
  k = x.dims.at(-1);
33976
34112
  } else {
33977
34113
  k = Math.min(k, x.dims.at(-1));
@@ -34000,10 +34136,10 @@ const arrayToIndexTensor = (array) => new Tensor('int64', array, [array.length])
34000
34136
  async function slice(data, starts, ends, axes, steps) {
34001
34137
  const op = await _ops_registry_js__WEBPACK_IMPORTED_MODULE_2__.TensorOpRegistry.slice;
34002
34138
  return await op({
34003
- x: data,
34004
- s: arrayToIndexTensor(starts),
34005
- e: arrayToIndexTensor(ends),
34006
- a: arrayToIndexTensor(axes),
34139
+ x: data,
34140
+ s: arrayToIndexTensor(starts),
34141
+ e: arrayToIndexTensor(ends),
34142
+ a: arrayToIndexTensor(axes),
34007
34143
  t: arrayToIndexTensor(steps ?? new Array(axes.length).fill(1)),
34008
34144
  });
34009
34145
  }
@@ -34784,6 +34920,9 @@ __webpack_require__.r(__webpack_exports__);
34784
34920
  /* harmony export */ Dinov2ForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2ForImageClassification),
34785
34921
  /* harmony export */ Dinov2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2Model),
34786
34922
  /* harmony export */ Dinov2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2PreTrainedModel),
34923
+ /* harmony export */ Dinov2WithRegistersForImageClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2WithRegistersForImageClassification),
34924
+ /* harmony export */ Dinov2WithRegistersModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2WithRegistersModel),
34925
+ /* harmony export */ Dinov2WithRegistersPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Dinov2WithRegistersPreTrainedModel),
34787
34926
  /* harmony export */ DistilBertForMaskedLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DistilBertForMaskedLM),
34788
34927
  /* harmony export */ DistilBertForQuestionAnswering: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DistilBertForQuestionAnswering),
34789
34928
  /* harmony export */ DistilBertForSequenceClassification: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.DistilBertForSequenceClassification),