@huggingface/transformers 4.0.0-next.8 → 4.0.0-next.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +2 -2
  2. package/dist/ort-wasm-simd-threaded.jsep.mjs +26 -26
  3. package/dist/transformers.js +338 -230
  4. package/dist/transformers.min.js +22 -18
  5. package/dist/transformers.node.cjs +339 -228
  6. package/dist/transformers.node.min.cjs +23 -19
  7. package/dist/transformers.node.min.mjs +23 -19
  8. package/dist/transformers.node.mjs +336 -228
  9. package/dist/transformers.web.js +220 -112
  10. package/dist/transformers.web.min.js +18 -14
  11. package/package.json +2 -2
  12. package/src/env.js +1 -1
  13. package/src/models/gemma3/image_processing_gemma3.js +3 -0
  14. package/src/models/gemma3/modeling_gemma3.js +4 -1
  15. package/src/models/gemma3/processing_gemma3.js +45 -0
  16. package/src/models/image_processors.js +1 -0
  17. package/src/models/modeling_utils.js +46 -25
  18. package/src/models/processors.js +1 -0
  19. package/src/models/qwen2_vl/modeling_qwen2_vl.js +39 -32
  20. package/src/models/registry.js +2 -8
  21. package/src/utils/model_registry/ModelRegistry.js +36 -0
  22. package/src/utils/model_registry/get_available_dtypes.js +68 -0
  23. package/src/utils/model_registry/get_model_files.js +7 -60
  24. package/src/utils/model_registry/resolve_model_type.js +66 -0
  25. package/types/models/gemma3/image_processing_gemma3.d.ts +4 -0
  26. package/types/models/gemma3/image_processing_gemma3.d.ts.map +1 -0
  27. package/types/models/gemma3/modeling_gemma3.d.ts +4 -1
  28. package/types/models/gemma3/modeling_gemma3.d.ts.map +1 -1
  29. package/types/models/gemma3/processing_gemma3.d.ts +20 -0
  30. package/types/models/gemma3/processing_gemma3.d.ts.map +1 -0
  31. package/types/models/image_processors.d.ts +1 -0
  32. package/types/models/modeling_utils.d.ts +2 -3
  33. package/types/models/modeling_utils.d.ts.map +1 -1
  34. package/types/models/processors.d.ts +1 -0
  35. package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
  36. package/types/models/registry.d.ts.map +1 -1
  37. package/types/utils/model_registry/ModelRegistry.d.ts +27 -0
  38. package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -1
  39. package/types/utils/model_registry/get_available_dtypes.d.ts +26 -0
  40. package/types/utils/model_registry/get_available_dtypes.d.ts.map +1 -0
  41. package/types/utils/model_registry/get_model_files.d.ts +25 -0
  42. package/types/utils/model_registry/get_model_files.d.ts.map +1 -1
  43. package/types/utils/model_registry/resolve_model_type.d.ts +24 -0
  44. package/types/utils/model_registry/resolve_model_type.d.ts.map +1 -0
@@ -14,7 +14,7 @@ var node_path_default = {};
14
14
  var node_url_default = {};
15
15
 
16
16
  // src/env.js
17
- var VERSION = "4.0.0-next.8";
17
+ var VERSION = "4.0.0-next.9";
18
18
  var HAS_SELF = typeof self !== "undefined";
19
19
  var IS_FS_AVAILABLE = !isEmpty(node_fs_default);
20
20
  var IS_PATH_AVAILABLE = !isEmpty(node_path_default);
@@ -11830,6 +11830,7 @@ var processors_exports = {};
11830
11830
  __export(processors_exports, {
11831
11831
  ChatterboxProcessor: () => ChatterboxProcessor,
11832
11832
  Florence2Processor: () => Florence2Processor,
11833
+ Gemma3Processor: () => Gemma3Processor,
11833
11834
  Gemma3nProcessor: () => Gemma3nProcessor,
11834
11835
  Glm46VProcessor: () => Glm46VProcessor,
11835
11836
  GraniteSpeechProcessor: () => GraniteSpeechProcessor,
@@ -14829,6 +14830,7 @@ __export(image_processors_exports, {
14829
14830
  DonutImageProcessor: () => DonutImageProcessor,
14830
14831
  EfficientNetImageProcessor: () => EfficientNetImageProcessor,
14831
14832
  GLPNFeatureExtractor: () => GLPNFeatureExtractor,
14833
+ Gemma3ImageProcessor: () => Gemma3ImageProcessor,
14832
14834
  Glm46VImageProcessor: () => Glm46VImageProcessor,
14833
14835
  GroundingDinoImageProcessor: () => GroundingDinoImageProcessor,
14834
14836
  Idefics3ImageProcessor: () => Idefics3ImageProcessor,
@@ -15013,6 +15015,10 @@ var EfficientNetImageProcessor = class extends ImageProcessor {
15013
15015
  }
15014
15016
  };
15015
15017
 
15018
+ // src/models/gemma3/image_processing_gemma3.js
15019
+ var Gemma3ImageProcessor = class extends ImageProcessor {
15020
+ };
15021
+
15016
15022
  // src/models/qwen2_vl/image_processing_qwen2_vl.js
15017
15023
  var Qwen2VLImageProcessor = class extends ImageProcessor {
15018
15024
  constructor(config) {
@@ -16236,6 +16242,48 @@ var Florence2Processor = class extends Processor {
16236
16242
  }
16237
16243
  };
16238
16244
 
16245
+ // src/models/gemma3/processing_gemma3.js
16246
+ var Gemma3Processor = class extends Processor {
16247
+ static tokenizer_class = AutoTokenizer;
16248
+ static image_processor_class = AutoImageProcessor;
16249
+ static uses_processor_config = true;
16250
+ static uses_chat_template_file = true;
16251
+ constructor(config, components, chat_template) {
16252
+ super(config, components, chat_template);
16253
+ this.image_seq_length = this.config.image_seq_length;
16254
+ const { boi_token, image_token, eoi_token } = this.tokenizer.config;
16255
+ this.boi_token = boi_token;
16256
+ this.image_token = image_token;
16257
+ this.eoi_token = eoi_token;
16258
+ const image_tokens_expanded = image_token.repeat(this.image_seq_length);
16259
+ this.full_image_sequence = `
16260
+
16261
+ ${boi_token}${image_tokens_expanded}${eoi_token}
16262
+
16263
+ `;
16264
+ }
16265
+ /**
16266
+ * @param {string|string[]} text
16267
+ * @param {import('../../utils/image.js').RawImage|import('../../utils/image.js').RawImage[]} [images]
16268
+ * @param {Object} [options]
16269
+ */
16270
+ async _call(text, images = null, options = {}) {
16271
+ if (typeof text === "string") {
16272
+ text = [text];
16273
+ }
16274
+ let image_inputs;
16275
+ if (images) {
16276
+ image_inputs = await this.image_processor(images, options);
16277
+ text = text.map((prompt) => prompt.replaceAll(this.boi_token, this.full_image_sequence));
16278
+ }
16279
+ const text_inputs = this.tokenizer(text, options);
16280
+ return {
16281
+ ...text_inputs,
16282
+ ...image_inputs
16283
+ };
16284
+ }
16285
+ };
16286
+
16239
16287
  // src/models/gemma3n/processing_gemma3n.js
16240
16288
  var Gemma3nProcessor = class extends Processor {
16241
16289
  static image_processor_class = AutoImageProcessor;
@@ -19416,8 +19464,7 @@ var MODEL_TYPES = {
19416
19464
  ImageAudioTextToText: 13,
19417
19465
  Supertonic: 14,
19418
19466
  Chatterbox: 15,
19419
- MultimodalLanguageModelOnly: 16,
19420
- VoxtralRealtime: 17
19467
+ VoxtralRealtime: 16
19421
19468
  };
19422
19469
  var MODEL_TYPE_CONFIG = {
19423
19470
  [MODEL_TYPES.DecoderOnly]: {
@@ -19474,12 +19521,12 @@ var MODEL_TYPE_CONFIG = {
19474
19521
  can_generate: true,
19475
19522
  forward: image_text_to_text_forward,
19476
19523
  prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
19477
- sessions: (config) => {
19524
+ sessions: (config, options, textOnly) => {
19478
19525
  const s = {
19479
19526
  embed_tokens: "embed_tokens",
19480
- vision_encoder: "vision_encoder",
19481
19527
  decoder_model_merged: "decoder_model_merged"
19482
19528
  };
19529
+ if (!textOnly) s["vision_encoder"] = "vision_encoder";
19483
19530
  if (config.is_encoder_decoder) s["model"] = "encoder_model";
19484
19531
  return s;
19485
19532
  },
@@ -19501,12 +19548,17 @@ var MODEL_TYPE_CONFIG = {
19501
19548
  [MODEL_TYPES.ImageAudioTextToText]: {
19502
19549
  can_generate: true,
19503
19550
  prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
19504
- sessions: () => ({
19505
- embed_tokens: "embed_tokens",
19506
- audio_encoder: "audio_encoder",
19507
- vision_encoder: "vision_encoder",
19508
- decoder_model_merged: "decoder_model_merged"
19509
- }),
19551
+ sessions: (config, options, textOnly) => {
19552
+ const s = {
19553
+ embed_tokens: "embed_tokens",
19554
+ decoder_model_merged: "decoder_model_merged"
19555
+ };
19556
+ if (!textOnly) {
19557
+ s["audio_encoder"] = "audio_encoder";
19558
+ s["vision_encoder"] = "vision_encoder";
19559
+ }
19560
+ return s;
19561
+ },
19510
19562
  optional_configs: { generation_config: "generation_config.json" }
19511
19563
  },
19512
19564
  [MODEL_TYPES.Phi3V]: {
@@ -19557,14 +19609,6 @@ var MODEL_TYPE_CONFIG = {
19557
19609
  cache_sessions: { model: true },
19558
19610
  optional_configs: { generation_config: "generation_config.json" }
19559
19611
  },
19560
- [MODEL_TYPES.MultimodalLanguageModelOnly]: {
19561
- can_generate: true,
19562
- forward: image_text_to_text_forward,
19563
- prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
19564
- sessions: () => ({ embed_tokens: "embed_tokens", decoder_model_merged: "decoder_model_merged" }),
19565
- cache_sessions: { decoder_model_merged: true },
19566
- optional_configs: { generation_config: "generation_config.json" }
19567
- },
19568
19612
  [MODEL_TYPES.VoxtralRealtime]: {
19569
19613
  can_generate: true,
19570
19614
  prepare_inputs: decoder_prepare_inputs_for_generation,
@@ -19590,6 +19634,19 @@ function getSessionsConfig(modelType, config, options = {}) {
19590
19634
  optional_configs: typeConfig.optional_configs
19591
19635
  };
19592
19636
  }
19637
+ function resolveTypeConfig(modelName, config) {
19638
+ let modelType = MODEL_TYPE_MAPPING.get(modelName);
19639
+ let textOnly = false;
19640
+ const nativeArch = config?.architectures?.[0];
19641
+ if (nativeArch && nativeArch !== modelName && modelName?.endsWith("ForCausalLM") && nativeArch.endsWith("ForConditionalGeneration")) {
19642
+ const nativeType = MODEL_TYPE_MAPPING.get(nativeArch);
19643
+ if (nativeType !== void 0) {
19644
+ modelType = nativeType;
19645
+ textOnly = true;
19646
+ }
19647
+ }
19648
+ return { typeConfig: MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default, textOnly, modelType };
19649
+ }
19593
19650
  var MODEL_TYPE_MAPPING = /* @__PURE__ */ new Map();
19594
19651
  var MODEL_NAME_TO_CLASS_MAPPING = /* @__PURE__ */ new Map();
19595
19652
  var MODEL_CLASS_TO_NAME_MAPPING = /* @__PURE__ */ new Map();
@@ -19609,8 +19666,7 @@ var PreTrainedModel = class extends Callable2 {
19609
19666
  this.sessions = sessions;
19610
19667
  this.configs = configs;
19611
19668
  const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this.constructor);
19612
- const modelType = MODEL_TYPE_MAPPING.get(modelName);
19613
- const typeConfig = MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default;
19669
+ const { typeConfig } = resolveTypeConfig(modelName, config);
19614
19670
  this.can_generate = typeConfig.can_generate;
19615
19671
  this._forward = typeConfig.forward;
19616
19672
  this._prepare_inputs_for_generation = typeConfig.prepare_inputs;
@@ -19673,9 +19729,8 @@ var PreTrainedModel = class extends Callable2 {
19673
19729
  session_options
19674
19730
  };
19675
19731
  const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this);
19676
- const modelType = MODEL_TYPE_MAPPING.get(modelName);
19677
19732
  config = options.config = await AutoConfig.from_pretrained(pretrained_model_name_or_path, options);
19678
- const typeConfig = MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default;
19733
+ const { typeConfig, textOnly, modelType } = resolveTypeConfig(modelName, config);
19679
19734
  if (modelType === void 0) {
19680
19735
  const type = modelName ?? config?.model_type;
19681
19736
  if (type !== "custom") {
@@ -19684,7 +19739,7 @@ var PreTrainedModel = class extends Callable2 {
19684
19739
  );
19685
19740
  }
19686
19741
  }
19687
- const sessions = typeConfig.sessions(config, options);
19742
+ const sessions = typeConfig.sessions(config, options, textOnly);
19688
19743
  const promises = [
19689
19744
  constructSessions(pretrained_model_name_or_path, sessions, options, typeConfig.cache_sessions)
19690
19745
  ];
@@ -20739,6 +20794,7 @@ __export(models_exports, {
20739
20794
  Gemma2Model: () => Gemma2Model,
20740
20795
  Gemma2PreTrainedModel: () => Gemma2PreTrainedModel,
20741
20796
  Gemma3ForCausalLM: () => Gemma3ForCausalLM,
20797
+ Gemma3ForConditionalGeneration: () => Gemma3ForConditionalGeneration,
20742
20798
  Gemma3Model: () => Gemma3Model,
20743
20799
  Gemma3PreTrainedModel: () => Gemma3PreTrainedModel,
20744
20800
  Gemma3nForCausalLM: () => Gemma3nForCausalLM,
@@ -22457,12 +22513,35 @@ var Gemma2Model = class extends Gemma2PreTrainedModel {
22457
22513
  var Gemma2ForCausalLM = class extends Gemma2PreTrainedModel {
22458
22514
  };
22459
22515
 
22516
+ // src/models/llava/modeling_llava.js
22517
+ var LlavaPreTrainedModel = class extends PreTrainedModel {
22518
+ forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
22519
+ };
22520
+ var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
22521
+ _merge_input_ids_with_image_features(kwargs) {
22522
+ const vision_hidden_size = kwargs.image_features.dims.at(-1);
22523
+ const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
22524
+ return default_merge_input_ids_with_image_features({
22525
+ // @ts-ignore
22526
+ image_token_id: this.config.image_token_index ?? this.config.image_token_id,
22527
+ ...kwargs,
22528
+ image_features: reshaped_image_hidden_states
22529
+ });
22530
+ }
22531
+ };
22532
+ var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
22533
+ };
22534
+ var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
22535
+ };
22536
+
22460
22537
  // src/models/gemma3/modeling_gemma3.js
22461
22538
  var Gemma3PreTrainedModel = class extends PreTrainedModel {
22462
22539
  };
22463
22540
  var Gemma3Model = class extends Gemma3PreTrainedModel {
22464
22541
  };
22465
- var Gemma3ForCausalLM = class extends Gemma3PreTrainedModel {
22542
+ var Gemma3ForConditionalGeneration = class extends LlavaForConditionalGeneration {
22543
+ };
22544
+ var Gemma3ForCausalLM = class extends Gemma3ForConditionalGeneration {
22466
22545
  };
22467
22546
 
22468
22547
  // src/models/gemma3n/modeling_gemma3n.js
@@ -22828,40 +22907,45 @@ var Qwen2VLForConditionalGeneration = class extends Qwen2VLPreTrainedModel {
22828
22907
  });
22829
22908
  }
22830
22909
  prepare_inputs_for_generation(input_ids, model_inputs, generation_config) {
22831
- if (model_inputs.attention_mask && !model_inputs.position_ids) {
22832
- if (!model_inputs.past_key_values) {
22833
- [model_inputs.position_ids, model_inputs.rope_deltas] = this.get_rope_index(
22910
+ if (!model_inputs.attention_mask || model_inputs.position_ids) {
22911
+ return model_inputs;
22912
+ }
22913
+ const session = this.sessions["decoder_model_merged"] ?? this.sessions["model"];
22914
+ if (!session.inputNames.includes("position_ids")) {
22915
+ return model_inputs;
22916
+ }
22917
+ if (!model_inputs.past_key_values) {
22918
+ [model_inputs.position_ids, model_inputs.rope_deltas] = this.get_rope_index(
22919
+ model_inputs.input_ids,
22920
+ model_inputs.image_grid_thw,
22921
+ model_inputs.video_grid_thw,
22922
+ model_inputs.attention_mask
22923
+ );
22924
+ } else {
22925
+ model_inputs.pixel_values = null;
22926
+ const past_length = model_inputs.past_key_values.get_seq_length();
22927
+ if (past_length < model_inputs.input_ids.dims[1]) {
22928
+ const [full_position_ids, rope_deltas] = this.get_rope_index(
22834
22929
  model_inputs.input_ids,
22835
22930
  model_inputs.image_grid_thw,
22836
22931
  model_inputs.video_grid_thw,
22837
22932
  model_inputs.attention_mask
22838
22933
  );
22934
+ model_inputs.rope_deltas = rope_deltas;
22935
+ model_inputs.position_ids = full_position_ids.slice(null, null, [past_length, null]);
22936
+ model_inputs.input_ids = model_inputs.input_ids.slice(null, [past_length, null]);
22839
22937
  } else {
22840
- model_inputs.pixel_values = null;
22841
- const past_length = model_inputs.past_key_values.get_seq_length();
22842
- if (past_length < model_inputs.input_ids.dims[1]) {
22843
- const [full_position_ids, rope_deltas] = this.get_rope_index(
22938
+ if (!model_inputs.rope_deltas) {
22939
+ [, model_inputs.rope_deltas] = this.get_rope_index(
22844
22940
  model_inputs.input_ids,
22845
22941
  model_inputs.image_grid_thw,
22846
22942
  model_inputs.video_grid_thw,
22847
22943
  model_inputs.attention_mask
22848
22944
  );
22849
- model_inputs.rope_deltas = rope_deltas;
22850
- model_inputs.position_ids = full_position_ids.slice(null, null, [past_length, null]);
22851
- model_inputs.input_ids = model_inputs.input_ids.slice(null, [past_length, null]);
22852
- } else {
22853
- if (!model_inputs.rope_deltas) {
22854
- [, model_inputs.rope_deltas] = this.get_rope_index(
22855
- model_inputs.input_ids,
22856
- model_inputs.image_grid_thw,
22857
- model_inputs.video_grid_thw,
22858
- model_inputs.attention_mask
22859
- );
22860
- }
22861
- const delta = BigInt(past_length);
22862
- const rope_deltas_list = model_inputs.rope_deltas.map((x) => delta + x);
22863
- model_inputs.position_ids = stack([rope_deltas_list, rope_deltas_list, rope_deltas_list], 0);
22864
22945
  }
22946
+ const delta = BigInt(past_length);
22947
+ const rope_deltas_list = model_inputs.rope_deltas.map((x) => delta + x);
22948
+ model_inputs.position_ids = stack([rope_deltas_list, rope_deltas_list, rope_deltas_list], 0);
22865
22949
  }
22866
22950
  }
22867
22951
  return model_inputs;
@@ -23144,27 +23228,6 @@ var HunYuanDenseV1Model = class extends HunYuanDenseV1PreTrainedModel {
23144
23228
  var HunYuanDenseV1ForCausalLM = class extends HunYuanDenseV1PreTrainedModel {
23145
23229
  };
23146
23230
 
23147
- // src/models/llava/modeling_llava.js
23148
- var LlavaPreTrainedModel = class extends PreTrainedModel {
23149
- forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
23150
- };
23151
- var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
23152
- _merge_input_ids_with_image_features(kwargs) {
23153
- const vision_hidden_size = kwargs.image_features.dims.at(-1);
23154
- const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
23155
- return default_merge_input_ids_with_image_features({
23156
- // @ts-ignore
23157
- image_token_id: this.config.image_token_index ?? this.config.image_token_id,
23158
- ...kwargs,
23159
- image_features: reshaped_image_hidden_states
23160
- });
23161
- }
23162
- };
23163
- var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
23164
- };
23165
- var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
23166
- };
23167
-
23168
23231
  // src/models/idefics3/modeling_idefics3.js
23169
23232
  var Idefics3ForConditionalGeneration = class extends LlavaForConditionalGeneration {
23170
23233
  forward_params = [
@@ -26062,6 +26125,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
26062
26125
  ["qwen3_vl", "Qwen3VLForCausalLM"],
26063
26126
  ["qwen3_vl_moe", "Qwen3VLMoeForCausalLM"],
26064
26127
  ["qwen3_5", "Qwen3_5ForCausalLM"],
26128
+ ["qwen3_5_text", "Qwen3_5ForCausalLM"],
26065
26129
  ["qwen3_5_moe", "Qwen3_5MoeForCausalLM"],
26066
26130
  ["gemma3n", "Gemma3nForCausalLM"],
26067
26131
  ["phi", "PhiForCausalLM"],
@@ -26149,6 +26213,7 @@ var MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
26149
26213
  ["smolvlm", "SmolVLMForConditionalGeneration"],
26150
26214
  ["paligemma", "PaliGemmaForConditionalGeneration"],
26151
26215
  ["llava_qwen2", "LlavaQwen2ForCausalLM"],
26216
+ ["gemma3", "Gemma3ForConditionalGeneration"],
26152
26217
  ["gemma3n", "Gemma3nForConditionalGeneration"],
26153
26218
  ["mistral3", "Mistral3ForConditionalGeneration"],
26154
26219
  ["lighton_ocr", "LightOnOcrForConditionalGeneration"],
@@ -26343,13 +26408,6 @@ var CUSTOM_MAPPING = [
26343
26408
  ],
26344
26409
  ["SupertonicForConditionalGeneration", SupertonicForConditionalGeneration, MODEL_TYPES.Supertonic],
26345
26410
  ["ChatterboxModel", ChatterboxModel, MODEL_TYPES.Chatterbox],
26346
- ["Qwen2VLForCausalLM", Qwen2VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
26347
- ["Qwen2_5_VLForCausalLM", Qwen2_5_VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
26348
- ["Qwen3VLForCausalLM", Qwen3VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
26349
- ["Qwen3VLMoeForCausalLM", Qwen3VLMoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
26350
- ["Qwen3_5ForCausalLM", Qwen3_5ForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
26351
- ["Qwen3_5MoeForCausalLM", Qwen3_5MoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
26352
- ["Gemma3nForCausalLM", Gemma3nForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
26353
26411
  [
26354
26412
  "VoxtralRealtimeForConditionalGeneration",
26355
26413
  VoxtralRealtimeForConditionalGeneration,
@@ -28031,6 +28089,41 @@ var TASK_ALIASES = Object.freeze({
28031
28089
  embeddings: "feature-extraction"
28032
28090
  });
28033
28091
 
28092
+ // src/utils/model_registry/resolve_model_type.js
28093
+ function resolve_model_type(config, { warn = true } = {}) {
28094
+ const architectures = (
28095
+ /** @type {string[]} */
28096
+ config.architectures || []
28097
+ );
28098
+ for (const arch of architectures) {
28099
+ const mappedType = MODEL_TYPE_MAPPING.get(arch);
28100
+ if (mappedType !== void 0) {
28101
+ return mappedType;
28102
+ }
28103
+ }
28104
+ if (config.model_type) {
28105
+ const mappedType = MODEL_TYPE_MAPPING.get(config.model_type);
28106
+ if (mappedType !== void 0) {
28107
+ return mappedType;
28108
+ }
28109
+ for (const mapping of Object.values(MODEL_MAPPING_NAMES)) {
28110
+ if (mapping.has(config.model_type)) {
28111
+ const resolved = MODEL_TYPE_MAPPING.get(mapping.get(config.model_type));
28112
+ if (resolved !== void 0) {
28113
+ return resolved;
28114
+ }
28115
+ }
28116
+ }
28117
+ }
28118
+ if (warn) {
28119
+ const archList = architectures.length > 0 ? architectures.join(", ") : "(none)";
28120
+ logger.warn(
28121
+ `[resolve_model_type] Architecture(s) not found in MODEL_TYPE_MAPPING: [${archList}] for model type '${config.model_type}'. Falling back to EncoderOnly (single model.onnx file). If you encounter issues, please report at: ${GITHUB_ISSUE_URL}`
28122
+ );
28123
+ }
28124
+ return MODEL_TYPES.EncoderOnly;
28125
+ }
28126
+
28034
28127
  // src/utils/model_registry/get_model_files.js
28035
28128
  function get_config(modelId, { config = null, cache_dir = null, local_files_only = false, revision = "main" } = {}) {
28036
28129
  if (config !== null) {
@@ -28053,43 +28146,7 @@ async function get_model_files(modelId, { config = null, dtype: overrideDtype =
28053
28146
  const subfolder = "onnx";
28054
28147
  const rawDevice = overrideDevice ?? custom_config.device;
28055
28148
  let dtype = overrideDtype ?? custom_config.dtype;
28056
- let modelType;
28057
- const architectures = (
28058
- /** @type {string[]} */
28059
- config.architectures || []
28060
- );
28061
- let foundInMapping = false;
28062
- for (const arch of architectures) {
28063
- const mappedType = MODEL_TYPE_MAPPING.get(arch);
28064
- if (mappedType !== void 0) {
28065
- modelType = mappedType;
28066
- foundInMapping = true;
28067
- break;
28068
- }
28069
- }
28070
- if (!foundInMapping && config.model_type) {
28071
- const mappedType = MODEL_TYPE_MAPPING.get(config.model_type);
28072
- if (mappedType !== void 0) {
28073
- modelType = mappedType;
28074
- foundInMapping = true;
28075
- }
28076
- if (!foundInMapping) {
28077
- for (const mapping of Object.values(MODEL_MAPPING_NAMES)) {
28078
- if (mapping.has(config.model_type)) {
28079
- modelType = MODEL_TYPE_MAPPING.get(mapping.get(config.model_type));
28080
- foundInMapping = true;
28081
- break;
28082
- }
28083
- }
28084
- }
28085
- }
28086
- if (!foundInMapping) {
28087
- const archList = architectures.length > 0 ? architectures.join(", ") : "(none)";
28088
- logger.warn(
28089
- `[get_model_files] Architecture(s) not found in MODEL_TYPE_MAPPING: [${archList}] for model type '${config.model_type}'. Falling back to EncoderOnly (single model.onnx file). If you encounter issues, please report at: ${GITHUB_ISSUE_URL}`
28090
- );
28091
- modelType = MODEL_TYPES.EncoderOnly;
28092
- }
28149
+ const modelType = resolve_model_type(config);
28093
28150
  const add_model_file = (fileName, baseName = null) => {
28094
28151
  baseName = baseName ?? fileName;
28095
28152
  const selectedDevice = selectDevice(rawDevice, fileName);
@@ -28676,6 +28733,31 @@ async function clear_pipeline_cache(task, modelId, options = {}) {
28676
28733
  return await clear_files_from_cache(modelId, files, options);
28677
28734
  }
28678
28735
 
28736
+ // src/utils/model_registry/get_available_dtypes.js
28737
+ var CONCRETE_DTYPES = Object.keys(DEFAULT_DTYPE_SUFFIX_MAPPING);
28738
+ async function get_available_dtypes(modelId, { config = null, model_file_name = null, revision = "main", cache_dir = null, local_files_only = false } = {}) {
28739
+ config = await get_config(modelId, { config, cache_dir, local_files_only, revision });
28740
+ const subfolder = "onnx";
28741
+ const modelType = resolve_model_type(config);
28742
+ const { sessions } = getSessionsConfig(modelType, config, { model_file_name });
28743
+ const baseNames = Object.values(sessions);
28744
+ const metadataOptions = { revision, cache_dir, local_files_only };
28745
+ const probeResults = await Promise.all(
28746
+ CONCRETE_DTYPES.map(async (dtype) => {
28747
+ const suffix = DEFAULT_DTYPE_SUFFIX_MAPPING[dtype] ?? "";
28748
+ const allExist = await Promise.all(
28749
+ baseNames.map(async (baseName) => {
28750
+ const filename = `${subfolder}/${baseName}${suffix}.onnx`;
28751
+ const metadata = await get_file_metadata(modelId, filename, metadataOptions);
28752
+ return metadata.exists;
28753
+ })
28754
+ );
28755
+ return { dtype, available: allExist.every(Boolean) };
28756
+ })
28757
+ );
28758
+ return probeResults.filter((r) => r.available).map((r) => r.dtype);
28759
+ }
28760
+
28679
28761
  // src/utils/model_registry/ModelRegistry.js
28680
28762
  var ModelRegistry = class {
28681
28763
  /**
@@ -28762,6 +28844,29 @@ var ModelRegistry = class {
28762
28844
  static async get_processor_files(modelId) {
28763
28845
  return get_processor_files(modelId);
28764
28846
  }
28847
+ /**
28848
+ * Detects which quantization levels (dtypes) are available for a model
28849
+ * by checking which ONNX files exist on the hub or locally.
28850
+ *
28851
+ * A dtype is considered available if all required model session files
28852
+ * exist for that dtype.
28853
+ *
28854
+ * @param {string} modelId - The model id (e.g., "onnx-community/all-MiniLM-L6-v2-ONNX")
28855
+ * @param {Object} [options] - Optional parameters
28856
+ * @param {import('../../configs.js').PretrainedConfig} [options.config=null] - Pre-loaded config
28857
+ * @param {string} [options.model_file_name=null] - Override the model file name (excluding .onnx suffix)
28858
+ * @param {string} [options.revision='main'] - Model revision
28859
+ * @param {string} [options.cache_dir=null] - Custom cache directory
28860
+ * @param {boolean} [options.local_files_only=false] - Only check local files
28861
+ * @returns {Promise<string[]>} Array of available dtype strings (e.g., ['fp32', 'fp16', 'q4', 'q8'])
28862
+ *
28863
+ * @example
28864
+ * const dtypes = await ModelRegistry.get_available_dtypes('onnx-community/all-MiniLM-L6-v2-ONNX');
28865
+ * console.log(dtypes); // ['fp32', 'fp16', 'int8', 'uint8', 'q8', 'q4']
28866
+ */
28867
+ static async get_available_dtypes(modelId, options = {}) {
28868
+ return get_available_dtypes(modelId, options);
28869
+ }
28765
28870
  /**
28766
28871
  * Quickly checks if a model is fully cached by verifying `config.json` is present,
28767
28872
  * then confirming all required files are cached.
@@ -29208,8 +29313,11 @@ export {
29208
29313
  Gemma2Model,
29209
29314
  Gemma2PreTrainedModel,
29210
29315
  Gemma3ForCausalLM,
29316
+ Gemma3ForConditionalGeneration,
29317
+ Gemma3ImageProcessor,
29211
29318
  Gemma3Model,
29212
29319
  Gemma3PreTrainedModel,
29320
+ Gemma3Processor,
29213
29321
  Gemma3nAudioFeatureExtractor,
29214
29322
  Gemma3nForCausalLM,
29215
29323
  Gemma3nForConditionalGeneration,