@huggingface/transformers 4.0.0-next.8 → 4.0.0-next.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/ort-wasm-simd-threaded.jsep.mjs +26 -26
- package/dist/transformers.js +338 -230
- package/dist/transformers.min.js +22 -18
- package/dist/transformers.node.cjs +339 -228
- package/dist/transformers.node.min.cjs +23 -19
- package/dist/transformers.node.min.mjs +23 -19
- package/dist/transformers.node.mjs +336 -228
- package/dist/transformers.web.js +220 -112
- package/dist/transformers.web.min.js +18 -14
- package/package.json +2 -2
- package/src/env.js +1 -1
- package/src/models/gemma3/image_processing_gemma3.js +3 -0
- package/src/models/gemma3/modeling_gemma3.js +4 -1
- package/src/models/gemma3/processing_gemma3.js +45 -0
- package/src/models/image_processors.js +1 -0
- package/src/models/modeling_utils.js +46 -25
- package/src/models/processors.js +1 -0
- package/src/models/qwen2_vl/modeling_qwen2_vl.js +39 -32
- package/src/models/registry.js +2 -8
- package/src/utils/model_registry/ModelRegistry.js +36 -0
- package/src/utils/model_registry/get_available_dtypes.js +68 -0
- package/src/utils/model_registry/get_model_files.js +7 -60
- package/src/utils/model_registry/resolve_model_type.js +66 -0
- package/types/models/gemma3/image_processing_gemma3.d.ts +4 -0
- package/types/models/gemma3/image_processing_gemma3.d.ts.map +1 -0
- package/types/models/gemma3/modeling_gemma3.d.ts +4 -1
- package/types/models/gemma3/modeling_gemma3.d.ts.map +1 -1
- package/types/models/gemma3/processing_gemma3.d.ts +20 -0
- package/types/models/gemma3/processing_gemma3.d.ts.map +1 -0
- package/types/models/image_processors.d.ts +1 -0
- package/types/models/modeling_utils.d.ts +2 -3
- package/types/models/modeling_utils.d.ts.map +1 -1
- package/types/models/processors.d.ts +1 -0
- package/types/models/qwen2_vl/modeling_qwen2_vl.d.ts.map +1 -1
- package/types/models/registry.d.ts.map +1 -1
- package/types/utils/model_registry/ModelRegistry.d.ts +27 -0
- package/types/utils/model_registry/ModelRegistry.d.ts.map +1 -1
- package/types/utils/model_registry/get_available_dtypes.d.ts +26 -0
- package/types/utils/model_registry/get_available_dtypes.d.ts.map +1 -0
- package/types/utils/model_registry/get_model_files.d.ts +25 -0
- package/types/utils/model_registry/get_model_files.d.ts.map +1 -1
- package/types/utils/model_registry/resolve_model_type.d.ts +24 -0
- package/types/utils/model_registry/resolve_model_type.d.ts.map +1 -0
package/dist/transformers.web.js
CHANGED
|
@@ -14,7 +14,7 @@ var node_path_default = {};
|
|
|
14
14
|
var node_url_default = {};
|
|
15
15
|
|
|
16
16
|
// src/env.js
|
|
17
|
-
var VERSION = "4.0.0-next.
|
|
17
|
+
var VERSION = "4.0.0-next.9";
|
|
18
18
|
var HAS_SELF = typeof self !== "undefined";
|
|
19
19
|
var IS_FS_AVAILABLE = !isEmpty(node_fs_default);
|
|
20
20
|
var IS_PATH_AVAILABLE = !isEmpty(node_path_default);
|
|
@@ -11830,6 +11830,7 @@ var processors_exports = {};
|
|
|
11830
11830
|
__export(processors_exports, {
|
|
11831
11831
|
ChatterboxProcessor: () => ChatterboxProcessor,
|
|
11832
11832
|
Florence2Processor: () => Florence2Processor,
|
|
11833
|
+
Gemma3Processor: () => Gemma3Processor,
|
|
11833
11834
|
Gemma3nProcessor: () => Gemma3nProcessor,
|
|
11834
11835
|
Glm46VProcessor: () => Glm46VProcessor,
|
|
11835
11836
|
GraniteSpeechProcessor: () => GraniteSpeechProcessor,
|
|
@@ -14829,6 +14830,7 @@ __export(image_processors_exports, {
|
|
|
14829
14830
|
DonutImageProcessor: () => DonutImageProcessor,
|
|
14830
14831
|
EfficientNetImageProcessor: () => EfficientNetImageProcessor,
|
|
14831
14832
|
GLPNFeatureExtractor: () => GLPNFeatureExtractor,
|
|
14833
|
+
Gemma3ImageProcessor: () => Gemma3ImageProcessor,
|
|
14832
14834
|
Glm46VImageProcessor: () => Glm46VImageProcessor,
|
|
14833
14835
|
GroundingDinoImageProcessor: () => GroundingDinoImageProcessor,
|
|
14834
14836
|
Idefics3ImageProcessor: () => Idefics3ImageProcessor,
|
|
@@ -15013,6 +15015,10 @@ var EfficientNetImageProcessor = class extends ImageProcessor {
|
|
|
15013
15015
|
}
|
|
15014
15016
|
};
|
|
15015
15017
|
|
|
15018
|
+
// src/models/gemma3/image_processing_gemma3.js
|
|
15019
|
+
var Gemma3ImageProcessor = class extends ImageProcessor {
|
|
15020
|
+
};
|
|
15021
|
+
|
|
15016
15022
|
// src/models/qwen2_vl/image_processing_qwen2_vl.js
|
|
15017
15023
|
var Qwen2VLImageProcessor = class extends ImageProcessor {
|
|
15018
15024
|
constructor(config) {
|
|
@@ -16236,6 +16242,48 @@ var Florence2Processor = class extends Processor {
|
|
|
16236
16242
|
}
|
|
16237
16243
|
};
|
|
16238
16244
|
|
|
16245
|
+
// src/models/gemma3/processing_gemma3.js
|
|
16246
|
+
var Gemma3Processor = class extends Processor {
|
|
16247
|
+
static tokenizer_class = AutoTokenizer;
|
|
16248
|
+
static image_processor_class = AutoImageProcessor;
|
|
16249
|
+
static uses_processor_config = true;
|
|
16250
|
+
static uses_chat_template_file = true;
|
|
16251
|
+
constructor(config, components, chat_template) {
|
|
16252
|
+
super(config, components, chat_template);
|
|
16253
|
+
this.image_seq_length = this.config.image_seq_length;
|
|
16254
|
+
const { boi_token, image_token, eoi_token } = this.tokenizer.config;
|
|
16255
|
+
this.boi_token = boi_token;
|
|
16256
|
+
this.image_token = image_token;
|
|
16257
|
+
this.eoi_token = eoi_token;
|
|
16258
|
+
const image_tokens_expanded = image_token.repeat(this.image_seq_length);
|
|
16259
|
+
this.full_image_sequence = `
|
|
16260
|
+
|
|
16261
|
+
${boi_token}${image_tokens_expanded}${eoi_token}
|
|
16262
|
+
|
|
16263
|
+
`;
|
|
16264
|
+
}
|
|
16265
|
+
/**
|
|
16266
|
+
* @param {string|string[]} text
|
|
16267
|
+
* @param {import('../../utils/image.js').RawImage|import('../../utils/image.js').RawImage[]} [images]
|
|
16268
|
+
* @param {Object} [options]
|
|
16269
|
+
*/
|
|
16270
|
+
async _call(text, images = null, options = {}) {
|
|
16271
|
+
if (typeof text === "string") {
|
|
16272
|
+
text = [text];
|
|
16273
|
+
}
|
|
16274
|
+
let image_inputs;
|
|
16275
|
+
if (images) {
|
|
16276
|
+
image_inputs = await this.image_processor(images, options);
|
|
16277
|
+
text = text.map((prompt) => prompt.replaceAll(this.boi_token, this.full_image_sequence));
|
|
16278
|
+
}
|
|
16279
|
+
const text_inputs = this.tokenizer(text, options);
|
|
16280
|
+
return {
|
|
16281
|
+
...text_inputs,
|
|
16282
|
+
...image_inputs
|
|
16283
|
+
};
|
|
16284
|
+
}
|
|
16285
|
+
};
|
|
16286
|
+
|
|
16239
16287
|
// src/models/gemma3n/processing_gemma3n.js
|
|
16240
16288
|
var Gemma3nProcessor = class extends Processor {
|
|
16241
16289
|
static image_processor_class = AutoImageProcessor;
|
|
@@ -19416,8 +19464,7 @@ var MODEL_TYPES = {
|
|
|
19416
19464
|
ImageAudioTextToText: 13,
|
|
19417
19465
|
Supertonic: 14,
|
|
19418
19466
|
Chatterbox: 15,
|
|
19419
|
-
|
|
19420
|
-
VoxtralRealtime: 17
|
|
19467
|
+
VoxtralRealtime: 16
|
|
19421
19468
|
};
|
|
19422
19469
|
var MODEL_TYPE_CONFIG = {
|
|
19423
19470
|
[MODEL_TYPES.DecoderOnly]: {
|
|
@@ -19474,12 +19521,12 @@ var MODEL_TYPE_CONFIG = {
|
|
|
19474
19521
|
can_generate: true,
|
|
19475
19522
|
forward: image_text_to_text_forward,
|
|
19476
19523
|
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
19477
|
-
sessions: (config) => {
|
|
19524
|
+
sessions: (config, options, textOnly) => {
|
|
19478
19525
|
const s = {
|
|
19479
19526
|
embed_tokens: "embed_tokens",
|
|
19480
|
-
vision_encoder: "vision_encoder",
|
|
19481
19527
|
decoder_model_merged: "decoder_model_merged"
|
|
19482
19528
|
};
|
|
19529
|
+
if (!textOnly) s["vision_encoder"] = "vision_encoder";
|
|
19483
19530
|
if (config.is_encoder_decoder) s["model"] = "encoder_model";
|
|
19484
19531
|
return s;
|
|
19485
19532
|
},
|
|
@@ -19501,12 +19548,17 @@ var MODEL_TYPE_CONFIG = {
|
|
|
19501
19548
|
[MODEL_TYPES.ImageAudioTextToText]: {
|
|
19502
19549
|
can_generate: true,
|
|
19503
19550
|
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
19504
|
-
sessions: () =>
|
|
19505
|
-
|
|
19506
|
-
|
|
19507
|
-
|
|
19508
|
-
|
|
19509
|
-
|
|
19551
|
+
sessions: (config, options, textOnly) => {
|
|
19552
|
+
const s = {
|
|
19553
|
+
embed_tokens: "embed_tokens",
|
|
19554
|
+
decoder_model_merged: "decoder_model_merged"
|
|
19555
|
+
};
|
|
19556
|
+
if (!textOnly) {
|
|
19557
|
+
s["audio_encoder"] = "audio_encoder";
|
|
19558
|
+
s["vision_encoder"] = "vision_encoder";
|
|
19559
|
+
}
|
|
19560
|
+
return s;
|
|
19561
|
+
},
|
|
19510
19562
|
optional_configs: { generation_config: "generation_config.json" }
|
|
19511
19563
|
},
|
|
19512
19564
|
[MODEL_TYPES.Phi3V]: {
|
|
@@ -19557,14 +19609,6 @@ var MODEL_TYPE_CONFIG = {
|
|
|
19557
19609
|
cache_sessions: { model: true },
|
|
19558
19610
|
optional_configs: { generation_config: "generation_config.json" }
|
|
19559
19611
|
},
|
|
19560
|
-
[MODEL_TYPES.MultimodalLanguageModelOnly]: {
|
|
19561
|
-
can_generate: true,
|
|
19562
|
-
forward: image_text_to_text_forward,
|
|
19563
|
-
prepare_inputs: multimodal_text_to_text_prepare_inputs_for_generation,
|
|
19564
|
-
sessions: () => ({ embed_tokens: "embed_tokens", decoder_model_merged: "decoder_model_merged" }),
|
|
19565
|
-
cache_sessions: { decoder_model_merged: true },
|
|
19566
|
-
optional_configs: { generation_config: "generation_config.json" }
|
|
19567
|
-
},
|
|
19568
19612
|
[MODEL_TYPES.VoxtralRealtime]: {
|
|
19569
19613
|
can_generate: true,
|
|
19570
19614
|
prepare_inputs: decoder_prepare_inputs_for_generation,
|
|
@@ -19590,6 +19634,19 @@ function getSessionsConfig(modelType, config, options = {}) {
|
|
|
19590
19634
|
optional_configs: typeConfig.optional_configs
|
|
19591
19635
|
};
|
|
19592
19636
|
}
|
|
19637
|
+
function resolveTypeConfig(modelName, config) {
|
|
19638
|
+
let modelType = MODEL_TYPE_MAPPING.get(modelName);
|
|
19639
|
+
let textOnly = false;
|
|
19640
|
+
const nativeArch = config?.architectures?.[0];
|
|
19641
|
+
if (nativeArch && nativeArch !== modelName && modelName?.endsWith("ForCausalLM") && nativeArch.endsWith("ForConditionalGeneration")) {
|
|
19642
|
+
const nativeType = MODEL_TYPE_MAPPING.get(nativeArch);
|
|
19643
|
+
if (nativeType !== void 0) {
|
|
19644
|
+
modelType = nativeType;
|
|
19645
|
+
textOnly = true;
|
|
19646
|
+
}
|
|
19647
|
+
}
|
|
19648
|
+
return { typeConfig: MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default, textOnly, modelType };
|
|
19649
|
+
}
|
|
19593
19650
|
var MODEL_TYPE_MAPPING = /* @__PURE__ */ new Map();
|
|
19594
19651
|
var MODEL_NAME_TO_CLASS_MAPPING = /* @__PURE__ */ new Map();
|
|
19595
19652
|
var MODEL_CLASS_TO_NAME_MAPPING = /* @__PURE__ */ new Map();
|
|
@@ -19609,8 +19666,7 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
19609
19666
|
this.sessions = sessions;
|
|
19610
19667
|
this.configs = configs;
|
|
19611
19668
|
const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this.constructor);
|
|
19612
|
-
const
|
|
19613
|
-
const typeConfig = MODEL_TYPE_CONFIG[modelType] ?? MODEL_TYPE_CONFIG.default;
|
|
19669
|
+
const { typeConfig } = resolveTypeConfig(modelName, config);
|
|
19614
19670
|
this.can_generate = typeConfig.can_generate;
|
|
19615
19671
|
this._forward = typeConfig.forward;
|
|
19616
19672
|
this._prepare_inputs_for_generation = typeConfig.prepare_inputs;
|
|
@@ -19673,9 +19729,8 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
19673
19729
|
session_options
|
|
19674
19730
|
};
|
|
19675
19731
|
const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this);
|
|
19676
|
-
const modelType = MODEL_TYPE_MAPPING.get(modelName);
|
|
19677
19732
|
config = options.config = await AutoConfig.from_pretrained(pretrained_model_name_or_path, options);
|
|
19678
|
-
const typeConfig
|
|
19733
|
+
const { typeConfig, textOnly, modelType } = resolveTypeConfig(modelName, config);
|
|
19679
19734
|
if (modelType === void 0) {
|
|
19680
19735
|
const type = modelName ?? config?.model_type;
|
|
19681
19736
|
if (type !== "custom") {
|
|
@@ -19684,7 +19739,7 @@ var PreTrainedModel = class extends Callable2 {
|
|
|
19684
19739
|
);
|
|
19685
19740
|
}
|
|
19686
19741
|
}
|
|
19687
|
-
const sessions = typeConfig.sessions(config, options);
|
|
19742
|
+
const sessions = typeConfig.sessions(config, options, textOnly);
|
|
19688
19743
|
const promises = [
|
|
19689
19744
|
constructSessions(pretrained_model_name_or_path, sessions, options, typeConfig.cache_sessions)
|
|
19690
19745
|
];
|
|
@@ -20739,6 +20794,7 @@ __export(models_exports, {
|
|
|
20739
20794
|
Gemma2Model: () => Gemma2Model,
|
|
20740
20795
|
Gemma2PreTrainedModel: () => Gemma2PreTrainedModel,
|
|
20741
20796
|
Gemma3ForCausalLM: () => Gemma3ForCausalLM,
|
|
20797
|
+
Gemma3ForConditionalGeneration: () => Gemma3ForConditionalGeneration,
|
|
20742
20798
|
Gemma3Model: () => Gemma3Model,
|
|
20743
20799
|
Gemma3PreTrainedModel: () => Gemma3PreTrainedModel,
|
|
20744
20800
|
Gemma3nForCausalLM: () => Gemma3nForCausalLM,
|
|
@@ -22457,12 +22513,35 @@ var Gemma2Model = class extends Gemma2PreTrainedModel {
|
|
|
22457
22513
|
var Gemma2ForCausalLM = class extends Gemma2PreTrainedModel {
|
|
22458
22514
|
};
|
|
22459
22515
|
|
|
22516
|
+
// src/models/llava/modeling_llava.js
|
|
22517
|
+
var LlavaPreTrainedModel = class extends PreTrainedModel {
|
|
22518
|
+
forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
|
|
22519
|
+
};
|
|
22520
|
+
var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
|
|
22521
|
+
_merge_input_ids_with_image_features(kwargs) {
|
|
22522
|
+
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
22523
|
+
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
22524
|
+
return default_merge_input_ids_with_image_features({
|
|
22525
|
+
// @ts-ignore
|
|
22526
|
+
image_token_id: this.config.image_token_index ?? this.config.image_token_id,
|
|
22527
|
+
...kwargs,
|
|
22528
|
+
image_features: reshaped_image_hidden_states
|
|
22529
|
+
});
|
|
22530
|
+
}
|
|
22531
|
+
};
|
|
22532
|
+
var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
22533
|
+
};
|
|
22534
|
+
var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
|
|
22535
|
+
};
|
|
22536
|
+
|
|
22460
22537
|
// src/models/gemma3/modeling_gemma3.js
|
|
22461
22538
|
var Gemma3PreTrainedModel = class extends PreTrainedModel {
|
|
22462
22539
|
};
|
|
22463
22540
|
var Gemma3Model = class extends Gemma3PreTrainedModel {
|
|
22464
22541
|
};
|
|
22465
|
-
var
|
|
22542
|
+
var Gemma3ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
22543
|
+
};
|
|
22544
|
+
var Gemma3ForCausalLM = class extends Gemma3ForConditionalGeneration {
|
|
22466
22545
|
};
|
|
22467
22546
|
|
|
22468
22547
|
// src/models/gemma3n/modeling_gemma3n.js
|
|
@@ -22828,40 +22907,45 @@ var Qwen2VLForConditionalGeneration = class extends Qwen2VLPreTrainedModel {
|
|
|
22828
22907
|
});
|
|
22829
22908
|
}
|
|
22830
22909
|
prepare_inputs_for_generation(input_ids, model_inputs, generation_config) {
|
|
22831
|
-
if (model_inputs.attention_mask
|
|
22832
|
-
|
|
22833
|
-
|
|
22910
|
+
if (!model_inputs.attention_mask || model_inputs.position_ids) {
|
|
22911
|
+
return model_inputs;
|
|
22912
|
+
}
|
|
22913
|
+
const session = this.sessions["decoder_model_merged"] ?? this.sessions["model"];
|
|
22914
|
+
if (!session.inputNames.includes("position_ids")) {
|
|
22915
|
+
return model_inputs;
|
|
22916
|
+
}
|
|
22917
|
+
if (!model_inputs.past_key_values) {
|
|
22918
|
+
[model_inputs.position_ids, model_inputs.rope_deltas] = this.get_rope_index(
|
|
22919
|
+
model_inputs.input_ids,
|
|
22920
|
+
model_inputs.image_grid_thw,
|
|
22921
|
+
model_inputs.video_grid_thw,
|
|
22922
|
+
model_inputs.attention_mask
|
|
22923
|
+
);
|
|
22924
|
+
} else {
|
|
22925
|
+
model_inputs.pixel_values = null;
|
|
22926
|
+
const past_length = model_inputs.past_key_values.get_seq_length();
|
|
22927
|
+
if (past_length < model_inputs.input_ids.dims[1]) {
|
|
22928
|
+
const [full_position_ids, rope_deltas] = this.get_rope_index(
|
|
22834
22929
|
model_inputs.input_ids,
|
|
22835
22930
|
model_inputs.image_grid_thw,
|
|
22836
22931
|
model_inputs.video_grid_thw,
|
|
22837
22932
|
model_inputs.attention_mask
|
|
22838
22933
|
);
|
|
22934
|
+
model_inputs.rope_deltas = rope_deltas;
|
|
22935
|
+
model_inputs.position_ids = full_position_ids.slice(null, null, [past_length, null]);
|
|
22936
|
+
model_inputs.input_ids = model_inputs.input_ids.slice(null, [past_length, null]);
|
|
22839
22937
|
} else {
|
|
22840
|
-
model_inputs.
|
|
22841
|
-
|
|
22842
|
-
if (past_length < model_inputs.input_ids.dims[1]) {
|
|
22843
|
-
const [full_position_ids, rope_deltas] = this.get_rope_index(
|
|
22938
|
+
if (!model_inputs.rope_deltas) {
|
|
22939
|
+
[, model_inputs.rope_deltas] = this.get_rope_index(
|
|
22844
22940
|
model_inputs.input_ids,
|
|
22845
22941
|
model_inputs.image_grid_thw,
|
|
22846
22942
|
model_inputs.video_grid_thw,
|
|
22847
22943
|
model_inputs.attention_mask
|
|
22848
22944
|
);
|
|
22849
|
-
model_inputs.rope_deltas = rope_deltas;
|
|
22850
|
-
model_inputs.position_ids = full_position_ids.slice(null, null, [past_length, null]);
|
|
22851
|
-
model_inputs.input_ids = model_inputs.input_ids.slice(null, [past_length, null]);
|
|
22852
|
-
} else {
|
|
22853
|
-
if (!model_inputs.rope_deltas) {
|
|
22854
|
-
[, model_inputs.rope_deltas] = this.get_rope_index(
|
|
22855
|
-
model_inputs.input_ids,
|
|
22856
|
-
model_inputs.image_grid_thw,
|
|
22857
|
-
model_inputs.video_grid_thw,
|
|
22858
|
-
model_inputs.attention_mask
|
|
22859
|
-
);
|
|
22860
|
-
}
|
|
22861
|
-
const delta = BigInt(past_length);
|
|
22862
|
-
const rope_deltas_list = model_inputs.rope_deltas.map((x) => delta + x);
|
|
22863
|
-
model_inputs.position_ids = stack([rope_deltas_list, rope_deltas_list, rope_deltas_list], 0);
|
|
22864
22945
|
}
|
|
22946
|
+
const delta = BigInt(past_length);
|
|
22947
|
+
const rope_deltas_list = model_inputs.rope_deltas.map((x) => delta + x);
|
|
22948
|
+
model_inputs.position_ids = stack([rope_deltas_list, rope_deltas_list, rope_deltas_list], 0);
|
|
22865
22949
|
}
|
|
22866
22950
|
}
|
|
22867
22951
|
return model_inputs;
|
|
@@ -23144,27 +23228,6 @@ var HunYuanDenseV1Model = class extends HunYuanDenseV1PreTrainedModel {
|
|
|
23144
23228
|
var HunYuanDenseV1ForCausalLM = class extends HunYuanDenseV1PreTrainedModel {
|
|
23145
23229
|
};
|
|
23146
23230
|
|
|
23147
|
-
// src/models/llava/modeling_llava.js
|
|
23148
|
-
var LlavaPreTrainedModel = class extends PreTrainedModel {
|
|
23149
|
-
forward_params = ["input_ids", "attention_mask", "pixel_values", "position_ids", "past_key_values"];
|
|
23150
|
-
};
|
|
23151
|
-
var LlavaForConditionalGeneration = class extends LlavaPreTrainedModel {
|
|
23152
|
-
_merge_input_ids_with_image_features(kwargs) {
|
|
23153
|
-
const vision_hidden_size = kwargs.image_features.dims.at(-1);
|
|
23154
|
-
const reshaped_image_hidden_states = kwargs.image_features.view(-1, vision_hidden_size);
|
|
23155
|
-
return default_merge_input_ids_with_image_features({
|
|
23156
|
-
// @ts-ignore
|
|
23157
|
-
image_token_id: this.config.image_token_index ?? this.config.image_token_id,
|
|
23158
|
-
...kwargs,
|
|
23159
|
-
image_features: reshaped_image_hidden_states
|
|
23160
|
-
});
|
|
23161
|
-
}
|
|
23162
|
-
};
|
|
23163
|
-
var Moondream1ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
23164
|
-
};
|
|
23165
|
-
var LlavaQwen2ForCausalLM = class extends LlavaForConditionalGeneration {
|
|
23166
|
-
};
|
|
23167
|
-
|
|
23168
23231
|
// src/models/idefics3/modeling_idefics3.js
|
|
23169
23232
|
var Idefics3ForConditionalGeneration = class extends LlavaForConditionalGeneration {
|
|
23170
23233
|
forward_params = [
|
|
@@ -26062,6 +26125,7 @@ var MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
26062
26125
|
["qwen3_vl", "Qwen3VLForCausalLM"],
|
|
26063
26126
|
["qwen3_vl_moe", "Qwen3VLMoeForCausalLM"],
|
|
26064
26127
|
["qwen3_5", "Qwen3_5ForCausalLM"],
|
|
26128
|
+
["qwen3_5_text", "Qwen3_5ForCausalLM"],
|
|
26065
26129
|
["qwen3_5_moe", "Qwen3_5MoeForCausalLM"],
|
|
26066
26130
|
["gemma3n", "Gemma3nForCausalLM"],
|
|
26067
26131
|
["phi", "PhiForCausalLM"],
|
|
@@ -26149,6 +26213,7 @@ var MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = /* @__PURE__ */ new Map([
|
|
|
26149
26213
|
["smolvlm", "SmolVLMForConditionalGeneration"],
|
|
26150
26214
|
["paligemma", "PaliGemmaForConditionalGeneration"],
|
|
26151
26215
|
["llava_qwen2", "LlavaQwen2ForCausalLM"],
|
|
26216
|
+
["gemma3", "Gemma3ForConditionalGeneration"],
|
|
26152
26217
|
["gemma3n", "Gemma3nForConditionalGeneration"],
|
|
26153
26218
|
["mistral3", "Mistral3ForConditionalGeneration"],
|
|
26154
26219
|
["lighton_ocr", "LightOnOcrForConditionalGeneration"],
|
|
@@ -26343,13 +26408,6 @@ var CUSTOM_MAPPING = [
|
|
|
26343
26408
|
],
|
|
26344
26409
|
["SupertonicForConditionalGeneration", SupertonicForConditionalGeneration, MODEL_TYPES.Supertonic],
|
|
26345
26410
|
["ChatterboxModel", ChatterboxModel, MODEL_TYPES.Chatterbox],
|
|
26346
|
-
["Qwen2VLForCausalLM", Qwen2VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
26347
|
-
["Qwen2_5_VLForCausalLM", Qwen2_5_VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
26348
|
-
["Qwen3VLForCausalLM", Qwen3VLForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
26349
|
-
["Qwen3VLMoeForCausalLM", Qwen3VLMoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
26350
|
-
["Qwen3_5ForCausalLM", Qwen3_5ForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
26351
|
-
["Qwen3_5MoeForCausalLM", Qwen3_5MoeForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
26352
|
-
["Gemma3nForCausalLM", Gemma3nForCausalLM, MODEL_TYPES.MultimodalLanguageModelOnly],
|
|
26353
26411
|
[
|
|
26354
26412
|
"VoxtralRealtimeForConditionalGeneration",
|
|
26355
26413
|
VoxtralRealtimeForConditionalGeneration,
|
|
@@ -28031,6 +28089,41 @@ var TASK_ALIASES = Object.freeze({
|
|
|
28031
28089
|
embeddings: "feature-extraction"
|
|
28032
28090
|
});
|
|
28033
28091
|
|
|
28092
|
+
// src/utils/model_registry/resolve_model_type.js
|
|
28093
|
+
function resolve_model_type(config, { warn = true } = {}) {
|
|
28094
|
+
const architectures = (
|
|
28095
|
+
/** @type {string[]} */
|
|
28096
|
+
config.architectures || []
|
|
28097
|
+
);
|
|
28098
|
+
for (const arch of architectures) {
|
|
28099
|
+
const mappedType = MODEL_TYPE_MAPPING.get(arch);
|
|
28100
|
+
if (mappedType !== void 0) {
|
|
28101
|
+
return mappedType;
|
|
28102
|
+
}
|
|
28103
|
+
}
|
|
28104
|
+
if (config.model_type) {
|
|
28105
|
+
const mappedType = MODEL_TYPE_MAPPING.get(config.model_type);
|
|
28106
|
+
if (mappedType !== void 0) {
|
|
28107
|
+
return mappedType;
|
|
28108
|
+
}
|
|
28109
|
+
for (const mapping of Object.values(MODEL_MAPPING_NAMES)) {
|
|
28110
|
+
if (mapping.has(config.model_type)) {
|
|
28111
|
+
const resolved = MODEL_TYPE_MAPPING.get(mapping.get(config.model_type));
|
|
28112
|
+
if (resolved !== void 0) {
|
|
28113
|
+
return resolved;
|
|
28114
|
+
}
|
|
28115
|
+
}
|
|
28116
|
+
}
|
|
28117
|
+
}
|
|
28118
|
+
if (warn) {
|
|
28119
|
+
const archList = architectures.length > 0 ? architectures.join(", ") : "(none)";
|
|
28120
|
+
logger.warn(
|
|
28121
|
+
`[resolve_model_type] Architecture(s) not found in MODEL_TYPE_MAPPING: [${archList}] for model type '${config.model_type}'. Falling back to EncoderOnly (single model.onnx file). If you encounter issues, please report at: ${GITHUB_ISSUE_URL}`
|
|
28122
|
+
);
|
|
28123
|
+
}
|
|
28124
|
+
return MODEL_TYPES.EncoderOnly;
|
|
28125
|
+
}
|
|
28126
|
+
|
|
28034
28127
|
// src/utils/model_registry/get_model_files.js
|
|
28035
28128
|
function get_config(modelId, { config = null, cache_dir = null, local_files_only = false, revision = "main" } = {}) {
|
|
28036
28129
|
if (config !== null) {
|
|
@@ -28053,43 +28146,7 @@ async function get_model_files(modelId, { config = null, dtype: overrideDtype =
|
|
|
28053
28146
|
const subfolder = "onnx";
|
|
28054
28147
|
const rawDevice = overrideDevice ?? custom_config.device;
|
|
28055
28148
|
let dtype = overrideDtype ?? custom_config.dtype;
|
|
28056
|
-
|
|
28057
|
-
const architectures = (
|
|
28058
|
-
/** @type {string[]} */
|
|
28059
|
-
config.architectures || []
|
|
28060
|
-
);
|
|
28061
|
-
let foundInMapping = false;
|
|
28062
|
-
for (const arch of architectures) {
|
|
28063
|
-
const mappedType = MODEL_TYPE_MAPPING.get(arch);
|
|
28064
|
-
if (mappedType !== void 0) {
|
|
28065
|
-
modelType = mappedType;
|
|
28066
|
-
foundInMapping = true;
|
|
28067
|
-
break;
|
|
28068
|
-
}
|
|
28069
|
-
}
|
|
28070
|
-
if (!foundInMapping && config.model_type) {
|
|
28071
|
-
const mappedType = MODEL_TYPE_MAPPING.get(config.model_type);
|
|
28072
|
-
if (mappedType !== void 0) {
|
|
28073
|
-
modelType = mappedType;
|
|
28074
|
-
foundInMapping = true;
|
|
28075
|
-
}
|
|
28076
|
-
if (!foundInMapping) {
|
|
28077
|
-
for (const mapping of Object.values(MODEL_MAPPING_NAMES)) {
|
|
28078
|
-
if (mapping.has(config.model_type)) {
|
|
28079
|
-
modelType = MODEL_TYPE_MAPPING.get(mapping.get(config.model_type));
|
|
28080
|
-
foundInMapping = true;
|
|
28081
|
-
break;
|
|
28082
|
-
}
|
|
28083
|
-
}
|
|
28084
|
-
}
|
|
28085
|
-
}
|
|
28086
|
-
if (!foundInMapping) {
|
|
28087
|
-
const archList = architectures.length > 0 ? architectures.join(", ") : "(none)";
|
|
28088
|
-
logger.warn(
|
|
28089
|
-
`[get_model_files] Architecture(s) not found in MODEL_TYPE_MAPPING: [${archList}] for model type '${config.model_type}'. Falling back to EncoderOnly (single model.onnx file). If you encounter issues, please report at: ${GITHUB_ISSUE_URL}`
|
|
28090
|
-
);
|
|
28091
|
-
modelType = MODEL_TYPES.EncoderOnly;
|
|
28092
|
-
}
|
|
28149
|
+
const modelType = resolve_model_type(config);
|
|
28093
28150
|
const add_model_file = (fileName, baseName = null) => {
|
|
28094
28151
|
baseName = baseName ?? fileName;
|
|
28095
28152
|
const selectedDevice = selectDevice(rawDevice, fileName);
|
|
@@ -28676,6 +28733,31 @@ async function clear_pipeline_cache(task, modelId, options = {}) {
|
|
|
28676
28733
|
return await clear_files_from_cache(modelId, files, options);
|
|
28677
28734
|
}
|
|
28678
28735
|
|
|
28736
|
+
// src/utils/model_registry/get_available_dtypes.js
|
|
28737
|
+
var CONCRETE_DTYPES = Object.keys(DEFAULT_DTYPE_SUFFIX_MAPPING);
|
|
28738
|
+
async function get_available_dtypes(modelId, { config = null, model_file_name = null, revision = "main", cache_dir = null, local_files_only = false } = {}) {
|
|
28739
|
+
config = await get_config(modelId, { config, cache_dir, local_files_only, revision });
|
|
28740
|
+
const subfolder = "onnx";
|
|
28741
|
+
const modelType = resolve_model_type(config);
|
|
28742
|
+
const { sessions } = getSessionsConfig(modelType, config, { model_file_name });
|
|
28743
|
+
const baseNames = Object.values(sessions);
|
|
28744
|
+
const metadataOptions = { revision, cache_dir, local_files_only };
|
|
28745
|
+
const probeResults = await Promise.all(
|
|
28746
|
+
CONCRETE_DTYPES.map(async (dtype) => {
|
|
28747
|
+
const suffix = DEFAULT_DTYPE_SUFFIX_MAPPING[dtype] ?? "";
|
|
28748
|
+
const allExist = await Promise.all(
|
|
28749
|
+
baseNames.map(async (baseName) => {
|
|
28750
|
+
const filename = `${subfolder}/${baseName}${suffix}.onnx`;
|
|
28751
|
+
const metadata = await get_file_metadata(modelId, filename, metadataOptions);
|
|
28752
|
+
return metadata.exists;
|
|
28753
|
+
})
|
|
28754
|
+
);
|
|
28755
|
+
return { dtype, available: allExist.every(Boolean) };
|
|
28756
|
+
})
|
|
28757
|
+
);
|
|
28758
|
+
return probeResults.filter((r) => r.available).map((r) => r.dtype);
|
|
28759
|
+
}
|
|
28760
|
+
|
|
28679
28761
|
// src/utils/model_registry/ModelRegistry.js
|
|
28680
28762
|
var ModelRegistry = class {
|
|
28681
28763
|
/**
|
|
@@ -28762,6 +28844,29 @@ var ModelRegistry = class {
|
|
|
28762
28844
|
static async get_processor_files(modelId) {
|
|
28763
28845
|
return get_processor_files(modelId);
|
|
28764
28846
|
}
|
|
28847
|
+
/**
|
|
28848
|
+
* Detects which quantization levels (dtypes) are available for a model
|
|
28849
|
+
* by checking which ONNX files exist on the hub or locally.
|
|
28850
|
+
*
|
|
28851
|
+
* A dtype is considered available if all required model session files
|
|
28852
|
+
* exist for that dtype.
|
|
28853
|
+
*
|
|
28854
|
+
* @param {string} modelId - The model id (e.g., "onnx-community/all-MiniLM-L6-v2-ONNX")
|
|
28855
|
+
* @param {Object} [options] - Optional parameters
|
|
28856
|
+
* @param {import('../../configs.js').PretrainedConfig} [options.config=null] - Pre-loaded config
|
|
28857
|
+
* @param {string} [options.model_file_name=null] - Override the model file name (excluding .onnx suffix)
|
|
28858
|
+
* @param {string} [options.revision='main'] - Model revision
|
|
28859
|
+
* @param {string} [options.cache_dir=null] - Custom cache directory
|
|
28860
|
+
* @param {boolean} [options.local_files_only=false] - Only check local files
|
|
28861
|
+
* @returns {Promise<string[]>} Array of available dtype strings (e.g., ['fp32', 'fp16', 'q4', 'q8'])
|
|
28862
|
+
*
|
|
28863
|
+
* @example
|
|
28864
|
+
* const dtypes = await ModelRegistry.get_available_dtypes('onnx-community/all-MiniLM-L6-v2-ONNX');
|
|
28865
|
+
* console.log(dtypes); // ['fp32', 'fp16', 'int8', 'uint8', 'q8', 'q4']
|
|
28866
|
+
*/
|
|
28867
|
+
static async get_available_dtypes(modelId, options = {}) {
|
|
28868
|
+
return get_available_dtypes(modelId, options);
|
|
28869
|
+
}
|
|
28765
28870
|
/**
|
|
28766
28871
|
* Quickly checks if a model is fully cached by verifying `config.json` is present,
|
|
28767
28872
|
* then confirming all required files are cached.
|
|
@@ -29208,8 +29313,11 @@ export {
|
|
|
29208
29313
|
Gemma2Model,
|
|
29209
29314
|
Gemma2PreTrainedModel,
|
|
29210
29315
|
Gemma3ForCausalLM,
|
|
29316
|
+
Gemma3ForConditionalGeneration,
|
|
29317
|
+
Gemma3ImageProcessor,
|
|
29211
29318
|
Gemma3Model,
|
|
29212
29319
|
Gemma3PreTrainedModel,
|
|
29320
|
+
Gemma3Processor,
|
|
29213
29321
|
Gemma3nAudioFeatureExtractor,
|
|
29214
29322
|
Gemma3nForCausalLM,
|
|
29215
29323
|
Gemma3nForConditionalGeneration,
|