@huggingface/transformers 3.1.2 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/dist/transformers.cjs +835 -144
- package/dist/transformers.cjs.map +1 -1
- package/dist/transformers.js +850 -144
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.cjs +1 -1
- package/dist/transformers.min.cjs.map +1 -1
- package/dist/transformers.min.js +1 -1
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.min.mjs +1 -1
- package/dist/transformers.min.mjs.map +1 -1
- package/dist/transformers.mjs +850 -144
- package/dist/transformers.mjs.map +1 -1
- package/package.json +1 -1
- package/src/base/image_processors_utils.js +3 -1
- package/src/configs.js +10 -2
- package/src/env.js +1 -1
- package/src/models/feature_extractors.js +1 -0
- package/src/models/idefics3/image_processing_idefics3.js +24 -13
- package/src/models/image_processors.js +1 -0
- package/src/models/moonshine/feature_extraction_moonshine.js +26 -0
- package/src/models/moonshine/processing_moonshine.js +20 -0
- package/src/models/phi3_v/image_processing_phi3_v.js +163 -0
- package/src/models/phi3_v/processing_phi3_v.js +53 -0
- package/src/models/processors.js +2 -0
- package/src/models/pyannote/feature_extraction_pyannote.js +56 -0
- package/src/models/pyannote/processing_pyannote.js +7 -54
- package/src/models.js +223 -30
- package/src/ops/registry.js +11 -0
- package/src/pipelines.js +31 -1
- package/src/utils/tensor.js +51 -1
- package/types/base/image_processors_utils.d.ts +2 -2
- package/types/base/image_processors_utils.d.ts.map +1 -1
- package/types/configs.d.ts.map +1 -1
- package/types/models/auto/image_processing_auto.d.ts.map +1 -1
- package/types/models/feature_extractors.d.ts +1 -0
- package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -1
- package/types/models/image_processors.d.ts +1 -0
- package/types/models/moonshine/feature_extraction_moonshine.d.ts +13 -0
- package/types/models/moonshine/feature_extraction_moonshine.d.ts.map +1 -0
- package/types/models/moonshine/processing_moonshine.d.ts +17 -0
- package/types/models/moonshine/processing_moonshine.d.ts.map +1 -0
- package/types/models/phi3_v/image_processing_phi3_v.d.ts +17 -0
- package/types/models/phi3_v/image_processing_phi3_v.d.ts.map +1 -0
- package/types/models/phi3_v/processing_phi3_v.d.ts +17 -0
- package/types/models/phi3_v/processing_phi3_v.d.ts.map +1 -0
- package/types/models/processors.d.ts +2 -0
- package/types/models/pyannote/feature_extraction_pyannote.d.ts +18 -0
- package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
- package/types/models/pyannote/processing_pyannote.d.ts +4 -15
- package/types/models/pyannote/processing_pyannote.d.ts.map +1 -1
- package/types/models.d.ts +64 -1
- package/types/models.d.ts.map +1 -1
- package/types/ops/registry.d.ts +1 -0
- package/types/ops/registry.d.ts.map +1 -1
- package/types/pipelines.d.ts +5 -0
- package/types/pipelines.d.ts.map +1 -1
- package/types/utils/tensor.d.ts +16 -0
- package/types/utils/tensor.d.ts.map +1 -1
package/src/models.js
CHANGED
|
@@ -131,6 +131,7 @@ const MODEL_TYPES = {
|
|
|
131
131
|
ImageTextToText: 6,
|
|
132
132
|
Musicgen: 7,
|
|
133
133
|
MultiModality: 8,
|
|
134
|
+
Phi3V: 9,
|
|
134
135
|
}
|
|
135
136
|
//////////////////////////////////////////////////
|
|
136
137
|
|
|
@@ -906,6 +907,10 @@ export class PreTrainedModel extends Callable {
|
|
|
906
907
|
this._forward = imageTextToTextForward;
|
|
907
908
|
this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
|
|
908
909
|
break;
|
|
910
|
+
case MODEL_TYPES.Phi3V:
|
|
911
|
+
this.can_generate = true;
|
|
912
|
+
this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
|
|
913
|
+
break;
|
|
909
914
|
|
|
910
915
|
case MODEL_TYPES.MultiModality:
|
|
911
916
|
this.can_generate = true;
|
|
@@ -1070,6 +1075,18 @@ export class PreTrainedModel extends Callable {
|
|
|
1070
1075
|
}, options),
|
|
1071
1076
|
]);
|
|
1072
1077
|
|
|
1078
|
+
} else if (modelType === MODEL_TYPES.Phi3V) {
|
|
1079
|
+
info = await Promise.all([
|
|
1080
|
+
constructSessions(pretrained_model_name_or_path, {
|
|
1081
|
+
prepare_inputs_embeds: 'prepare_inputs_embeds',
|
|
1082
|
+
model: 'model',
|
|
1083
|
+
vision_encoder: 'vision_encoder',
|
|
1084
|
+
}, options),
|
|
1085
|
+
getOptionalConfigs(pretrained_model_name_or_path, {
|
|
1086
|
+
generation_config: 'generation_config.json',
|
|
1087
|
+
}, options),
|
|
1088
|
+
]);
|
|
1089
|
+
|
|
1073
1090
|
} else { // should be MODEL_TYPES.EncoderOnly
|
|
1074
1091
|
if (modelType !== MODEL_TYPES.EncoderOnly) {
|
|
1075
1092
|
const type = modelName ?? config?.model_type;
|
|
@@ -1934,6 +1951,49 @@ export class BertForQuestionAnswering extends BertPreTrainedModel {
|
|
|
1934
1951
|
}
|
|
1935
1952
|
//////////////////////////////////////////////////
|
|
1936
1953
|
|
|
1954
|
+
//////////////////////////////////////////////////
|
|
1955
|
+
// ModernBert models
|
|
1956
|
+
export class ModernBertPreTrainedModel extends PreTrainedModel { }
|
|
1957
|
+
export class ModernBertModel extends ModernBertPreTrainedModel { }
|
|
1958
|
+
|
|
1959
|
+
export class ModernBertForMaskedLM extends ModernBertPreTrainedModel {
|
|
1960
|
+
/**
|
|
1961
|
+
* Calls the model on new inputs.
|
|
1962
|
+
*
|
|
1963
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1964
|
+
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
|
1965
|
+
*/
|
|
1966
|
+
async _call(model_inputs) {
|
|
1967
|
+
return new MaskedLMOutput(await super._call(model_inputs));
|
|
1968
|
+
}
|
|
1969
|
+
}
|
|
1970
|
+
|
|
1971
|
+
export class ModernBertForSequenceClassification extends ModernBertPreTrainedModel {
|
|
1972
|
+
/**
|
|
1973
|
+
* Calls the model on new inputs.
|
|
1974
|
+
*
|
|
1975
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1976
|
+
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
|
1977
|
+
*/
|
|
1978
|
+
async _call(model_inputs) {
|
|
1979
|
+
return new SequenceClassifierOutput(await super._call(model_inputs));
|
|
1980
|
+
}
|
|
1981
|
+
}
|
|
1982
|
+
|
|
1983
|
+
export class ModernBertForTokenClassification extends ModernBertPreTrainedModel {
|
|
1984
|
+
/**
|
|
1985
|
+
* Calls the model on new inputs.
|
|
1986
|
+
*
|
|
1987
|
+
* @param {Object} model_inputs The inputs to the model.
|
|
1988
|
+
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
|
1989
|
+
*/
|
|
1990
|
+
async _call(model_inputs) {
|
|
1991
|
+
return new TokenClassifierOutput(await super._call(model_inputs));
|
|
1992
|
+
}
|
|
1993
|
+
}
|
|
1994
|
+
//////////////////////////////////////////////////
|
|
1995
|
+
|
|
1996
|
+
|
|
1937
1997
|
//////////////////////////////////////////////////
|
|
1938
1998
|
// NomicBert models
|
|
1939
1999
|
export class NomicBertPreTrainedModel extends PreTrainedModel { }
|
|
@@ -3342,6 +3402,29 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
|
|
|
3342
3402
|
}
|
|
3343
3403
|
//////////////////////////////////////////////////
|
|
3344
3404
|
|
|
3405
|
+
|
|
3406
|
+
//////////////////////////////////////////////////
|
|
3407
|
+
// Moonshine models
|
|
3408
|
+
export class MoonshinePreTrainedModel extends PreTrainedModel {
|
|
3409
|
+
|
|
3410
|
+
requires_attention_mask = false;
|
|
3411
|
+
main_input_name = 'input_values';
|
|
3412
|
+
forward_params = [
|
|
3413
|
+
'input_values',
|
|
3414
|
+
'decoder_input_ids',
|
|
3415
|
+
'past_key_values',
|
|
3416
|
+
];
|
|
3417
|
+
};
|
|
3418
|
+
|
|
3419
|
+
/**
|
|
3420
|
+
* MoonshineModel class for training Moonshine models without a language model head.
|
|
3421
|
+
*/
|
|
3422
|
+
export class MoonshineModel extends MoonshinePreTrainedModel { }
|
|
3423
|
+
|
|
3424
|
+
export class MoonshineForConditionalGeneration extends MoonshinePreTrainedModel { }
|
|
3425
|
+
//////////////////////////////////////////////////
|
|
3426
|
+
|
|
3427
|
+
|
|
3345
3428
|
//////////////////////////////////////////////////
|
|
3346
3429
|
/**
|
|
3347
3430
|
* Vision Encoder-Decoder model based on OpenAI's GPT architecture for image captioning and other vision tasks
|
|
@@ -3612,6 +3695,77 @@ export class Idefics3ForConditionalGeneration extends Idefics3PreTrainedModel {
|
|
|
3612
3695
|
}
|
|
3613
3696
|
//////////////////////////////////////////////////
|
|
3614
3697
|
|
|
3698
|
+
export class Phi3VPreTrainedModel extends PreTrainedModel {
|
|
3699
|
+
forward_params = [
|
|
3700
|
+
'input_ids',
|
|
3701
|
+
'inputs_embeds',
|
|
3702
|
+
'attention_mask',
|
|
3703
|
+
'position_ids',
|
|
3704
|
+
'pixel_values',
|
|
3705
|
+
'image_sizes',
|
|
3706
|
+
'past_key_values',
|
|
3707
|
+
];
|
|
3708
|
+
}
|
|
3709
|
+
export class Phi3VForCausalLM extends Phi3VPreTrainedModel {
|
|
3710
|
+
|
|
3711
|
+
async forward({
|
|
3712
|
+
// Produced by the tokenizer/processor:
|
|
3713
|
+
input_ids = null,
|
|
3714
|
+
attention_mask = null,
|
|
3715
|
+
pixel_values = null,
|
|
3716
|
+
image_sizes = null,
|
|
3717
|
+
|
|
3718
|
+
// Used during generation:
|
|
3719
|
+
position_ids = null,
|
|
3720
|
+
inputs_embeds = null,
|
|
3721
|
+
past_key_values = null,
|
|
3722
|
+
|
|
3723
|
+
// Generic generation parameters
|
|
3724
|
+
generation_config = null,
|
|
3725
|
+
logits_processor = null,
|
|
3726
|
+
|
|
3727
|
+
// TODO: needed?
|
|
3728
|
+
...kwargs
|
|
3729
|
+
}) {
|
|
3730
|
+
if (!inputs_embeds) {
|
|
3731
|
+
let image_features;
|
|
3732
|
+
if (pixel_values && input_ids.dims[1] !== 1) {
|
|
3733
|
+
if (!image_sizes) {
|
|
3734
|
+
throw new Error('`image_sizes` must be provided when `pixel_values` is provided.');
|
|
3735
|
+
}
|
|
3736
|
+
|
|
3737
|
+
// Encode the image
|
|
3738
|
+
({ image_features } = await sessionRun(this.sessions['vision_encoder'], {
|
|
3739
|
+
pixel_values,
|
|
3740
|
+
image_sizes,
|
|
3741
|
+
}));
|
|
3742
|
+
} else {
|
|
3743
|
+
const hidden_size = this.config.normalized_config.hidden_size;
|
|
3744
|
+
image_features = new Tensor(
|
|
3745
|
+
'float32',
|
|
3746
|
+
[],
|
|
3747
|
+
[0, hidden_size],
|
|
3748
|
+
);
|
|
3749
|
+
}
|
|
3750
|
+
|
|
3751
|
+
({ inputs_embeds } = await sessionRun(this.sessions['prepare_inputs_embeds'], {
|
|
3752
|
+
input_ids,
|
|
3753
|
+
image_features,
|
|
3754
|
+
}));
|
|
3755
|
+
}
|
|
3756
|
+
|
|
3757
|
+
const outputs = await decoderForward(this, {
|
|
3758
|
+
inputs_embeds,
|
|
3759
|
+
past_key_values,
|
|
3760
|
+
attention_mask,
|
|
3761
|
+
position_ids,
|
|
3762
|
+
generation_config,
|
|
3763
|
+
logits_processor,
|
|
3764
|
+
}, false);
|
|
3765
|
+
return outputs;
|
|
3766
|
+
}
|
|
3767
|
+
}
|
|
3768
|
+
|
|
3615
3769
|
//////////////////////////////////////////////////
|
|
3616
3770
|
export class CLIPPreTrainedModel extends PreTrainedModel { }
|
|
3617
3771
|
|
|
@@ -3666,9 +3820,11 @@ export class CLIPModel extends CLIPPreTrainedModel { }
|
|
|
3666
3820
|
export class CLIPTextModel extends CLIPPreTrainedModel {
|
|
3667
3821
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
3668
3822
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
3669
|
-
|
|
3670
|
-
|
|
3671
|
-
|
|
3823
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
3824
|
+
// Update default model file name if not provided
|
|
3825
|
+
model_file_name: 'text_model',
|
|
3826
|
+
...options,
|
|
3827
|
+
});
|
|
3672
3828
|
}
|
|
3673
3829
|
}
|
|
3674
3830
|
|
|
@@ -3701,9 +3857,11 @@ export class CLIPTextModel extends CLIPPreTrainedModel {
|
|
|
3701
3857
|
export class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
|
|
3702
3858
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
3703
3859
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
3704
|
-
|
|
3705
|
-
|
|
3706
|
-
|
|
3860
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
3861
|
+
// Update default model file name if not provided
|
|
3862
|
+
model_file_name: 'text_model',
|
|
3863
|
+
...options,
|
|
3864
|
+
});
|
|
3707
3865
|
}
|
|
3708
3866
|
}
|
|
3709
3867
|
|
|
@@ -3713,9 +3871,11 @@ export class CLIPTextModelWithProjection extends CLIPPreTrainedModel {
|
|
|
3713
3871
|
export class CLIPVisionModel extends CLIPPreTrainedModel {
|
|
3714
3872
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
3715
3873
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
3716
|
-
|
|
3717
|
-
|
|
3718
|
-
|
|
3874
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
3875
|
+
// Update default model file name if not provided
|
|
3876
|
+
model_file_name: 'vision_model',
|
|
3877
|
+
...options,
|
|
3878
|
+
});
|
|
3719
3879
|
}
|
|
3720
3880
|
}
|
|
3721
3881
|
|
|
@@ -3748,9 +3908,11 @@ export class CLIPVisionModel extends CLIPPreTrainedModel {
|
|
|
3748
3908
|
export class CLIPVisionModelWithProjection extends CLIPPreTrainedModel {
|
|
3749
3909
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
3750
3910
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
3751
|
-
|
|
3752
|
-
|
|
3753
|
-
|
|
3911
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
3912
|
+
// Update default model file name if not provided
|
|
3913
|
+
model_file_name: 'vision_model',
|
|
3914
|
+
...options,
|
|
3915
|
+
});
|
|
3754
3916
|
}
|
|
3755
3917
|
}
|
|
3756
3918
|
//////////////////////////////////////////////////
|
|
@@ -3834,9 +3996,11 @@ export class SiglipModel extends SiglipPreTrainedModel { }
|
|
|
3834
3996
|
export class SiglipTextModel extends SiglipPreTrainedModel {
|
|
3835
3997
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
3836
3998
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
3837
|
-
|
|
3838
|
-
|
|
3839
|
-
|
|
3999
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
4000
|
+
// Update default model file name if not provided
|
|
4001
|
+
model_file_name: 'text_model',
|
|
4002
|
+
...options,
|
|
4003
|
+
});
|
|
3840
4004
|
}
|
|
3841
4005
|
}
|
|
3842
4006
|
|
|
@@ -3869,9 +4033,11 @@ export class SiglipTextModel extends SiglipPreTrainedModel {
|
|
|
3869
4033
|
export class SiglipVisionModel extends CLIPPreTrainedModel {
|
|
3870
4034
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
3871
4035
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
3872
|
-
|
|
3873
|
-
|
|
3874
|
-
|
|
4036
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
4037
|
+
// Update default model file name if not provided
|
|
4038
|
+
model_file_name: 'vision_model',
|
|
4039
|
+
...options,
|
|
4040
|
+
});
|
|
3875
4041
|
}
|
|
3876
4042
|
}
|
|
3877
4043
|
//////////////////////////////////////////////////
|
|
@@ -3926,18 +4092,22 @@ export class JinaCLIPModel extends JinaCLIPPreTrainedModel {
|
|
|
3926
4092
|
export class JinaCLIPTextModel extends JinaCLIPPreTrainedModel {
|
|
3927
4093
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
3928
4094
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
3929
|
-
|
|
3930
|
-
|
|
3931
|
-
|
|
4095
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
4096
|
+
// Update default model file name if not provided
|
|
4097
|
+
model_file_name: 'text_model',
|
|
4098
|
+
...options,
|
|
4099
|
+
});
|
|
3932
4100
|
}
|
|
3933
4101
|
}
|
|
3934
4102
|
|
|
3935
4103
|
export class JinaCLIPVisionModel extends JinaCLIPPreTrainedModel {
|
|
3936
4104
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
3937
4105
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
3938
|
-
|
|
3939
|
-
|
|
3940
|
-
|
|
4106
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
4107
|
+
// Update default model file name if not provided
|
|
4108
|
+
model_file_name: 'vision_model',
|
|
4109
|
+
...options,
|
|
4110
|
+
});
|
|
3941
4111
|
}
|
|
3942
4112
|
}
|
|
3943
4113
|
//////////////////////////////////////////////////
|
|
@@ -4097,6 +4267,14 @@ export class LlamaForCausalLM extends LlamaPreTrainedModel { }
|
|
|
4097
4267
|
//////////////////////////////////////////////////
|
|
4098
4268
|
|
|
4099
4269
|
|
|
4270
|
+
//////////////////////////////////////////////////
|
|
4271
|
+
// EXAONE models
|
|
4272
|
+
export class ExaonePreTrainedModel extends PreTrainedModel { }
|
|
4273
|
+
export class ExaoneModel extends ExaonePreTrainedModel { }
|
|
4274
|
+
export class ExaoneForCausalLM extends ExaonePreTrainedModel { }
|
|
4275
|
+
//////////////////////////////////////////////////
|
|
4276
|
+
|
|
4277
|
+
|
|
4100
4278
|
//////////////////////////////////////////////////
|
|
4101
4279
|
// MobileLLM models
|
|
4102
4280
|
export class MobileLLMPreTrainedModel extends PreTrainedModel { }
|
|
@@ -6159,9 +6337,11 @@ export class ClapModel extends ClapPreTrainedModel { }
|
|
|
6159
6337
|
export class ClapTextModelWithProjection extends ClapPreTrainedModel {
|
|
6160
6338
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
6161
6339
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
6162
|
-
|
|
6163
|
-
|
|
6164
|
-
|
|
6340
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
6341
|
+
// Update default model file name if not provided
|
|
6342
|
+
model_file_name: 'text_model',
|
|
6343
|
+
...options,
|
|
6344
|
+
});
|
|
6165
6345
|
}
|
|
6166
6346
|
}
|
|
6167
6347
|
|
|
@@ -6194,9 +6374,11 @@ export class ClapTextModelWithProjection extends ClapPreTrainedModel {
|
|
|
6194
6374
|
export class ClapAudioModelWithProjection extends ClapPreTrainedModel {
|
|
6195
6375
|
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
6196
6376
|
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
6197
|
-
|
|
6198
|
-
|
|
6199
|
-
|
|
6377
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
6378
|
+
// Update default model file name if not provided
|
|
6379
|
+
model_file_name: 'audio_model',
|
|
6380
|
+
...options,
|
|
6381
|
+
});
|
|
6200
6382
|
}
|
|
6201
6383
|
}
|
|
6202
6384
|
//////////////////////////////////////////////////
|
|
@@ -6782,6 +6964,7 @@ export class PretrainedMixin {
|
|
|
6782
6964
|
|
|
6783
6965
|
const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
6784
6966
|
['bert', ['BertModel', BertModel]],
|
|
6967
|
+
['modernbert', ['ModernBertModel', ModernBertModel]],
|
|
6785
6968
|
['nomic_bert', ['NomicBertModel', NomicBertModel]],
|
|
6786
6969
|
['roformer', ['RoFormerModel', RoFormerModel]],
|
|
6787
6970
|
['electra', ['ElectraModel', ElectraModel]],
|
|
@@ -6883,6 +7066,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
|
6883
7066
|
['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]],
|
|
6884
7067
|
['codegen', ['CodeGenModel', CodeGenModel]],
|
|
6885
7068
|
['llama', ['LlamaModel', LlamaModel]],
|
|
7069
|
+
['exaone', ['ExaoneModel', ExaoneModel]],
|
|
6886
7070
|
['olmo', ['OlmoModel', OlmoModel]],
|
|
6887
7071
|
['olmo2', ['Olmo2Model', Olmo2Model]],
|
|
6888
7072
|
['mobilellm', ['MobileLLMModel', MobileLLMModel]],
|
|
@@ -6905,6 +7089,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
|
6905
7089
|
const MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES = new Map([
|
|
6906
7090
|
['speecht5', ['SpeechT5ForSpeechToText', SpeechT5ForSpeechToText]],
|
|
6907
7091
|
['whisper', ['WhisperForConditionalGeneration', WhisperForConditionalGeneration]],
|
|
7092
|
+
['moonshine', ['MoonshineForConditionalGeneration', MoonshineForConditionalGeneration]],
|
|
6908
7093
|
]);
|
|
6909
7094
|
|
|
6910
7095
|
const MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES = new Map([
|
|
@@ -6918,6 +7103,7 @@ const MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES = new Map([
|
|
|
6918
7103
|
|
|
6919
7104
|
const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([
|
|
6920
7105
|
['bert', ['BertForSequenceClassification', BertForSequenceClassification]],
|
|
7106
|
+
['modernbert', ['ModernBertForSequenceClassification', ModernBertForSequenceClassification]],
|
|
6921
7107
|
['roformer', ['RoFormerForSequenceClassification', RoFormerForSequenceClassification]],
|
|
6922
7108
|
['electra', ['ElectraForSequenceClassification', ElectraForSequenceClassification]],
|
|
6923
7109
|
['esm', ['EsmForSequenceClassification', EsmForSequenceClassification]],
|
|
@@ -6939,6 +7125,7 @@ const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([
|
|
|
6939
7125
|
|
|
6940
7126
|
const MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = new Map([
|
|
6941
7127
|
['bert', ['BertForTokenClassification', BertForTokenClassification]],
|
|
7128
|
+
['modernbert', ['ModernBertForTokenClassification', ModernBertForTokenClassification]],
|
|
6942
7129
|
['roformer', ['RoFormerForTokenClassification', RoFormerForTokenClassification]],
|
|
6943
7130
|
['electra', ['ElectraForTokenClassification', ElectraForTokenClassification]],
|
|
6944
7131
|
['esm', ['EsmForTokenClassification', EsmForTokenClassification]],
|
|
@@ -6975,6 +7162,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
|
|
|
6975
7162
|
['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]],
|
|
6976
7163
|
['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]],
|
|
6977
7164
|
['llama', ['LlamaForCausalLM', LlamaForCausalLM]],
|
|
7165
|
+
['exaone', ['ExaoneForCausalLM', ExaoneForCausalLM]],
|
|
6978
7166
|
['olmo', ['OlmoForCausalLM', OlmoForCausalLM]],
|
|
6979
7167
|
['olmo2', ['Olmo2ForCausalLM', Olmo2ForCausalLM]],
|
|
6980
7168
|
['mobilellm', ['MobileLLMForCausalLM', MobileLLMForCausalLM]],
|
|
@@ -6994,6 +7182,9 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
|
|
|
6994
7182
|
['falcon', ['FalconForCausalLM', FalconForCausalLM]],
|
|
6995
7183
|
['trocr', ['TrOCRForCausalLM', TrOCRForCausalLM]],
|
|
6996
7184
|
['stablelm', ['StableLmForCausalLM', StableLmForCausalLM]],
|
|
7185
|
+
|
|
7186
|
+
// Also image-text-to-text
|
|
7187
|
+
['phi3_v', ['Phi3VForCausalLM', Phi3VForCausalLM]],
|
|
6997
7188
|
]);
|
|
6998
7189
|
|
|
6999
7190
|
const MODEL_FOR_MULTIMODALITY_MAPPING_NAMES = new Map([
|
|
@@ -7003,6 +7194,7 @@ const MODEL_FOR_MULTIMODALITY_MAPPING_NAMES = new Map([
|
|
|
7003
7194
|
|
|
7004
7195
|
const MODEL_FOR_MASKED_LM_MAPPING_NAMES = new Map([
|
|
7005
7196
|
['bert', ['BertForMaskedLM', BertForMaskedLM]],
|
|
7197
|
+
['modernbert', ['ModernBertForMaskedLM', ModernBertForMaskedLM]],
|
|
7006
7198
|
['roformer', ['RoFormerForMaskedLM', RoFormerForMaskedLM]],
|
|
7007
7199
|
['electra', ['ElectraForMaskedLM', ElectraForMaskedLM]],
|
|
7008
7200
|
['esm', ['EsmForMaskedLM', EsmForMaskedLM]],
|
|
@@ -7231,6 +7423,7 @@ const CUSTOM_MAPPING = [
|
|
|
7231
7423
|
// OVERRIDE:
|
|
7232
7424
|
// TODO: Refactor to allow class to specify model
|
|
7233
7425
|
['MusicgenForConditionalGeneration', MusicgenForConditionalGeneration, MODEL_TYPES.Musicgen],
|
|
7426
|
+
['Phi3VForCausalLM', Phi3VForCausalLM, MODEL_TYPES.Phi3V],
|
|
7234
7427
|
|
|
7235
7428
|
['CLIPTextModelWithProjection', CLIPTextModelWithProjection, MODEL_TYPES.EncoderOnly],
|
|
7236
7429
|
['SiglipTextModel', SiglipTextModel, MODEL_TYPES.EncoderOnly],
|
package/src/ops/registry.js
CHANGED
|
@@ -100,4 +100,15 @@ export class TensorOpRegistry {
|
|
|
100
100
|
}
|
|
101
101
|
return this._top_k;
|
|
102
102
|
}
|
|
103
|
+
|
|
104
|
+
static get slice() {
|
|
105
|
+
if (!this._slice) {
|
|
106
|
+
this._slice = wrap(
|
|
107
|
+
[8, 7, 18, 0, 58, 96, 10, 25, 10, 1, 120, 10, 1, 115, 10, 1, 101, 10, 1, 97, 10, 1, 116, 18, 1, 121, 34, 5, 83, 108, 105, 99, 101, 18, 1, 114, 90, 9, 10, 1, 120, 18, 4, 10, 2, 8, 1, 90, 9, 10, 1, 115, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 101, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 97, 18, 4, 10, 2, 8, 7, 90, 9, 10, 1, 116, 18, 4, 10, 2, 8, 7, 98, 9, 10, 1, 121, 18, 4, 10, 2, 8, 1, 66, 2, 16, 13],
|
|
108
|
+
this.session_options,
|
|
109
|
+
'y',
|
|
110
|
+
)
|
|
111
|
+
}
|
|
112
|
+
return this._slice;
|
|
113
|
+
}
|
|
103
114
|
}
|
package/src/pipelines.js
CHANGED
|
@@ -688,7 +688,7 @@ export class FillMaskPipeline extends (/** @type {new (options: TextPipelineCons
|
|
|
688
688
|
return {
|
|
689
689
|
score: values[i],
|
|
690
690
|
token: Number(x),
|
|
691
|
-
token_str: this.tokenizer.
|
|
691
|
+
token_str: this.tokenizer.decode([x]),
|
|
692
692
|
sequence: this.tokenizer.decode(sequence, { skip_special_tokens: true }),
|
|
693
693
|
}
|
|
694
694
|
}));
|
|
@@ -1729,6 +1729,8 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
|
|
|
1729
1729
|
case 'unispeech-sat':
|
|
1730
1730
|
case 'hubert':
|
|
1731
1731
|
return this._call_wav2vec2(audio, kwargs)
|
|
1732
|
+
case 'moonshine':
|
|
1733
|
+
return this._call_moonshine(audio, kwargs)
|
|
1732
1734
|
default:
|
|
1733
1735
|
throw new Error(`AutomaticSpeechRecognitionPipeline does not support model type '${this.model.config.model_type}'.`)
|
|
1734
1736
|
}
|
|
@@ -1882,6 +1884,34 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
|
|
|
1882
1884
|
}
|
|
1883
1885
|
return single ? toReturn[0] : toReturn;
|
|
1884
1886
|
}
|
|
1887
|
+
|
|
1888
|
+
/**
|
|
1889
|
+
* @type {AutomaticSpeechRecognitionPipelineCallback}
|
|
1890
|
+
* @private
|
|
1891
|
+
*/
|
|
1892
|
+
async _call_moonshine(audio, kwargs) {
|
|
1893
|
+
const single = !Array.isArray(audio);
|
|
1894
|
+
if (single) {
|
|
1895
|
+
audio = [/** @type {AudioInput} */ (audio)];
|
|
1896
|
+
}
|
|
1897
|
+
const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
|
|
1898
|
+
const preparedAudios = await prepareAudios(audio, sampling_rate);
|
|
1899
|
+
const toReturn = [];
|
|
1900
|
+
for (const aud of preparedAudios) {
|
|
1901
|
+
const inputs = await this.processor(aud);
|
|
1902
|
+
|
|
1903
|
+
// According to the [paper](https://arxiv.org/pdf/2410.15608):
|
|
1904
|
+
// "We use greedy decoding, with a heuristic limit of 6 output tokens
|
|
1905
|
+
// per second of audio to avoid repeated output sequences."
|
|
1906
|
+
const max_new_tokens = Math.floor(aud.length / sampling_rate) * 6;
|
|
1907
|
+
const outputs = await this.model.generate({ max_new_tokens, ...kwargs, ...inputs });
|
|
1908
|
+
|
|
1909
|
+
const text = this.processor.batch_decode(outputs, { skip_special_tokens: true })[0];
|
|
1910
|
+
toReturn.push({ text });
|
|
1911
|
+
}
|
|
1912
|
+
return single ? toReturn[0] : toReturn;
|
|
1913
|
+
}
|
|
1914
|
+
|
|
1885
1915
|
}
|
|
1886
1916
|
|
|
1887
1917
|
/**
|
package/src/utils/tensor.js
CHANGED
|
@@ -772,8 +772,21 @@ export class Tensor {
|
|
|
772
772
|
if (!DataTypeMap.hasOwnProperty(type)) {
|
|
773
773
|
throw new Error(`Unsupported type: ${type}`);
|
|
774
774
|
}
|
|
775
|
+
|
|
776
|
+
// Handle special cases where a mapping function is needed (e.g., where one type is a bigint and the other is a number)
|
|
777
|
+
let map_fn;
|
|
778
|
+
const is_source_bigint = ['int64', 'uint64'].includes(this.type);
|
|
779
|
+
const is_dest_bigint = ['int64', 'uint64'].includes(type);
|
|
780
|
+
if (is_source_bigint && !is_dest_bigint) {
|
|
781
|
+
// TypeError: Cannot convert a BigInt value to a number
|
|
782
|
+
map_fn = Number;
|
|
783
|
+
} else if (!is_source_bigint && is_dest_bigint) {
|
|
784
|
+
// TypeError: Cannot convert [x] to a BigInt
|
|
785
|
+
map_fn = BigInt;
|
|
786
|
+
}
|
|
787
|
+
|
|
775
788
|
// @ts-ignore
|
|
776
|
-
return new Tensor(type, DataTypeMap[type].from(this.data), this.dims);
|
|
789
|
+
return new Tensor(type, DataTypeMap[type].from(this.data, map_fn), this.dims);
|
|
777
790
|
}
|
|
778
791
|
}
|
|
779
792
|
|
|
@@ -971,6 +984,29 @@ export async function topk(x, k) {
|
|
|
971
984
|
});
|
|
972
985
|
}
|
|
973
986
|
|
|
987
|
+
|
|
988
|
+
const arrayToIndexTensor = (array) => new Tensor('int64', array, [array.length]);
|
|
989
|
+
/**
|
|
990
|
+
* Slice a multidimensional float32 tensor.
|
|
991
|
+
* @param {Tensor} data: Tensor of data to extract slices from
|
|
992
|
+
* @param {number[]} starts: 1-D array of starting indices of corresponding axis in axes
|
|
993
|
+
* @param {number[]} ends: 1-D array of ending indices (exclusive) of corresponding axis in axes
|
|
994
|
+
* @param {number[]} axes: 1-D array of axes that starts and ends apply to
|
|
995
|
+
* @param {number[]} [steps]: 1-D array of slice step of corresponding axis in axes.
|
|
996
|
+
* @returns {Promise<Tensor>} Sliced data tensor.
|
|
997
|
+
*/
|
|
998
|
+
export async function slice(data, starts, ends, axes, steps) {
|
|
999
|
+
const op = await TensorOpRegistry.slice;
|
|
1000
|
+
return await op({
|
|
1001
|
+
x: data,
|
|
1002
|
+
s: arrayToIndexTensor(starts),
|
|
1003
|
+
e: arrayToIndexTensor(ends),
|
|
1004
|
+
a: arrayToIndexTensor(axes),
|
|
1005
|
+
t: arrayToIndexTensor(steps ?? new Array(axes.length).fill(1)),
|
|
1006
|
+
});
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
|
|
974
1010
|
/**
|
|
975
1011
|
* Perform mean pooling of the last hidden state followed by a normalization step.
|
|
976
1012
|
* @param {Tensor} last_hidden_state Tensor of shape [batchSize, seqLength, embedDim]
|
|
@@ -1417,6 +1453,20 @@ export function zeros_like(tensor) {
|
|
|
1417
1453
|
return zeros(tensor.dims);
|
|
1418
1454
|
}
|
|
1419
1455
|
|
|
1456
|
+
/**
|
|
1457
|
+
* Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)
|
|
1458
|
+
* @param {number[]} size A sequence of integers defining the shape of the output tensor.
|
|
1459
|
+
* @returns {Tensor} The random tensor.
|
|
1460
|
+
*/
|
|
1461
|
+
export function rand(size) {
|
|
1462
|
+
const length = size.reduce((a, b) => a * b, 1);
|
|
1463
|
+
return new Tensor(
|
|
1464
|
+
"float32",
|
|
1465
|
+
Float32Array.from({ length }, () => Math.random()),
|
|
1466
|
+
size,
|
|
1467
|
+
)
|
|
1468
|
+
}
|
|
1469
|
+
|
|
1420
1470
|
/**
|
|
1421
1471
|
* Quantizes the embeddings tensor to binary or unsigned binary precision.
|
|
1422
1472
|
* @param {Tensor} tensor The tensor to quantize.
|
|
@@ -149,7 +149,7 @@ export class ImageProcessor extends ImageProcessor_base {
|
|
|
149
149
|
* Pad the image by a certain amount.
|
|
150
150
|
* @param {Float32Array} pixelData The pixel data to pad.
|
|
151
151
|
* @param {number[]} imgDims The dimensions of the image (height, width, channels).
|
|
152
|
-
* @param {{width:number; height:number}|number} padSize The dimensions of the padded image.
|
|
152
|
+
* @param {{width:number; height:number}|number|'square'} padSize The dimensions of the padded image.
|
|
153
153
|
* @param {Object} options The options for padding.
|
|
154
154
|
* @param {'constant'|'symmetric'} [options.mode='constant'] The type of padding to add.
|
|
155
155
|
* @param {boolean} [options.center=false] Whether to center the image.
|
|
@@ -159,7 +159,7 @@ export class ImageProcessor extends ImageProcessor_base {
|
|
|
159
159
|
pad_image(pixelData: Float32Array, imgDims: number[], padSize: {
|
|
160
160
|
width: number;
|
|
161
161
|
height: number;
|
|
162
|
-
} | number, { mode, center, constant_values, }?: {
|
|
162
|
+
} | number | "square", { mode, center, constant_values, }?: {
|
|
163
163
|
mode?: "constant" | "symmetric";
|
|
164
164
|
center?: boolean;
|
|
165
165
|
constant_values?: number | number[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"image_processors_utils.d.ts","sourceRoot":"","sources":["../../src/base/image_processors_utils.js"],"names":[],"mappings":"AA+EA;;;;;;;;;GASG;AACH,uDAPG;IAAwB,MAAM,EAAtB,MAAM;IACU,UAAU,EAA1B,MAAM;CACd,cAAQ,MAAM,iBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,iBAClB,OAAO,GACN,KAAQ,CAwEnB;AAGD;;;;;;GAMG;AACH,4DALW,GAAC,iBACD,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAEhB;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAC,EAAE,CAwDtD;AAkPD;;;;;;;;;GASG;AACH,4DARW,GAAC,cACD,MAAM,mBACN,MAAM,gCACN,MAAM,sBACN,GAAG,CAAC,MAAM,CAAC,iBACX,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAChB,KAAK,CAAC;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,KAAK,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAC,CAAC,CAAA;CAAC,CAAC,CAuE/G;AAGD;;;;;;;GAOG;AACH,4DANW,GAAC,cACD,MAAM,iBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAEhB,KAAK,CAAC;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,KAAK,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAC,CAAC,CAAA;CAAC,CAAC,CAI/G;;KA3iBsC,GAAG;UAAyB,GACnE;;AA6iBA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH;
|
|
1
|
+
{"version":3,"file":"image_processors_utils.d.ts","sourceRoot":"","sources":["../../src/base/image_processors_utils.js"],"names":[],"mappings":"AA+EA;;;;;;;;;GASG;AACH,uDAPG;IAAwB,MAAM,EAAtB,MAAM;IACU,UAAU,EAA1B,MAAM;CACd,cAAQ,MAAM,iBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,iBAClB,OAAO,GACN,KAAQ,CAwEnB;AAGD;;;;;;GAMG;AACH,4DALW,GAAC,iBACD,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAEhB;IAAC,YAAY,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAC,EAAE,CAwDtD;AAkPD;;;;;;;;;GASG;AACH,4DARW,GAAC,cACD,MAAM,mBACN,MAAM,gCACN,MAAM,sBACN,GAAG,CAAC,MAAM,CAAC,iBACX,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAChB,KAAK,CAAC;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,KAAK,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAC,CAAC,CAAA;CAAC,CAAC,CAuE/G;AAGD;;;;;;;GAOG;AACH,4DANW,GAAC,cACD,MAAM,iBACN,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAEhB,KAAK,CAAC;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,aAAa,EAAE,KAAK,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAC,CAAC,CAAA;CAAC,CAAC,CAI/G;;KA3iBsC,GAAG;UAAyB,GACnE;;AA6iBA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH;IAqeI;;;;;;;;;;;;;;OAcG;IACH,sDATW,MAAM,WAKN,OAAO,iBAAiB,EAAE,iBAAiB,GAEzC,OAAO,CAAC,cAAc,CAAC,CAKnC;IArfD;;;OAGG;IACH,oBAFW,oBAAoB,EAmC9B;IA9BG,qBAAkD;IAClD,oBAA+C;IAE/C,iBAAoC;IACpC,oBAA2C;IAC3C,uBAAwD;IACxD,sBAAuC;IAEvC,sBAAuC;IACvC,UAA4C;IAC5C,mBAA8D;IAC9D,uBAAwE;IAExE,wBAA2C;IAC3C,eAAiC;IACjC,oBAAmD;IACnD,oBAA2C;IAE3C,cAA+B;IAC/B,YAA2B;IAQ3B,+BAAkE;IAElE,6BAAoB;IAGxB;;;;;;;OAOG;IACH,iBALW,QAAQ,QACR;QAAC,MAAM,EAAC,MAAM,CAAC;QAAC,KAAK,EAAC,MAAM,CAAA;KAAC,aAC7B,MAAM,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAC5B,OAAO,CAAC,QAAQ,CAAC,CAsB7B;IAGD;;;;;OAKG;IACH,mBAJW,QAAQ,mBACR,MAAM,GACJ,OAAO,CAAC,QAAQ,CAAC,CAiC7B;IAED;;;;;;;;;;OAUG;IACH,qBATW,YAAY,WACZ,MAAM,EAAE,WACR;QAAC,KAAK,EAAC,MAAM,CAAC;QAAC,MAAM,EAAC,MAAM,CAAA;KAAC,GAAC,MAAM,GAAC,QAAQ,uCAErD;QAAyC,IAAI,GAArC,UAAU,GAAC,WAAW;QACJ,MAAM,GAAxB,OAAO;QACmB,eAAe,GAAzC,MAAM,GAAC,MAAM,EAAE;KACvB,GAAU,CAAC,YAAY,EAAE,MAAM,EAAE,CAAC,CA+EpC;IAED;;;;OAIG;IACH,mBAHW,YAAY,GACV,IAAI,CAMhB;IAED;;;;;;OAMG;IACH,oCAJW,QAAQ,QACR,GAAG,GACD,CAAC,MAAM,EAAE,MAAM,CAAC,CA4F5B;IAED;;;;OAIG;IACH,cAHW,QAAQ,GACN,OAAO,CAAC,QAAQ,CAAC,CAO7B;IAED;;;;;OAKG;IAEH;;;;;;OAMG;IACH,kBAJW,QAAQ,iGAEN,OAAO;;;;uBAVN,WAAW;;;;6BACX,WAAW;;;;sBACX,MAAM;MAQmB,CAsHtC;IAED;;;;;;;OAOG;IACH,cAJW,QAAQ,EAAE,WACP,GAAG,EAAA,GACJ,OAAO,CAAC,oBAAoB,CAAC,CAqBzC;CAsBJ;;;;;0BAvjCY,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC;;;;;kBAM9B,MAAM;;;;oBACN,WAAW,EAAE;;;;0BACb,WAAW,EAAE;;;;;;;;;;;;;iBAgiBb,MAAM,EAAE;;;;gBACR,MAAM,EAAE;;;;iBACR,OAAO;;;;qBACP,MAAM;;;;mBACN,OAAO;;;;gBACP,OAAO;;;;eACP,MAAM;;;;WACN,MAAM,MAAO;;;;iBACb,MAAM,MAAO;;;;;4BACb,OAAO;;;;;qBAEP,OAAO;;;;mBAEP,OAAO;;;;;wBACP,OAAO;;;;;yBAEP,MAAM;;;;WAGN,MAAM,EAAE;;;;UACR,MAAM,EAAE;;uBAtkBqB,oBAAoB;yBAEtC,mBAAmB"}
|
package/types/configs.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"configs.d.ts","sourceRoot":"","sources":["../src/configs.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"configs.d.ts","sourceRoot":"","sources":["../src/configs.js"],"names":[],"mappings":"AA2OA;;;;GAIG;AACH,0CAHW,gBAAgB;;;IACd,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CA2EpC;AACD;;;GAGG;AACH;IAwBI;;;;;;;;OAQG;IACH,sDANW,MAAM,0EACN,iBAAiB,GAGf,OAAO,CAAC,gBAAgB,CAAC,CAqBrC;IArCD;;;OAGG;IACH,6BAGC;IAnBD,0BAA0B;IAC1B,YADW,MAAM,GAAC,IAAI,CACJ;IAElB,sBAAsB;IACtB,oBADW,OAAO,CACS;IAE3B,qBAAqB;IACrB,yBADW,MAAM,CACO;IAExB,mCAAmC;IACnC,0BADW,oBAAoB,CACN;IAQrB,uBAAkD;CAgCzD;AAED;;;;;GAKG;AACH;IArCI;;;;;;;;OAQG;IACH,sDANW,MAAM,0EACN,iBAAiB,GAGf,OAAO,CAAC,gBAAgB,CAAC,CAqBrC;CAcJ;gCA7VY,OAAO,gBAAgB,EAAE,iBAAiB;+BAI1C,OAAO,iBAAiB,EAAE,gBAAgB;2BAI1C,OAAO,iBAAiB,EAAE,YAAY;;;;;;;;qBA0VrC,OAAO,mBAAmB,EAAE,QAAQ,GAAC,MAAM,CAAC,OAAO,mBAAmB,EAAE,QAAQ,EAAE,OAAO,mBAAmB,EAAE,QAAQ,CAAC;;;;;;+BACvH,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;;;;aAGtB,OAAO,oBAAoB,EAAE,UAAU;;;;YACvC,OAAO,mBAAmB,EAAE,QAAQ,GAAC,MAAM,CAAC,MAAM,EAAE,OAAO,mBAAmB,EAAE,QAAQ,CAAC;;;;+BACzF,OAAO,GAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"image_processing_auto.d.ts","sourceRoot":"","sources":["../../../src/models/auto/image_processing_auto.js"],"names":[],"mappings":"AAMA;
|
|
1
|
+
{"version":3,"file":"image_processing_auto.d.ts","sourceRoot":"","sources":["../../../src/models/auto/image_processing_auto.js"],"names":[],"mappings":"AAMA;kFAuB4jyC,oBAAiB;CAD5kyC;+BAzB8B,sCAAsC"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export * from "./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js";
|
|
2
2
|
export * from "./clap/feature_extraction_clap.js";
|
|
3
|
+
export * from "./moonshine/feature_extraction_moonshine.js";
|
|
3
4
|
export * from "./pyannote/feature_extraction_pyannote.js";
|
|
4
5
|
export * from "./seamless_m4t/feature_extraction_seamless_m4t.js";
|
|
5
6
|
export * from "./speecht5/feature_extraction_speecht5.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"image_processing_idefics3.d.ts","sourceRoot":"","sources":["../../../src/models/idefics3/image_processing_idefics3.js"],"names":[],"mappings":"AAOA;IACI,yBAKC;IAFG,wBAA2D;IAC3D,oBAA2C;IAG/C;;;OAGG;IAEH;;;;;OAKG;IACH,6GAHW,MAAM;;;MAiBhB;IAED,uDAAuD;IACvD,cADY,0CAAS,yCAAU,GAAC,yCAAU,EAAE;;;;;;;;;;OA4H3C;IAED;;;;;;
|
|
1
|
+
{"version":3,"file":"image_processing_idefics3.d.ts","sourceRoot":"","sources":["../../../src/models/idefics3/image_processing_idefics3.js"],"names":[],"mappings":"AAOA;IACI,yBAKC;IAFG,wBAA2D;IAC3D,oBAA2C;IAG/C;;;OAGG;IAEH;;;;;OAKG;IACH,6GAHW,MAAM;;;MAiBhB;IAED,uDAAuD;IACvD,cADY,0CAAS,yCAAU,GAAC,yCAAU,EAAE;;;;;;;;;;OA4H3C;IAED;;;;;;OA4DC;CACJ;+BAjOM,sCAAsC"}
|
|
@@ -23,6 +23,7 @@ export * from "./mobilevit/image_processing_mobilevit.js";
|
|
|
23
23
|
export * from "./nougat/image_processing_nougat.js";
|
|
24
24
|
export * from "./owlv2/image_processing_owlv2.js";
|
|
25
25
|
export * from "./owlvit/image_processing_owlvit.js";
|
|
26
|
+
export * from "./phi3_v/image_processing_phi3_v.js";
|
|
26
27
|
export * from "./pvt/image_processing_pvt.js";
|
|
27
28
|
export * from "./qwen2_vl/image_processing_qwen2_vl.js";
|
|
28
29
|
export * from "./rt_detr/image_processing_rt_detr.js";
|