@huggingface/transformers 3.4.0 → 3.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/dist/transformers.js +528 -201
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.js +1 -1
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.node.cjs +508 -200
- package/dist/transformers.node.cjs.map +1 -1
- package/dist/transformers.node.min.cjs +1 -1
- package/dist/transformers.node.min.cjs.map +1 -1
- package/dist/transformers.node.min.mjs +1 -1
- package/dist/transformers.node.min.mjs.map +1 -1
- package/dist/transformers.node.mjs +528 -201
- package/dist/transformers.node.mjs.map +1 -1
- package/dist/transformers.web.js +528 -201
- package/dist/transformers.web.js.map +1 -1
- package/dist/transformers.web.min.js +1 -1
- package/dist/transformers.web.min.js.map +1 -1
- package/package.json +1 -1
- package/src/configs.js +2 -0
- package/src/env.js +1 -1
- package/src/models/feature_extractors.js +1 -0
- package/src/models/snac/feature_extraction_snac.js +3 -0
- package/src/models.js +125 -2
- package/src/pipelines.js +140 -135
- package/src/tokenizers.js +44 -34
- package/src/utils/data-structures.js +74 -0
- package/src/utils/hub.js +36 -15
- package/src/utils/image.js +9 -1
- package/src/utils/tensor.js +6 -2
- package/types/configs.d.ts.map +1 -1
- package/types/models/feature_extractors.d.ts +1 -0
- package/types/models/snac/feature_extraction_snac.d.ts +4 -0
- package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
- package/types/models.d.ts +72 -0
- package/types/models.d.ts.map +1 -1
- package/types/pipelines.d.ts +2 -2
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenizers.d.ts +4 -1
- package/types/tokenizers.d.ts.map +1 -1
- package/types/tsconfig.tsbuildinfo +1 -1
- package/types/utils/data-structures.d.ts +26 -0
- package/types/utils/data-structures.d.ts.map +1 -1
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +2 -2
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/tensor.d.ts.map +1 -1
|
@@ -3684,6 +3684,7 @@ function getNormalizedConfig(config) {
|
|
|
3684
3684
|
// Sub-configs
|
|
3685
3685
|
case 'llava':
|
|
3686
3686
|
case 'paligemma':
|
|
3687
|
+
case 'gemma3':
|
|
3687
3688
|
case 'florence2':
|
|
3688
3689
|
case 'llava_onevision':
|
|
3689
3690
|
case 'idefics3':
|
|
@@ -3743,6 +3744,7 @@ function getNormalizedConfig(config) {
|
|
|
3743
3744
|
break;
|
|
3744
3745
|
case 'gemma':
|
|
3745
3746
|
case 'gemma2':
|
|
3747
|
+
case 'gemma3_text':
|
|
3746
3748
|
case 'glm':
|
|
3747
3749
|
case 'helium':
|
|
3748
3750
|
mapping['num_heads'] = 'num_key_value_heads';
|
|
@@ -4074,7 +4076,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
4074
4076
|
|
|
4075
4077
|
|
|
4076
4078
|
|
|
4077
|
-
const VERSION = '3.4.
|
|
4079
|
+
const VERSION = '3.4.2';
|
|
4078
4080
|
|
|
4079
4081
|
// Check if various APIs are available (depends on environment)
|
|
4080
4082
|
const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
@@ -6218,6 +6220,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6218
6220
|
/* harmony export */ Gemma2ForCausalLM: () => (/* binding */ Gemma2ForCausalLM),
|
|
6219
6221
|
/* harmony export */ Gemma2Model: () => (/* binding */ Gemma2Model),
|
|
6220
6222
|
/* harmony export */ Gemma2PreTrainedModel: () => (/* binding */ Gemma2PreTrainedModel),
|
|
6223
|
+
/* harmony export */ Gemma3ForCausalLM: () => (/* binding */ Gemma3ForCausalLM),
|
|
6224
|
+
/* harmony export */ Gemma3Model: () => (/* binding */ Gemma3Model),
|
|
6225
|
+
/* harmony export */ Gemma3PreTrainedModel: () => (/* binding */ Gemma3PreTrainedModel),
|
|
6221
6226
|
/* harmony export */ GemmaForCausalLM: () => (/* binding */ GemmaForCausalLM),
|
|
6222
6227
|
/* harmony export */ GemmaModel: () => (/* binding */ GemmaModel),
|
|
6223
6228
|
/* harmony export */ GemmaPreTrainedModel: () => (/* binding */ GemmaPreTrainedModel),
|
|
@@ -6288,6 +6293,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6288
6293
|
/* harmony export */ MaskFormerModel: () => (/* binding */ MaskFormerModel),
|
|
6289
6294
|
/* harmony export */ MaskFormerPreTrainedModel: () => (/* binding */ MaskFormerPreTrainedModel),
|
|
6290
6295
|
/* harmony export */ MaskedLMOutput: () => (/* binding */ MaskedLMOutput),
|
|
6296
|
+
/* harmony export */ Metric3DForDepthEstimation: () => (/* binding */ Metric3DForDepthEstimation),
|
|
6297
|
+
/* harmony export */ Metric3DPreTrainedModel: () => (/* binding */ Metric3DPreTrainedModel),
|
|
6298
|
+
/* harmony export */ Metric3Dv2ForDepthEstimation: () => (/* binding */ Metric3Dv2ForDepthEstimation),
|
|
6299
|
+
/* harmony export */ Metric3Dv2PreTrainedModel: () => (/* binding */ Metric3Dv2PreTrainedModel),
|
|
6291
6300
|
/* harmony export */ MgpstrForSceneTextRecognition: () => (/* binding */ MgpstrForSceneTextRecognition),
|
|
6292
6301
|
/* harmony export */ MgpstrModelOutput: () => (/* binding */ MgpstrModelOutput),
|
|
6293
6302
|
/* harmony export */ MgpstrPreTrainedModel: () => (/* binding */ MgpstrPreTrainedModel),
|
|
@@ -6399,10 +6408,18 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6399
6408
|
/* harmony export */ Qwen2PreTrainedModel: () => (/* binding */ Qwen2PreTrainedModel),
|
|
6400
6409
|
/* harmony export */ Qwen2VLForConditionalGeneration: () => (/* binding */ Qwen2VLForConditionalGeneration),
|
|
6401
6410
|
/* harmony export */ Qwen2VLPreTrainedModel: () => (/* binding */ Qwen2VLPreTrainedModel),
|
|
6411
|
+
/* harmony export */ RFDetrForObjectDetection: () => (/* binding */ RFDetrForObjectDetection),
|
|
6412
|
+
/* harmony export */ RFDetrModel: () => (/* binding */ RFDetrModel),
|
|
6413
|
+
/* harmony export */ RFDetrObjectDetectionOutput: () => (/* binding */ RFDetrObjectDetectionOutput),
|
|
6414
|
+
/* harmony export */ RFDetrPreTrainedModel: () => (/* binding */ RFDetrPreTrainedModel),
|
|
6402
6415
|
/* harmony export */ RTDetrForObjectDetection: () => (/* binding */ RTDetrForObjectDetection),
|
|
6403
6416
|
/* harmony export */ RTDetrModel: () => (/* binding */ RTDetrModel),
|
|
6404
6417
|
/* harmony export */ RTDetrObjectDetectionOutput: () => (/* binding */ RTDetrObjectDetectionOutput),
|
|
6405
6418
|
/* harmony export */ RTDetrPreTrainedModel: () => (/* binding */ RTDetrPreTrainedModel),
|
|
6419
|
+
/* harmony export */ RTDetrV2ForObjectDetection: () => (/* binding */ RTDetrV2ForObjectDetection),
|
|
6420
|
+
/* harmony export */ RTDetrV2Model: () => (/* binding */ RTDetrV2Model),
|
|
6421
|
+
/* harmony export */ RTDetrV2ObjectDetectionOutput: () => (/* binding */ RTDetrV2ObjectDetectionOutput),
|
|
6422
|
+
/* harmony export */ RTDetrV2PreTrainedModel: () => (/* binding */ RTDetrV2PreTrainedModel),
|
|
6406
6423
|
/* harmony export */ ResNetForImageClassification: () => (/* binding */ ResNetForImageClassification),
|
|
6407
6424
|
/* harmony export */ ResNetModel: () => (/* binding */ ResNetModel),
|
|
6408
6425
|
/* harmony export */ ResNetPreTrainedModel: () => (/* binding */ ResNetPreTrainedModel),
|
|
@@ -6436,6 +6453,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6436
6453
|
/* harmony export */ SiglipTextModel: () => (/* binding */ SiglipTextModel),
|
|
6437
6454
|
/* harmony export */ SiglipVisionModel: () => (/* binding */ SiglipVisionModel),
|
|
6438
6455
|
/* harmony export */ SmolVLMForConditionalGeneration: () => (/* binding */ SmolVLMForConditionalGeneration),
|
|
6456
|
+
/* harmony export */ SnacDecoderModel: () => (/* binding */ SnacDecoderModel),
|
|
6457
|
+
/* harmony export */ SnacEncoderModel: () => (/* binding */ SnacEncoderModel),
|
|
6458
|
+
/* harmony export */ SnacModel: () => (/* binding */ SnacModel),
|
|
6459
|
+
/* harmony export */ SnacPreTrainedModel: () => (/* binding */ SnacPreTrainedModel),
|
|
6439
6460
|
/* harmony export */ SpeechT5ForSpeechToText: () => (/* binding */ SpeechT5ForSpeechToText),
|
|
6440
6461
|
/* harmony export */ SpeechT5ForTextToSpeech: () => (/* binding */ SpeechT5ForTextToSpeech),
|
|
6441
6462
|
/* harmony export */ SpeechT5HifiGan: () => (/* binding */ SpeechT5HifiGan),
|
|
@@ -7094,8 +7115,8 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
|
|
|
7094
7115
|
new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
|
|
7095
7116
|
}
|
|
7096
7117
|
if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
|
|
7097
|
-
// NOTE: Handle a special case for paligemma models, where positions are 1-indexed
|
|
7098
|
-
const start_index = self.config.model_type
|
|
7118
|
+
// NOTE: Handle a special case for paligemma/gemma3 models, where positions are 1-indexed
|
|
7119
|
+
const start_index = ['paligemma', 'gemma3_text', 'gemma3'].includes(self.config.model_type) ? 1 : 0;
|
|
7099
7120
|
new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
|
|
7100
7121
|
}
|
|
7101
7122
|
|
|
@@ -11020,6 +11041,23 @@ class Gemma2Model extends Gemma2PreTrainedModel { }
|
|
|
11020
11041
|
class Gemma2ForCausalLM extends Gemma2PreTrainedModel { }
|
|
11021
11042
|
//////////////////////////////////////////////////
|
|
11022
11043
|
|
|
11044
|
+
|
|
11045
|
+
//////////////////////////////////////////////////
|
|
11046
|
+
// Gemma3 models
|
|
11047
|
+
|
|
11048
|
+
/**
|
|
11049
|
+
* The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
|
|
11050
|
+
*/
|
|
11051
|
+
class Gemma3PreTrainedModel extends PreTrainedModel { }
|
|
11052
|
+
/**
|
|
11053
|
+
* The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
|
|
11054
|
+
*/
|
|
11055
|
+
class Gemma3Model extends Gemma3PreTrainedModel { }
|
|
11056
|
+
|
|
11057
|
+
class Gemma3ForCausalLM extends Gemma3PreTrainedModel { }
|
|
11058
|
+
//////////////////////////////////////////////////
|
|
11059
|
+
|
|
11060
|
+
|
|
11023
11061
|
//////////////////////////////////////////////////
|
|
11024
11062
|
class OpenELMPreTrainedModel extends PreTrainedModel { }
|
|
11025
11063
|
class OpenELMModel extends OpenELMPreTrainedModel { }
|
|
@@ -11664,6 +11702,37 @@ class RTDetrObjectDetectionOutput extends ModelOutput {
|
|
|
11664
11702
|
}
|
|
11665
11703
|
//////////////////////////////////////////////////
|
|
11666
11704
|
|
|
11705
|
+
|
|
11706
|
+
//////////////////////////////////////////////////
|
|
11707
|
+
class RTDetrV2PreTrainedModel extends PreTrainedModel { }
|
|
11708
|
+
class RTDetrV2Model extends RTDetrV2PreTrainedModel { }
|
|
11709
|
+
class RTDetrV2ForObjectDetection extends RTDetrV2PreTrainedModel {
|
|
11710
|
+
/**
|
|
11711
|
+
* @param {any} model_inputs
|
|
11712
|
+
*/
|
|
11713
|
+
async _call(model_inputs) {
|
|
11714
|
+
return new RTDetrV2ObjectDetectionOutput(await super._call(model_inputs));
|
|
11715
|
+
}
|
|
11716
|
+
}
|
|
11717
|
+
|
|
11718
|
+
class RTDetrV2ObjectDetectionOutput extends RTDetrObjectDetectionOutput {}
|
|
11719
|
+
//////////////////////////////////////////////////
|
|
11720
|
+
|
|
11721
|
+
//////////////////////////////////////////////////
|
|
11722
|
+
class RFDetrPreTrainedModel extends PreTrainedModel { }
|
|
11723
|
+
class RFDetrModel extends RFDetrPreTrainedModel { }
|
|
11724
|
+
class RFDetrForObjectDetection extends RFDetrPreTrainedModel {
|
|
11725
|
+
/**
|
|
11726
|
+
* @param {any} model_inputs
|
|
11727
|
+
*/
|
|
11728
|
+
async _call(model_inputs) {
|
|
11729
|
+
return new RFDetrObjectDetectionOutput(await super._call(model_inputs));
|
|
11730
|
+
}
|
|
11731
|
+
}
|
|
11732
|
+
|
|
11733
|
+
class RFDetrObjectDetectionOutput extends RTDetrObjectDetectionOutput {}
|
|
11734
|
+
//////////////////////////////////////////////////
|
|
11735
|
+
|
|
11667
11736
|
//////////////////////////////////////////////////
|
|
11668
11737
|
class TableTransformerPreTrainedModel extends PreTrainedModel { }
|
|
11669
11738
|
|
|
@@ -11872,6 +11941,16 @@ class DepthProPreTrainedModel extends PreTrainedModel { }
|
|
|
11872
11941
|
class DepthProForDepthEstimation extends DepthProPreTrainedModel { }
|
|
11873
11942
|
//////////////////////////////////////////////////
|
|
11874
11943
|
|
|
11944
|
+
//////////////////////////////////////////////////
|
|
11945
|
+
class Metric3DPreTrainedModel extends PreTrainedModel { }
|
|
11946
|
+
class Metric3DForDepthEstimation extends Metric3DPreTrainedModel { }
|
|
11947
|
+
//////////////////////////////////////////////////
|
|
11948
|
+
|
|
11949
|
+
//////////////////////////////////////////////////
|
|
11950
|
+
class Metric3Dv2PreTrainedModel extends PreTrainedModel { }
|
|
11951
|
+
class Metric3Dv2ForDepthEstimation extends Metric3Dv2PreTrainedModel { }
|
|
11952
|
+
//////////////////////////////////////////////////
|
|
11953
|
+
|
|
11875
11954
|
//////////////////////////////////////////////////
|
|
11876
11955
|
class MaskFormerPreTrainedModel extends PreTrainedModel { }
|
|
11877
11956
|
class MaskFormerModel extends MaskFormerPreTrainedModel { }
|
|
@@ -13787,6 +13866,60 @@ class DacDecoderModel extends DacPreTrainedModel {
|
|
|
13787
13866
|
}
|
|
13788
13867
|
//////////////////////////////////////////////////
|
|
13789
13868
|
|
|
13869
|
+
|
|
13870
|
+
//////////////////////////////////////////////////
|
|
13871
|
+
// Snac models
|
|
13872
|
+
class SnacPreTrainedModel extends PreTrainedModel {
|
|
13873
|
+
main_input_name = 'input_values';
|
|
13874
|
+
forward_params = ['input_values'];
|
|
13875
|
+
}
|
|
13876
|
+
|
|
13877
|
+
/**
|
|
13878
|
+
* The SNAC (Multi-Scale Neural Audio Codec) model.
|
|
13879
|
+
*/
|
|
13880
|
+
class SnacModel extends SnacPreTrainedModel {
|
|
13881
|
+
/**
|
|
13882
|
+
* Encodes the input audio waveform into discrete codes.
|
|
13883
|
+
* @param {Object} inputs Model inputs
|
|
13884
|
+
* @param {Tensor} [inputs.input_values] Float values of the input audio waveform, of shape `(batch_size, channels, sequence_length)`).
|
|
13885
|
+
* @returns {Promise<Record<string, Tensor>>} The output tensors of shape `(batch_size, num_codebooks, sequence_length)`.
|
|
13886
|
+
*/
|
|
13887
|
+
async encode(inputs) {
|
|
13888
|
+
return await sessionRun(this.sessions['encoder_model'], inputs);
|
|
13889
|
+
}
|
|
13890
|
+
|
|
13891
|
+
/**
|
|
13892
|
+
* Decodes the given frames into an output audio waveform.
|
|
13893
|
+
* @param {Record<string, Tensor>} inputs The encoded audio codes.
|
|
13894
|
+
* @returns {Promise<{audio_values: Tensor}>} The output tensor of shape `(batch_size, num_channels, sequence_length)`.
|
|
13895
|
+
*/
|
|
13896
|
+
async decode(inputs) {
|
|
13897
|
+
return await sessionRun(this.sessions['decoder_model'], inputs);
|
|
13898
|
+
}
|
|
13899
|
+
}
|
|
13900
|
+
|
|
13901
|
+
class SnacEncoderModel extends SnacPreTrainedModel {
|
|
13902
|
+
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
13903
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
13904
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
13905
|
+
...options,
|
|
13906
|
+
// Update default model file name if not provided
|
|
13907
|
+
model_file_name: options.model_file_name ?? 'encoder_model',
|
|
13908
|
+
});
|
|
13909
|
+
}
|
|
13910
|
+
}
|
|
13911
|
+
class SnacDecoderModel extends SnacPreTrainedModel {
|
|
13912
|
+
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
13913
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
13914
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
13915
|
+
...options,
|
|
13916
|
+
// Update default model file name if not provided
|
|
13917
|
+
model_file_name: options.model_file_name ?? 'decoder_model',
|
|
13918
|
+
});
|
|
13919
|
+
}
|
|
13920
|
+
}
|
|
13921
|
+
//////////////////////////////////////////////////
|
|
13922
|
+
|
|
13790
13923
|
//////////////////////////////////////////////////
|
|
13791
13924
|
// AutoModels, used to simplify construction of PreTrainedModels
|
|
13792
13925
|
// (uses config to instantiate correct class)
|
|
@@ -13907,6 +14040,8 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
|
|
13907
14040
|
|
|
13908
14041
|
['detr', ['DetrModel', DetrModel]],
|
|
13909
14042
|
['rt_detr', ['RTDetrModel', RTDetrModel]],
|
|
14043
|
+
['rt_detr_v2', ['RTDetrV2Model', RTDetrV2Model]],
|
|
14044
|
+
['rf_detr', ['RFDetrModel', RFDetrModel]],
|
|
13910
14045
|
['table-transformer', ['TableTransformerModel', TableTransformerModel]],
|
|
13911
14046
|
['vit', ['ViTModel', ViTModel]],
|
|
13912
14047
|
['ijepa', ['IJepaModel', IJepaModel]],
|
|
@@ -13968,6 +14103,7 @@ const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
|
|
|
13968
14103
|
const MODEL_MAPPING_NAMES_AUTO_ENCODER = new Map([
|
|
13969
14104
|
['mimi', ['MimiModel', MimiModel]],
|
|
13970
14105
|
['dac', ['DacModel', DacModel]],
|
|
14106
|
+
['snac', ['SnacModel', SnacModel]],
|
|
13971
14107
|
]);
|
|
13972
14108
|
|
|
13973
14109
|
const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
@@ -13988,6 +14124,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
|
13988
14124
|
['cohere', ['CohereModel', CohereModel]],
|
|
13989
14125
|
['gemma', ['GemmaModel', GemmaModel]],
|
|
13990
14126
|
['gemma2', ['Gemma2Model', Gemma2Model]],
|
|
14127
|
+
['gemma3_text', ['Gemma3Model', Gemma3Model]],
|
|
13991
14128
|
['helium', ['HeliumModel', HeliumModel]],
|
|
13992
14129
|
['glm', ['GlmModel', GlmModel]],
|
|
13993
14130
|
['openelm', ['OpenELMModel', OpenELMModel]],
|
|
@@ -14087,6 +14224,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
|
|
|
14087
14224
|
['cohere', ['CohereForCausalLM', CohereForCausalLM]],
|
|
14088
14225
|
['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],
|
|
14089
14226
|
['gemma2', ['Gemma2ForCausalLM', Gemma2ForCausalLM]],
|
|
14227
|
+
['gemma3_text', ['Gemma3ForCausalLM', Gemma3ForCausalLM]],
|
|
14090
14228
|
['helium', ['HeliumForCausalLM', HeliumForCausalLM]],
|
|
14091
14229
|
['glm', ['GlmForCausalLM', GlmForCausalLM]],
|
|
14092
14230
|
['openelm', ['OpenELMForCausalLM', OpenELMForCausalLM]],
|
|
@@ -14203,6 +14341,8 @@ const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
|
|
|
14203
14341
|
const MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES = new Map([
|
|
14204
14342
|
['detr', ['DetrForObjectDetection', DetrForObjectDetection]],
|
|
14205
14343
|
['rt_detr', ['RTDetrForObjectDetection', RTDetrForObjectDetection]],
|
|
14344
|
+
['rt_detr_v2', ['RTDetrV2ForObjectDetection', RTDetrV2ForObjectDetection]],
|
|
14345
|
+
['rf_detr', ['RFDetrForObjectDetection', RFDetrForObjectDetection]],
|
|
14206
14346
|
['table-transformer', ['TableTransformerForObjectDetection', TableTransformerForObjectDetection]],
|
|
14207
14347
|
['yolos', ['YolosForObjectDetection', YolosForObjectDetection]],
|
|
14208
14348
|
]);
|
|
@@ -14288,6 +14428,8 @@ const MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = new Map([
|
|
|
14288
14428
|
['glpn', ['GLPNForDepthEstimation', GLPNForDepthEstimation]],
|
|
14289
14429
|
['sapiens', ['SapiensForDepthEstimation', SapiensForDepthEstimation]],
|
|
14290
14430
|
['depth_pro', ['DepthProForDepthEstimation', DepthProForDepthEstimation]],
|
|
14431
|
+
['metric3d', ['Metric3DForDepthEstimation', Metric3DForDepthEstimation]],
|
|
14432
|
+
['metric3dv2', ['Metric3Dv2ForDepthEstimation', Metric3Dv2ForDepthEstimation]],
|
|
14291
14433
|
])
|
|
14292
14434
|
|
|
14293
14435
|
const MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES = new Map([
|
|
@@ -14373,6 +14515,8 @@ const CUSTOM_MAPPING = [
|
|
|
14373
14515
|
['DacDecoderModel', DacDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
14374
14516
|
['MimiEncoderModel', MimiEncoderModel, MODEL_TYPES.EncoderOnly],
|
|
14375
14517
|
['MimiDecoderModel', MimiDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
14518
|
+
['SnacEncoderModel', SnacEncoderModel, MODEL_TYPES.EncoderOnly],
|
|
14519
|
+
['SnacDecoderModel', SnacDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
14376
14520
|
]
|
|
14377
14521
|
for (const [name, model, type] of CUSTOM_MAPPING) {
|
|
14378
14522
|
MODEL_TYPE_MAPPING.set(name, type);
|
|
@@ -15689,14 +15833,15 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
15689
15833
|
/* harmony export */ ClapFeatureExtractor: () => (/* reexport safe */ _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_2__.ClapFeatureExtractor),
|
|
15690
15834
|
/* harmony export */ DacFeatureExtractor: () => (/* reexport safe */ _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_3__.DacFeatureExtractor),
|
|
15691
15835
|
/* harmony export */ EncodecFeatureExtractor: () => (/* reexport safe */ _encodec_feature_extraction_encodec_js__WEBPACK_IMPORTED_MODULE_1__.EncodecFeatureExtractor),
|
|
15692
|
-
/* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */
|
|
15836
|
+
/* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_12__.ImageProcessor),
|
|
15693
15837
|
/* harmony export */ MoonshineFeatureExtractor: () => (/* reexport safe */ _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_4__.MoonshineFeatureExtractor),
|
|
15694
15838
|
/* harmony export */ PyAnnoteFeatureExtractor: () => (/* reexport safe */ _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_5__.PyAnnoteFeatureExtractor),
|
|
15695
15839
|
/* harmony export */ SeamlessM4TFeatureExtractor: () => (/* reexport safe */ _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_6__.SeamlessM4TFeatureExtractor),
|
|
15696
|
-
/* harmony export */
|
|
15697
|
-
/* harmony export */
|
|
15698
|
-
/* harmony export */
|
|
15699
|
-
/* harmony export */
|
|
15840
|
+
/* harmony export */ SnacFeatureExtractor: () => (/* reexport safe */ _snac_feature_extraction_snac_js__WEBPACK_IMPORTED_MODULE_7__.SnacFeatureExtractor),
|
|
15841
|
+
/* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_8__.SpeechT5FeatureExtractor),
|
|
15842
|
+
/* harmony export */ Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_9__.Wav2Vec2FeatureExtractor),
|
|
15843
|
+
/* harmony export */ WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_10__.WeSpeakerFeatureExtractor),
|
|
15844
|
+
/* harmony export */ WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_11__.WhisperFeatureExtractor)
|
|
15700
15845
|
/* harmony export */ });
|
|
15701
15846
|
/* harmony import */ var _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js */ "./src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js");
|
|
15702
15847
|
/* harmony import */ var _encodec_feature_extraction_encodec_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./encodec/feature_extraction_encodec.js */ "./src/models/encodec/feature_extraction_encodec.js");
|
|
@@ -15705,11 +15850,13 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
15705
15850
|
/* harmony import */ var _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./moonshine/feature_extraction_moonshine.js */ "./src/models/moonshine/feature_extraction_moonshine.js");
|
|
15706
15851
|
/* harmony import */ var _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./pyannote/feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
|
|
15707
15852
|
/* harmony import */ var _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./seamless_m4t/feature_extraction_seamless_m4t.js */ "./src/models/seamless_m4t/feature_extraction_seamless_m4t.js");
|
|
15708
|
-
/* harmony import */ var
|
|
15709
|
-
/* harmony import */ var
|
|
15710
|
-
/* harmony import */ var
|
|
15711
|
-
/* harmony import */ var
|
|
15712
|
-
/* harmony import */ var
|
|
15853
|
+
/* harmony import */ var _snac_feature_extraction_snac_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./snac/feature_extraction_snac.js */ "./src/models/snac/feature_extraction_snac.js");
|
|
15854
|
+
/* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
|
|
15855
|
+
/* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
|
|
15856
|
+
/* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
|
|
15857
|
+
/* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
|
|
15858
|
+
/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
|
|
15859
|
+
|
|
15713
15860
|
|
|
15714
15861
|
|
|
15715
15862
|
|
|
@@ -18742,6 +18889,25 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
18742
18889
|
|
|
18743
18890
|
|
|
18744
18891
|
|
|
18892
|
+
/***/ }),
|
|
18893
|
+
|
|
18894
|
+
/***/ "./src/models/snac/feature_extraction_snac.js":
|
|
18895
|
+
/*!****************************************************!*\
|
|
18896
|
+
!*** ./src/models/snac/feature_extraction_snac.js ***!
|
|
18897
|
+
\****************************************************/
|
|
18898
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
18899
|
+
|
|
18900
|
+
"use strict";
|
|
18901
|
+
__webpack_require__.r(__webpack_exports__);
|
|
18902
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
18903
|
+
/* harmony export */ SnacFeatureExtractor: () => (/* binding */ SnacFeatureExtractor)
|
|
18904
|
+
/* harmony export */ });
|
|
18905
|
+
/* harmony import */ var _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../dac/feature_extraction_dac.js */ "./src/models/dac/feature_extraction_dac.js");
|
|
18906
|
+
|
|
18907
|
+
|
|
18908
|
+
class SnacFeatureExtractor extends _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_0__.DacFeatureExtractor { }
|
|
18909
|
+
|
|
18910
|
+
|
|
18745
18911
|
/***/ }),
|
|
18746
18912
|
|
|
18747
18913
|
/***/ "./src/models/speecht5/feature_extraction_speecht5.js":
|
|
@@ -19999,16 +20165,16 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
19999
20165
|
/* harmony import */ var _utils_image_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./utils/image.js */ "./src/utils/image.js");
|
|
20000
20166
|
/**
|
|
20001
20167
|
* @file Pipelines provide a high-level, easy to use, API for running machine learning models.
|
|
20002
|
-
*
|
|
20168
|
+
*
|
|
20003
20169
|
* **Example:** Instantiate pipeline using the `pipeline` function.
|
|
20004
20170
|
* ```javascript
|
|
20005
20171
|
* import { pipeline } from '@huggingface/transformers';
|
|
20006
|
-
*
|
|
20172
|
+
*
|
|
20007
20173
|
* const classifier = await pipeline('sentiment-analysis');
|
|
20008
20174
|
* const output = await classifier('I love transformers!');
|
|
20009
20175
|
* // [{'label': 'POSITIVE', 'score': 0.999817686}]
|
|
20010
20176
|
* ```
|
|
20011
|
-
*
|
|
20177
|
+
*
|
|
20012
20178
|
* @module pipelines
|
|
20013
20179
|
*/
|
|
20014
20180
|
|
|
@@ -20027,7 +20193,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
20027
20193
|
|
|
20028
20194
|
|
|
20029
20195
|
/**
|
|
20030
|
-
* @typedef {string | RawImage | URL} ImageInput
|
|
20196
|
+
* @typedef {string | RawImage | URL | Blob | HTMLCanvasElement | OffscreenCanvas} ImageInput
|
|
20031
20197
|
* @typedef {ImageInput|ImageInput[]} ImagePipelineInputs
|
|
20032
20198
|
*/
|
|
20033
20199
|
|
|
@@ -20101,7 +20267,7 @@ function get_bounding_box(box, asInteger) {
|
|
|
20101
20267
|
/**
|
|
20102
20268
|
* @callback DisposeType Disposes the item.
|
|
20103
20269
|
* @returns {Promise<void>} A promise that resolves when the item has been disposed.
|
|
20104
|
-
*
|
|
20270
|
+
*
|
|
20105
20271
|
* @typedef {Object} Disposable
|
|
20106
20272
|
* @property {DisposeType} dispose A promise that resolves when the pipeline has been disposed.
|
|
20107
20273
|
*/
|
|
@@ -20138,7 +20304,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20138
20304
|
* @property {string} task The task of the pipeline. Useful for specifying subtasks.
|
|
20139
20305
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
20140
20306
|
* @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
|
|
20141
|
-
*
|
|
20307
|
+
*
|
|
20142
20308
|
* @typedef {ModelTokenizerConstructorArgs} TextPipelineConstructorArgs An object used to instantiate a text-based pipeline.
|
|
20143
20309
|
*/
|
|
20144
20310
|
|
|
@@ -20147,7 +20313,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20147
20313
|
* @property {string} task The task of the pipeline. Useful for specifying subtasks.
|
|
20148
20314
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
20149
20315
|
* @property {Processor} processor The processor used by the pipeline.
|
|
20150
|
-
*
|
|
20316
|
+
*
|
|
20151
20317
|
* @typedef {ModelProcessorConstructorArgs} AudioPipelineConstructorArgs An object used to instantiate an audio-based pipeline.
|
|
20152
20318
|
* @typedef {ModelProcessorConstructorArgs} ImagePipelineConstructorArgs An object used to instantiate an image-based pipeline.
|
|
20153
20319
|
*/
|
|
@@ -20159,7 +20325,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20159
20325
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
20160
20326
|
* @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
|
|
20161
20327
|
* @property {Processor} processor The processor used by the pipeline.
|
|
20162
|
-
*
|
|
20328
|
+
*
|
|
20163
20329
|
* @typedef {ModelTokenizerProcessorConstructorArgs} TextAudioPipelineConstructorArgs An object used to instantiate a text- and audio-based pipeline.
|
|
20164
20330
|
* @typedef {ModelTokenizerProcessorConstructorArgs} TextImagePipelineConstructorArgs An object used to instantiate a text- and image-based pipeline.
|
|
20165
20331
|
*/
|
|
@@ -20169,15 +20335,15 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20169
20335
|
* @property {string} label The label predicted.
|
|
20170
20336
|
* @property {number} score The corresponding probability.
|
|
20171
20337
|
* @typedef {TextClassificationSingle[]} TextClassificationOutput
|
|
20172
|
-
*
|
|
20338
|
+
*
|
|
20173
20339
|
* @typedef {Object} TextClassificationPipelineOptions Parameters specific to text classification pipelines.
|
|
20174
20340
|
* @property {number} [top_k=1] The number of top predictions to be returned.
|
|
20175
|
-
*
|
|
20341
|
+
*
|
|
20176
20342
|
* @callback TextClassificationPipelineCallback Classify the text(s) given as inputs.
|
|
20177
20343
|
* @param {string|string[]} texts The input text(s) to be classified.
|
|
20178
20344
|
* @param {TextClassificationPipelineOptions} [options] The options to use for text classification.
|
|
20179
20345
|
* @returns {Promise<TextClassificationOutput|TextClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
20180
|
-
*
|
|
20346
|
+
*
|
|
20181
20347
|
* @typedef {TextPipelineConstructorArgs & TextClassificationPipelineCallback & Disposable} TextClassificationPipelineType
|
|
20182
20348
|
*/
|
|
20183
20349
|
|
|
@@ -20190,7 +20356,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20190
20356
|
* const output = await classifier('I love transformers!');
|
|
20191
20357
|
* // [{ label: 'POSITIVE', score: 0.999788761138916 }]
|
|
20192
20358
|
* ```
|
|
20193
|
-
*
|
|
20359
|
+
*
|
|
20194
20360
|
* **Example:** Multilingual sentiment-analysis w/ `Xenova/bert-base-multilingual-uncased-sentiment` (and return top 5 classes).
|
|
20195
20361
|
* ```javascript
|
|
20196
20362
|
* const classifier = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
|
|
@@ -20203,7 +20369,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20203
20369
|
* // { label: '2 stars', score: 0.0009423971059732139 }
|
|
20204
20370
|
* // ]
|
|
20205
20371
|
* ```
|
|
20206
|
-
*
|
|
20372
|
+
*
|
|
20207
20373
|
* **Example:** Toxic comment classification w/ `Xenova/toxic-bert` (and return all classes).
|
|
20208
20374
|
* ```javascript
|
|
20209
20375
|
* const classifier = await pipeline('text-classification', 'Xenova/toxic-bert');
|
|
@@ -20288,21 +20454,21 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
|
|
|
20288
20454
|
* @property {number} [start] The index of the start of the corresponding entity in the sentence.
|
|
20289
20455
|
* @property {number} [end] The index of the end of the corresponding entity in the sentence.
|
|
20290
20456
|
* @typedef {TokenClassificationSingle[]} TokenClassificationOutput
|
|
20291
|
-
*
|
|
20457
|
+
*
|
|
20292
20458
|
* @typedef {Object} TokenClassificationPipelineOptions Parameters specific to token classification pipelines.
|
|
20293
20459
|
* @property {string[]} [ignore_labels] A list of labels to ignore.
|
|
20294
|
-
*
|
|
20460
|
+
*
|
|
20295
20461
|
* @callback TokenClassificationPipelineCallback Classify each token of the text(s) given as inputs.
|
|
20296
20462
|
* @param {string|string[]} texts One or several texts (or one list of texts) for token classification.
|
|
20297
20463
|
* @param {TokenClassificationPipelineOptions} [options] The options to use for token classification.
|
|
20298
20464
|
* @returns {Promise<TokenClassificationOutput|TokenClassificationOutput[]>} The result.
|
|
20299
|
-
*
|
|
20465
|
+
*
|
|
20300
20466
|
* @typedef {TextPipelineConstructorArgs & TokenClassificationPipelineCallback & Disposable} TokenClassificationPipelineType
|
|
20301
20467
|
*/
|
|
20302
20468
|
|
|
20303
20469
|
/**
|
|
20304
20470
|
* Named Entity Recognition pipeline using any `ModelForTokenClassification`.
|
|
20305
|
-
*
|
|
20471
|
+
*
|
|
20306
20472
|
* **Example:** Perform named entity recognition with `Xenova/bert-base-NER`.
|
|
20307
20473
|
* ```javascript
|
|
20308
20474
|
* const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
|
|
@@ -20312,7 +20478,7 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
|
|
|
20312
20478
|
* // { entity: 'B-LOC', score: 0.9994474053382874, index: 9, word: 'London' }
|
|
20313
20479
|
* // ]
|
|
20314
20480
|
* ```
|
|
20315
|
-
*
|
|
20481
|
+
*
|
|
20316
20482
|
* **Example:** Perform named entity recognition with `Xenova/bert-base-NER` (and return all labels).
|
|
20317
20483
|
* ```javascript
|
|
20318
20484
|
* const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
|
|
@@ -20408,22 +20574,22 @@ class TokenClassificationPipeline extends (/** @type {new (options: TextPipeline
|
|
|
20408
20574
|
* @property {number} [start] The character start index of the answer (in the tokenized version of the input).
|
|
20409
20575
|
* @property {number} [end] The character end index of the answer (in the tokenized version of the input).
|
|
20410
20576
|
* @property {string} answer The answer to the question.
|
|
20411
|
-
*
|
|
20577
|
+
*
|
|
20412
20578
|
* @typedef {Object} QuestionAnsweringPipelineOptions Parameters specific to question answering pipelines.
|
|
20413
20579
|
* @property {number} [top_k=1] The number of top answer predictions to be returned.
|
|
20414
|
-
*
|
|
20580
|
+
*
|
|
20415
20581
|
* @callback QuestionAnsweringPipelineCallback Answer the question(s) given as inputs by using the context(s).
|
|
20416
20582
|
* @param {string|string[]} question One or several question(s) (must be used in conjunction with the `context` argument).
|
|
20417
20583
|
* @param {string|string[]} context One or several context(s) associated with the question(s) (must be used in conjunction with the `question` argument).
|
|
20418
20584
|
* @param {QuestionAnsweringPipelineOptions} [options] The options to use for question answering.
|
|
20419
20585
|
* @returns {Promise<QuestionAnsweringOutput|QuestionAnsweringOutput[]>} An array or object containing the predicted answers and scores.
|
|
20420
|
-
*
|
|
20586
|
+
*
|
|
20421
20587
|
* @typedef {TextPipelineConstructorArgs & QuestionAnsweringPipelineCallback & Disposable} QuestionAnsweringPipelineType
|
|
20422
20588
|
*/
|
|
20423
20589
|
|
|
20424
20590
|
/**
|
|
20425
20591
|
* Question Answering pipeline using any `ModelForQuestionAnswering`.
|
|
20426
|
-
*
|
|
20592
|
+
*
|
|
20427
20593
|
* **Example:** Run question answering with `Xenova/distilbert-base-uncased-distilled-squad`.
|
|
20428
20594
|
* ```javascript
|
|
20429
20595
|
* const answerer = await pipeline('question-answering', 'Xenova/distilbert-base-uncased-distilled-squad');
|
|
@@ -20548,10 +20714,10 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
|
|
|
20548
20714
|
* @property {number} token The predicted token id (to replace the masked one).
|
|
20549
20715
|
* @property {string} token_str The predicted token (to replace the masked one).
|
|
20550
20716
|
* @typedef {FillMaskSingle[]} FillMaskOutput
|
|
20551
|
-
*
|
|
20717
|
+
*
|
|
20552
20718
|
* @typedef {Object} FillMaskPipelineOptions Parameters specific to fill mask pipelines.
|
|
20553
20719
|
* @property {number} [top_k=5] When passed, overrides the number of predictions to return.
|
|
20554
|
-
*
|
|
20720
|
+
*
|
|
20555
20721
|
* @callback FillMaskPipelineCallback Fill the masked token in the text(s) given as inputs.
|
|
20556
20722
|
* @param {string|string[]} texts One or several texts (or one list of prompts) with masked tokens.
|
|
20557
20723
|
* @param {FillMaskPipelineOptions} [options] The options to use for masked language modelling.
|
|
@@ -20559,13 +20725,13 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
|
|
|
20559
20725
|
* and the sequence with the predicted token filled in, or an array of such arrays (one for each input text).
|
|
20560
20726
|
* If only one input text is given, the output will be an array of objects.
|
|
20561
20727
|
* @throws {Error} When the mask token is not found in the input text.
|
|
20562
|
-
*
|
|
20728
|
+
*
|
|
20563
20729
|
* @typedef {TextPipelineConstructorArgs & FillMaskPipelineCallback & Disposable} FillMaskPipelineType
|
|
20564
20730
|
*/
|
|
20565
20731
|
|
|
20566
20732
|
/**
|
|
20567
20733
|
* Masked language modeling prediction pipeline using any `ModelWithLMHead`.
|
|
20568
|
-
*
|
|
20734
|
+
*
|
|
20569
20735
|
* **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-uncased`.
|
|
20570
20736
|
* ```javascript
|
|
20571
20737
|
* const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
|
|
@@ -20578,7 +20744,7 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
|
|
|
20578
20744
|
* // { token_str: 'life', score: 0.01859794743359089, token: 1297, sequence: 'The goal of life is life.' }
|
|
20579
20745
|
* // ]
|
|
20580
20746
|
* ```
|
|
20581
|
-
*
|
|
20747
|
+
*
|
|
20582
20748
|
* **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-cased` (and return top result).
|
|
20583
20749
|
* ```javascript
|
|
20584
20750
|
* const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
|
|
@@ -20655,18 +20821,18 @@ class FillMaskPipeline extends (/** @type {new (options: TextPipelineConstructor
|
|
|
20655
20821
|
* @typedef {Object} Text2TextGenerationSingle
|
|
20656
20822
|
* @property {string} generated_text The generated text.
|
|
20657
20823
|
* @typedef {Text2TextGenerationSingle[]} Text2TextGenerationOutput
|
|
20658
|
-
*
|
|
20824
|
+
*
|
|
20659
20825
|
* @callback Text2TextGenerationPipelineCallback Generate the output text(s) using text(s) given as inputs.
|
|
20660
20826
|
* @param {string|string[]} texts Input text for the encoder.
|
|
20661
20827
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
20662
20828
|
* @returns {Promise<Text2TextGenerationOutput|Text2TextGenerationOutput[]>}
|
|
20663
|
-
*
|
|
20829
|
+
*
|
|
20664
20830
|
* @typedef {TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable} Text2TextGenerationPipelineType
|
|
20665
20831
|
*/
|
|
20666
20832
|
|
|
20667
20833
|
/**
|
|
20668
20834
|
* Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks.
|
|
20669
|
-
*
|
|
20835
|
+
*
|
|
20670
20836
|
* **Example:** Text-to-text generation w/ `Xenova/LaMini-Flan-T5-783M`.
|
|
20671
20837
|
* ```javascript
|
|
20672
20838
|
* const generator = await pipeline('text2text-generation', 'Xenova/LaMini-Flan-T5-783M');
|
|
@@ -20742,18 +20908,18 @@ class Text2TextGenerationPipeline extends (/** @type {new (options: TextPipeline
|
|
|
20742
20908
|
* @typedef {Object} SummarizationSingle
|
|
20743
20909
|
* @property {string} summary_text The summary text.
|
|
20744
20910
|
* @typedef {SummarizationSingle[]} SummarizationOutput
|
|
20745
|
-
*
|
|
20911
|
+
*
|
|
20746
20912
|
* @callback SummarizationPipelineCallback Summarize the text(s) given as inputs.
|
|
20747
20913
|
* @param {string|string[]} texts One or several articles (or one list of articles) to summarize.
|
|
20748
20914
|
* @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
20749
20915
|
* @returns {Promise<SummarizationOutput|SummarizationOutput[]>}
|
|
20750
|
-
*
|
|
20916
|
+
*
|
|
20751
20917
|
* @typedef {TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable} SummarizationPipelineType
|
|
20752
20918
|
*/
|
|
20753
20919
|
|
|
20754
20920
|
/**
|
|
20755
20921
|
* A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline.
|
|
20756
|
-
*
|
|
20922
|
+
*
|
|
20757
20923
|
* **Example:** Summarization w/ `Xenova/distilbart-cnn-6-6`.
|
|
20758
20924
|
* ```javascript
|
|
20759
20925
|
* const generator = await pipeline('summarization', 'Xenova/distilbart-cnn-6-6');
|
|
@@ -20789,23 +20955,23 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
|
|
|
20789
20955
|
* @typedef {Object} TranslationSingle
|
|
20790
20956
|
* @property {string} translation_text The translated text.
|
|
20791
20957
|
* @typedef {TranslationSingle[]} TranslationOutput
|
|
20792
|
-
*
|
|
20958
|
+
*
|
|
20793
20959
|
* @callback TranslationPipelineCallback Translate the text(s) given as inputs.
|
|
20794
20960
|
* @param {string|string[]} texts Texts to be translated.
|
|
20795
20961
|
* @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
20796
20962
|
* @returns {Promise<TranslationOutput|TranslationOutput[]>}
|
|
20797
|
-
*
|
|
20963
|
+
*
|
|
20798
20964
|
* @typedef {TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable} TranslationPipelineType
|
|
20799
20965
|
*/
|
|
20800
20966
|
|
|
20801
20967
|
/**
|
|
20802
20968
|
* Translates text from one language to another.
|
|
20803
|
-
*
|
|
20969
|
+
*
|
|
20804
20970
|
* **Example:** Multilingual translation w/ `Xenova/nllb-200-distilled-600M`.
|
|
20805
|
-
*
|
|
20971
|
+
*
|
|
20806
20972
|
* See [here](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200)
|
|
20807
20973
|
* for the full list of languages and their corresponding codes.
|
|
20808
|
-
*
|
|
20974
|
+
*
|
|
20809
20975
|
* ```javascript
|
|
20810
20976
|
* const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M');
|
|
20811
20977
|
* const output = await translator('जीवन एक चॉकलेट बॉक्स की तरह है।', {
|
|
@@ -20814,12 +20980,12 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
|
|
|
20814
20980
|
* });
|
|
20815
20981
|
* // [{ translation_text: 'La vie est comme une boîte à chocolat.' }]
|
|
20816
20982
|
* ```
|
|
20817
|
-
*
|
|
20983
|
+
*
|
|
20818
20984
|
* **Example:** Multilingual translation w/ `Xenova/m2m100_418M`.
|
|
20819
|
-
*
|
|
20985
|
+
*
|
|
20820
20986
|
* See [here](https://huggingface.co/facebook/m2m100_418M#languages-covered)
|
|
20821
20987
|
* for the full list of languages and their corresponding codes.
|
|
20822
|
-
*
|
|
20988
|
+
*
|
|
20823
20989
|
* ```javascript
|
|
20824
20990
|
* const translator = await pipeline('translation', 'Xenova/m2m100_418M');
|
|
20825
20991
|
* const output = await translator('生活就像一盒巧克力。', {
|
|
@@ -20828,12 +20994,12 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
|
|
|
20828
20994
|
* });
|
|
20829
20995
|
* // [{ translation_text: 'Life is like a box of chocolate.' }]
|
|
20830
20996
|
* ```
|
|
20831
|
-
*
|
|
20997
|
+
*
|
|
20832
20998
|
* **Example:** Multilingual translation w/ `Xenova/mbart-large-50-many-to-many-mmt`.
|
|
20833
|
-
*
|
|
20999
|
+
*
|
|
20834
21000
|
* See [here](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt#languages-covered)
|
|
20835
21001
|
* for the full list of languages and their corresponding codes.
|
|
20836
|
-
*
|
|
21002
|
+
*
|
|
20837
21003
|
* ```javascript
|
|
20838
21004
|
* const translator = await pipeline('translation', 'Xenova/mbart-large-50-many-to-many-mmt');
|
|
20839
21005
|
* const output = await translator('संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है', {
|
|
@@ -20862,21 +21028,21 @@ function isChat(x) {
|
|
|
20862
21028
|
|
|
20863
21029
|
/**
|
|
20864
21030
|
* @typedef {import('./tokenizers.js').Message[]} Chat
|
|
20865
|
-
*
|
|
21031
|
+
*
|
|
20866
21032
|
* @typedef {Object} TextGenerationSingle
|
|
20867
21033
|
* @property {string|Chat} generated_text The generated text.
|
|
20868
21034
|
* @typedef {TextGenerationSingle[]} TextGenerationOutput
|
|
20869
|
-
*
|
|
21035
|
+
*
|
|
20870
21036
|
* @typedef {Object} TextGenerationSpecificParams Parameters specific to text-generation pipelines.
|
|
20871
21037
|
* @property {boolean} [add_special_tokens] Whether or not to add special tokens when tokenizing the sequences.
|
|
20872
21038
|
* @property {boolean} [return_full_text=true] If set to `false` only added text is returned, otherwise the full text is returned.
|
|
20873
21039
|
* @typedef {import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams} TextGenerationConfig
|
|
20874
|
-
*
|
|
21040
|
+
*
|
|
20875
21041
|
* @callback TextGenerationPipelineCallback Complete the prompt(s) given as inputs.
|
|
20876
21042
|
* @param {string|string[]|Chat|Chat[]} texts One or several prompts (or one list of prompts) to complete.
|
|
20877
21043
|
* @param {Partial<TextGenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
20878
21044
|
* @returns {Promise<TextGenerationOutput|TextGenerationOutput[]>} An array or object containing the generated texts.
|
|
20879
|
-
*
|
|
21045
|
+
*
|
|
20880
21046
|
* @typedef {TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable} TextGenerationPipelineType
|
|
20881
21047
|
*/
|
|
20882
21048
|
|
|
@@ -20884,7 +21050,7 @@ function isChat(x) {
|
|
|
20884
21050
|
* Language generation pipeline using any `ModelWithLMHead` or `ModelForCausalLM`.
|
|
20885
21051
|
* This pipeline predicts the words that will follow a specified text prompt.
|
|
20886
21052
|
* NOTE: For the full list of generation parameters, see [`GenerationConfig`](./utils/generation#module_utils/generation.GenerationConfig).
|
|
20887
|
-
*
|
|
21053
|
+
*
|
|
20888
21054
|
* **Example:** Text generation with `Xenova/distilgpt2` (default settings).
|
|
20889
21055
|
* ```javascript
|
|
20890
21056
|
* const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
|
|
@@ -20892,7 +21058,7 @@ function isChat(x) {
|
|
|
20892
21058
|
* const output = await generator(text);
|
|
20893
21059
|
* // [{ generated_text: "I enjoy walking with my cute dog, and I love to play with the other dogs." }]
|
|
20894
21060
|
* ```
|
|
20895
|
-
*
|
|
21061
|
+
*
|
|
20896
21062
|
* **Example:** Text generation with `Xenova/distilgpt2` (custom settings).
|
|
20897
21063
|
* ```javascript
|
|
20898
21064
|
* const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
|
|
@@ -20911,7 +21077,7 @@ function isChat(x) {
|
|
|
20911
21077
|
* // "generated_text": "Once upon a time, there was an abundance of information about the most important and influential"
|
|
20912
21078
|
* // }]
|
|
20913
21079
|
* ```
|
|
20914
|
-
*
|
|
21080
|
+
*
|
|
20915
21081
|
* **Example:** Run code generation with `Xenova/codegen-350M-mono`.
|
|
20916
21082
|
* ```javascript
|
|
20917
21083
|
* const generator = await pipeline('text-generation', 'Xenova/codegen-350M-mono');
|
|
@@ -21030,7 +21196,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
|
|
|
21030
21196
|
* @property {string} sequence The sequence for which this is the output.
|
|
21031
21197
|
* @property {string[]} labels The labels sorted by order of likelihood.
|
|
21032
21198
|
* @property {number[]} scores The probabilities for each of the labels.
|
|
21033
|
-
*
|
|
21199
|
+
*
|
|
21034
21200
|
* @typedef {Object} ZeroShotClassificationPipelineOptions Parameters specific to zero-shot classification pipelines.
|
|
21035
21201
|
* @property {string} [hypothesis_template="This example is {}."] The template used to turn each
|
|
21036
21202
|
* candidate label into an NLI-style hypothesis. The candidate label will replace the {} placeholder.
|
|
@@ -21038,14 +21204,14 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
|
|
|
21038
21204
|
* If `false`, the scores are normalized such that the sum of the label likelihoods for each sequence
|
|
21039
21205
|
* is 1. If `true`, the labels are considered independent and probabilities are normalized for each
|
|
21040
21206
|
* candidate by doing a softmax of the entailment score vs. the contradiction score.
|
|
21041
|
-
*
|
|
21207
|
+
*
|
|
21042
21208
|
* @callback ZeroShotClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
21043
21209
|
* @param {string|string[]} texts The sequence(s) to classify, will be truncated if the model input is too large.
|
|
21044
21210
|
* @param {string|string[]} candidate_labels The set of possible class labels to classify each sequence into.
|
|
21045
21211
|
* Can be a single label, a string of comma-separated labels, or a list of labels.
|
|
21046
21212
|
* @param {ZeroShotClassificationPipelineOptions} [options] The options to use for zero-shot classification.
|
|
21047
21213
|
* @returns {Promise<ZeroShotClassificationOutput|ZeroShotClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
21048
|
-
*
|
|
21214
|
+
*
|
|
21049
21215
|
* @typedef {TextPipelineConstructorArgs & ZeroShotClassificationPipelineCallback & Disposable} ZeroShotClassificationPipelineType
|
|
21050
21216
|
*/
|
|
21051
21217
|
|
|
@@ -21054,7 +21220,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
|
|
|
21054
21220
|
* trained on NLI (natural language inference) tasks. Equivalent of `text-classification`
|
|
21055
21221
|
* pipelines, but these models don't require a hardcoded number of potential classes, they
|
|
21056
21222
|
* can be chosen at runtime. It usually means it's slower but it is **much** more flexible.
|
|
21057
|
-
*
|
|
21223
|
+
*
|
|
21058
21224
|
* **Example:** Zero shot classification with `Xenova/mobilebert-uncased-mnli`.
|
|
21059
21225
|
* ```javascript
|
|
21060
21226
|
* const classifier = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli');
|
|
@@ -21067,7 +21233,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
|
|
|
21067
21233
|
* // scores: [ 0.5562091040482018, 0.1843621307860853, 0.13942646639336376, 0.12000229877234923 ]
|
|
21068
21234
|
* // }
|
|
21069
21235
|
* ```
|
|
21070
|
-
*
|
|
21236
|
+
*
|
|
21071
21237
|
* **Example:** Zero shot classification with `Xenova/nli-deberta-v3-xsmall` (multi-label).
|
|
21072
21238
|
* ```javascript
|
|
21073
21239
|
* const classifier = await pipeline('zero-shot-classification', 'Xenova/nli-deberta-v3-xsmall');
|
|
@@ -21181,20 +21347,20 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
|
|
|
21181
21347
|
* @property {'none'|'mean'|'cls'} [pooling="none"] The pooling method to use.
|
|
21182
21348
|
* @property {boolean} [normalize=false] Whether or not to normalize the embeddings in the last dimension.
|
|
21183
21349
|
* @property {boolean} [quantize=false] Whether or not to quantize the embeddings.
|
|
21184
|
-
* @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
|
|
21185
|
-
*
|
|
21350
|
+
* @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
|
|
21351
|
+
*
|
|
21186
21352
|
* @callback FeatureExtractionPipelineCallback Extract the features of the input(s).
|
|
21187
21353
|
* @param {string|string[]} texts One or several texts (or one list of texts) to get the features of.
|
|
21188
21354
|
* @param {FeatureExtractionPipelineOptions} [options] The options to use for feature extraction.
|
|
21189
21355
|
* @returns {Promise<Tensor>} The features computed by the model.
|
|
21190
|
-
*
|
|
21356
|
+
*
|
|
21191
21357
|
* @typedef {TextPipelineConstructorArgs & FeatureExtractionPipelineCallback & Disposable} FeatureExtractionPipelineType
|
|
21192
21358
|
*/
|
|
21193
21359
|
|
|
21194
21360
|
/**
|
|
21195
21361
|
* Feature extraction pipeline using no model head. This pipeline extracts the hidden
|
|
21196
21362
|
* states from the base transformer, which can be used as features in downstream tasks.
|
|
21197
|
-
*
|
|
21363
|
+
*
|
|
21198
21364
|
* **Example:** Run feature extraction with `bert-base-uncased` (without pooling/normalization).
|
|
21199
21365
|
* ```javascript
|
|
21200
21366
|
* const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
|
|
@@ -21205,7 +21371,7 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
|
|
|
21205
21371
|
* // dims: [1, 8, 768]
|
|
21206
21372
|
* // }
|
|
21207
21373
|
* ```
|
|
21208
|
-
*
|
|
21374
|
+
*
|
|
21209
21375
|
* **Example:** Run feature extraction with `bert-base-uncased` (with pooling/normalization).
|
|
21210
21376
|
* ```javascript
|
|
21211
21377
|
* const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
|
|
@@ -21216,7 +21382,7 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
|
|
|
21216
21382
|
* // dims: [1, 768]
|
|
21217
21383
|
* // }
|
|
21218
21384
|
* ```
|
|
21219
|
-
*
|
|
21385
|
+
*
|
|
21220
21386
|
* **Example:** Calculating embeddings with `sentence-transformers` models.
|
|
21221
21387
|
* ```javascript
|
|
21222
21388
|
* const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
@@ -21297,19 +21463,19 @@ class FeatureExtractionPipeline extends (/** @type {new (options: TextPipelineCo
|
|
|
21297
21463
|
/**
|
|
21298
21464
|
* @typedef {Object} ImageFeatureExtractionPipelineOptions Parameters specific to image feature extraction pipelines.
|
|
21299
21465
|
* @property {boolean} [pool=null] Whether or not to return the pooled output. If set to `false`, the model will return the raw hidden states.
|
|
21300
|
-
*
|
|
21466
|
+
*
|
|
21301
21467
|
* @callback ImageFeatureExtractionPipelineCallback Extract the features of the input(s).
|
|
21302
21468
|
* @param {ImagePipelineInputs} images One or several images (or one list of images) to get the features of.
|
|
21303
21469
|
* @param {ImageFeatureExtractionPipelineOptions} [options] The options to use for image feature extraction.
|
|
21304
21470
|
* @returns {Promise<Tensor>} The image features computed by the model.
|
|
21305
|
-
*
|
|
21471
|
+
*
|
|
21306
21472
|
* @typedef {ImagePipelineConstructorArgs & ImageFeatureExtractionPipelineCallback & Disposable} ImageFeatureExtractionPipelineType
|
|
21307
21473
|
*/
|
|
21308
21474
|
|
|
21309
21475
|
/**
|
|
21310
21476
|
* Image feature extraction pipeline using no model head. This pipeline extracts the hidden
|
|
21311
21477
|
* states from the base transformer, which can be used as features in downstream tasks.
|
|
21312
|
-
*
|
|
21478
|
+
*
|
|
21313
21479
|
* **Example:** Perform image feature extraction with `Xenova/vit-base-patch16-224-in21k`.
|
|
21314
21480
|
* ```javascript
|
|
21315
21481
|
* const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/vit-base-patch16-224-in21k');
|
|
@@ -21322,7 +21488,7 @@ class FeatureExtractionPipeline extends (/** @type {new (options: TextPipelineCo
|
|
|
21322
21488
|
* // size: 151296
|
|
21323
21489
|
* // }
|
|
21324
21490
|
* ```
|
|
21325
|
-
*
|
|
21491
|
+
*
|
|
21326
21492
|
* **Example:** Compute image embeddings with `Xenova/clip-vit-base-patch32`.
|
|
21327
21493
|
* ```javascript
|
|
21328
21494
|
* const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32');
|
|
@@ -21378,12 +21544,12 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
|
|
|
21378
21544
|
* @property {string} label The label predicted.
|
|
21379
21545
|
* @property {number} score The corresponding probability.
|
|
21380
21546
|
* @typedef {AudioClassificationSingle[]} AudioClassificationOutput
|
|
21381
|
-
*
|
|
21547
|
+
*
|
|
21382
21548
|
* @typedef {Object} AudioClassificationPipelineOptions Parameters specific to audio classification pipelines.
|
|
21383
21549
|
* @property {number} [top_k=5] The number of top labels that will be returned by the pipeline.
|
|
21384
21550
|
* If the provided number is `null` or higher than the number of labels available in the model configuration,
|
|
21385
21551
|
* it will default to the number of labels.
|
|
21386
|
-
*
|
|
21552
|
+
*
|
|
21387
21553
|
* @callback AudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
21388
21554
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
|
|
21389
21555
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -21392,14 +21558,14 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
|
|
|
21392
21558
|
* - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
|
|
21393
21559
|
* @param {AudioClassificationPipelineOptions} [options] The options to use for audio classification.
|
|
21394
21560
|
* @returns {Promise<AudioClassificationOutput|AudioClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
21395
|
-
*
|
|
21561
|
+
*
|
|
21396
21562
|
* @typedef {AudioPipelineConstructorArgs & AudioClassificationPipelineCallback & Disposable} AudioClassificationPipelineType
|
|
21397
21563
|
*/
|
|
21398
21564
|
|
|
21399
21565
|
/**
|
|
21400
21566
|
* Audio classification pipeline using any `AutoModelForAudioClassification`.
|
|
21401
21567
|
* This pipeline predicts the class of a raw waveform or an audio file.
|
|
21402
|
-
*
|
|
21568
|
+
*
|
|
21403
21569
|
* **Example:** Perform audio classification with `Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech`.
|
|
21404
21570
|
* ```javascript
|
|
21405
21571
|
* const classifier = await pipeline('audio-classification', 'Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech');
|
|
@@ -21410,7 +21576,7 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
|
|
|
21410
21576
|
* // { label: 'female', score: 0.001845747814513743 }
|
|
21411
21577
|
* // ]
|
|
21412
21578
|
* ```
|
|
21413
|
-
*
|
|
21579
|
+
*
|
|
21414
21580
|
* **Example:** Perform audio classification with `Xenova/ast-finetuned-audioset-10-10-0.4593` and return top 4 results.
|
|
21415
21581
|
* ```javascript
|
|
21416
21582
|
* const classifier = await pipeline('audio-classification', 'Xenova/ast-finetuned-audioset-10-10-0.4593');
|
|
@@ -21475,12 +21641,12 @@ class AudioClassificationPipeline extends (/** @type {new (options: AudioPipelin
|
|
|
21475
21641
|
* @typedef {Object} ZeroShotAudioClassificationOutput
|
|
21476
21642
|
* @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
|
|
21477
21643
|
* @property {number} score The score attributed by the model for that label (between 0 and 1).
|
|
21478
|
-
*
|
|
21644
|
+
*
|
|
21479
21645
|
* @typedef {Object} ZeroShotAudioClassificationPipelineOptions Parameters specific to zero-shot audio classification pipelines.
|
|
21480
21646
|
* @property {string} [hypothesis_template="This is a sound of {}."] The sentence used in conjunction with `candidate_labels`
|
|
21481
21647
|
* to attempt the audio classification by replacing the placeholder with the candidate_labels.
|
|
21482
21648
|
* Then likelihood is estimated by using `logits_per_audio`.
|
|
21483
|
-
*
|
|
21649
|
+
*
|
|
21484
21650
|
* @callback ZeroShotAudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
21485
21651
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
|
|
21486
21652
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -21490,14 +21656,14 @@ class AudioClassificationPipeline extends (/** @type {new (options: AudioPipelin
|
|
|
21490
21656
|
* @param {string[]} candidate_labels The candidate labels for this audio.
|
|
21491
21657
|
* @param {ZeroShotAudioClassificationPipelineOptions} [options] The options to use for zero-shot audio classification.
|
|
21492
21658
|
* @returns {Promise<ZeroShotAudioClassificationOutput[]|ZeroShotAudioClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
|
|
21493
|
-
*
|
|
21659
|
+
*
|
|
21494
21660
|
* @typedef {TextAudioPipelineConstructorArgs & ZeroShotAudioClassificationPipelineCallback & Disposable} ZeroShotAudioClassificationPipelineType
|
|
21495
21661
|
*/
|
|
21496
21662
|
|
|
21497
21663
|
/**
|
|
21498
21664
|
* Zero shot audio classification pipeline using `ClapModel`. This pipeline predicts the class of an audio when you
|
|
21499
21665
|
* provide an audio and a set of `candidate_labels`.
|
|
21500
|
-
*
|
|
21666
|
+
*
|
|
21501
21667
|
* **Example**: Perform zero-shot audio classification with `Xenova/clap-htsat-unfused`.
|
|
21502
21668
|
* ```javascript
|
|
21503
21669
|
* const classifier = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused');
|
|
@@ -21530,7 +21696,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21530
21696
|
audio = [/** @type {AudioInput} */ (audio)];
|
|
21531
21697
|
}
|
|
21532
21698
|
|
|
21533
|
-
// Insert label into hypothesis template
|
|
21699
|
+
// Insert label into hypothesis template
|
|
21534
21700
|
const texts = candidate_labels.map(
|
|
21535
21701
|
x => hypothesis_template.replace('{}', x)
|
|
21536
21702
|
);
|
|
@@ -21574,7 +21740,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21574
21740
|
* @property {string} text The recognized text.
|
|
21575
21741
|
* @property {Chunk[]} [chunks] When using `return_timestamps`, the `chunks` will become a list
|
|
21576
21742
|
* containing all the various text chunks identified by the model.
|
|
21577
|
-
*
|
|
21743
|
+
*
|
|
21578
21744
|
* @typedef {Object} AutomaticSpeechRecognitionSpecificParams Parameters specific to automatic-speech-recognition pipelines.
|
|
21579
21745
|
* @property {boolean|'word'} [return_timestamps] Whether to return timestamps or not. Default is `false`.
|
|
21580
21746
|
* @property {number} [chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking).
|
|
@@ -21584,7 +21750,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21584
21750
|
* @property {string} [task] The task to perform. Default is `null`, meaning it should be auto-detected.
|
|
21585
21751
|
* @property {number} [num_frames] The number of frames in the input audio.
|
|
21586
21752
|
* @typedef {import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig
|
|
21587
|
-
*
|
|
21753
|
+
*
|
|
21588
21754
|
* @callback AutomaticSpeechRecognitionPipelineCallback Transcribe the audio sequence(s) given as inputs to text.
|
|
21589
21755
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be transcribed. The input is either:
|
|
21590
21756
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -21593,7 +21759,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21593
21759
|
* - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
|
|
21594
21760
|
* @param {Partial<AutomaticSpeechRecognitionConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
21595
21761
|
* @returns {Promise<AutomaticSpeechRecognitionOutput|AutomaticSpeechRecognitionOutput[]>} An object containing the transcription text and optionally timestamps if `return_timestamps` is `true`.
|
|
21596
|
-
*
|
|
21762
|
+
*
|
|
21597
21763
|
* @typedef {TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable} AutomaticSpeechRecognitionPipelineType
|
|
21598
21764
|
*/
|
|
21599
21765
|
|
|
@@ -21607,7 +21773,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21607
21773
|
* const output = await transcriber(url);
|
|
21608
21774
|
* // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." }
|
|
21609
21775
|
* ```
|
|
21610
|
-
*
|
|
21776
|
+
*
|
|
21611
21777
|
* **Example:** Transcribe English w/ timestamps.
|
|
21612
21778
|
* ```javascript
|
|
21613
21779
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -21621,7 +21787,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21621
21787
|
* // ]
|
|
21622
21788
|
* // }
|
|
21623
21789
|
* ```
|
|
21624
|
-
*
|
|
21790
|
+
*
|
|
21625
21791
|
* **Example:** Transcribe English w/ word-level timestamps.
|
|
21626
21792
|
* ```javascript
|
|
21627
21793
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -21640,7 +21806,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21640
21806
|
* // ]
|
|
21641
21807
|
* // }
|
|
21642
21808
|
* ```
|
|
21643
|
-
*
|
|
21809
|
+
*
|
|
21644
21810
|
* **Example:** Transcribe French.
|
|
21645
21811
|
* ```javascript
|
|
21646
21812
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
|
|
@@ -21648,7 +21814,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21648
21814
|
* const output = await transcriber(url, { language: 'french', task: 'transcribe' });
|
|
21649
21815
|
* // { text: " J'adore, j'aime, je n'aime pas, je déteste." }
|
|
21650
21816
|
* ```
|
|
21651
|
-
*
|
|
21817
|
+
*
|
|
21652
21818
|
* **Example:** Translate French to English.
|
|
21653
21819
|
* ```javascript
|
|
21654
21820
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
|
|
@@ -21656,7 +21822,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21656
21822
|
* const output = await transcriber(url, { language: 'french', task: 'translate' });
|
|
21657
21823
|
* // { text: " I love, I like, I don't like, I hate." }
|
|
21658
21824
|
* ```
|
|
21659
|
-
*
|
|
21825
|
+
*
|
|
21660
21826
|
* **Example:** Transcribe/translate audio longer than 30 seconds.
|
|
21661
21827
|
* ```javascript
|
|
21662
21828
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -21879,18 +22045,18 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
21879
22045
|
* @typedef {Object} ImageToTextSingle
|
|
21880
22046
|
* @property {string} generated_text The generated text.
|
|
21881
22047
|
* @typedef {ImageToTextSingle[]} ImageToTextOutput
|
|
21882
|
-
*
|
|
22048
|
+
*
|
|
21883
22049
|
* @callback ImageToTextPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
21884
22050
|
* @param {ImagePipelineInputs} texts The images to be captioned.
|
|
21885
22051
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
21886
22052
|
* @returns {Promise<ImageToTextOutput|ImageToTextOutput[]>} An object (or array of objects) containing the generated text(s).
|
|
21887
|
-
*
|
|
22053
|
+
*
|
|
21888
22054
|
* @typedef {TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable} ImageToTextPipelineType
|
|
21889
22055
|
*/
|
|
21890
22056
|
|
|
21891
22057
|
/**
|
|
21892
22058
|
* Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image.
|
|
21893
|
-
*
|
|
22059
|
+
*
|
|
21894
22060
|
* **Example:** Generate a caption for an image w/ `Xenova/vit-gpt2-image-captioning`.
|
|
21895
22061
|
* ```javascript
|
|
21896
22062
|
* const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');
|
|
@@ -21898,7 +22064,7 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
21898
22064
|
* const output = await captioner(url);
|
|
21899
22065
|
* // [{ generated_text: 'a cat laying on a couch with another cat' }]
|
|
21900
22066
|
* ```
|
|
21901
|
-
*
|
|
22067
|
+
*
|
|
21902
22068
|
* **Example:** Optical Character Recognition (OCR) w/ `Xenova/trocr-small-handwritten`.
|
|
21903
22069
|
* ```javascript
|
|
21904
22070
|
* const captioner = await pipeline('image-to-text', 'Xenova/trocr-small-handwritten');
|
|
@@ -21944,22 +22110,22 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
|
|
|
21944
22110
|
* @property {string} label The label identified by the model.
|
|
21945
22111
|
* @property {number} score The score attributed by the model for that label.
|
|
21946
22112
|
* @typedef {ImageClassificationSingle[]} ImageClassificationOutput
|
|
21947
|
-
*
|
|
22113
|
+
*
|
|
21948
22114
|
* @typedef {Object} ImageClassificationPipelineOptions Parameters specific to image classification pipelines.
|
|
21949
|
-
* @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
|
|
21950
|
-
*
|
|
22115
|
+
* @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
|
|
22116
|
+
*
|
|
21951
22117
|
* @callback ImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
21952
22118
|
* @param {ImagePipelineInputs} images The input images(s) to be classified.
|
|
21953
22119
|
* @param {ImageClassificationPipelineOptions} [options] The options to use for image classification.
|
|
21954
22120
|
* @returns {Promise<ImageClassificationOutput|ImageClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
21955
|
-
*
|
|
22121
|
+
*
|
|
21956
22122
|
* @typedef {ImagePipelineConstructorArgs & ImageClassificationPipelineCallback & Disposable} ImageClassificationPipelineType
|
|
21957
22123
|
*/
|
|
21958
22124
|
|
|
21959
22125
|
/**
|
|
21960
22126
|
* Image classification pipeline using any `AutoModelForImageClassification`.
|
|
21961
22127
|
* This pipeline predicts the class of an image.
|
|
21962
|
-
*
|
|
22128
|
+
*
|
|
21963
22129
|
* **Example:** Classify an image.
|
|
21964
22130
|
* ```javascript
|
|
21965
22131
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -21969,7 +22135,7 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
|
|
|
21969
22135
|
* // { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
|
|
21970
22136
|
* // ]
|
|
21971
22137
|
* ```
|
|
21972
|
-
*
|
|
22138
|
+
*
|
|
21973
22139
|
* **Example:** Classify an image and return top `n` classes.
|
|
21974
22140
|
* ```javascript
|
|
21975
22141
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -21981,7 +22147,7 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
|
|
|
21981
22147
|
* // { label: 'lion, king of beasts, Panthera leo', score: 0.00045060308184474707 },
|
|
21982
22148
|
* // ]
|
|
21983
22149
|
* ```
|
|
21984
|
-
*
|
|
22150
|
+
*
|
|
21985
22151
|
* **Example:** Classify an image and return all classes.
|
|
21986
22152
|
* ```javascript
|
|
21987
22153
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -22048,7 +22214,7 @@ class ImageClassificationPipeline extends (/** @type {new (options: ImagePipelin
|
|
|
22048
22214
|
* @property {string|null} label The label of the segment.
|
|
22049
22215
|
* @property {number|null} score The score of the segment.
|
|
22050
22216
|
* @property {RawImage} mask The mask of the segment.
|
|
22051
|
-
*
|
|
22217
|
+
*
|
|
22052
22218
|
* @typedef {Object} ImageSegmentationPipelineOptions Parameters specific to image segmentation pipelines.
|
|
22053
22219
|
* @property {number} [threshold=0.5] Probability threshold to filter out predicted masks.
|
|
22054
22220
|
* @property {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
|
|
@@ -22057,19 +22223,19 @@ class ImageClassificationPipeline extends (/** @type {new (options: ImagePipelin
|
|
|
22057
22223
|
* depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
|
|
22058
22224
|
* @property {number[]} [label_ids_to_fuse=null] List of label ids to fuse. If not set, do not fuse any labels.
|
|
22059
22225
|
* @property {number[][]} [target_sizes=null] List of target sizes for the input images. If not set, use the original image sizes.
|
|
22060
|
-
*
|
|
22226
|
+
*
|
|
22061
22227
|
* @callback ImageSegmentationPipelineCallback Segment the input images.
|
|
22062
22228
|
* @param {ImagePipelineInputs} images The input images.
|
|
22063
22229
|
* @param {ImageSegmentationPipelineOptions} [options] The options to use for image segmentation.
|
|
22064
22230
|
* @returns {Promise<ImageSegmentationPipelineOutput[]>} The annotated segments.
|
|
22065
|
-
*
|
|
22231
|
+
*
|
|
22066
22232
|
* @typedef {ImagePipelineConstructorArgs & ImageSegmentationPipelineCallback & Disposable} ImageSegmentationPipelineType
|
|
22067
22233
|
*/
|
|
22068
22234
|
|
|
22069
22235
|
/**
|
|
22070
22236
|
* Image segmentation pipeline using any `AutoModelForXXXSegmentation`.
|
|
22071
22237
|
* This pipeline predicts masks of objects and their classes.
|
|
22072
|
-
*
|
|
22238
|
+
*
|
|
22073
22239
|
* **Example:** Perform image segmentation with `Xenova/detr-resnet-50-panoptic`.
|
|
22074
22240
|
* ```javascript
|
|
22075
22241
|
* const segmenter = await pipeline('image-segmentation', 'Xenova/detr-resnet-50-panoptic');
|
|
@@ -22153,12 +22319,17 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
|
|
|
22153
22319
|
/** @type {ImageSegmentationPipelineOutput[]} */
|
|
22154
22320
|
const annotation = [];
|
|
22155
22321
|
if (!subtask) {
|
|
22322
|
+
// We define an epsilon to safeguard against numerical/precision issues when detecting
|
|
22323
|
+
// the normalization mode of the output (i.e., sigmoid already applied, or not).
|
|
22324
|
+
// See https://github.com/microsoft/onnxruntime/issues/23943 for more information.
|
|
22325
|
+
const epsilon = 1e-5;
|
|
22326
|
+
|
|
22156
22327
|
// Perform standard image segmentation
|
|
22157
22328
|
const result = output[outputNames[0]];
|
|
22158
22329
|
for (let i = 0; i < imageSizes.length; ++i) {
|
|
22159
22330
|
const size = imageSizes[i];
|
|
22160
22331
|
const item = result[i];
|
|
22161
|
-
if (item.data.some(x => x <
|
|
22332
|
+
if (item.data.some(x => x < -epsilon || x > 1 + epsilon)) {
|
|
22162
22333
|
item.sigmoid_();
|
|
22163
22334
|
}
|
|
22164
22335
|
const mask = await _utils_image_js__WEBPACK_IMPORTED_MODULE_9__.RawImage.fromTensor(item.mul_(255).to('uint8')).resize(size[1], size[0]);
|
|
@@ -22227,19 +22398,19 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
|
|
|
22227
22398
|
|
|
22228
22399
|
/**
|
|
22229
22400
|
* @typedef {Object} BackgroundRemovalPipelineOptions Parameters specific to image segmentation pipelines.
|
|
22230
|
-
*
|
|
22401
|
+
*
|
|
22231
22402
|
* @callback BackgroundRemovalPipelineCallback Segment the input images.
|
|
22232
22403
|
* @param {ImagePipelineInputs} images The input images.
|
|
22233
22404
|
* @param {BackgroundRemovalPipelineOptions} [options] The options to use for image segmentation.
|
|
22234
22405
|
* @returns {Promise<RawImage[]>} The images with the background removed.
|
|
22235
|
-
*
|
|
22406
|
+
*
|
|
22236
22407
|
* @typedef {ImagePipelineConstructorArgs & BackgroundRemovalPipelineCallback & Disposable} BackgroundRemovalPipelineType
|
|
22237
22408
|
*/
|
|
22238
22409
|
|
|
22239
22410
|
/**
|
|
22240
22411
|
* Background removal pipeline using certain `AutoModelForXXXSegmentation`.
|
|
22241
22412
|
* This pipeline removes the backgrounds of images.
|
|
22242
|
-
*
|
|
22413
|
+
*
|
|
22243
22414
|
* **Example:** Perform background removal with `Xenova/modnet`.
|
|
22244
22415
|
* ```javascript
|
|
22245
22416
|
* const segmenter = await pipeline('background-removal', 'Xenova/modnet');
|
|
@@ -22250,7 +22421,7 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
|
|
|
22250
22421
|
* // ]
|
|
22251
22422
|
* ```
|
|
22252
22423
|
*/
|
|
22253
|
-
class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) =>
|
|
22424
|
+
class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => BackgroundRemovalPipelineType} */ (/** @type {any} */(ImageSegmentationPipeline))) {
|
|
22254
22425
|
/**
|
|
22255
22426
|
* Create a new BackgroundRemovalPipeline.
|
|
22256
22427
|
* @param {ImagePipelineConstructorArgs} options An object used to instantiate the pipeline.
|
|
@@ -22285,25 +22456,25 @@ class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineC
|
|
|
22285
22456
|
* @typedef {Object} ZeroShotImageClassificationOutput
|
|
22286
22457
|
* @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
|
|
22287
22458
|
* @property {number} score The score attributed by the model for that label (between 0 and 1).
|
|
22288
|
-
*
|
|
22459
|
+
*
|
|
22289
22460
|
* @typedef {Object} ZeroShotImageClassificationPipelineOptions Parameters specific to zero-shot image classification pipelines.
|
|
22290
22461
|
* @property {string} [hypothesis_template="This is a photo of {}"] The sentence used in conjunction with `candidate_labels`
|
|
22291
22462
|
* to attempt the image classification by replacing the placeholder with the candidate_labels.
|
|
22292
22463
|
* Then likelihood is estimated by using `logits_per_image`.
|
|
22293
|
-
*
|
|
22464
|
+
*
|
|
22294
22465
|
* @callback ZeroShotImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
22295
22466
|
* @param {ImagePipelineInputs} images The input images.
|
|
22296
22467
|
* @param {string[]} candidate_labels The candidate labels for this image.
|
|
22297
22468
|
* @param {ZeroShotImageClassificationPipelineOptions} [options] The options to use for zero-shot image classification.
|
|
22298
22469
|
* @returns {Promise<ZeroShotImageClassificationOutput[]|ZeroShotImageClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
|
|
22299
|
-
*
|
|
22470
|
+
*
|
|
22300
22471
|
* @typedef {TextImagePipelineConstructorArgs & ZeroShotImageClassificationPipelineCallback & Disposable} ZeroShotImageClassificationPipelineType
|
|
22301
22472
|
*/
|
|
22302
22473
|
|
|
22303
22474
|
/**
|
|
22304
22475
|
* Zero shot image classification pipeline. This pipeline predicts the class of
|
|
22305
22476
|
* an image when you provide an image and a set of `candidate_labels`.
|
|
22306
|
-
*
|
|
22477
|
+
*
|
|
22307
22478
|
* **Example:** Zero shot image classification w/ `Xenova/clip-vit-base-patch32`.
|
|
22308
22479
|
* ```javascript
|
|
22309
22480
|
* const classifier = await pipeline('zero-shot-image-classification', 'Xenova/clip-vit-base-patch32');
|
|
@@ -22333,7 +22504,7 @@ class ZeroShotImageClassificationPipeline extends (/** @type {new (options: Text
|
|
|
22333
22504
|
const isBatched = Array.isArray(images);
|
|
22334
22505
|
const preparedImages = await prepareImages(images);
|
|
22335
22506
|
|
|
22336
|
-
// Insert label into hypothesis template
|
|
22507
|
+
// Insert label into hypothesis template
|
|
22337
22508
|
const texts = candidate_labels.map(
|
|
22338
22509
|
x => hypothesis_template.replace('{}', x)
|
|
22339
22510
|
);
|
|
@@ -22380,23 +22551,23 @@ class ZeroShotImageClassificationPipeline extends (/** @type {new (options: Text
|
|
|
22380
22551
|
* @property {number} score The score attributed by the model for that label.
|
|
22381
22552
|
* @property {BoundingBox} box The bounding box of detected object in image's original size, or as a percentage if `percentage` is set to true.
|
|
22382
22553
|
* @typedef {ObjectDetectionPipelineSingle[]} ObjectDetectionPipelineOutput
|
|
22383
|
-
*
|
|
22554
|
+
*
|
|
22384
22555
|
* @typedef {Object} ObjectDetectionPipelineOptions Parameters specific to object detection pipelines.
|
|
22385
22556
|
* @property {number} [threshold=0.9] The threshold used to filter boxes by score.
|
|
22386
22557
|
* @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
|
|
22387
|
-
*
|
|
22558
|
+
*
|
|
22388
22559
|
* @callback ObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
|
|
22389
22560
|
* @param {ImagePipelineInputs} images The input images.
|
|
22390
22561
|
* @param {ObjectDetectionPipelineOptions} [options] The options to use for object detection.
|
|
22391
|
-
* @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
|
|
22392
|
-
*
|
|
22562
|
+
* @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
|
|
22563
|
+
*
|
|
22393
22564
|
* @typedef {ImagePipelineConstructorArgs & ObjectDetectionPipelineCallback & Disposable} ObjectDetectionPipelineType
|
|
22394
22565
|
*/
|
|
22395
22566
|
|
|
22396
22567
|
/**
|
|
22397
22568
|
* Object detection pipeline using any `AutoModelForObjectDetection`.
|
|
22398
22569
|
* This pipeline predicts bounding boxes of objects and their classes.
|
|
22399
|
-
*
|
|
22570
|
+
*
|
|
22400
22571
|
* **Example:** Run object-detection with `Xenova/detr-resnet-50`.
|
|
22401
22572
|
* ```javascript
|
|
22402
22573
|
* const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
|
|
@@ -22470,27 +22641,27 @@ class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipelineCon
|
|
|
22470
22641
|
* @property {string} label Text query corresponding to the found object.
|
|
22471
22642
|
* @property {number} score Score corresponding to the object (between 0 and 1).
|
|
22472
22643
|
* @property {BoundingBox} box Bounding box of the detected object in image's original size, or as a percentage if `percentage` is set to true.
|
|
22473
|
-
*
|
|
22644
|
+
*
|
|
22474
22645
|
* @typedef {Object} ZeroShotObjectDetectionPipelineOptions Parameters specific to zero-shot object detection pipelines.
|
|
22475
22646
|
* @property {number} [threshold=0.1] The probability necessary to make a prediction.
|
|
22476
22647
|
* @property {number} [top_k=null] The number of top predictions that will be returned by the pipeline.
|
|
22477
22648
|
* If the provided number is `null` or higher than the number of predictions available, it will default
|
|
22478
22649
|
* to the number of predictions.
|
|
22479
22650
|
* @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
|
|
22480
|
-
*
|
|
22651
|
+
*
|
|
22481
22652
|
* @callback ZeroShotObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
|
|
22482
22653
|
* @param {ImagePipelineInputs} images The input images.
|
|
22483
22654
|
* @param {string[]} candidate_labels What the model should recognize in the image.
|
|
22484
22655
|
* @param {ZeroShotObjectDetectionPipelineOptions} [options] The options to use for zero-shot object detection.
|
|
22485
22656
|
* @returns {Promise<ZeroShotObjectDetectionOutput[]|ZeroShotObjectDetectionOutput[][]>} An array of objects containing the predicted labels, scores, and bounding boxes.
|
|
22486
|
-
*
|
|
22657
|
+
*
|
|
22487
22658
|
* @typedef {TextImagePipelineConstructorArgs & ZeroShotObjectDetectionPipelineCallback & Disposable} ZeroShotObjectDetectionPipelineType
|
|
22488
22659
|
*/
|
|
22489
22660
|
|
|
22490
22661
|
/**
|
|
22491
22662
|
* Zero-shot object detection pipeline. This pipeline predicts bounding boxes of
|
|
22492
22663
|
* objects when you provide an image and a set of `candidate_labels`.
|
|
22493
|
-
*
|
|
22664
|
+
*
|
|
22494
22665
|
* **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32`.
|
|
22495
22666
|
* ```javascript
|
|
22496
22667
|
* const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
|
|
@@ -22520,7 +22691,7 @@ class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipelineCon
|
|
|
22520
22691
|
* // }
|
|
22521
22692
|
* // ]
|
|
22522
22693
|
* ```
|
|
22523
|
-
*
|
|
22694
|
+
*
|
|
22524
22695
|
* **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32` (returning top 4 matches and setting a threshold).
|
|
22525
22696
|
* ```javascript
|
|
22526
22697
|
* const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
|
|
@@ -22635,13 +22806,13 @@ class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: TextImag
|
|
|
22635
22806
|
* @typedef {Object} DocumentQuestionAnsweringSingle
|
|
22636
22807
|
* @property {string} answer The generated text.
|
|
22637
22808
|
* @typedef {DocumentQuestionAnsweringSingle[]} DocumentQuestionAnsweringOutput
|
|
22638
|
-
*
|
|
22809
|
+
*
|
|
22639
22810
|
* @callback DocumentQuestionAnsweringPipelineCallback Answer the question given as input by using the document.
|
|
22640
22811
|
* @param {ImageInput} image The image of the document to use.
|
|
22641
22812
|
* @param {string} question A question to ask of the document.
|
|
22642
22813
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
22643
22814
|
* @returns {Promise<DocumentQuestionAnsweringOutput|DocumentQuestionAnsweringOutput[]>} An object (or array of objects) containing the answer(s).
|
|
22644
|
-
*
|
|
22815
|
+
*
|
|
22645
22816
|
* @typedef {TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable} DocumentQuestionAnsweringPipelineType
|
|
22646
22817
|
*/
|
|
22647
22818
|
|
|
@@ -22649,7 +22820,7 @@ class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: TextImag
|
|
|
22649
22820
|
* Document Question Answering pipeline using any `AutoModelForDocumentQuestionAnswering`.
|
|
22650
22821
|
* The inputs/outputs are similar to the (extractive) question answering pipeline; however,
|
|
22651
22822
|
* the pipeline takes an image (and optional OCR'd words/boxes) as input instead of text context.
|
|
22652
|
-
*
|
|
22823
|
+
*
|
|
22653
22824
|
* **Example:** Answer questions about a document with `Xenova/donut-base-finetuned-docvqa`.
|
|
22654
22825
|
* ```javascript
|
|
22655
22826
|
* const qa_pipeline = await pipeline('document-question-answering', 'Xenova/donut-base-finetuned-docvqa');
|
|
@@ -22719,22 +22890,22 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
|
|
|
22719
22890
|
* @typedef {Object} TextToAudioOutput
|
|
22720
22891
|
* @property {Float32Array} audio The generated audio waveform.
|
|
22721
22892
|
* @property {number} sampling_rate The sampling rate of the generated audio waveform.
|
|
22722
|
-
*
|
|
22893
|
+
*
|
|
22723
22894
|
* @typedef {Object} TextToAudioPipelineOptions Parameters specific to text-to-audio pipelines.
|
|
22724
22895
|
* @property {Tensor|Float32Array|string|URL} [speaker_embeddings=null] The speaker embeddings (if the model requires it).
|
|
22725
|
-
*
|
|
22896
|
+
*
|
|
22726
22897
|
* @callback TextToAudioPipelineCallback Generates speech/audio from the inputs.
|
|
22727
22898
|
* @param {string|string[]} texts The text(s) to generate.
|
|
22728
22899
|
* @param {TextToAudioPipelineOptions} options Parameters passed to the model generation/forward method.
|
|
22729
22900
|
* @returns {Promise<TextToAudioOutput>} An object containing the generated audio and sampling rate.
|
|
22730
|
-
*
|
|
22901
|
+
*
|
|
22731
22902
|
* @typedef {TextToAudioPipelineConstructorArgs & TextToAudioPipelineCallback & Disposable} TextToAudioPipelineType
|
|
22732
22903
|
*/
|
|
22733
22904
|
|
|
22734
22905
|
/**
|
|
22735
22906
|
* Text-to-audio generation pipeline using any `AutoModelForTextToWaveform` or `AutoModelForTextToSpectrogram`.
|
|
22736
22907
|
* This pipeline generates an audio file from an input text and optional other conditional inputs.
|
|
22737
|
-
*
|
|
22908
|
+
*
|
|
22738
22909
|
* **Example:** Generate audio from text with `Xenova/speecht5_tts`.
|
|
22739
22910
|
* ```javascript
|
|
22740
22911
|
* const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
|
|
@@ -22745,17 +22916,17 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
|
|
|
22745
22916
|
* // sampling_rate: 16000
|
|
22746
22917
|
* // }
|
|
22747
22918
|
* ```
|
|
22748
|
-
*
|
|
22919
|
+
*
|
|
22749
22920
|
* You can then save the audio to a .wav file with the `wavefile` package:
|
|
22750
22921
|
* ```javascript
|
|
22751
22922
|
* import wavefile from 'wavefile';
|
|
22752
22923
|
* import fs from 'fs';
|
|
22753
|
-
*
|
|
22924
|
+
*
|
|
22754
22925
|
* const wav = new wavefile.WaveFile();
|
|
22755
22926
|
* wav.fromScratch(1, out.sampling_rate, '32f', out.audio);
|
|
22756
22927
|
* fs.writeFileSync('out.wav', wav.toBuffer());
|
|
22757
22928
|
* ```
|
|
22758
|
-
*
|
|
22929
|
+
*
|
|
22759
22930
|
* **Example:** Multilingual speech generation with `Xenova/mms-tts-fra`. See [here](https://huggingface.co/models?pipeline_tag=text-to-speech&other=vits&sort=trending) for the full list of available languages (1107).
|
|
22760
22931
|
* ```javascript
|
|
22761
22932
|
* const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra');
|
|
@@ -22861,13 +23032,13 @@ class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPipelineC
|
|
|
22861
23032
|
* @callback ImageToImagePipelineCallback Transform the image(s) passed as inputs.
|
|
22862
23033
|
* @param {ImagePipelineInputs} images The images to transform.
|
|
22863
23034
|
* @returns {Promise<RawImage|RawImage[]>} The transformed image or list of images.
|
|
22864
|
-
*
|
|
23035
|
+
*
|
|
22865
23036
|
* @typedef {ImagePipelineConstructorArgs & ImageToImagePipelineCallback & Disposable} ImageToImagePipelineType
|
|
22866
23037
|
*/
|
|
22867
23038
|
|
|
22868
23039
|
/**
|
|
22869
23040
|
* Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input.
|
|
22870
|
-
*
|
|
23041
|
+
*
|
|
22871
23042
|
* **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`
|
|
22872
23043
|
* ```javascript
|
|
22873
23044
|
* const upscaler = await pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64');
|
|
@@ -22912,17 +23083,17 @@ class ImageToImagePipeline extends (/** @type {new (options: ImagePipelineConstr
|
|
|
22912
23083
|
* @typedef {Object} DepthEstimationPipelineOutput
|
|
22913
23084
|
* @property {Tensor} predicted_depth The raw depth map predicted by the model.
|
|
22914
23085
|
* @property {RawImage} depth The processed depth map as an image (with the same size as the input image).
|
|
22915
|
-
*
|
|
23086
|
+
*
|
|
22916
23087
|
* @callback DepthEstimationPipelineCallback Predicts the depth for the image(s) passed as inputs.
|
|
22917
23088
|
* @param {ImagePipelineInputs} images The images to compute depth for.
|
|
22918
23089
|
* @returns {Promise<DepthEstimationPipelineOutput|DepthEstimationPipelineOutput[]>} An image or a list of images containing result(s).
|
|
22919
|
-
*
|
|
23090
|
+
*
|
|
22920
23091
|
* @typedef {ImagePipelineConstructorArgs & DepthEstimationPipelineCallback & Disposable} DepthEstimationPipelineType
|
|
22921
23092
|
*/
|
|
22922
23093
|
|
|
22923
23094
|
/**
|
|
22924
23095
|
* Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image.
|
|
22925
|
-
*
|
|
23096
|
+
*
|
|
22926
23097
|
* **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`
|
|
22927
23098
|
* ```javascript
|
|
22928
23099
|
* const depth_estimator = await pipeline('depth-estimation', 'Xenova/dpt-hybrid-midas');
|
|
@@ -23307,7 +23478,7 @@ const TASK_ALIASES = Object.freeze({
|
|
|
23307
23478
|
|
|
23308
23479
|
/**
|
|
23309
23480
|
* Utility factory method to build a `Pipeline` object.
|
|
23310
|
-
*
|
|
23481
|
+
*
|
|
23311
23482
|
* @template {PipelineType} T The type of pipeline to return.
|
|
23312
23483
|
* @param {T} task The task defining which pipeline will be returned. Currently accepted tasks are:
|
|
23313
23484
|
* - `"audio-classification"`: will return a `AudioClassificationPipeline`.
|
|
@@ -26126,13 +26297,12 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
|
|
|
26126
26297
|
this.decoder.end_of_word_suffix = this.model.end_of_word_suffix;
|
|
26127
26298
|
}
|
|
26128
26299
|
|
|
26129
|
-
this.
|
|
26130
|
-
this.added_tokens.
|
|
26131
|
-
|
|
26132
|
-
|
|
26133
|
-
|
|
26134
|
-
|
|
26135
|
-
) : null;
|
|
26300
|
+
this.added_tokens_splitter = new _utils_data_structures_js__WEBPACK_IMPORTED_MODULE_5__.DictionarySplitter(
|
|
26301
|
+
this.added_tokens.map(x => x.content),
|
|
26302
|
+
);
|
|
26303
|
+
|
|
26304
|
+
/** @type {Map<string, AddedToken>} */
|
|
26305
|
+
this.added_tokens_map = new Map(this.added_tokens.map(x => [x.content, x]))
|
|
26136
26306
|
|
|
26137
26307
|
// Set mask token if present (otherwise will be undefined, which is fine)
|
|
26138
26308
|
this.mask_token = this.getToken('mask_token');
|
|
@@ -26427,40 +26597,50 @@ class PreTrainedTokenizer extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_0__
|
|
|
26427
26597
|
// Actual function which does encoding, for a single text
|
|
26428
26598
|
// First, we take care of special tokens. Needed to avoid issues arising from
|
|
26429
26599
|
// normalization and/or pretokenization (which may not preserve special tokens)
|
|
26430
|
-
const sections = this.
|
|
26431
|
-
|
|
26432
|
-
|
|
26433
|
-
|
|
26434
|
-
|
|
26435
|
-
|
|
26436
|
-
|
|
26437
|
-
|
|
26438
|
-
if (this.remove_space === true) {
|
|
26439
|
-
x = x.trim().split(/\s+/).join(' ');
|
|
26440
|
-
}
|
|
26441
|
-
if (this.do_lowercase_and_remove_accent) {
|
|
26442
|
-
x = lowercase_and_remove_accent(x);
|
|
26600
|
+
const sections = this.added_tokens_splitter.split(text);
|
|
26601
|
+
|
|
26602
|
+
// Process left/right stripping of added tokens
|
|
26603
|
+
for (let i = 0; i < sections.length; ++i) {
|
|
26604
|
+
const addedToken = this.added_tokens_map.get(sections[i]);
|
|
26605
|
+
if (addedToken) {
|
|
26606
|
+
if (addedToken.lstrip && i > 0) {
|
|
26607
|
+
sections[i - 1] = sections[i - 1].trimEnd();
|
|
26443
26608
|
}
|
|
26444
|
-
|
|
26445
|
-
|
|
26446
|
-
x = this.normalizer(x);
|
|
26609
|
+
if (addedToken.rstrip && i < sections.length - 1) {
|
|
26610
|
+
sections[i + 1] = sections[i + 1].trimStart();
|
|
26447
26611
|
}
|
|
26612
|
+
}
|
|
26613
|
+
}
|
|
26448
26614
|
|
|
26449
|
-
|
|
26450
|
-
|
|
26451
|
-
|
|
26452
|
-
return [];
|
|
26453
|
-
}
|
|
26615
|
+
const tokens = sections.flatMap((x, section_index) => {
|
|
26616
|
+
if (x.length === 0) return [];
|
|
26617
|
+
if (this.added_tokens_map.has(x)) return [x]; // Return added tokens unchanged
|
|
26454
26618
|
|
|
26455
|
-
|
|
26456
|
-
|
|
26457
|
-
|
|
26619
|
+
if (this.remove_space === true) {
|
|
26620
|
+
x = x.trim().split(/\s+/).join(' ');
|
|
26621
|
+
}
|
|
26622
|
+
if (this.do_lowercase_and_remove_accent) {
|
|
26623
|
+
x = lowercase_and_remove_accent(x);
|
|
26624
|
+
}
|
|
26458
26625
|
|
|
26459
|
-
|
|
26626
|
+
if (this.normalizer !== null) {
|
|
26627
|
+
x = this.normalizer(x);
|
|
26628
|
+
}
|
|
26460
26629
|
|
|
26461
|
-
|
|
26630
|
+
// If, after normalization, this section is empty (e.g., trimming whitespace),
|
|
26631
|
+
// we return an empty array
|
|
26632
|
+
if (x.length === 0) {
|
|
26633
|
+
return [];
|
|
26462
26634
|
}
|
|
26463
|
-
|
|
26635
|
+
|
|
26636
|
+
const sectionTokens = (this.pre_tokenizer !== null) ? this.pre_tokenizer(x, {
|
|
26637
|
+
section_index,
|
|
26638
|
+
}) : [x];
|
|
26639
|
+
|
|
26640
|
+
const tokens = this.model(sectionTokens);
|
|
26641
|
+
|
|
26642
|
+
return tokens;
|
|
26643
|
+
});
|
|
26464
26644
|
|
|
26465
26645
|
return tokens;
|
|
26466
26646
|
}
|
|
@@ -29089,6 +29269,7 @@ function count(arr, value) {
|
|
|
29089
29269
|
__webpack_require__.r(__webpack_exports__);
|
|
29090
29270
|
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
29091
29271
|
/* harmony export */ CharTrie: () => (/* binding */ CharTrie),
|
|
29272
|
+
/* harmony export */ DictionarySplitter: () => (/* binding */ DictionarySplitter),
|
|
29092
29273
|
/* harmony export */ PriorityQueue: () => (/* binding */ PriorityQueue),
|
|
29093
29274
|
/* harmony export */ TokenLattice: () => (/* binding */ TokenLattice)
|
|
29094
29275
|
/* harmony export */ });
|
|
@@ -29540,6 +29721,80 @@ class TokenLatticeNode {
|
|
|
29540
29721
|
}
|
|
29541
29722
|
}
|
|
29542
29723
|
|
|
29724
|
+
/**
|
|
29725
|
+
* A data structure which uses a trie to split a string into tokens based on a dictionary.
|
|
29726
|
+
* It can also use a regular expression to preprocess the input text before splitting.
|
|
29727
|
+
*
|
|
29728
|
+
* NOTE: To ensure multi-byte characters are handled correctly, we operate at byte-level instead of character-level.
|
|
29729
|
+
*/
|
|
29730
|
+
class DictionarySplitter {
|
|
29731
|
+
/**
|
|
29732
|
+
* @param {string[]} dictionary The dictionary of words to use for splitting.
|
|
29733
|
+
*/
|
|
29734
|
+
constructor(dictionary) {
|
|
29735
|
+
this.trie = this._buildTrie(dictionary);
|
|
29736
|
+
}
|
|
29737
|
+
|
|
29738
|
+
/**
|
|
29739
|
+
* Builds a trie from the given dictionary.
|
|
29740
|
+
* @param {string[]} dictionary The dictionary of words to build the trie from.
|
|
29741
|
+
* @returns {Object} The root node of the trie.
|
|
29742
|
+
* @private
|
|
29743
|
+
*/
|
|
29744
|
+
_buildTrie(dictionary) {
|
|
29745
|
+
const trie = Object.create(null);
|
|
29746
|
+
for (const word of dictionary) {
|
|
29747
|
+
let node = trie;
|
|
29748
|
+
for (let i = 0; i < word.length; ++i) {
|
|
29749
|
+
node = (node[word[i]] ??= Object.create(null));
|
|
29750
|
+
}
|
|
29751
|
+
node.end = word;
|
|
29752
|
+
}
|
|
29753
|
+
return trie;
|
|
29754
|
+
}
|
|
29755
|
+
|
|
29756
|
+
/**
|
|
29757
|
+
* Splits the input text into tokens based on the dictionary.
|
|
29758
|
+
* @param {string} text The input text to split.
|
|
29759
|
+
* @returns {string[]} An array of tokens.
|
|
29760
|
+
*/
|
|
29761
|
+
split(text) {
|
|
29762
|
+
const result = [];
|
|
29763
|
+
const n = text.length;
|
|
29764
|
+
let start = 0;
|
|
29765
|
+
let i = 0;
|
|
29766
|
+
|
|
29767
|
+
while (i < n) {
|
|
29768
|
+
let node = this.trie;
|
|
29769
|
+
let match = null;
|
|
29770
|
+
let j = i;
|
|
29771
|
+
|
|
29772
|
+
while (j < n && (node = node[text[j]])) {
|
|
29773
|
+
if (node.end) {
|
|
29774
|
+
// Always keep the last (i.e., longest) match.
|
|
29775
|
+
match = node.end;
|
|
29776
|
+
}
|
|
29777
|
+
++j;
|
|
29778
|
+
}
|
|
29779
|
+
|
|
29780
|
+
if (match) {
|
|
29781
|
+
if (i > start) {
|
|
29782
|
+
result.push(text.slice(start, i));
|
|
29783
|
+
}
|
|
29784
|
+
result.push(match);
|
|
29785
|
+
i += match.length;
|
|
29786
|
+
start = i;
|
|
29787
|
+
} else {
|
|
29788
|
+
++i;
|
|
29789
|
+
}
|
|
29790
|
+
}
|
|
29791
|
+
if (start < n) {
|
|
29792
|
+
result.push(text.slice(start));
|
|
29793
|
+
}
|
|
29794
|
+
return result;
|
|
29795
|
+
}
|
|
29796
|
+
}
|
|
29797
|
+
|
|
29543
29798
|
|
|
29544
29799
|
/***/ }),
|
|
29545
29800
|
|
|
@@ -29923,6 +30178,22 @@ function isValidUrl(string, protocols = null, validHosts = null) {
|
|
|
29923
30178
|
return true;
|
|
29924
30179
|
}
|
|
29925
30180
|
|
|
30181
|
+
const REPO_ID_REGEX = /^(\b[\w\-.]+\b\/)?\b[\w\-.]{1,96}\b$/;
|
|
30182
|
+
|
|
30183
|
+
/**
|
|
30184
|
+
* Tests whether a string is a valid Hugging Face model ID or not.
|
|
30185
|
+
* Adapted from https://github.com/huggingface/huggingface_hub/blob/6378820ebb03f071988a96c7f3268f5bdf8f9449/src/huggingface_hub/utils/_validators.py#L119-L170
|
|
30186
|
+
*
|
|
30187
|
+
* @param {string} string The string to test
|
|
30188
|
+
* @returns {boolean} True if the string is a valid model ID, false otherwise.
|
|
30189
|
+
*/
|
|
30190
|
+
function isValidHfModelId(string) {
|
|
30191
|
+
if (!REPO_ID_REGEX.test(string)) return false;
|
|
30192
|
+
if (string.includes("..") || string.includes("--")) return false;
|
|
30193
|
+
if (string.endsWith(".git") || string.endsWith(".ipynb")) return false;
|
|
30194
|
+
return true;
|
|
30195
|
+
}
|
|
30196
|
+
|
|
29926
30197
|
/**
|
|
29927
30198
|
* Helper function to get a file, using either the Fetch API or FileSystem API.
|
|
29928
30199
|
*
|
|
@@ -30175,12 +30446,13 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
30175
30446
|
}
|
|
30176
30447
|
|
|
30177
30448
|
const revision = options.revision ?? 'main';
|
|
30449
|
+
const requestURL = pathJoin(path_or_repo_id, filename);
|
|
30178
30450
|
|
|
30179
|
-
|
|
30180
|
-
|
|
30181
|
-
|
|
30182
|
-
|
|
30183
|
-
|
|
30451
|
+
const validModelId = isValidHfModelId(path_or_repo_id);
|
|
30452
|
+
const localPath = validModelId
|
|
30453
|
+
? pathJoin(_env_js__WEBPACK_IMPORTED_MODULE_2__.env.localModelPath, requestURL)
|
|
30454
|
+
: requestURL;
|
|
30455
|
+
const remoteURL = pathJoin(
|
|
30184
30456
|
_env_js__WEBPACK_IMPORTED_MODULE_2__.env.remoteHost,
|
|
30185
30457
|
_env_js__WEBPACK_IMPORTED_MODULE_2__.env.remotePathTemplate
|
|
30186
30458
|
.replaceAll('{model}', path_or_repo_id)
|
|
@@ -30188,14 +30460,14 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
30188
30460
|
filename
|
|
30189
30461
|
);
|
|
30190
30462
|
|
|
30191
|
-
// Choose cache key for filesystem cache
|
|
30192
|
-
// When using the main revision (default), we use the request URL as the cache key.
|
|
30193
|
-
// If a specific revision is requested, we account for this in the cache key.
|
|
30194
|
-
let fsCacheKey = revision === 'main' ? requestURL : pathJoin(path_or_repo_id, revision, filename);
|
|
30195
|
-
|
|
30196
30463
|
/** @type {string} */
|
|
30197
30464
|
let cacheKey;
|
|
30198
|
-
|
|
30465
|
+
const proposedCacheKey = cache instanceof FileCache
|
|
30466
|
+
// Choose cache key for filesystem cache
|
|
30467
|
+
// When using the main revision (default), we use the request URL as the cache key.
|
|
30468
|
+
// If a specific revision is requested, we account for this in the cache key.
|
|
30469
|
+
? revision === 'main' ? requestURL : pathJoin(path_or_repo_id, revision, filename)
|
|
30470
|
+
: remoteURL;
|
|
30199
30471
|
|
|
30200
30472
|
// Whether to cache the final response in the end.
|
|
30201
30473
|
let toCacheResponse = false;
|
|
@@ -30208,11 +30480,10 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
30208
30480
|
// 1. We first try to get from cache using the local path. In some environments (like deno),
|
|
30209
30481
|
// non-URL cache keys are not allowed. In these cases, `response` will be undefined.
|
|
30210
30482
|
// 2. If no response is found, we try to get from cache using the remote URL or file system cache.
|
|
30211
|
-
response = await tryCache(cache,
|
|
30483
|
+
response = await tryCache(cache, localPath, proposedCacheKey);
|
|
30212
30484
|
}
|
|
30213
30485
|
|
|
30214
30486
|
const cacheHit = response !== undefined;
|
|
30215
|
-
|
|
30216
30487
|
if (response === undefined) {
|
|
30217
30488
|
// Caching not available, or file is not cached, so we perform the request
|
|
30218
30489
|
|
|
@@ -30230,9 +30501,9 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
30230
30501
|
console.warn(`Unable to load from local path "${localPath}": "${e}"`);
|
|
30231
30502
|
}
|
|
30232
30503
|
} else if (options.local_files_only) {
|
|
30233
|
-
throw new Error(`\`local_files_only=true\`, but attempted to load a remote file from: ${
|
|
30504
|
+
throw new Error(`\`local_files_only=true\`, but attempted to load a remote file from: ${requestURL}.`);
|
|
30234
30505
|
} else if (!_env_js__WEBPACK_IMPORTED_MODULE_2__.env.allowRemoteModels) {
|
|
30235
|
-
throw new Error(`\`env.allowRemoteModels=false\`, but attempted to load a remote file from: ${
|
|
30506
|
+
throw new Error(`\`env.allowRemoteModels=false\`, but attempted to load a remote file from: ${requestURL}.`);
|
|
30236
30507
|
}
|
|
30237
30508
|
}
|
|
30238
30509
|
|
|
@@ -30252,6 +30523,11 @@ async function getModelFile(path_or_repo_id, filename, fatal = true, options = {
|
|
|
30252
30523
|
return null;
|
|
30253
30524
|
}
|
|
30254
30525
|
}
|
|
30526
|
+
if (!validModelId) {
|
|
30527
|
+
// Before making any requests to the remote server, we check if the model ID is valid.
|
|
30528
|
+
// This prevents unnecessary network requests for invalid model IDs.
|
|
30529
|
+
throw Error(`Local file missing at "${localPath}" and download aborted due to invalid model ID "${path_or_repo_id}".`);
|
|
30530
|
+
}
|
|
30255
30531
|
|
|
30256
30532
|
// File not found locally, so we try to download it from the remote server
|
|
30257
30533
|
response = await getFile(remoteURL);
|
|
@@ -30583,7 +30859,7 @@ class RawImage {
|
|
|
30583
30859
|
|
|
30584
30860
|
/**
|
|
30585
30861
|
* Helper method for reading an image from a variety of input types.
|
|
30586
|
-
* @param {RawImage|string|URL} input
|
|
30862
|
+
* @param {RawImage|string|URL|Blob|HTMLCanvasElement|OffscreenCanvas} input
|
|
30587
30863
|
* @returns The image object.
|
|
30588
30864
|
*
|
|
30589
30865
|
* **Example:** Read image from a URL.
|
|
@@ -30602,6 +30878,14 @@ class RawImage {
|
|
|
30602
30878
|
return input;
|
|
30603
30879
|
} else if (typeof input === 'string' || input instanceof URL) {
|
|
30604
30880
|
return await this.fromURL(input);
|
|
30881
|
+
} else if (input instanceof Blob) {
|
|
30882
|
+
return await this.fromBlob(input);
|
|
30883
|
+
} else if (
|
|
30884
|
+
(typeof HTMLCanvasElement !== "undefined" && input instanceof HTMLCanvasElement)
|
|
30885
|
+
||
|
|
30886
|
+
(typeof OffscreenCanvas !== "undefined" && input instanceof OffscreenCanvas)
|
|
30887
|
+
) {
|
|
30888
|
+
return this.fromCanvas(input);
|
|
30605
30889
|
} else {
|
|
30606
30890
|
throw new Error(`Unsupported input type: ${typeof input}`);
|
|
30607
30891
|
}
|
|
@@ -33613,8 +33897,12 @@ function calc_unsqueeze_dims(dims, dim) {
|
|
|
33613
33897
|
* @private
|
|
33614
33898
|
*/
|
|
33615
33899
|
function safeIndex(index, size, dimension = null, boundsCheck = true) {
|
|
33616
|
-
if (
|
|
33617
|
-
|
|
33900
|
+
if (index < -size || index >= size) {
|
|
33901
|
+
if (boundsCheck) {
|
|
33902
|
+
throw new Error(`IndexError: index ${index} is out of bounds for dimension${dimension === null ? '' : ' ' + dimension} with size ${size}`);
|
|
33903
|
+
} else {
|
|
33904
|
+
return index < -size ? 0 : size;
|
|
33905
|
+
}
|
|
33618
33906
|
}
|
|
33619
33907
|
|
|
33620
33908
|
if (index < 0) {
|
|
@@ -34471,6 +34759,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34471
34759
|
/* harmony export */ Gemma2ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2ForCausalLM),
|
|
34472
34760
|
/* harmony export */ Gemma2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2Model),
|
|
34473
34761
|
/* harmony export */ Gemma2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2PreTrainedModel),
|
|
34762
|
+
/* harmony export */ Gemma3ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3ForCausalLM),
|
|
34763
|
+
/* harmony export */ Gemma3Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3Model),
|
|
34764
|
+
/* harmony export */ Gemma3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3PreTrainedModel),
|
|
34474
34765
|
/* harmony export */ GemmaForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaForCausalLM),
|
|
34475
34766
|
/* harmony export */ GemmaModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaModel),
|
|
34476
34767
|
/* harmony export */ GemmaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaPreTrainedModel),
|
|
@@ -34572,6 +34863,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34572
34863
|
/* harmony export */ MaskFormerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskFormerPreTrainedModel),
|
|
34573
34864
|
/* harmony export */ MaskedLMOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskedLMOutput),
|
|
34574
34865
|
/* harmony export */ MaxLengthCriteria: () => (/* reexport safe */ _generation_stopping_criteria_js__WEBPACK_IMPORTED_MODULE_20__.MaxLengthCriteria),
|
|
34866
|
+
/* harmony export */ Metric3DForDepthEstimation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3DForDepthEstimation),
|
|
34867
|
+
/* harmony export */ Metric3DPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3DPreTrainedModel),
|
|
34868
|
+
/* harmony export */ Metric3Dv2ForDepthEstimation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3Dv2ForDepthEstimation),
|
|
34869
|
+
/* harmony export */ Metric3Dv2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3Dv2PreTrainedModel),
|
|
34575
34870
|
/* harmony export */ MgpstrForSceneTextRecognition: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrForSceneTextRecognition),
|
|
34576
34871
|
/* harmony export */ MgpstrModelOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrModelOutput),
|
|
34577
34872
|
/* harmony export */ MgpstrPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrPreTrainedModel),
|
|
@@ -34724,11 +35019,19 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34724
35019
|
/* harmony export */ Qwen2VLImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_14__.Qwen2VLImageProcessor),
|
|
34725
35020
|
/* harmony export */ Qwen2VLPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Qwen2VLPreTrainedModel),
|
|
34726
35021
|
/* harmony export */ Qwen2VLProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_17__.Qwen2VLProcessor),
|
|
35022
|
+
/* harmony export */ RFDetrForObjectDetection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RFDetrForObjectDetection),
|
|
35023
|
+
/* harmony export */ RFDetrModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RFDetrModel),
|
|
35024
|
+
/* harmony export */ RFDetrObjectDetectionOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RFDetrObjectDetectionOutput),
|
|
35025
|
+
/* harmony export */ RFDetrPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RFDetrPreTrainedModel),
|
|
34727
35026
|
/* harmony export */ RTDetrForObjectDetection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrForObjectDetection),
|
|
34728
35027
|
/* harmony export */ RTDetrImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_14__.RTDetrImageProcessor),
|
|
34729
35028
|
/* harmony export */ RTDetrModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrModel),
|
|
34730
35029
|
/* harmony export */ RTDetrObjectDetectionOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrObjectDetectionOutput),
|
|
34731
35030
|
/* harmony export */ RTDetrPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrPreTrainedModel),
|
|
35031
|
+
/* harmony export */ RTDetrV2ForObjectDetection: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrV2ForObjectDetection),
|
|
35032
|
+
/* harmony export */ RTDetrV2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrV2Model),
|
|
35033
|
+
/* harmony export */ RTDetrV2ObjectDetectionOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrV2ObjectDetectionOutput),
|
|
35034
|
+
/* harmony export */ RTDetrV2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.RTDetrV2PreTrainedModel),
|
|
34732
35035
|
/* harmony export */ RawAudio: () => (/* reexport safe */ _utils_audio_js__WEBPACK_IMPORTED_MODULE_5__.RawAudio),
|
|
34733
35036
|
/* harmony export */ RawImage: () => (/* reexport safe */ _utils_image_js__WEBPACK_IMPORTED_MODULE_6__.RawImage),
|
|
34734
35037
|
/* harmony export */ RawVideo: () => (/* reexport safe */ _utils_video_js__WEBPACK_IMPORTED_MODULE_7__.RawVideo),
|
|
@@ -34778,6 +35081,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34778
35081
|
/* harmony export */ SmolVLMForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SmolVLMForConditionalGeneration),
|
|
34779
35082
|
/* harmony export */ SmolVLMImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_14__.SmolVLMImageProcessor),
|
|
34780
35083
|
/* harmony export */ SmolVLMProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_17__.SmolVLMProcessor),
|
|
35084
|
+
/* harmony export */ SnacDecoderModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacDecoderModel),
|
|
35085
|
+
/* harmony export */ SnacEncoderModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacEncoderModel),
|
|
35086
|
+
/* harmony export */ SnacFeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_11__.SnacFeatureExtractor),
|
|
35087
|
+
/* harmony export */ SnacModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacModel),
|
|
35088
|
+
/* harmony export */ SnacPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacPreTrainedModel),
|
|
34781
35089
|
/* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_11__.SpeechT5FeatureExtractor),
|
|
34782
35090
|
/* harmony export */ SpeechT5ForSpeechToText: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SpeechT5ForSpeechToText),
|
|
34783
35091
|
/* harmony export */ SpeechT5ForTextToSpeech: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SpeechT5ForTextToSpeech),
|