@huggingface/transformers 3.4.0 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -2
- package/dist/transformers.js +315 -152
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.js +1 -1
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.node.cjs +303 -151
- package/dist/transformers.node.cjs.map +1 -1
- package/dist/transformers.node.min.cjs +1 -1
- package/dist/transformers.node.min.cjs.map +1 -1
- package/dist/transformers.node.min.mjs +1 -1
- package/dist/transformers.node.min.mjs.map +1 -1
- package/dist/transformers.node.mjs +315 -152
- package/dist/transformers.node.mjs.map +1 -1
- package/dist/transformers.web.js +315 -152
- package/dist/transformers.web.js.map +1 -1
- package/dist/transformers.web.min.js +1 -1
- package/dist/transformers.web.min.js.map +1 -1
- package/package.json +1 -1
- package/src/configs.js +2 -0
- package/src/env.js +1 -1
- package/src/models/feature_extractors.js +1 -0
- package/src/models/snac/feature_extraction_snac.js +3 -0
- package/src/models.js +90 -2
- package/src/pipelines.js +140 -135
- package/src/utils/image.js +9 -1
- package/src/utils/tensor.js +6 -2
- package/types/configs.d.ts.map +1 -1
- package/types/models/feature_extractors.d.ts +1 -0
- package/types/models/snac/feature_extraction_snac.d.ts +4 -0
- package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
- package/types/models.d.ts +48 -0
- package/types/models.d.ts.map +1 -1
- package/types/pipelines.d.ts +2 -2
- package/types/pipelines.d.ts.map +1 -1
- package/types/tsconfig.tsbuildinfo +1 -1
- package/types/utils/image.d.ts +2 -2
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/tensor.d.ts.map +1 -1
|
@@ -3684,6 +3684,7 @@ function getNormalizedConfig(config) {
|
|
|
3684
3684
|
// Sub-configs
|
|
3685
3685
|
case 'llava':
|
|
3686
3686
|
case 'paligemma':
|
|
3687
|
+
case 'gemma3':
|
|
3687
3688
|
case 'florence2':
|
|
3688
3689
|
case 'llava_onevision':
|
|
3689
3690
|
case 'idefics3':
|
|
@@ -3743,6 +3744,7 @@ function getNormalizedConfig(config) {
|
|
|
3743
3744
|
break;
|
|
3744
3745
|
case 'gemma':
|
|
3745
3746
|
case 'gemma2':
|
|
3747
|
+
case 'gemma3_text':
|
|
3746
3748
|
case 'glm':
|
|
3747
3749
|
case 'helium':
|
|
3748
3750
|
mapping['num_heads'] = 'num_key_value_heads';
|
|
@@ -4074,7 +4076,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
4074
4076
|
|
|
4075
4077
|
|
|
4076
4078
|
|
|
4077
|
-
const VERSION = '3.4.
|
|
4079
|
+
const VERSION = '3.4.1';
|
|
4078
4080
|
|
|
4079
4081
|
// Check if various APIs are available (depends on environment)
|
|
4080
4082
|
const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
|
|
@@ -6218,6 +6220,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6218
6220
|
/* harmony export */ Gemma2ForCausalLM: () => (/* binding */ Gemma2ForCausalLM),
|
|
6219
6221
|
/* harmony export */ Gemma2Model: () => (/* binding */ Gemma2Model),
|
|
6220
6222
|
/* harmony export */ Gemma2PreTrainedModel: () => (/* binding */ Gemma2PreTrainedModel),
|
|
6223
|
+
/* harmony export */ Gemma3ForCausalLM: () => (/* binding */ Gemma3ForCausalLM),
|
|
6224
|
+
/* harmony export */ Gemma3Model: () => (/* binding */ Gemma3Model),
|
|
6225
|
+
/* harmony export */ Gemma3PreTrainedModel: () => (/* binding */ Gemma3PreTrainedModel),
|
|
6221
6226
|
/* harmony export */ GemmaForCausalLM: () => (/* binding */ GemmaForCausalLM),
|
|
6222
6227
|
/* harmony export */ GemmaModel: () => (/* binding */ GemmaModel),
|
|
6223
6228
|
/* harmony export */ GemmaPreTrainedModel: () => (/* binding */ GemmaPreTrainedModel),
|
|
@@ -6288,6 +6293,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6288
6293
|
/* harmony export */ MaskFormerModel: () => (/* binding */ MaskFormerModel),
|
|
6289
6294
|
/* harmony export */ MaskFormerPreTrainedModel: () => (/* binding */ MaskFormerPreTrainedModel),
|
|
6290
6295
|
/* harmony export */ MaskedLMOutput: () => (/* binding */ MaskedLMOutput),
|
|
6296
|
+
/* harmony export */ Metric3DForDepthEstimation: () => (/* binding */ Metric3DForDepthEstimation),
|
|
6297
|
+
/* harmony export */ Metric3DPreTrainedModel: () => (/* binding */ Metric3DPreTrainedModel),
|
|
6298
|
+
/* harmony export */ Metric3Dv2ForDepthEstimation: () => (/* binding */ Metric3Dv2ForDepthEstimation),
|
|
6299
|
+
/* harmony export */ Metric3Dv2PreTrainedModel: () => (/* binding */ Metric3Dv2PreTrainedModel),
|
|
6291
6300
|
/* harmony export */ MgpstrForSceneTextRecognition: () => (/* binding */ MgpstrForSceneTextRecognition),
|
|
6292
6301
|
/* harmony export */ MgpstrModelOutput: () => (/* binding */ MgpstrModelOutput),
|
|
6293
6302
|
/* harmony export */ MgpstrPreTrainedModel: () => (/* binding */ MgpstrPreTrainedModel),
|
|
@@ -6436,6 +6445,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
6436
6445
|
/* harmony export */ SiglipTextModel: () => (/* binding */ SiglipTextModel),
|
|
6437
6446
|
/* harmony export */ SiglipVisionModel: () => (/* binding */ SiglipVisionModel),
|
|
6438
6447
|
/* harmony export */ SmolVLMForConditionalGeneration: () => (/* binding */ SmolVLMForConditionalGeneration),
|
|
6448
|
+
/* harmony export */ SnacDecoderModel: () => (/* binding */ SnacDecoderModel),
|
|
6449
|
+
/* harmony export */ SnacEncoderModel: () => (/* binding */ SnacEncoderModel),
|
|
6450
|
+
/* harmony export */ SnacModel: () => (/* binding */ SnacModel),
|
|
6451
|
+
/* harmony export */ SnacPreTrainedModel: () => (/* binding */ SnacPreTrainedModel),
|
|
6439
6452
|
/* harmony export */ SpeechT5ForSpeechToText: () => (/* binding */ SpeechT5ForSpeechToText),
|
|
6440
6453
|
/* harmony export */ SpeechT5ForTextToSpeech: () => (/* binding */ SpeechT5ForTextToSpeech),
|
|
6441
6454
|
/* harmony export */ SpeechT5HifiGan: () => (/* binding */ SpeechT5HifiGan),
|
|
@@ -7094,8 +7107,8 @@ async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
|
|
|
7094
7107
|
new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
|
|
7095
7108
|
}
|
|
7096
7109
|
if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
|
|
7097
|
-
// NOTE: Handle a special case for paligemma models, where positions are 1-indexed
|
|
7098
|
-
const start_index = self.config.model_type
|
|
7110
|
+
// NOTE: Handle a special case for paligemma/gemma3 models, where positions are 1-indexed
|
|
7111
|
+
const start_index = ['paligemma', 'gemma3_text', 'gemma3'].includes(self.config.model_type) ? 1 : 0;
|
|
7099
7112
|
new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values, start_index);
|
|
7100
7113
|
}
|
|
7101
7114
|
|
|
@@ -11020,6 +11033,23 @@ class Gemma2Model extends Gemma2PreTrainedModel { }
|
|
|
11020
11033
|
class Gemma2ForCausalLM extends Gemma2PreTrainedModel { }
|
|
11021
11034
|
//////////////////////////////////////////////////
|
|
11022
11035
|
|
|
11036
|
+
|
|
11037
|
+
//////////////////////////////////////////////////
|
|
11038
|
+
// Gemma3 models
|
|
11039
|
+
|
|
11040
|
+
/**
|
|
11041
|
+
* The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
|
|
11042
|
+
*/
|
|
11043
|
+
class Gemma3PreTrainedModel extends PreTrainedModel { }
|
|
11044
|
+
/**
|
|
11045
|
+
* The bare Gemma3 Model outputting raw hidden-states without any specific head on top.
|
|
11046
|
+
*/
|
|
11047
|
+
class Gemma3Model extends Gemma3PreTrainedModel { }
|
|
11048
|
+
|
|
11049
|
+
class Gemma3ForCausalLM extends Gemma3PreTrainedModel { }
|
|
11050
|
+
//////////////////////////////////////////////////
|
|
11051
|
+
|
|
11052
|
+
|
|
11023
11053
|
//////////////////////////////////////////////////
|
|
11024
11054
|
class OpenELMPreTrainedModel extends PreTrainedModel { }
|
|
11025
11055
|
class OpenELMModel extends OpenELMPreTrainedModel { }
|
|
@@ -11872,6 +11902,16 @@ class DepthProPreTrainedModel extends PreTrainedModel { }
|
|
|
11872
11902
|
class DepthProForDepthEstimation extends DepthProPreTrainedModel { }
|
|
11873
11903
|
//////////////////////////////////////////////////
|
|
11874
11904
|
|
|
11905
|
+
//////////////////////////////////////////////////
|
|
11906
|
+
class Metric3DPreTrainedModel extends PreTrainedModel { }
|
|
11907
|
+
class Metric3DForDepthEstimation extends Metric3DPreTrainedModel { }
|
|
11908
|
+
//////////////////////////////////////////////////
|
|
11909
|
+
|
|
11910
|
+
//////////////////////////////////////////////////
|
|
11911
|
+
class Metric3Dv2PreTrainedModel extends PreTrainedModel { }
|
|
11912
|
+
class Metric3Dv2ForDepthEstimation extends Metric3Dv2PreTrainedModel { }
|
|
11913
|
+
//////////////////////////////////////////////////
|
|
11914
|
+
|
|
11875
11915
|
//////////////////////////////////////////////////
|
|
11876
11916
|
class MaskFormerPreTrainedModel extends PreTrainedModel { }
|
|
11877
11917
|
class MaskFormerModel extends MaskFormerPreTrainedModel { }
|
|
@@ -13787,6 +13827,60 @@ class DacDecoderModel extends DacPreTrainedModel {
|
|
|
13787
13827
|
}
|
|
13788
13828
|
//////////////////////////////////////////////////
|
|
13789
13829
|
|
|
13830
|
+
|
|
13831
|
+
//////////////////////////////////////////////////
|
|
13832
|
+
// Snac models
|
|
13833
|
+
class SnacPreTrainedModel extends PreTrainedModel {
|
|
13834
|
+
main_input_name = 'input_values';
|
|
13835
|
+
forward_params = ['input_values'];
|
|
13836
|
+
}
|
|
13837
|
+
|
|
13838
|
+
/**
|
|
13839
|
+
* The SNAC (Multi-Scale Neural Audio Codec) model.
|
|
13840
|
+
*/
|
|
13841
|
+
class SnacModel extends SnacPreTrainedModel {
|
|
13842
|
+
/**
|
|
13843
|
+
* Encodes the input audio waveform into discrete codes.
|
|
13844
|
+
* @param {Object} inputs Model inputs
|
|
13845
|
+
* @param {Tensor} [inputs.input_values] Float values of the input audio waveform, of shape `(batch_size, channels, sequence_length)`).
|
|
13846
|
+
* @returns {Promise<Record<string, Tensor>>} The output tensors of shape `(batch_size, num_codebooks, sequence_length)`.
|
|
13847
|
+
*/
|
|
13848
|
+
async encode(inputs) {
|
|
13849
|
+
return await sessionRun(this.sessions['encoder_model'], inputs);
|
|
13850
|
+
}
|
|
13851
|
+
|
|
13852
|
+
/**
|
|
13853
|
+
* Decodes the given frames into an output audio waveform.
|
|
13854
|
+
* @param {Record<string, Tensor>} inputs The encoded audio codes.
|
|
13855
|
+
* @returns {Promise<{audio_values: Tensor}>} The output tensor of shape `(batch_size, num_channels, sequence_length)`.
|
|
13856
|
+
*/
|
|
13857
|
+
async decode(inputs) {
|
|
13858
|
+
return await sessionRun(this.sessions['decoder_model'], inputs);
|
|
13859
|
+
}
|
|
13860
|
+
}
|
|
13861
|
+
|
|
13862
|
+
class SnacEncoderModel extends SnacPreTrainedModel {
|
|
13863
|
+
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
13864
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
13865
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
13866
|
+
...options,
|
|
13867
|
+
// Update default model file name if not provided
|
|
13868
|
+
model_file_name: options.model_file_name ?? 'encoder_model',
|
|
13869
|
+
});
|
|
13870
|
+
}
|
|
13871
|
+
}
|
|
13872
|
+
class SnacDecoderModel extends SnacPreTrainedModel {
|
|
13873
|
+
/** @type {typeof PreTrainedModel.from_pretrained} */
|
|
13874
|
+
static async from_pretrained(pretrained_model_name_or_path, options = {}) {
|
|
13875
|
+
return super.from_pretrained(pretrained_model_name_or_path, {
|
|
13876
|
+
...options,
|
|
13877
|
+
// Update default model file name if not provided
|
|
13878
|
+
model_file_name: options.model_file_name ?? 'decoder_model',
|
|
13879
|
+
});
|
|
13880
|
+
}
|
|
13881
|
+
}
|
|
13882
|
+
//////////////////////////////////////////////////
|
|
13883
|
+
|
|
13790
13884
|
//////////////////////////////////////////////////
|
|
13791
13885
|
// AutoModels, used to simplify construction of PreTrainedModels
|
|
13792
13886
|
// (uses config to instantiate correct class)
|
|
@@ -13968,6 +14062,7 @@ const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
|
|
|
13968
14062
|
const MODEL_MAPPING_NAMES_AUTO_ENCODER = new Map([
|
|
13969
14063
|
['mimi', ['MimiModel', MimiModel]],
|
|
13970
14064
|
['dac', ['DacModel', DacModel]],
|
|
14065
|
+
['snac', ['SnacModel', SnacModel]],
|
|
13971
14066
|
]);
|
|
13972
14067
|
|
|
13973
14068
|
const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
@@ -13988,6 +14083,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|
|
13988
14083
|
['cohere', ['CohereModel', CohereModel]],
|
|
13989
14084
|
['gemma', ['GemmaModel', GemmaModel]],
|
|
13990
14085
|
['gemma2', ['Gemma2Model', Gemma2Model]],
|
|
14086
|
+
['gemma3_text', ['Gemma3Model', Gemma3Model]],
|
|
13991
14087
|
['helium', ['HeliumModel', HeliumModel]],
|
|
13992
14088
|
['glm', ['GlmModel', GlmModel]],
|
|
13993
14089
|
['openelm', ['OpenELMModel', OpenELMModel]],
|
|
@@ -14087,6 +14183,7 @@ const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
|
|
|
14087
14183
|
['cohere', ['CohereForCausalLM', CohereForCausalLM]],
|
|
14088
14184
|
['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],
|
|
14089
14185
|
['gemma2', ['Gemma2ForCausalLM', Gemma2ForCausalLM]],
|
|
14186
|
+
['gemma3_text', ['Gemma3ForCausalLM', Gemma3ForCausalLM]],
|
|
14090
14187
|
['helium', ['HeliumForCausalLM', HeliumForCausalLM]],
|
|
14091
14188
|
['glm', ['GlmForCausalLM', GlmForCausalLM]],
|
|
14092
14189
|
['openelm', ['OpenELMForCausalLM', OpenELMForCausalLM]],
|
|
@@ -14288,6 +14385,8 @@ const MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = new Map([
|
|
|
14288
14385
|
['glpn', ['GLPNForDepthEstimation', GLPNForDepthEstimation]],
|
|
14289
14386
|
['sapiens', ['SapiensForDepthEstimation', SapiensForDepthEstimation]],
|
|
14290
14387
|
['depth_pro', ['DepthProForDepthEstimation', DepthProForDepthEstimation]],
|
|
14388
|
+
['metric3d', ['Metric3DForDepthEstimation', Metric3DForDepthEstimation]],
|
|
14389
|
+
['metric3dv2', ['Metric3Dv2ForDepthEstimation', Metric3Dv2ForDepthEstimation]],
|
|
14291
14390
|
])
|
|
14292
14391
|
|
|
14293
14392
|
const MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES = new Map([
|
|
@@ -14373,6 +14472,8 @@ const CUSTOM_MAPPING = [
|
|
|
14373
14472
|
['DacDecoderModel', DacDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
14374
14473
|
['MimiEncoderModel', MimiEncoderModel, MODEL_TYPES.EncoderOnly],
|
|
14375
14474
|
['MimiDecoderModel', MimiDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
14475
|
+
['SnacEncoderModel', SnacEncoderModel, MODEL_TYPES.EncoderOnly],
|
|
14476
|
+
['SnacDecoderModel', SnacDecoderModel, MODEL_TYPES.EncoderOnly],
|
|
14376
14477
|
]
|
|
14377
14478
|
for (const [name, model, type] of CUSTOM_MAPPING) {
|
|
14378
14479
|
MODEL_TYPE_MAPPING.set(name, type);
|
|
@@ -15689,14 +15790,15 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
15689
15790
|
/* harmony export */ ClapFeatureExtractor: () => (/* reexport safe */ _clap_feature_extraction_clap_js__WEBPACK_IMPORTED_MODULE_2__.ClapFeatureExtractor),
|
|
15690
15791
|
/* harmony export */ DacFeatureExtractor: () => (/* reexport safe */ _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_3__.DacFeatureExtractor),
|
|
15691
15792
|
/* harmony export */ EncodecFeatureExtractor: () => (/* reexport safe */ _encodec_feature_extraction_encodec_js__WEBPACK_IMPORTED_MODULE_1__.EncodecFeatureExtractor),
|
|
15692
|
-
/* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */
|
|
15793
|
+
/* harmony export */ ImageFeatureExtractor: () => (/* reexport safe */ _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_12__.ImageProcessor),
|
|
15693
15794
|
/* harmony export */ MoonshineFeatureExtractor: () => (/* reexport safe */ _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_4__.MoonshineFeatureExtractor),
|
|
15694
15795
|
/* harmony export */ PyAnnoteFeatureExtractor: () => (/* reexport safe */ _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_5__.PyAnnoteFeatureExtractor),
|
|
15695
15796
|
/* harmony export */ SeamlessM4TFeatureExtractor: () => (/* reexport safe */ _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_6__.SeamlessM4TFeatureExtractor),
|
|
15696
|
-
/* harmony export */
|
|
15697
|
-
/* harmony export */
|
|
15698
|
-
/* harmony export */
|
|
15699
|
-
/* harmony export */
|
|
15797
|
+
/* harmony export */ SnacFeatureExtractor: () => (/* reexport safe */ _snac_feature_extraction_snac_js__WEBPACK_IMPORTED_MODULE_7__.SnacFeatureExtractor),
|
|
15798
|
+
/* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_8__.SpeechT5FeatureExtractor),
|
|
15799
|
+
/* harmony export */ Wav2Vec2FeatureExtractor: () => (/* reexport safe */ _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_9__.Wav2Vec2FeatureExtractor),
|
|
15800
|
+
/* harmony export */ WeSpeakerFeatureExtractor: () => (/* reexport safe */ _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_10__.WeSpeakerFeatureExtractor),
|
|
15801
|
+
/* harmony export */ WhisperFeatureExtractor: () => (/* reexport safe */ _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_11__.WhisperFeatureExtractor)
|
|
15700
15802
|
/* harmony export */ });
|
|
15701
15803
|
/* harmony import */ var _audio_spectrogram_transformer_feature_extraction_audio_spectrogram_transformer_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ./audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js */ "./src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js");
|
|
15702
15804
|
/* harmony import */ var _encodec_feature_extraction_encodec_js__WEBPACK_IMPORTED_MODULE_1__ = __webpack_require__(/*! ./encodec/feature_extraction_encodec.js */ "./src/models/encodec/feature_extraction_encodec.js");
|
|
@@ -15705,11 +15807,13 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
15705
15807
|
/* harmony import */ var _moonshine_feature_extraction_moonshine_js__WEBPACK_IMPORTED_MODULE_4__ = __webpack_require__(/*! ./moonshine/feature_extraction_moonshine.js */ "./src/models/moonshine/feature_extraction_moonshine.js");
|
|
15706
15808
|
/* harmony import */ var _pyannote_feature_extraction_pyannote_js__WEBPACK_IMPORTED_MODULE_5__ = __webpack_require__(/*! ./pyannote/feature_extraction_pyannote.js */ "./src/models/pyannote/feature_extraction_pyannote.js");
|
|
15707
15809
|
/* harmony import */ var _seamless_m4t_feature_extraction_seamless_m4t_js__WEBPACK_IMPORTED_MODULE_6__ = __webpack_require__(/*! ./seamless_m4t/feature_extraction_seamless_m4t.js */ "./src/models/seamless_m4t/feature_extraction_seamless_m4t.js");
|
|
15708
|
-
/* harmony import */ var
|
|
15709
|
-
/* harmony import */ var
|
|
15710
|
-
/* harmony import */ var
|
|
15711
|
-
/* harmony import */ var
|
|
15712
|
-
/* harmony import */ var
|
|
15810
|
+
/* harmony import */ var _snac_feature_extraction_snac_js__WEBPACK_IMPORTED_MODULE_7__ = __webpack_require__(/*! ./snac/feature_extraction_snac.js */ "./src/models/snac/feature_extraction_snac.js");
|
|
15811
|
+
/* harmony import */ var _speecht5_feature_extraction_speecht5_js__WEBPACK_IMPORTED_MODULE_8__ = __webpack_require__(/*! ./speecht5/feature_extraction_speecht5.js */ "./src/models/speecht5/feature_extraction_speecht5.js");
|
|
15812
|
+
/* harmony import */ var _wav2vec2_feature_extraction_wav2vec2_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./wav2vec2/feature_extraction_wav2vec2.js */ "./src/models/wav2vec2/feature_extraction_wav2vec2.js");
|
|
15813
|
+
/* harmony import */ var _wespeaker_feature_extraction_wespeaker_js__WEBPACK_IMPORTED_MODULE_10__ = __webpack_require__(/*! ./wespeaker/feature_extraction_wespeaker.js */ "./src/models/wespeaker/feature_extraction_wespeaker.js");
|
|
15814
|
+
/* harmony import */ var _whisper_feature_extraction_whisper_js__WEBPACK_IMPORTED_MODULE_11__ = __webpack_require__(/*! ./whisper/feature_extraction_whisper.js */ "./src/models/whisper/feature_extraction_whisper.js");
|
|
15815
|
+
/* harmony import */ var _base_image_processors_utils_js__WEBPACK_IMPORTED_MODULE_12__ = __webpack_require__(/*! ../base/image_processors_utils.js */ "./src/base/image_processors_utils.js");
|
|
15816
|
+
|
|
15713
15817
|
|
|
15714
15818
|
|
|
15715
15819
|
|
|
@@ -18742,6 +18846,25 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
18742
18846
|
|
|
18743
18847
|
|
|
18744
18848
|
|
|
18849
|
+
/***/ }),
|
|
18850
|
+
|
|
18851
|
+
/***/ "./src/models/snac/feature_extraction_snac.js":
|
|
18852
|
+
/*!****************************************************!*\
|
|
18853
|
+
!*** ./src/models/snac/feature_extraction_snac.js ***!
|
|
18854
|
+
\****************************************************/
|
|
18855
|
+
/***/ ((__unused_webpack___webpack_module__, __webpack_exports__, __webpack_require__) => {
|
|
18856
|
+
|
|
18857
|
+
"use strict";
|
|
18858
|
+
__webpack_require__.r(__webpack_exports__);
|
|
18859
|
+
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
|
|
18860
|
+
/* harmony export */ SnacFeatureExtractor: () => (/* binding */ SnacFeatureExtractor)
|
|
18861
|
+
/* harmony export */ });
|
|
18862
|
+
/* harmony import */ var _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! ../dac/feature_extraction_dac.js */ "./src/models/dac/feature_extraction_dac.js");
|
|
18863
|
+
|
|
18864
|
+
|
|
18865
|
+
class SnacFeatureExtractor extends _dac_feature_extraction_dac_js__WEBPACK_IMPORTED_MODULE_0__.DacFeatureExtractor { }
|
|
18866
|
+
|
|
18867
|
+
|
|
18745
18868
|
/***/ }),
|
|
18746
18869
|
|
|
18747
18870
|
/***/ "./src/models/speecht5/feature_extraction_speecht5.js":
|
|
@@ -19999,16 +20122,16 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
19999
20122
|
/* harmony import */ var _utils_image_js__WEBPACK_IMPORTED_MODULE_9__ = __webpack_require__(/*! ./utils/image.js */ "./src/utils/image.js");
|
|
20000
20123
|
/**
|
|
20001
20124
|
* @file Pipelines provide a high-level, easy to use, API for running machine learning models.
|
|
20002
|
-
*
|
|
20125
|
+
*
|
|
20003
20126
|
* **Example:** Instantiate pipeline using the `pipeline` function.
|
|
20004
20127
|
* ```javascript
|
|
20005
20128
|
* import { pipeline } from '@huggingface/transformers';
|
|
20006
|
-
*
|
|
20129
|
+
*
|
|
20007
20130
|
* const classifier = await pipeline('sentiment-analysis');
|
|
20008
20131
|
* const output = await classifier('I love transformers!');
|
|
20009
20132
|
* // [{'label': 'POSITIVE', 'score': 0.999817686}]
|
|
20010
20133
|
* ```
|
|
20011
|
-
*
|
|
20134
|
+
*
|
|
20012
20135
|
* @module pipelines
|
|
20013
20136
|
*/
|
|
20014
20137
|
|
|
@@ -20027,7 +20150,7 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
20027
20150
|
|
|
20028
20151
|
|
|
20029
20152
|
/**
|
|
20030
|
-
* @typedef {string | RawImage | URL} ImageInput
|
|
20153
|
+
* @typedef {string | RawImage | URL | Blob | HTMLCanvasElement | OffscreenCanvas} ImageInput
|
|
20031
20154
|
* @typedef {ImageInput|ImageInput[]} ImagePipelineInputs
|
|
20032
20155
|
*/
|
|
20033
20156
|
|
|
@@ -20101,7 +20224,7 @@ function get_bounding_box(box, asInteger) {
|
|
|
20101
20224
|
/**
|
|
20102
20225
|
* @callback DisposeType Disposes the item.
|
|
20103
20226
|
* @returns {Promise<void>} A promise that resolves when the item has been disposed.
|
|
20104
|
-
*
|
|
20227
|
+
*
|
|
20105
20228
|
* @typedef {Object} Disposable
|
|
20106
20229
|
* @property {DisposeType} dispose A promise that resolves when the pipeline has been disposed.
|
|
20107
20230
|
*/
|
|
@@ -20138,7 +20261,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20138
20261
|
* @property {string} task The task of the pipeline. Useful for specifying subtasks.
|
|
20139
20262
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
20140
20263
|
* @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
|
|
20141
|
-
*
|
|
20264
|
+
*
|
|
20142
20265
|
* @typedef {ModelTokenizerConstructorArgs} TextPipelineConstructorArgs An object used to instantiate a text-based pipeline.
|
|
20143
20266
|
*/
|
|
20144
20267
|
|
|
@@ -20147,7 +20270,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20147
20270
|
* @property {string} task The task of the pipeline. Useful for specifying subtasks.
|
|
20148
20271
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
20149
20272
|
* @property {Processor} processor The processor used by the pipeline.
|
|
20150
|
-
*
|
|
20273
|
+
*
|
|
20151
20274
|
* @typedef {ModelProcessorConstructorArgs} AudioPipelineConstructorArgs An object used to instantiate an audio-based pipeline.
|
|
20152
20275
|
* @typedef {ModelProcessorConstructorArgs} ImagePipelineConstructorArgs An object used to instantiate an image-based pipeline.
|
|
20153
20276
|
*/
|
|
@@ -20159,7 +20282,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20159
20282
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
20160
20283
|
* @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
|
|
20161
20284
|
* @property {Processor} processor The processor used by the pipeline.
|
|
20162
|
-
*
|
|
20285
|
+
*
|
|
20163
20286
|
* @typedef {ModelTokenizerProcessorConstructorArgs} TextAudioPipelineConstructorArgs An object used to instantiate a text- and audio-based pipeline.
|
|
20164
20287
|
* @typedef {ModelTokenizerProcessorConstructorArgs} TextImagePipelineConstructorArgs An object used to instantiate a text- and image-based pipeline.
|
|
20165
20288
|
*/
|
|
@@ -20169,15 +20292,15 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20169
20292
|
* @property {string} label The label predicted.
|
|
20170
20293
|
* @property {number} score The corresponding probability.
|
|
20171
20294
|
* @typedef {TextClassificationSingle[]} TextClassificationOutput
|
|
20172
|
-
*
|
|
20295
|
+
*
|
|
20173
20296
|
* @typedef {Object} TextClassificationPipelineOptions Parameters specific to text classification pipelines.
|
|
20174
20297
|
* @property {number} [top_k=1] The number of top predictions to be returned.
|
|
20175
|
-
*
|
|
20298
|
+
*
|
|
20176
20299
|
* @callback TextClassificationPipelineCallback Classify the text(s) given as inputs.
|
|
20177
20300
|
* @param {string|string[]} texts The input text(s) to be classified.
|
|
20178
20301
|
* @param {TextClassificationPipelineOptions} [options] The options to use for text classification.
|
|
20179
20302
|
* @returns {Promise<TextClassificationOutput|TextClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
20180
|
-
*
|
|
20303
|
+
*
|
|
20181
20304
|
* @typedef {TextPipelineConstructorArgs & TextClassificationPipelineCallback & Disposable} TextClassificationPipelineType
|
|
20182
20305
|
*/
|
|
20183
20306
|
|
|
@@ -20190,7 +20313,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20190
20313
|
* const output = await classifier('I love transformers!');
|
|
20191
20314
|
* // [{ label: 'POSITIVE', score: 0.999788761138916 }]
|
|
20192
20315
|
* ```
|
|
20193
|
-
*
|
|
20316
|
+
*
|
|
20194
20317
|
* **Example:** Multilingual sentiment-analysis w/ `Xenova/bert-base-multilingual-uncased-sentiment` (and return top 5 classes).
|
|
20195
20318
|
* ```javascript
|
|
20196
20319
|
* const classifier = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
|
|
@@ -20203,7 +20326,7 @@ class Pipeline extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_4__.Callable {
|
|
|
20203
20326
|
* // { label: '2 stars', score: 0.0009423971059732139 }
|
|
20204
20327
|
* // ]
|
|
20205
20328
|
* ```
|
|
20206
|
-
*
|
|
20329
|
+
*
|
|
20207
20330
|
* **Example:** Toxic comment classification w/ `Xenova/toxic-bert` (and return all classes).
|
|
20208
20331
|
* ```javascript
|
|
20209
20332
|
* const classifier = await pipeline('text-classification', 'Xenova/toxic-bert');
|
|
@@ -20288,21 +20411,21 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
|
|
|
20288
20411
|
* @property {number} [start] The index of the start of the corresponding entity in the sentence.
|
|
20289
20412
|
* @property {number} [end] The index of the end of the corresponding entity in the sentence.
|
|
20290
20413
|
* @typedef {TokenClassificationSingle[]} TokenClassificationOutput
|
|
20291
|
-
*
|
|
20414
|
+
*
|
|
20292
20415
|
* @typedef {Object} TokenClassificationPipelineOptions Parameters specific to token classification pipelines.
|
|
20293
20416
|
* @property {string[]} [ignore_labels] A list of labels to ignore.
|
|
20294
|
-
*
|
|
20417
|
+
*
|
|
20295
20418
|
* @callback TokenClassificationPipelineCallback Classify each token of the text(s) given as inputs.
|
|
20296
20419
|
* @param {string|string[]} texts One or several texts (or one list of texts) for token classification.
|
|
20297
20420
|
* @param {TokenClassificationPipelineOptions} [options] The options to use for token classification.
|
|
20298
20421
|
* @returns {Promise<TokenClassificationOutput|TokenClassificationOutput[]>} The result.
|
|
20299
|
-
*
|
|
20422
|
+
*
|
|
20300
20423
|
* @typedef {TextPipelineConstructorArgs & TokenClassificationPipelineCallback & Disposable} TokenClassificationPipelineType
|
|
20301
20424
|
*/
|
|
20302
20425
|
|
|
20303
20426
|
/**
|
|
20304
20427
|
* Named Entity Recognition pipeline using any `ModelForTokenClassification`.
|
|
20305
|
-
*
|
|
20428
|
+
*
|
|
20306
20429
|
* **Example:** Perform named entity recognition with `Xenova/bert-base-NER`.
|
|
20307
20430
|
* ```javascript
|
|
20308
20431
|
* const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
|
|
@@ -20312,7 +20435,7 @@ class TextClassificationPipeline extends (/** @type {new (options: TextPipelineC
|
|
|
20312
20435
|
* // { entity: 'B-LOC', score: 0.9994474053382874, index: 9, word: 'London' }
|
|
20313
20436
|
* // ]
|
|
20314
20437
|
* ```
|
|
20315
|
-
*
|
|
20438
|
+
*
|
|
20316
20439
|
* **Example:** Perform named entity recognition with `Xenova/bert-base-NER` (and return all labels).
|
|
20317
20440
|
* ```javascript
|
|
20318
20441
|
* const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
|
|
@@ -20408,22 +20531,22 @@ class TokenClassificationPipeline extends (/** @type {new (options: TextPipeline
|
|
|
20408
20531
|
* @property {number} [start] The character start index of the answer (in the tokenized version of the input).
|
|
20409
20532
|
* @property {number} [end] The character end index of the answer (in the tokenized version of the input).
|
|
20410
20533
|
* @property {string} answer The answer to the question.
|
|
20411
|
-
*
|
|
20534
|
+
*
|
|
20412
20535
|
* @typedef {Object} QuestionAnsweringPipelineOptions Parameters specific to question answering pipelines.
|
|
20413
20536
|
* @property {number} [top_k=1] The number of top answer predictions to be returned.
|
|
20414
|
-
*
|
|
20537
|
+
*
|
|
20415
20538
|
* @callback QuestionAnsweringPipelineCallback Answer the question(s) given as inputs by using the context(s).
|
|
20416
20539
|
* @param {string|string[]} question One or several question(s) (must be used in conjunction with the `context` argument).
|
|
20417
20540
|
* @param {string|string[]} context One or several context(s) associated with the question(s) (must be used in conjunction with the `question` argument).
|
|
20418
20541
|
* @param {QuestionAnsweringPipelineOptions} [options] The options to use for question answering.
|
|
20419
20542
|
* @returns {Promise<QuestionAnsweringOutput|QuestionAnsweringOutput[]>} An array or object containing the predicted answers and scores.
|
|
20420
|
-
*
|
|
20543
|
+
*
|
|
20421
20544
|
* @typedef {TextPipelineConstructorArgs & QuestionAnsweringPipelineCallback & Disposable} QuestionAnsweringPipelineType
|
|
20422
20545
|
*/
|
|
20423
20546
|
|
|
20424
20547
|
/**
|
|
20425
20548
|
* Question Answering pipeline using any `ModelForQuestionAnswering`.
|
|
20426
|
-
*
|
|
20549
|
+
*
|
|
20427
20550
|
* **Example:** Run question answering with `Xenova/distilbert-base-uncased-distilled-squad`.
|
|
20428
20551
|
* ```javascript
|
|
20429
20552
|
* const answerer = await pipeline('question-answering', 'Xenova/distilbert-base-uncased-distilled-squad');
|
|
@@ -20548,10 +20671,10 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
|
|
|
20548
20671
|
* @property {number} token The predicted token id (to replace the masked one).
|
|
20549
20672
|
* @property {string} token_str The predicted token (to replace the masked one).
|
|
20550
20673
|
* @typedef {FillMaskSingle[]} FillMaskOutput
|
|
20551
|
-
*
|
|
20674
|
+
*
|
|
20552
20675
|
* @typedef {Object} FillMaskPipelineOptions Parameters specific to fill mask pipelines.
|
|
20553
20676
|
* @property {number} [top_k=5] When passed, overrides the number of predictions to return.
|
|
20554
|
-
*
|
|
20677
|
+
*
|
|
20555
20678
|
* @callback FillMaskPipelineCallback Fill the masked token in the text(s) given as inputs.
|
|
20556
20679
|
* @param {string|string[]} texts One or several texts (or one list of prompts) with masked tokens.
|
|
20557
20680
|
* @param {FillMaskPipelineOptions} [options] The options to use for masked language modelling.
|
|
@@ -20559,13 +20682,13 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
|
|
|
20559
20682
|
* and the sequence with the predicted token filled in, or an array of such arrays (one for each input text).
|
|
20560
20683
|
* If only one input text is given, the output will be an array of objects.
|
|
20561
20684
|
* @throws {Error} When the mask token is not found in the input text.
|
|
20562
|
-
*
|
|
20685
|
+
*
|
|
20563
20686
|
* @typedef {TextPipelineConstructorArgs & FillMaskPipelineCallback & Disposable} FillMaskPipelineType
|
|
20564
20687
|
*/
|
|
20565
20688
|
|
|
20566
20689
|
/**
|
|
20567
20690
|
* Masked language modeling prediction pipeline using any `ModelWithLMHead`.
|
|
20568
|
-
*
|
|
20691
|
+
*
|
|
20569
20692
|
* **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-uncased`.
|
|
20570
20693
|
* ```javascript
|
|
20571
20694
|
* const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
|
|
@@ -20578,7 +20701,7 @@ class QuestionAnsweringPipeline extends (/** @type {new (options: TextPipelineCo
|
|
|
20578
20701
|
* // { token_str: 'life', score: 0.01859794743359089, token: 1297, sequence: 'The goal of life is life.' }
|
|
20579
20702
|
* // ]
|
|
20580
20703
|
* ```
|
|
20581
|
-
*
|
|
20704
|
+
*
|
|
20582
20705
|
* **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-cased` (and return top result).
|
|
20583
20706
|
* ```javascript
|
|
20584
20707
|
* const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
|
|
@@ -20655,18 +20778,18 @@ class FillMaskPipeline extends (/** @type {new (options: TextPipelineConstructor
|
|
|
20655
20778
|
* @typedef {Object} Text2TextGenerationSingle
|
|
20656
20779
|
* @property {string} generated_text The generated text.
|
|
20657
20780
|
* @typedef {Text2TextGenerationSingle[]} Text2TextGenerationOutput
|
|
20658
|
-
*
|
|
20781
|
+
*
|
|
20659
20782
|
* @callback Text2TextGenerationPipelineCallback Generate the output text(s) using text(s) given as inputs.
|
|
20660
20783
|
* @param {string|string[]} texts Input text for the encoder.
|
|
20661
20784
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
20662
20785
|
* @returns {Promise<Text2TextGenerationOutput|Text2TextGenerationOutput[]>}
|
|
20663
|
-
*
|
|
20786
|
+
*
|
|
20664
20787
|
* @typedef {TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable} Text2TextGenerationPipelineType
|
|
20665
20788
|
*/
|
|
20666
20789
|
|
|
20667
20790
|
/**
|
|
20668
20791
|
* Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks.
|
|
20669
|
-
*
|
|
20792
|
+
*
|
|
20670
20793
|
* **Example:** Text-to-text generation w/ `Xenova/LaMini-Flan-T5-783M`.
|
|
20671
20794
|
* ```javascript
|
|
20672
20795
|
* const generator = await pipeline('text2text-generation', 'Xenova/LaMini-Flan-T5-783M');
|
|
@@ -20742,18 +20865,18 @@ class Text2TextGenerationPipeline extends (/** @type {new (options: TextPipeline
|
|
|
20742
20865
|
* @typedef {Object} SummarizationSingle
|
|
20743
20866
|
* @property {string} summary_text The summary text.
|
|
20744
20867
|
* @typedef {SummarizationSingle[]} SummarizationOutput
|
|
20745
|
-
*
|
|
20868
|
+
*
|
|
20746
20869
|
* @callback SummarizationPipelineCallback Summarize the text(s) given as inputs.
|
|
20747
20870
|
* @param {string|string[]} texts One or several articles (or one list of articles) to summarize.
|
|
20748
20871
|
* @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
20749
20872
|
* @returns {Promise<SummarizationOutput|SummarizationOutput[]>}
|
|
20750
|
-
*
|
|
20873
|
+
*
|
|
20751
20874
|
* @typedef {TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable} SummarizationPipelineType
|
|
20752
20875
|
*/
|
|
20753
20876
|
|
|
20754
20877
|
/**
|
|
20755
20878
|
* A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline.
|
|
20756
|
-
*
|
|
20879
|
+
*
|
|
20757
20880
|
* **Example:** Summarization w/ `Xenova/distilbart-cnn-6-6`.
|
|
20758
20881
|
* ```javascript
|
|
20759
20882
|
* const generator = await pipeline('summarization', 'Xenova/distilbart-cnn-6-6');
|
|
@@ -20789,23 +20912,23 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
|
|
|
20789
20912
|
* @typedef {Object} TranslationSingle
|
|
20790
20913
|
* @property {string} translation_text The translated text.
|
|
20791
20914
|
* @typedef {TranslationSingle[]} TranslationOutput
|
|
20792
|
-
*
|
|
20915
|
+
*
|
|
20793
20916
|
* @callback TranslationPipelineCallback Translate the text(s) given as inputs.
|
|
20794
20917
|
* @param {string|string[]} texts Texts to be translated.
|
|
20795
20918
|
* @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
20796
20919
|
* @returns {Promise<TranslationOutput|TranslationOutput[]>}
|
|
20797
|
-
*
|
|
20920
|
+
*
|
|
20798
20921
|
* @typedef {TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable} TranslationPipelineType
|
|
20799
20922
|
*/
|
|
20800
20923
|
|
|
20801
20924
|
/**
|
|
20802
20925
|
* Translates text from one language to another.
|
|
20803
|
-
*
|
|
20926
|
+
*
|
|
20804
20927
|
* **Example:** Multilingual translation w/ `Xenova/nllb-200-distilled-600M`.
|
|
20805
|
-
*
|
|
20928
|
+
*
|
|
20806
20929
|
* See [here](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200)
|
|
20807
20930
|
* for the full list of languages and their corresponding codes.
|
|
20808
|
-
*
|
|
20931
|
+
*
|
|
20809
20932
|
* ```javascript
|
|
20810
20933
|
* const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M');
|
|
20811
20934
|
* const output = await translator('जीवन एक चॉकलेट बॉक्स की तरह है।', {
|
|
@@ -20814,12 +20937,12 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
|
|
|
20814
20937
|
* });
|
|
20815
20938
|
* // [{ translation_text: 'La vie est comme une boîte à chocolat.' }]
|
|
20816
20939
|
* ```
|
|
20817
|
-
*
|
|
20940
|
+
*
|
|
20818
20941
|
* **Example:** Multilingual translation w/ `Xenova/m2m100_418M`.
|
|
20819
|
-
*
|
|
20942
|
+
*
|
|
20820
20943
|
* See [here](https://huggingface.co/facebook/m2m100_418M#languages-covered)
|
|
20821
20944
|
* for the full list of languages and their corresponding codes.
|
|
20822
|
-
*
|
|
20945
|
+
*
|
|
20823
20946
|
* ```javascript
|
|
20824
20947
|
* const translator = await pipeline('translation', 'Xenova/m2m100_418M');
|
|
20825
20948
|
* const output = await translator('生活就像一盒巧克力。', {
|
|
@@ -20828,12 +20951,12 @@ class SummarizationPipeline extends (/** @type {new (options: TextPipelineConstr
|
|
|
20828
20951
|
* });
|
|
20829
20952
|
* // [{ translation_text: 'Life is like a box of chocolate.' }]
|
|
20830
20953
|
* ```
|
|
20831
|
-
*
|
|
20954
|
+
*
|
|
20832
20955
|
* **Example:** Multilingual translation w/ `Xenova/mbart-large-50-many-to-many-mmt`.
|
|
20833
|
-
*
|
|
20956
|
+
*
|
|
20834
20957
|
* See [here](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt#languages-covered)
|
|
20835
20958
|
* for the full list of languages and their corresponding codes.
|
|
20836
|
-
*
|
|
20959
|
+
*
|
|
20837
20960
|
* ```javascript
|
|
20838
20961
|
* const translator = await pipeline('translation', 'Xenova/mbart-large-50-many-to-many-mmt');
|
|
20839
20962
|
* const output = await translator('संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है', {
|
|
@@ -20862,21 +20985,21 @@ function isChat(x) {
|
|
|
20862
20985
|
|
|
20863
20986
|
/**
|
|
20864
20987
|
* @typedef {import('./tokenizers.js').Message[]} Chat
|
|
20865
|
-
*
|
|
20988
|
+
*
|
|
20866
20989
|
* @typedef {Object} TextGenerationSingle
|
|
20867
20990
|
* @property {string|Chat} generated_text The generated text.
|
|
20868
20991
|
* @typedef {TextGenerationSingle[]} TextGenerationOutput
|
|
20869
|
-
*
|
|
20992
|
+
*
|
|
20870
20993
|
* @typedef {Object} TextGenerationSpecificParams Parameters specific to text-generation pipelines.
|
|
20871
20994
|
* @property {boolean} [add_special_tokens] Whether or not to add special tokens when tokenizing the sequences.
|
|
20872
20995
|
* @property {boolean} [return_full_text=true] If set to `false` only added text is returned, otherwise the full text is returned.
|
|
20873
20996
|
* @typedef {import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams} TextGenerationConfig
|
|
20874
|
-
*
|
|
20997
|
+
*
|
|
20875
20998
|
* @callback TextGenerationPipelineCallback Complete the prompt(s) given as inputs.
|
|
20876
20999
|
* @param {string|string[]|Chat|Chat[]} texts One or several prompts (or one list of prompts) to complete.
|
|
20877
21000
|
* @param {Partial<TextGenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
20878
21001
|
* @returns {Promise<TextGenerationOutput|TextGenerationOutput[]>} An array or object containing the generated texts.
|
|
20879
|
-
*
|
|
21002
|
+
*
|
|
20880
21003
|
* @typedef {TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable} TextGenerationPipelineType
|
|
20881
21004
|
*/
|
|
20882
21005
|
|
|
@@ -20884,7 +21007,7 @@ function isChat(x) {
|
|
|
20884
21007
|
* Language generation pipeline using any `ModelWithLMHead` or `ModelForCausalLM`.
|
|
20885
21008
|
* This pipeline predicts the words that will follow a specified text prompt.
|
|
20886
21009
|
* NOTE: For the full list of generation parameters, see [`GenerationConfig`](./utils/generation#module_utils/generation.GenerationConfig).
|
|
20887
|
-
*
|
|
21010
|
+
*
|
|
20888
21011
|
* **Example:** Text generation with `Xenova/distilgpt2` (default settings).
|
|
20889
21012
|
* ```javascript
|
|
20890
21013
|
* const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
|
|
@@ -20892,7 +21015,7 @@ function isChat(x) {
|
|
|
20892
21015
|
* const output = await generator(text);
|
|
20893
21016
|
* // [{ generated_text: "I enjoy walking with my cute dog, and I love to play with the other dogs." }]
|
|
20894
21017
|
* ```
|
|
20895
|
-
*
|
|
21018
|
+
*
|
|
20896
21019
|
* **Example:** Text generation with `Xenova/distilgpt2` (custom settings).
|
|
20897
21020
|
* ```javascript
|
|
20898
21021
|
* const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
|
|
@@ -20911,7 +21034,7 @@ function isChat(x) {
|
|
|
20911
21034
|
* // "generated_text": "Once upon a time, there was an abundance of information about the most important and influential"
|
|
20912
21035
|
* // }]
|
|
20913
21036
|
* ```
|
|
20914
|
-
*
|
|
21037
|
+
*
|
|
20915
21038
|
* **Example:** Run code generation with `Xenova/codegen-350M-mono`.
|
|
20916
21039
|
* ```javascript
|
|
20917
21040
|
* const generator = await pipeline('text-generation', 'Xenova/codegen-350M-mono');
|
|
@@ -21030,7 +21153,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
|
|
|
21030
21153
|
* @property {string} sequence The sequence for which this is the output.
|
|
21031
21154
|
* @property {string[]} labels The labels sorted by order of likelihood.
|
|
21032
21155
|
* @property {number[]} scores The probabilities for each of the labels.
|
|
21033
|
-
*
|
|
21156
|
+
*
|
|
21034
21157
|
* @typedef {Object} ZeroShotClassificationPipelineOptions Parameters specific to zero-shot classification pipelines.
|
|
21035
21158
|
* @property {string} [hypothesis_template="This example is {}."] The template used to turn each
|
|
21036
21159
|
* candidate label into an NLI-style hypothesis. The candidate label will replace the {} placeholder.
|
|
@@ -21038,14 +21161,14 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
|
|
|
21038
21161
|
* If `false`, the scores are normalized such that the sum of the label likelihoods for each sequence
|
|
21039
21162
|
* is 1. If `true`, the labels are considered independent and probabilities are normalized for each
|
|
21040
21163
|
* candidate by doing a softmax of the entailment score vs. the contradiction score.
|
|
21041
|
-
*
|
|
21164
|
+
*
|
|
21042
21165
|
* @callback ZeroShotClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
21043
21166
|
* @param {string|string[]} texts The sequence(s) to classify, will be truncated if the model input is too large.
|
|
21044
21167
|
* @param {string|string[]} candidate_labels The set of possible class labels to classify each sequence into.
|
|
21045
21168
|
* Can be a single label, a string of comma-separated labels, or a list of labels.
|
|
21046
21169
|
* @param {ZeroShotClassificationPipelineOptions} [options] The options to use for zero-shot classification.
|
|
21047
21170
|
* @returns {Promise<ZeroShotClassificationOutput|ZeroShotClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
21048
|
-
*
|
|
21171
|
+
*
|
|
21049
21172
|
* @typedef {TextPipelineConstructorArgs & ZeroShotClassificationPipelineCallback & Disposable} ZeroShotClassificationPipelineType
|
|
21050
21173
|
*/
|
|
21051
21174
|
|
|
@@ -21054,7 +21177,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
|
|
|
21054
21177
|
* trained on NLI (natural language inference) tasks. Equivalent of `text-classification`
|
|
21055
21178
|
* pipelines, but these models don't require a hardcoded number of potential classes, they
|
|
21056
21179
|
* can be chosen at runtime. It usually means it's slower but it is **much** more flexible.
|
|
21057
|
-
*
|
|
21180
|
+
*
|
|
21058
21181
|
* **Example:** Zero shot classification with `Xenova/mobilebert-uncased-mnli`.
|
|
21059
21182
|
* ```javascript
|
|
21060
21183
|
* const classifier = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli');
|
|
@@ -21067,7 +21190,7 @@ class TextGenerationPipeline extends (/** @type {new (options: TextPipelineConst
|
|
|
21067
21190
|
* // scores: [ 0.5562091040482018, 0.1843621307860853, 0.13942646639336376, 0.12000229877234923 ]
|
|
21068
21191
|
* // }
|
|
21069
21192
|
* ```
|
|
21070
|
-
*
|
|
21193
|
+
*
|
|
21071
21194
|
* **Example:** Zero shot classification with `Xenova/nli-deberta-v3-xsmall` (multi-label).
|
|
21072
21195
|
* ```javascript
|
|
21073
21196
|
* const classifier = await pipeline('zero-shot-classification', 'Xenova/nli-deberta-v3-xsmall');
|
|
@@ -21181,20 +21304,20 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
|
|
|
21181
21304
|
* @property {'none'|'mean'|'cls'} [pooling="none"] The pooling method to use.
|
|
21182
21305
|
* @property {boolean} [normalize=false] Whether or not to normalize the embeddings in the last dimension.
|
|
21183
21306
|
* @property {boolean} [quantize=false] Whether or not to quantize the embeddings.
|
|
21184
|
-
* @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
|
|
21185
|
-
*
|
|
21307
|
+
* @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
|
|
21308
|
+
*
|
|
21186
21309
|
* @callback FeatureExtractionPipelineCallback Extract the features of the input(s).
|
|
21187
21310
|
* @param {string|string[]} texts One or several texts (or one list of texts) to get the features of.
|
|
21188
21311
|
* @param {FeatureExtractionPipelineOptions} [options] The options to use for feature extraction.
|
|
21189
21312
|
* @returns {Promise<Tensor>} The features computed by the model.
|
|
21190
|
-
*
|
|
21313
|
+
*
|
|
21191
21314
|
* @typedef {TextPipelineConstructorArgs & FeatureExtractionPipelineCallback & Disposable} FeatureExtractionPipelineType
|
|
21192
21315
|
*/
|
|
21193
21316
|
|
|
21194
21317
|
/**
|
|
21195
21318
|
* Feature extraction pipeline using no model head. This pipeline extracts the hidden
|
|
21196
21319
|
* states from the base transformer, which can be used as features in downstream tasks.
|
|
21197
|
-
*
|
|
21320
|
+
*
|
|
21198
21321
|
* **Example:** Run feature extraction with `bert-base-uncased` (without pooling/normalization).
|
|
21199
21322
|
* ```javascript
|
|
21200
21323
|
* const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
|
|
@@ -21205,7 +21328,7 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
|
|
|
21205
21328
|
* // dims: [1, 8, 768]
|
|
21206
21329
|
* // }
|
|
21207
21330
|
* ```
|
|
21208
|
-
*
|
|
21331
|
+
*
|
|
21209
21332
|
* **Example:** Run feature extraction with `bert-base-uncased` (with pooling/normalization).
|
|
21210
21333
|
* ```javascript
|
|
21211
21334
|
* const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
|
|
@@ -21216,7 +21339,7 @@ class ZeroShotClassificationPipeline extends (/** @type {new (options: TextPipel
|
|
|
21216
21339
|
* // dims: [1, 768]
|
|
21217
21340
|
* // }
|
|
21218
21341
|
* ```
|
|
21219
|
-
*
|
|
21342
|
+
*
|
|
21220
21343
|
* **Example:** Calculating embeddings with `sentence-transformers` models.
|
|
21221
21344
|
* ```javascript
|
|
21222
21345
|
* const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
@@ -21297,19 +21420,19 @@ class FeatureExtractionPipeline extends (/** @type {new (options: TextPipelineCo
|
|
|
21297
21420
|
/**
|
|
21298
21421
|
* @typedef {Object} ImageFeatureExtractionPipelineOptions Parameters specific to image feature extraction pipelines.
|
|
21299
21422
|
* @property {boolean} [pool=null] Whether or not to return the pooled output. If set to `false`, the model will return the raw hidden states.
|
|
21300
|
-
*
|
|
21423
|
+
*
|
|
21301
21424
|
* @callback ImageFeatureExtractionPipelineCallback Extract the features of the input(s).
|
|
21302
21425
|
* @param {ImagePipelineInputs} images One or several images (or one list of images) to get the features of.
|
|
21303
21426
|
* @param {ImageFeatureExtractionPipelineOptions} [options] The options to use for image feature extraction.
|
|
21304
21427
|
* @returns {Promise<Tensor>} The image features computed by the model.
|
|
21305
|
-
*
|
|
21428
|
+
*
|
|
21306
21429
|
* @typedef {ImagePipelineConstructorArgs & ImageFeatureExtractionPipelineCallback & Disposable} ImageFeatureExtractionPipelineType
|
|
21307
21430
|
*/
|
|
21308
21431
|
|
|
21309
21432
|
/**
|
|
21310
21433
|
* Image feature extraction pipeline using no model head. This pipeline extracts the hidden
|
|
21311
21434
|
* states from the base transformer, which can be used as features in downstream tasks.
|
|
21312
|
-
*
|
|
21435
|
+
*
|
|
21313
21436
|
* **Example:** Perform image feature extraction with `Xenova/vit-base-patch16-224-in21k`.
|
|
21314
21437
|
* ```javascript
|
|
21315
21438
|
* const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/vit-base-patch16-224-in21k');
|
|
@@ -21322,7 +21445,7 @@ class FeatureExtractionPipeline extends (/** @type {new (options: TextPipelineCo
|
|
|
21322
21445
|
* // size: 151296
|
|
21323
21446
|
* // }
|
|
21324
21447
|
* ```
|
|
21325
|
-
*
|
|
21448
|
+
*
|
|
21326
21449
|
* **Example:** Compute image embeddings with `Xenova/clip-vit-base-patch32`.
|
|
21327
21450
|
* ```javascript
|
|
21328
21451
|
* const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32');
|
|
@@ -21378,12 +21501,12 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
|
|
|
21378
21501
|
* @property {string} label The label predicted.
|
|
21379
21502
|
* @property {number} score The corresponding probability.
|
|
21380
21503
|
* @typedef {AudioClassificationSingle[]} AudioClassificationOutput
|
|
21381
|
-
*
|
|
21504
|
+
*
|
|
21382
21505
|
* @typedef {Object} AudioClassificationPipelineOptions Parameters specific to audio classification pipelines.
|
|
21383
21506
|
* @property {number} [top_k=5] The number of top labels that will be returned by the pipeline.
|
|
21384
21507
|
* If the provided number is `null` or higher than the number of labels available in the model configuration,
|
|
21385
21508
|
* it will default to the number of labels.
|
|
21386
|
-
*
|
|
21509
|
+
*
|
|
21387
21510
|
* @callback AudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
21388
21511
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
|
|
21389
21512
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -21392,14 +21515,14 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
|
|
|
21392
21515
|
* - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
|
|
21393
21516
|
* @param {AudioClassificationPipelineOptions} [options] The options to use for audio classification.
|
|
21394
21517
|
* @returns {Promise<AudioClassificationOutput|AudioClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
21395
|
-
*
|
|
21518
|
+
*
|
|
21396
21519
|
* @typedef {AudioPipelineConstructorArgs & AudioClassificationPipelineCallback & Disposable} AudioClassificationPipelineType
|
|
21397
21520
|
*/
|
|
21398
21521
|
|
|
21399
21522
|
/**
|
|
21400
21523
|
* Audio classification pipeline using any `AutoModelForAudioClassification`.
|
|
21401
21524
|
* This pipeline predicts the class of a raw waveform or an audio file.
|
|
21402
|
-
*
|
|
21525
|
+
*
|
|
21403
21526
|
* **Example:** Perform audio classification with `Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech`.
|
|
21404
21527
|
* ```javascript
|
|
21405
21528
|
* const classifier = await pipeline('audio-classification', 'Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech');
|
|
@@ -21410,7 +21533,7 @@ class ImageFeatureExtractionPipeline extends (/** @type {new (options: ImagePipe
|
|
|
21410
21533
|
* // { label: 'female', score: 0.001845747814513743 }
|
|
21411
21534
|
* // ]
|
|
21412
21535
|
* ```
|
|
21413
|
-
*
|
|
21536
|
+
*
|
|
21414
21537
|
* **Example:** Perform audio classification with `Xenova/ast-finetuned-audioset-10-10-0.4593` and return top 4 results.
|
|
21415
21538
|
* ```javascript
|
|
21416
21539
|
* const classifier = await pipeline('audio-classification', 'Xenova/ast-finetuned-audioset-10-10-0.4593');
|
|
@@ -21475,12 +21598,12 @@ class AudioClassificationPipeline extends (/** @type {new (options: AudioPipelin
|
|
|
21475
21598
|
* @typedef {Object} ZeroShotAudioClassificationOutput
|
|
21476
21599
|
* @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
|
|
21477
21600
|
* @property {number} score The score attributed by the model for that label (between 0 and 1).
|
|
21478
|
-
*
|
|
21601
|
+
*
|
|
21479
21602
|
* @typedef {Object} ZeroShotAudioClassificationPipelineOptions Parameters specific to zero-shot audio classification pipelines.
|
|
21480
21603
|
* @property {string} [hypothesis_template="This is a sound of {}."] The sentence used in conjunction with `candidate_labels`
|
|
21481
21604
|
* to attempt the audio classification by replacing the placeholder with the candidate_labels.
|
|
21482
21605
|
* Then likelihood is estimated by using `logits_per_audio`.
|
|
21483
|
-
*
|
|
21606
|
+
*
|
|
21484
21607
|
* @callback ZeroShotAudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
21485
21608
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
|
|
21486
21609
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -21490,14 +21613,14 @@ class AudioClassificationPipeline extends (/** @type {new (options: AudioPipelin
|
|
|
21490
21613
|
* @param {string[]} candidate_labels The candidate labels for this audio.
|
|
21491
21614
|
* @param {ZeroShotAudioClassificationPipelineOptions} [options] The options to use for zero-shot audio classification.
|
|
21492
21615
|
* @returns {Promise<ZeroShotAudioClassificationOutput[]|ZeroShotAudioClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
|
|
21493
|
-
*
|
|
21616
|
+
*
|
|
21494
21617
|
* @typedef {TextAudioPipelineConstructorArgs & ZeroShotAudioClassificationPipelineCallback & Disposable} ZeroShotAudioClassificationPipelineType
|
|
21495
21618
|
*/
|
|
21496
21619
|
|
|
21497
21620
|
/**
|
|
21498
21621
|
* Zero shot audio classification pipeline using `ClapModel`. This pipeline predicts the class of an audio when you
|
|
21499
21622
|
* provide an audio and a set of `candidate_labels`.
|
|
21500
|
-
*
|
|
21623
|
+
*
|
|
21501
21624
|
* **Example**: Perform zero-shot audio classification with `Xenova/clap-htsat-unfused`.
|
|
21502
21625
|
* ```javascript
|
|
21503
21626
|
* const classifier = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused');
|
|
@@ -21530,7 +21653,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21530
21653
|
audio = [/** @type {AudioInput} */ (audio)];
|
|
21531
21654
|
}
|
|
21532
21655
|
|
|
21533
|
-
// Insert label into hypothesis template
|
|
21656
|
+
// Insert label into hypothesis template
|
|
21534
21657
|
const texts = candidate_labels.map(
|
|
21535
21658
|
x => hypothesis_template.replace('{}', x)
|
|
21536
21659
|
);
|
|
@@ -21574,7 +21697,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21574
21697
|
* @property {string} text The recognized text.
|
|
21575
21698
|
* @property {Chunk[]} [chunks] When using `return_timestamps`, the `chunks` will become a list
|
|
21576
21699
|
* containing all the various text chunks identified by the model.
|
|
21577
|
-
*
|
|
21700
|
+
*
|
|
21578
21701
|
* @typedef {Object} AutomaticSpeechRecognitionSpecificParams Parameters specific to automatic-speech-recognition pipelines.
|
|
21579
21702
|
* @property {boolean|'word'} [return_timestamps] Whether to return timestamps or not. Default is `false`.
|
|
21580
21703
|
* @property {number} [chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking).
|
|
@@ -21584,7 +21707,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21584
21707
|
* @property {string} [task] The task to perform. Default is `null`, meaning it should be auto-detected.
|
|
21585
21708
|
* @property {number} [num_frames] The number of frames in the input audio.
|
|
21586
21709
|
* @typedef {import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig
|
|
21587
|
-
*
|
|
21710
|
+
*
|
|
21588
21711
|
* @callback AutomaticSpeechRecognitionPipelineCallback Transcribe the audio sequence(s) given as inputs to text.
|
|
21589
21712
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be transcribed. The input is either:
|
|
21590
21713
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -21593,7 +21716,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21593
21716
|
* - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
|
|
21594
21717
|
* @param {Partial<AutomaticSpeechRecognitionConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
21595
21718
|
* @returns {Promise<AutomaticSpeechRecognitionOutput|AutomaticSpeechRecognitionOutput[]>} An object containing the transcription text and optionally timestamps if `return_timestamps` is `true`.
|
|
21596
|
-
*
|
|
21719
|
+
*
|
|
21597
21720
|
* @typedef {TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable} AutomaticSpeechRecognitionPipelineType
|
|
21598
21721
|
*/
|
|
21599
21722
|
|
|
@@ -21607,7 +21730,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21607
21730
|
* const output = await transcriber(url);
|
|
21608
21731
|
* // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." }
|
|
21609
21732
|
* ```
|
|
21610
|
-
*
|
|
21733
|
+
*
|
|
21611
21734
|
* **Example:** Transcribe English w/ timestamps.
|
|
21612
21735
|
* ```javascript
|
|
21613
21736
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -21621,7 +21744,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21621
21744
|
* // ]
|
|
21622
21745
|
* // }
|
|
21623
21746
|
* ```
|
|
21624
|
-
*
|
|
21747
|
+
*
|
|
21625
21748
|
* **Example:** Transcribe English w/ word-level timestamps.
|
|
21626
21749
|
* ```javascript
|
|
21627
21750
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -21640,7 +21763,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21640
21763
|
* // ]
|
|
21641
21764
|
* // }
|
|
21642
21765
|
* ```
|
|
21643
|
-
*
|
|
21766
|
+
*
|
|
21644
21767
|
* **Example:** Transcribe French.
|
|
21645
21768
|
* ```javascript
|
|
21646
21769
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
|
|
@@ -21648,7 +21771,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21648
21771
|
* const output = await transcriber(url, { language: 'french', task: 'transcribe' });
|
|
21649
21772
|
* // { text: " J'adore, j'aime, je n'aime pas, je déteste." }
|
|
21650
21773
|
* ```
|
|
21651
|
-
*
|
|
21774
|
+
*
|
|
21652
21775
|
* **Example:** Translate French to English.
|
|
21653
21776
|
* ```javascript
|
|
21654
21777
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
|
|
@@ -21656,7 +21779,7 @@ class ZeroShotAudioClassificationPipeline extends (/** @type {new (options: Text
|
|
|
21656
21779
|
* const output = await transcriber(url, { language: 'french', task: 'translate' });
|
|
21657
21780
|
* // { text: " I love, I like, I don't like, I hate." }
|
|
21658
21781
|
* ```
|
|
21659
|
-
*
|
|
21782
|
+
*
|
|
21660
21783
|
* **Example:** Transcribe/translate audio longer than 30 seconds.
|
|
21661
21784
|
* ```javascript
|
|
21662
21785
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -21879,18 +22002,18 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
21879
22002
|
* @typedef {Object} ImageToTextSingle
|
|
21880
22003
|
* @property {string} generated_text The generated text.
|
|
21881
22004
|
* @typedef {ImageToTextSingle[]} ImageToTextOutput
|
|
21882
|
-
*
|
|
22005
|
+
*
|
|
21883
22006
|
* @callback ImageToTextPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
21884
22007
|
* @param {ImagePipelineInputs} texts The images to be captioned.
|
|
21885
22008
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
21886
22009
|
* @returns {Promise<ImageToTextOutput|ImageToTextOutput[]>} An object (or array of objects) containing the generated text(s).
|
|
21887
|
-
*
|
|
22010
|
+
*
|
|
21888
22011
|
* @typedef {TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable} ImageToTextPipelineType
|
|
21889
22012
|
*/
|
|
21890
22013
|
|
|
21891
22014
|
/**
|
|
21892
22015
|
* Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image.
|
|
21893
|
-
*
|
|
22016
|
+
*
|
|
21894
22017
|
* **Example:** Generate a caption for an image w/ `Xenova/vit-gpt2-image-captioning`.
|
|
21895
22018
|
* ```javascript
|
|
21896
22019
|
* const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');
|
|
@@ -21898,7 +22021,7 @@ class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options: TextA
|
|
|
21898
22021
|
* const output = await captioner(url);
|
|
21899
22022
|
* // [{ generated_text: 'a cat laying on a couch with another cat' }]
|
|
21900
22023
|
* ```
|
|
21901
|
-
*
|
|
22024
|
+
*
|
|
21902
22025
|
* **Example:** Optical Character Recognition (OCR) w/ `Xenova/trocr-small-handwritten`.
|
|
21903
22026
|
* ```javascript
|
|
21904
22027
|
* const captioner = await pipeline('image-to-text', 'Xenova/trocr-small-handwritten');
|
|
@@ -21944,22 +22067,22 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
|
|
|
21944
22067
|
* @property {string} label The label identified by the model.
|
|
21945
22068
|
* @property {number} score The score attributed by the model for that label.
|
|
21946
22069
|
* @typedef {ImageClassificationSingle[]} ImageClassificationOutput
|
|
21947
|
-
*
|
|
22070
|
+
*
|
|
21948
22071
|
* @typedef {Object} ImageClassificationPipelineOptions Parameters specific to image classification pipelines.
|
|
21949
|
-
* @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
|
|
21950
|
-
*
|
|
22072
|
+
* @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
|
|
22073
|
+
*
|
|
21951
22074
|
* @callback ImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
21952
22075
|
* @param {ImagePipelineInputs} images The input images(s) to be classified.
|
|
21953
22076
|
* @param {ImageClassificationPipelineOptions} [options] The options to use for image classification.
|
|
21954
22077
|
* @returns {Promise<ImageClassificationOutput|ImageClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
21955
|
-
*
|
|
22078
|
+
*
|
|
21956
22079
|
* @typedef {ImagePipelineConstructorArgs & ImageClassificationPipelineCallback & Disposable} ImageClassificationPipelineType
|
|
21957
22080
|
*/
|
|
21958
22081
|
|
|
21959
22082
|
/**
|
|
21960
22083
|
* Image classification pipeline using any `AutoModelForImageClassification`.
|
|
21961
22084
|
* This pipeline predicts the class of an image.
|
|
21962
|
-
*
|
|
22085
|
+
*
|
|
21963
22086
|
* **Example:** Classify an image.
|
|
21964
22087
|
* ```javascript
|
|
21965
22088
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -21969,7 +22092,7 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
|
|
|
21969
22092
|
* // { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
|
|
21970
22093
|
* // ]
|
|
21971
22094
|
* ```
|
|
21972
|
-
*
|
|
22095
|
+
*
|
|
21973
22096
|
* **Example:** Classify an image and return top `n` classes.
|
|
21974
22097
|
* ```javascript
|
|
21975
22098
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -21981,7 +22104,7 @@ class ImageToTextPipeline extends (/** @type {new (options: TextImagePipelineCon
|
|
|
21981
22104
|
* // { label: 'lion, king of beasts, Panthera leo', score: 0.00045060308184474707 },
|
|
21982
22105
|
* // ]
|
|
21983
22106
|
* ```
|
|
21984
|
-
*
|
|
22107
|
+
*
|
|
21985
22108
|
* **Example:** Classify an image and return all classes.
|
|
21986
22109
|
* ```javascript
|
|
21987
22110
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -22048,7 +22171,7 @@ class ImageClassificationPipeline extends (/** @type {new (options: ImagePipelin
|
|
|
22048
22171
|
* @property {string|null} label The label of the segment.
|
|
22049
22172
|
* @property {number|null} score The score of the segment.
|
|
22050
22173
|
* @property {RawImage} mask The mask of the segment.
|
|
22051
|
-
*
|
|
22174
|
+
*
|
|
22052
22175
|
* @typedef {Object} ImageSegmentationPipelineOptions Parameters specific to image segmentation pipelines.
|
|
22053
22176
|
* @property {number} [threshold=0.5] Probability threshold to filter out predicted masks.
|
|
22054
22177
|
* @property {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
|
|
@@ -22057,19 +22180,19 @@ class ImageClassificationPipeline extends (/** @type {new (options: ImagePipelin
|
|
|
22057
22180
|
* depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
|
|
22058
22181
|
* @property {number[]} [label_ids_to_fuse=null] List of label ids to fuse. If not set, do not fuse any labels.
|
|
22059
22182
|
* @property {number[][]} [target_sizes=null] List of target sizes for the input images. If not set, use the original image sizes.
|
|
22060
|
-
*
|
|
22183
|
+
*
|
|
22061
22184
|
* @callback ImageSegmentationPipelineCallback Segment the input images.
|
|
22062
22185
|
* @param {ImagePipelineInputs} images The input images.
|
|
22063
22186
|
* @param {ImageSegmentationPipelineOptions} [options] The options to use for image segmentation.
|
|
22064
22187
|
* @returns {Promise<ImageSegmentationPipelineOutput[]>} The annotated segments.
|
|
22065
|
-
*
|
|
22188
|
+
*
|
|
22066
22189
|
* @typedef {ImagePipelineConstructorArgs & ImageSegmentationPipelineCallback & Disposable} ImageSegmentationPipelineType
|
|
22067
22190
|
*/
|
|
22068
22191
|
|
|
22069
22192
|
/**
|
|
22070
22193
|
* Image segmentation pipeline using any `AutoModelForXXXSegmentation`.
|
|
22071
22194
|
* This pipeline predicts masks of objects and their classes.
|
|
22072
|
-
*
|
|
22195
|
+
*
|
|
22073
22196
|
* **Example:** Perform image segmentation with `Xenova/detr-resnet-50-panoptic`.
|
|
22074
22197
|
* ```javascript
|
|
22075
22198
|
* const segmenter = await pipeline('image-segmentation', 'Xenova/detr-resnet-50-panoptic');
|
|
@@ -22153,12 +22276,17 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
|
|
|
22153
22276
|
/** @type {ImageSegmentationPipelineOutput[]} */
|
|
22154
22277
|
const annotation = [];
|
|
22155
22278
|
if (!subtask) {
|
|
22279
|
+
// We define an epsilon to safeguard against numerical/precision issues when detecting
|
|
22280
|
+
// the normalization mode of the output (i.e., sigmoid already applied, or not).
|
|
22281
|
+
// See https://github.com/microsoft/onnxruntime/issues/23943 for more information.
|
|
22282
|
+
const epsilon = 1e-5;
|
|
22283
|
+
|
|
22156
22284
|
// Perform standard image segmentation
|
|
22157
22285
|
const result = output[outputNames[0]];
|
|
22158
22286
|
for (let i = 0; i < imageSizes.length; ++i) {
|
|
22159
22287
|
const size = imageSizes[i];
|
|
22160
22288
|
const item = result[i];
|
|
22161
|
-
if (item.data.some(x => x <
|
|
22289
|
+
if (item.data.some(x => x < -epsilon || x > 1 + epsilon)) {
|
|
22162
22290
|
item.sigmoid_();
|
|
22163
22291
|
}
|
|
22164
22292
|
const mask = await _utils_image_js__WEBPACK_IMPORTED_MODULE_9__.RawImage.fromTensor(item.mul_(255).to('uint8')).resize(size[1], size[0]);
|
|
@@ -22227,19 +22355,19 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
|
|
|
22227
22355
|
|
|
22228
22356
|
/**
|
|
22229
22357
|
* @typedef {Object} BackgroundRemovalPipelineOptions Parameters specific to image segmentation pipelines.
|
|
22230
|
-
*
|
|
22358
|
+
*
|
|
22231
22359
|
* @callback BackgroundRemovalPipelineCallback Segment the input images.
|
|
22232
22360
|
* @param {ImagePipelineInputs} images The input images.
|
|
22233
22361
|
* @param {BackgroundRemovalPipelineOptions} [options] The options to use for image segmentation.
|
|
22234
22362
|
* @returns {Promise<RawImage[]>} The images with the background removed.
|
|
22235
|
-
*
|
|
22363
|
+
*
|
|
22236
22364
|
* @typedef {ImagePipelineConstructorArgs & BackgroundRemovalPipelineCallback & Disposable} BackgroundRemovalPipelineType
|
|
22237
22365
|
*/
|
|
22238
22366
|
|
|
22239
22367
|
/**
|
|
22240
22368
|
* Background removal pipeline using certain `AutoModelForXXXSegmentation`.
|
|
22241
22369
|
* This pipeline removes the backgrounds of images.
|
|
22242
|
-
*
|
|
22370
|
+
*
|
|
22243
22371
|
* **Example:** Perform background removal with `Xenova/modnet`.
|
|
22244
22372
|
* ```javascript
|
|
22245
22373
|
* const segmenter = await pipeline('background-removal', 'Xenova/modnet');
|
|
@@ -22250,7 +22378,7 @@ class ImageSegmentationPipeline extends (/** @type {new (options: ImagePipelineC
|
|
|
22250
22378
|
* // ]
|
|
22251
22379
|
* ```
|
|
22252
22380
|
*/
|
|
22253
|
-
class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) =>
|
|
22381
|
+
class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => BackgroundRemovalPipelineType} */ (/** @type {any} */(ImageSegmentationPipeline))) {
|
|
22254
22382
|
/**
|
|
22255
22383
|
* Create a new BackgroundRemovalPipeline.
|
|
22256
22384
|
* @param {ImagePipelineConstructorArgs} options An object used to instantiate the pipeline.
|
|
@@ -22285,25 +22413,25 @@ class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineC
|
|
|
22285
22413
|
* @typedef {Object} ZeroShotImageClassificationOutput
|
|
22286
22414
|
* @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
|
|
22287
22415
|
* @property {number} score The score attributed by the model for that label (between 0 and 1).
|
|
22288
|
-
*
|
|
22416
|
+
*
|
|
22289
22417
|
* @typedef {Object} ZeroShotImageClassificationPipelineOptions Parameters specific to zero-shot image classification pipelines.
|
|
22290
22418
|
* @property {string} [hypothesis_template="This is a photo of {}"] The sentence used in conjunction with `candidate_labels`
|
|
22291
22419
|
* to attempt the image classification by replacing the placeholder with the candidate_labels.
|
|
22292
22420
|
* Then likelihood is estimated by using `logits_per_image`.
|
|
22293
|
-
*
|
|
22421
|
+
*
|
|
22294
22422
|
* @callback ZeroShotImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
22295
22423
|
* @param {ImagePipelineInputs} images The input images.
|
|
22296
22424
|
* @param {string[]} candidate_labels The candidate labels for this image.
|
|
22297
22425
|
* @param {ZeroShotImageClassificationPipelineOptions} [options] The options to use for zero-shot image classification.
|
|
22298
22426
|
* @returns {Promise<ZeroShotImageClassificationOutput[]|ZeroShotImageClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
|
|
22299
|
-
*
|
|
22427
|
+
*
|
|
22300
22428
|
* @typedef {TextImagePipelineConstructorArgs & ZeroShotImageClassificationPipelineCallback & Disposable} ZeroShotImageClassificationPipelineType
|
|
22301
22429
|
*/
|
|
22302
22430
|
|
|
22303
22431
|
/**
|
|
22304
22432
|
* Zero shot image classification pipeline. This pipeline predicts the class of
|
|
22305
22433
|
* an image when you provide an image and a set of `candidate_labels`.
|
|
22306
|
-
*
|
|
22434
|
+
*
|
|
22307
22435
|
* **Example:** Zero shot image classification w/ `Xenova/clip-vit-base-patch32`.
|
|
22308
22436
|
* ```javascript
|
|
22309
22437
|
* const classifier = await pipeline('zero-shot-image-classification', 'Xenova/clip-vit-base-patch32');
|
|
@@ -22333,7 +22461,7 @@ class ZeroShotImageClassificationPipeline extends (/** @type {new (options: Text
|
|
|
22333
22461
|
const isBatched = Array.isArray(images);
|
|
22334
22462
|
const preparedImages = await prepareImages(images);
|
|
22335
22463
|
|
|
22336
|
-
// Insert label into hypothesis template
|
|
22464
|
+
// Insert label into hypothesis template
|
|
22337
22465
|
const texts = candidate_labels.map(
|
|
22338
22466
|
x => hypothesis_template.replace('{}', x)
|
|
22339
22467
|
);
|
|
@@ -22380,23 +22508,23 @@ class ZeroShotImageClassificationPipeline extends (/** @type {new (options: Text
|
|
|
22380
22508
|
* @property {number} score The score attributed by the model for that label.
|
|
22381
22509
|
* @property {BoundingBox} box The bounding box of detected object in image's original size, or as a percentage if `percentage` is set to true.
|
|
22382
22510
|
* @typedef {ObjectDetectionPipelineSingle[]} ObjectDetectionPipelineOutput
|
|
22383
|
-
*
|
|
22511
|
+
*
|
|
22384
22512
|
* @typedef {Object} ObjectDetectionPipelineOptions Parameters specific to object detection pipelines.
|
|
22385
22513
|
* @property {number} [threshold=0.9] The threshold used to filter boxes by score.
|
|
22386
22514
|
* @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
|
|
22387
|
-
*
|
|
22515
|
+
*
|
|
22388
22516
|
* @callback ObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
|
|
22389
22517
|
* @param {ImagePipelineInputs} images The input images.
|
|
22390
22518
|
* @param {ObjectDetectionPipelineOptions} [options] The options to use for object detection.
|
|
22391
|
-
* @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
|
|
22392
|
-
*
|
|
22519
|
+
* @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
|
|
22520
|
+
*
|
|
22393
22521
|
* @typedef {ImagePipelineConstructorArgs & ObjectDetectionPipelineCallback & Disposable} ObjectDetectionPipelineType
|
|
22394
22522
|
*/
|
|
22395
22523
|
|
|
22396
22524
|
/**
|
|
22397
22525
|
* Object detection pipeline using any `AutoModelForObjectDetection`.
|
|
22398
22526
|
* This pipeline predicts bounding boxes of objects and their classes.
|
|
22399
|
-
*
|
|
22527
|
+
*
|
|
22400
22528
|
* **Example:** Run object-detection with `Xenova/detr-resnet-50`.
|
|
22401
22529
|
* ```javascript
|
|
22402
22530
|
* const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
|
|
@@ -22470,27 +22598,27 @@ class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipelineCon
|
|
|
22470
22598
|
* @property {string} label Text query corresponding to the found object.
|
|
22471
22599
|
* @property {number} score Score corresponding to the object (between 0 and 1).
|
|
22472
22600
|
* @property {BoundingBox} box Bounding box of the detected object in image's original size, or as a percentage if `percentage` is set to true.
|
|
22473
|
-
*
|
|
22601
|
+
*
|
|
22474
22602
|
* @typedef {Object} ZeroShotObjectDetectionPipelineOptions Parameters specific to zero-shot object detection pipelines.
|
|
22475
22603
|
* @property {number} [threshold=0.1] The probability necessary to make a prediction.
|
|
22476
22604
|
* @property {number} [top_k=null] The number of top predictions that will be returned by the pipeline.
|
|
22477
22605
|
* If the provided number is `null` or higher than the number of predictions available, it will default
|
|
22478
22606
|
* to the number of predictions.
|
|
22479
22607
|
* @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
|
|
22480
|
-
*
|
|
22608
|
+
*
|
|
22481
22609
|
* @callback ZeroShotObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
|
|
22482
22610
|
* @param {ImagePipelineInputs} images The input images.
|
|
22483
22611
|
* @param {string[]} candidate_labels What the model should recognize in the image.
|
|
22484
22612
|
* @param {ZeroShotObjectDetectionPipelineOptions} [options] The options to use for zero-shot object detection.
|
|
22485
22613
|
* @returns {Promise<ZeroShotObjectDetectionOutput[]|ZeroShotObjectDetectionOutput[][]>} An array of objects containing the predicted labels, scores, and bounding boxes.
|
|
22486
|
-
*
|
|
22614
|
+
*
|
|
22487
22615
|
* @typedef {TextImagePipelineConstructorArgs & ZeroShotObjectDetectionPipelineCallback & Disposable} ZeroShotObjectDetectionPipelineType
|
|
22488
22616
|
*/
|
|
22489
22617
|
|
|
22490
22618
|
/**
|
|
22491
22619
|
* Zero-shot object detection pipeline. This pipeline predicts bounding boxes of
|
|
22492
22620
|
* objects when you provide an image and a set of `candidate_labels`.
|
|
22493
|
-
*
|
|
22621
|
+
*
|
|
22494
22622
|
* **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32`.
|
|
22495
22623
|
* ```javascript
|
|
22496
22624
|
* const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
|
|
@@ -22520,7 +22648,7 @@ class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipelineCon
|
|
|
22520
22648
|
* // }
|
|
22521
22649
|
* // ]
|
|
22522
22650
|
* ```
|
|
22523
|
-
*
|
|
22651
|
+
*
|
|
22524
22652
|
* **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32` (returning top 4 matches and setting a threshold).
|
|
22525
22653
|
* ```javascript
|
|
22526
22654
|
* const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
|
|
@@ -22635,13 +22763,13 @@ class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: TextImag
|
|
|
22635
22763
|
* @typedef {Object} DocumentQuestionAnsweringSingle
|
|
22636
22764
|
* @property {string} answer The generated text.
|
|
22637
22765
|
* @typedef {DocumentQuestionAnsweringSingle[]} DocumentQuestionAnsweringOutput
|
|
22638
|
-
*
|
|
22766
|
+
*
|
|
22639
22767
|
* @callback DocumentQuestionAnsweringPipelineCallback Answer the question given as input by using the document.
|
|
22640
22768
|
* @param {ImageInput} image The image of the document to use.
|
|
22641
22769
|
* @param {string} question A question to ask of the document.
|
|
22642
22770
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
22643
22771
|
* @returns {Promise<DocumentQuestionAnsweringOutput|DocumentQuestionAnsweringOutput[]>} An object (or array of objects) containing the answer(s).
|
|
22644
|
-
*
|
|
22772
|
+
*
|
|
22645
22773
|
* @typedef {TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable} DocumentQuestionAnsweringPipelineType
|
|
22646
22774
|
*/
|
|
22647
22775
|
|
|
@@ -22649,7 +22777,7 @@ class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: TextImag
|
|
|
22649
22777
|
* Document Question Answering pipeline using any `AutoModelForDocumentQuestionAnswering`.
|
|
22650
22778
|
* The inputs/outputs are similar to the (extractive) question answering pipeline; however,
|
|
22651
22779
|
* the pipeline takes an image (and optional OCR'd words/boxes) as input instead of text context.
|
|
22652
|
-
*
|
|
22780
|
+
*
|
|
22653
22781
|
* **Example:** Answer questions about a document with `Xenova/donut-base-finetuned-docvqa`.
|
|
22654
22782
|
* ```javascript
|
|
22655
22783
|
* const qa_pipeline = await pipeline('document-question-answering', 'Xenova/donut-base-finetuned-docvqa');
|
|
@@ -22719,22 +22847,22 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
|
|
|
22719
22847
|
* @typedef {Object} TextToAudioOutput
|
|
22720
22848
|
* @property {Float32Array} audio The generated audio waveform.
|
|
22721
22849
|
* @property {number} sampling_rate The sampling rate of the generated audio waveform.
|
|
22722
|
-
*
|
|
22850
|
+
*
|
|
22723
22851
|
* @typedef {Object} TextToAudioPipelineOptions Parameters specific to text-to-audio pipelines.
|
|
22724
22852
|
* @property {Tensor|Float32Array|string|URL} [speaker_embeddings=null] The speaker embeddings (if the model requires it).
|
|
22725
|
-
*
|
|
22853
|
+
*
|
|
22726
22854
|
* @callback TextToAudioPipelineCallback Generates speech/audio from the inputs.
|
|
22727
22855
|
* @param {string|string[]} texts The text(s) to generate.
|
|
22728
22856
|
* @param {TextToAudioPipelineOptions} options Parameters passed to the model generation/forward method.
|
|
22729
22857
|
* @returns {Promise<TextToAudioOutput>} An object containing the generated audio and sampling rate.
|
|
22730
|
-
*
|
|
22858
|
+
*
|
|
22731
22859
|
* @typedef {TextToAudioPipelineConstructorArgs & TextToAudioPipelineCallback & Disposable} TextToAudioPipelineType
|
|
22732
22860
|
*/
|
|
22733
22861
|
|
|
22734
22862
|
/**
|
|
22735
22863
|
* Text-to-audio generation pipeline using any `AutoModelForTextToWaveform` or `AutoModelForTextToSpectrogram`.
|
|
22736
22864
|
* This pipeline generates an audio file from an input text and optional other conditional inputs.
|
|
22737
|
-
*
|
|
22865
|
+
*
|
|
22738
22866
|
* **Example:** Generate audio from text with `Xenova/speecht5_tts`.
|
|
22739
22867
|
* ```javascript
|
|
22740
22868
|
* const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
|
|
@@ -22745,17 +22873,17 @@ class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: TextIm
|
|
|
22745
22873
|
* // sampling_rate: 16000
|
|
22746
22874
|
* // }
|
|
22747
22875
|
* ```
|
|
22748
|
-
*
|
|
22876
|
+
*
|
|
22749
22877
|
* You can then save the audio to a .wav file with the `wavefile` package:
|
|
22750
22878
|
* ```javascript
|
|
22751
22879
|
* import wavefile from 'wavefile';
|
|
22752
22880
|
* import fs from 'fs';
|
|
22753
|
-
*
|
|
22881
|
+
*
|
|
22754
22882
|
* const wav = new wavefile.WaveFile();
|
|
22755
22883
|
* wav.fromScratch(1, out.sampling_rate, '32f', out.audio);
|
|
22756
22884
|
* fs.writeFileSync('out.wav', wav.toBuffer());
|
|
22757
22885
|
* ```
|
|
22758
|
-
*
|
|
22886
|
+
*
|
|
22759
22887
|
* **Example:** Multilingual speech generation with `Xenova/mms-tts-fra`. See [here](https://huggingface.co/models?pipeline_tag=text-to-speech&other=vits&sort=trending) for the full list of available languages (1107).
|
|
22760
22888
|
* ```javascript
|
|
22761
22889
|
* const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra');
|
|
@@ -22861,13 +22989,13 @@ class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPipelineC
|
|
|
22861
22989
|
* @callback ImageToImagePipelineCallback Transform the image(s) passed as inputs.
|
|
22862
22990
|
* @param {ImagePipelineInputs} images The images to transform.
|
|
22863
22991
|
* @returns {Promise<RawImage|RawImage[]>} The transformed image or list of images.
|
|
22864
|
-
*
|
|
22992
|
+
*
|
|
22865
22993
|
* @typedef {ImagePipelineConstructorArgs & ImageToImagePipelineCallback & Disposable} ImageToImagePipelineType
|
|
22866
22994
|
*/
|
|
22867
22995
|
|
|
22868
22996
|
/**
|
|
22869
22997
|
* Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input.
|
|
22870
|
-
*
|
|
22998
|
+
*
|
|
22871
22999
|
* **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`
|
|
22872
23000
|
* ```javascript
|
|
22873
23001
|
* const upscaler = await pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64');
|
|
@@ -22912,17 +23040,17 @@ class ImageToImagePipeline extends (/** @type {new (options: ImagePipelineConstr
|
|
|
22912
23040
|
* @typedef {Object} DepthEstimationPipelineOutput
|
|
22913
23041
|
* @property {Tensor} predicted_depth The raw depth map predicted by the model.
|
|
22914
23042
|
* @property {RawImage} depth The processed depth map as an image (with the same size as the input image).
|
|
22915
|
-
*
|
|
23043
|
+
*
|
|
22916
23044
|
* @callback DepthEstimationPipelineCallback Predicts the depth for the image(s) passed as inputs.
|
|
22917
23045
|
* @param {ImagePipelineInputs} images The images to compute depth for.
|
|
22918
23046
|
* @returns {Promise<DepthEstimationPipelineOutput|DepthEstimationPipelineOutput[]>} An image or a list of images containing result(s).
|
|
22919
|
-
*
|
|
23047
|
+
*
|
|
22920
23048
|
* @typedef {ImagePipelineConstructorArgs & DepthEstimationPipelineCallback & Disposable} DepthEstimationPipelineType
|
|
22921
23049
|
*/
|
|
22922
23050
|
|
|
22923
23051
|
/**
|
|
22924
23052
|
* Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image.
|
|
22925
|
-
*
|
|
23053
|
+
*
|
|
22926
23054
|
* **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`
|
|
22927
23055
|
* ```javascript
|
|
22928
23056
|
* const depth_estimator = await pipeline('depth-estimation', 'Xenova/dpt-hybrid-midas');
|
|
@@ -23307,7 +23435,7 @@ const TASK_ALIASES = Object.freeze({
|
|
|
23307
23435
|
|
|
23308
23436
|
/**
|
|
23309
23437
|
* Utility factory method to build a `Pipeline` object.
|
|
23310
|
-
*
|
|
23438
|
+
*
|
|
23311
23439
|
* @template {PipelineType} T The type of pipeline to return.
|
|
23312
23440
|
* @param {T} task The task defining which pipeline will be returned. Currently accepted tasks are:
|
|
23313
23441
|
* - `"audio-classification"`: will return a `AudioClassificationPipeline`.
|
|
@@ -30583,7 +30711,7 @@ class RawImage {
|
|
|
30583
30711
|
|
|
30584
30712
|
/**
|
|
30585
30713
|
* Helper method for reading an image from a variety of input types.
|
|
30586
|
-
* @param {RawImage|string|URL} input
|
|
30714
|
+
* @param {RawImage|string|URL|Blob|HTMLCanvasElement|OffscreenCanvas} input
|
|
30587
30715
|
* @returns The image object.
|
|
30588
30716
|
*
|
|
30589
30717
|
* **Example:** Read image from a URL.
|
|
@@ -30602,6 +30730,14 @@ class RawImage {
|
|
|
30602
30730
|
return input;
|
|
30603
30731
|
} else if (typeof input === 'string' || input instanceof URL) {
|
|
30604
30732
|
return await this.fromURL(input);
|
|
30733
|
+
} else if (input instanceof Blob) {
|
|
30734
|
+
return await this.fromBlob(input);
|
|
30735
|
+
} else if (
|
|
30736
|
+
(typeof HTMLCanvasElement !== "undefined" && input instanceof HTMLCanvasElement)
|
|
30737
|
+
||
|
|
30738
|
+
(typeof OffscreenCanvas !== "undefined" && input instanceof OffscreenCanvas)
|
|
30739
|
+
) {
|
|
30740
|
+
return this.fromCanvas(input);
|
|
30605
30741
|
} else {
|
|
30606
30742
|
throw new Error(`Unsupported input type: ${typeof input}`);
|
|
30607
30743
|
}
|
|
@@ -33613,8 +33749,12 @@ function calc_unsqueeze_dims(dims, dim) {
|
|
|
33613
33749
|
* @private
|
|
33614
33750
|
*/
|
|
33615
33751
|
function safeIndex(index, size, dimension = null, boundsCheck = true) {
|
|
33616
|
-
if (
|
|
33617
|
-
|
|
33752
|
+
if (index < -size || index >= size) {
|
|
33753
|
+
if (boundsCheck) {
|
|
33754
|
+
throw new Error(`IndexError: index ${index} is out of bounds for dimension${dimension === null ? '' : ' ' + dimension} with size ${size}`);
|
|
33755
|
+
} else {
|
|
33756
|
+
return index < -size ? 0 : size;
|
|
33757
|
+
}
|
|
33618
33758
|
}
|
|
33619
33759
|
|
|
33620
33760
|
if (index < 0) {
|
|
@@ -34471,6 +34611,9 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34471
34611
|
/* harmony export */ Gemma2ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2ForCausalLM),
|
|
34472
34612
|
/* harmony export */ Gemma2Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2Model),
|
|
34473
34613
|
/* harmony export */ Gemma2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma2PreTrainedModel),
|
|
34614
|
+
/* harmony export */ Gemma3ForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3ForCausalLM),
|
|
34615
|
+
/* harmony export */ Gemma3Model: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3Model),
|
|
34616
|
+
/* harmony export */ Gemma3PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Gemma3PreTrainedModel),
|
|
34474
34617
|
/* harmony export */ GemmaForCausalLM: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaForCausalLM),
|
|
34475
34618
|
/* harmony export */ GemmaModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaModel),
|
|
34476
34619
|
/* harmony export */ GemmaPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.GemmaPreTrainedModel),
|
|
@@ -34572,6 +34715,10 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34572
34715
|
/* harmony export */ MaskFormerPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskFormerPreTrainedModel),
|
|
34573
34716
|
/* harmony export */ MaskedLMOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MaskedLMOutput),
|
|
34574
34717
|
/* harmony export */ MaxLengthCriteria: () => (/* reexport safe */ _generation_stopping_criteria_js__WEBPACK_IMPORTED_MODULE_20__.MaxLengthCriteria),
|
|
34718
|
+
/* harmony export */ Metric3DForDepthEstimation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3DForDepthEstimation),
|
|
34719
|
+
/* harmony export */ Metric3DPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3DPreTrainedModel),
|
|
34720
|
+
/* harmony export */ Metric3Dv2ForDepthEstimation: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3Dv2ForDepthEstimation),
|
|
34721
|
+
/* harmony export */ Metric3Dv2PreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.Metric3Dv2PreTrainedModel),
|
|
34575
34722
|
/* harmony export */ MgpstrForSceneTextRecognition: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrForSceneTextRecognition),
|
|
34576
34723
|
/* harmony export */ MgpstrModelOutput: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrModelOutput),
|
|
34577
34724
|
/* harmony export */ MgpstrPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.MgpstrPreTrainedModel),
|
|
@@ -34778,6 +34925,11 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
34778
34925
|
/* harmony export */ SmolVLMForConditionalGeneration: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SmolVLMForConditionalGeneration),
|
|
34779
34926
|
/* harmony export */ SmolVLMImageProcessor: () => (/* reexport safe */ _models_image_processors_js__WEBPACK_IMPORTED_MODULE_14__.SmolVLMImageProcessor),
|
|
34780
34927
|
/* harmony export */ SmolVLMProcessor: () => (/* reexport safe */ _models_processors_js__WEBPACK_IMPORTED_MODULE_17__.SmolVLMProcessor),
|
|
34928
|
+
/* harmony export */ SnacDecoderModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacDecoderModel),
|
|
34929
|
+
/* harmony export */ SnacEncoderModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacEncoderModel),
|
|
34930
|
+
/* harmony export */ SnacFeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_11__.SnacFeatureExtractor),
|
|
34931
|
+
/* harmony export */ SnacModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacModel),
|
|
34932
|
+
/* harmony export */ SnacPreTrainedModel: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SnacPreTrainedModel),
|
|
34781
34933
|
/* harmony export */ SpeechT5FeatureExtractor: () => (/* reexport safe */ _models_feature_extractors_js__WEBPACK_IMPORTED_MODULE_11__.SpeechT5FeatureExtractor),
|
|
34782
34934
|
/* harmony export */ SpeechT5ForSpeechToText: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SpeechT5ForSpeechToText),
|
|
34783
34935
|
/* harmony export */ SpeechT5ForTextToSpeech: () => (/* reexport safe */ _models_js__WEBPACK_IMPORTED_MODULE_2__.SpeechT5ForTextToSpeech),
|