@huggingface/transformers 3.3.3 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -3
- package/dist/ort-wasm-simd-threaded.jsep.mjs +124 -115
- package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/dist/transformers.js +2778 -1592
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.js +1 -1
- package/dist/transformers.min.js.map +1 -1
- package/dist/{transformers.cjs → transformers.node.cjs} +1699 -2530
- package/dist/transformers.node.cjs.map +1 -0
- package/dist/transformers.node.min.cjs +2 -0
- package/dist/transformers.node.min.cjs.map +1 -0
- package/dist/transformers.node.min.mjs +2 -0
- package/dist/transformers.node.min.mjs.map +1 -0
- package/dist/{transformers.mjs → transformers.node.mjs} +1738 -2510
- package/dist/transformers.node.mjs.map +1 -0
- package/dist/transformers.web.js +35876 -0
- package/dist/transformers.web.js.map +1 -0
- package/dist/transformers.web.min.js +2 -0
- package/dist/transformers.web.min.js.map +1 -0
- package/package.json +6 -6
- package/src/backends/onnx.js +14 -15
- package/src/configs.js +6 -1
- package/src/env.js +1 -1
- package/src/generation/streamers.js +4 -3
- package/src/models/dac/feature_extraction_dac.js +3 -0
- package/src/models/encodec/feature_extraction_encodec.js +32 -0
- package/src/models/feature_extractors.js +3 -0
- package/src/models/idefics3/image_processing_idefics3.js +1 -1
- package/src/models/image_processors.js +1 -0
- package/src/models/processors.js +2 -0
- package/src/models/smolvlm/image_processing_smolvlm.js +2 -0
- package/src/models/smolvlm/processing_smolvlm.js +2 -0
- package/src/models/snac/feature_extraction_snac.js +3 -0
- package/src/models/ultravox/processing_ultravox.js +54 -0
- package/src/models/whisper/common_whisper.js +7 -1
- package/src/models/whisper/feature_extraction_whisper.js +18 -10
- package/src/models.js +546 -78
- package/src/pipelines.js +246 -137
- package/src/tokenizers.js +42 -28
- package/src/transformers.js +1 -0
- package/src/utils/audio.js +2 -0
- package/src/utils/hub.js +140 -80
- package/src/utils/image.js +9 -1
- package/src/utils/maths.js +1 -1
- package/src/utils/tensor.js +12 -5
- package/src/utils/video.js +128 -0
- package/types/backends/onnx.d.ts +2 -2
- package/types/backends/onnx.d.ts.map +1 -1
- package/types/configs.d.ts +1 -1
- package/types/configs.d.ts.map +1 -1
- package/types/generation/streamers.d.ts.map +1 -1
- package/types/models/dac/feature_extraction_dac.d.ts +4 -0
- package/types/models/dac/feature_extraction_dac.d.ts.map +1 -0
- package/types/models/encodec/feature_extraction_encodec.d.ts +13 -0
- package/types/models/encodec/feature_extraction_encodec.d.ts.map +1 -0
- package/types/models/feature_extractors.d.ts +3 -0
- package/types/models/florence2/processing_florence2.d.ts +1 -1
- package/types/models/florence2/processing_florence2.d.ts.map +1 -1
- package/types/models/image_processors.d.ts +1 -0
- package/types/models/processors.d.ts +2 -0
- package/types/models/smolvlm/image_processing_smolvlm.d.ts +2 -0
- package/types/models/smolvlm/image_processing_smolvlm.d.ts.map +1 -0
- package/types/models/smolvlm/processing_smolvlm.d.ts +2 -0
- package/types/models/smolvlm/processing_smolvlm.d.ts.map +1 -0
- package/types/models/snac/feature_extraction_snac.d.ts +4 -0
- package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
- package/types/models/ultravox/processing_ultravox.d.ts +16 -0
- package/types/models/ultravox/processing_ultravox.d.ts.map +1 -0
- package/types/models/whisper/common_whisper.d.ts.map +1 -1
- package/types/models/whisper/feature_extraction_whisper.d.ts +3 -1
- package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
- package/types/models.d.ts +180 -4
- package/types/models.d.ts.map +1 -1
- package/types/pipelines.d.ts +51 -5
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenizers.d.ts.map +1 -1
- package/types/transformers.d.ts +1 -0
- package/types/tsconfig.tsbuildinfo +1 -1
- package/types/utils/audio.d.ts.map +1 -1
- package/types/utils/hub.d.ts +19 -7
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +2 -2
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/maths.d.ts +2 -2
- package/types/utils/maths.d.ts.map +1 -1
- package/types/utils/tensor.d.ts +17 -18
- package/types/utils/tensor.d.ts.map +1 -1
- package/types/utils/video.d.ts +37 -0
- package/types/utils/video.d.ts.map +1 -0
- package/dist/transformers.cjs.map +0 -1
- package/dist/transformers.min.cjs +0 -2
- package/dist/transformers.min.cjs.map +0 -1
- package/dist/transformers.min.mjs +0 -2
- package/dist/transformers.min.mjs.map +0 -1
- package/dist/transformers.mjs.map +0 -1
package/src/pipelines.js
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @file Pipelines provide a high-level, easy to use, API for running machine learning models.
|
|
3
|
-
*
|
|
3
|
+
*
|
|
4
4
|
* **Example:** Instantiate pipeline using the `pipeline` function.
|
|
5
5
|
* ```javascript
|
|
6
6
|
* import { pipeline } from '@huggingface/transformers';
|
|
7
|
-
*
|
|
7
|
+
*
|
|
8
8
|
* const classifier = await pipeline('sentiment-analysis');
|
|
9
9
|
* const output = await classifier('I love transformers!');
|
|
10
10
|
* // [{'label': 'POSITIVE', 'score': 0.999817686}]
|
|
11
11
|
* ```
|
|
12
|
-
*
|
|
12
|
+
*
|
|
13
13
|
* @module pipelines
|
|
14
14
|
*/
|
|
15
15
|
|
|
@@ -78,7 +78,7 @@ import { RawImage } from './utils/image.js';
|
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
/**
|
|
81
|
-
* @typedef {string | RawImage | URL} ImageInput
|
|
81
|
+
* @typedef {string | RawImage | URL | Blob | HTMLCanvasElement | OffscreenCanvas} ImageInput
|
|
82
82
|
* @typedef {ImageInput|ImageInput[]} ImagePipelineInputs
|
|
83
83
|
*/
|
|
84
84
|
|
|
@@ -152,7 +152,7 @@ function get_bounding_box(box, asInteger) {
|
|
|
152
152
|
/**
|
|
153
153
|
* @callback DisposeType Disposes the item.
|
|
154
154
|
* @returns {Promise<void>} A promise that resolves when the item has been disposed.
|
|
155
|
-
*
|
|
155
|
+
*
|
|
156
156
|
* @typedef {Object} Disposable
|
|
157
157
|
* @property {DisposeType} dispose A promise that resolves when the pipeline has been disposed.
|
|
158
158
|
*/
|
|
@@ -189,7 +189,7 @@ export class Pipeline extends Callable {
|
|
|
189
189
|
* @property {string} task The task of the pipeline. Useful for specifying subtasks.
|
|
190
190
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
191
191
|
* @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
|
|
192
|
-
*
|
|
192
|
+
*
|
|
193
193
|
* @typedef {ModelTokenizerConstructorArgs} TextPipelineConstructorArgs An object used to instantiate a text-based pipeline.
|
|
194
194
|
*/
|
|
195
195
|
|
|
@@ -198,7 +198,7 @@ export class Pipeline extends Callable {
|
|
|
198
198
|
* @property {string} task The task of the pipeline. Useful for specifying subtasks.
|
|
199
199
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
200
200
|
* @property {Processor} processor The processor used by the pipeline.
|
|
201
|
-
*
|
|
201
|
+
*
|
|
202
202
|
* @typedef {ModelProcessorConstructorArgs} AudioPipelineConstructorArgs An object used to instantiate an audio-based pipeline.
|
|
203
203
|
* @typedef {ModelProcessorConstructorArgs} ImagePipelineConstructorArgs An object used to instantiate an image-based pipeline.
|
|
204
204
|
*/
|
|
@@ -210,7 +210,7 @@ export class Pipeline extends Callable {
|
|
|
210
210
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
211
211
|
* @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
|
|
212
212
|
* @property {Processor} processor The processor used by the pipeline.
|
|
213
|
-
*
|
|
213
|
+
*
|
|
214
214
|
* @typedef {ModelTokenizerProcessorConstructorArgs} TextAudioPipelineConstructorArgs An object used to instantiate a text- and audio-based pipeline.
|
|
215
215
|
* @typedef {ModelTokenizerProcessorConstructorArgs} TextImagePipelineConstructorArgs An object used to instantiate a text- and image-based pipeline.
|
|
216
216
|
*/
|
|
@@ -220,15 +220,15 @@ export class Pipeline extends Callable {
|
|
|
220
220
|
* @property {string} label The label predicted.
|
|
221
221
|
* @property {number} score The corresponding probability.
|
|
222
222
|
* @typedef {TextClassificationSingle[]} TextClassificationOutput
|
|
223
|
-
*
|
|
223
|
+
*
|
|
224
224
|
* @typedef {Object} TextClassificationPipelineOptions Parameters specific to text classification pipelines.
|
|
225
225
|
* @property {number} [top_k=1] The number of top predictions to be returned.
|
|
226
|
-
*
|
|
226
|
+
*
|
|
227
227
|
* @callback TextClassificationPipelineCallback Classify the text(s) given as inputs.
|
|
228
228
|
* @param {string|string[]} texts The input text(s) to be classified.
|
|
229
229
|
* @param {TextClassificationPipelineOptions} [options] The options to use for text classification.
|
|
230
230
|
* @returns {Promise<TextClassificationOutput|TextClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
231
|
-
*
|
|
231
|
+
*
|
|
232
232
|
* @typedef {TextPipelineConstructorArgs & TextClassificationPipelineCallback & Disposable} TextClassificationPipelineType
|
|
233
233
|
*/
|
|
234
234
|
|
|
@@ -241,7 +241,7 @@ export class Pipeline extends Callable {
|
|
|
241
241
|
* const output = await classifier('I love transformers!');
|
|
242
242
|
* // [{ label: 'POSITIVE', score: 0.999788761138916 }]
|
|
243
243
|
* ```
|
|
244
|
-
*
|
|
244
|
+
*
|
|
245
245
|
* **Example:** Multilingual sentiment-analysis w/ `Xenova/bert-base-multilingual-uncased-sentiment` (and return top 5 classes).
|
|
246
246
|
* ```javascript
|
|
247
247
|
* const classifier = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
|
|
@@ -254,7 +254,7 @@ export class Pipeline extends Callable {
|
|
|
254
254
|
* // { label: '2 stars', score: 0.0009423971059732139 }
|
|
255
255
|
* // ]
|
|
256
256
|
* ```
|
|
257
|
-
*
|
|
257
|
+
*
|
|
258
258
|
* **Example:** Toxic comment classification w/ `Xenova/toxic-bert` (and return all classes).
|
|
259
259
|
* ```javascript
|
|
260
260
|
* const classifier = await pipeline('text-classification', 'Xenova/toxic-bert');
|
|
@@ -339,21 +339,21 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
|
|
|
339
339
|
* @property {number} [start] The index of the start of the corresponding entity in the sentence.
|
|
340
340
|
* @property {number} [end] The index of the end of the corresponding entity in the sentence.
|
|
341
341
|
* @typedef {TokenClassificationSingle[]} TokenClassificationOutput
|
|
342
|
-
*
|
|
342
|
+
*
|
|
343
343
|
* @typedef {Object} TokenClassificationPipelineOptions Parameters specific to token classification pipelines.
|
|
344
344
|
* @property {string[]} [ignore_labels] A list of labels to ignore.
|
|
345
|
-
*
|
|
345
|
+
*
|
|
346
346
|
* @callback TokenClassificationPipelineCallback Classify each token of the text(s) given as inputs.
|
|
347
347
|
* @param {string|string[]} texts One or several texts (or one list of texts) for token classification.
|
|
348
348
|
* @param {TokenClassificationPipelineOptions} [options] The options to use for token classification.
|
|
349
349
|
* @returns {Promise<TokenClassificationOutput|TokenClassificationOutput[]>} The result.
|
|
350
|
-
*
|
|
350
|
+
*
|
|
351
351
|
* @typedef {TextPipelineConstructorArgs & TokenClassificationPipelineCallback & Disposable} TokenClassificationPipelineType
|
|
352
352
|
*/
|
|
353
353
|
|
|
354
354
|
/**
|
|
355
355
|
* Named Entity Recognition pipeline using any `ModelForTokenClassification`.
|
|
356
|
-
*
|
|
356
|
+
*
|
|
357
357
|
* **Example:** Perform named entity recognition with `Xenova/bert-base-NER`.
|
|
358
358
|
* ```javascript
|
|
359
359
|
* const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
|
|
@@ -363,7 +363,7 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
|
|
|
363
363
|
* // { entity: 'B-LOC', score: 0.9994474053382874, index: 9, word: 'London' }
|
|
364
364
|
* // ]
|
|
365
365
|
* ```
|
|
366
|
-
*
|
|
366
|
+
*
|
|
367
367
|
* **Example:** Perform named entity recognition with `Xenova/bert-base-NER` (and return all labels).
|
|
368
368
|
* ```javascript
|
|
369
369
|
* const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
|
|
@@ -459,22 +459,22 @@ export class TokenClassificationPipeline extends (/** @type {new (options: TextP
|
|
|
459
459
|
* @property {number} [start] The character start index of the answer (in the tokenized version of the input).
|
|
460
460
|
* @property {number} [end] The character end index of the answer (in the tokenized version of the input).
|
|
461
461
|
* @property {string} answer The answer to the question.
|
|
462
|
-
*
|
|
462
|
+
*
|
|
463
463
|
* @typedef {Object} QuestionAnsweringPipelineOptions Parameters specific to question answering pipelines.
|
|
464
464
|
* @property {number} [top_k=1] The number of top answer predictions to be returned.
|
|
465
|
-
*
|
|
465
|
+
*
|
|
466
466
|
* @callback QuestionAnsweringPipelineCallback Answer the question(s) given as inputs by using the context(s).
|
|
467
467
|
* @param {string|string[]} question One or several question(s) (must be used in conjunction with the `context` argument).
|
|
468
468
|
* @param {string|string[]} context One or several context(s) associated with the question(s) (must be used in conjunction with the `question` argument).
|
|
469
469
|
* @param {QuestionAnsweringPipelineOptions} [options] The options to use for question answering.
|
|
470
470
|
* @returns {Promise<QuestionAnsweringOutput|QuestionAnsweringOutput[]>} An array or object containing the predicted answers and scores.
|
|
471
|
-
*
|
|
471
|
+
*
|
|
472
472
|
* @typedef {TextPipelineConstructorArgs & QuestionAnsweringPipelineCallback & Disposable} QuestionAnsweringPipelineType
|
|
473
473
|
*/
|
|
474
474
|
|
|
475
475
|
/**
|
|
476
476
|
* Question Answering pipeline using any `ModelForQuestionAnswering`.
|
|
477
|
-
*
|
|
477
|
+
*
|
|
478
478
|
* **Example:** Run question answering with `Xenova/distilbert-base-uncased-distilled-squad`.
|
|
479
479
|
* ```javascript
|
|
480
480
|
* const answerer = await pipeline('question-answering', 'Xenova/distilbert-base-uncased-distilled-squad');
|
|
@@ -599,10 +599,10 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
|
|
|
599
599
|
* @property {number} token The predicted token id (to replace the masked one).
|
|
600
600
|
* @property {string} token_str The predicted token (to replace the masked one).
|
|
601
601
|
* @typedef {FillMaskSingle[]} FillMaskOutput
|
|
602
|
-
*
|
|
602
|
+
*
|
|
603
603
|
* @typedef {Object} FillMaskPipelineOptions Parameters specific to fill mask pipelines.
|
|
604
604
|
* @property {number} [top_k=5] When passed, overrides the number of predictions to return.
|
|
605
|
-
*
|
|
605
|
+
*
|
|
606
606
|
* @callback FillMaskPipelineCallback Fill the masked token in the text(s) given as inputs.
|
|
607
607
|
* @param {string|string[]} texts One or several texts (or one list of prompts) with masked tokens.
|
|
608
608
|
* @param {FillMaskPipelineOptions} [options] The options to use for masked language modelling.
|
|
@@ -610,13 +610,13 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
|
|
|
610
610
|
* and the sequence with the predicted token filled in, or an array of such arrays (one for each input text).
|
|
611
611
|
* If only one input text is given, the output will be an array of objects.
|
|
612
612
|
* @throws {Error} When the mask token is not found in the input text.
|
|
613
|
-
*
|
|
613
|
+
*
|
|
614
614
|
* @typedef {TextPipelineConstructorArgs & FillMaskPipelineCallback & Disposable} FillMaskPipelineType
|
|
615
615
|
*/
|
|
616
616
|
|
|
617
617
|
/**
|
|
618
618
|
* Masked language modeling prediction pipeline using any `ModelWithLMHead`.
|
|
619
|
-
*
|
|
619
|
+
*
|
|
620
620
|
* **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-uncased`.
|
|
621
621
|
* ```javascript
|
|
622
622
|
* const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
|
|
@@ -629,7 +629,7 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
|
|
|
629
629
|
* // { token_str: 'life', score: 0.01859794743359089, token: 1297, sequence: 'The goal of life is life.' }
|
|
630
630
|
* // ]
|
|
631
631
|
* ```
|
|
632
|
-
*
|
|
632
|
+
*
|
|
633
633
|
* **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-cased` (and return top result).
|
|
634
634
|
* ```javascript
|
|
635
635
|
* const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
|
|
@@ -706,18 +706,18 @@ export class FillMaskPipeline extends (/** @type {new (options: TextPipelineCons
|
|
|
706
706
|
* @typedef {Object} Text2TextGenerationSingle
|
|
707
707
|
* @property {string} generated_text The generated text.
|
|
708
708
|
* @typedef {Text2TextGenerationSingle[]} Text2TextGenerationOutput
|
|
709
|
-
*
|
|
709
|
+
*
|
|
710
710
|
* @callback Text2TextGenerationPipelineCallback Generate the output text(s) using text(s) given as inputs.
|
|
711
711
|
* @param {string|string[]} texts Input text for the encoder.
|
|
712
712
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
713
713
|
* @returns {Promise<Text2TextGenerationOutput|Text2TextGenerationOutput[]>}
|
|
714
|
-
*
|
|
714
|
+
*
|
|
715
715
|
* @typedef {TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable} Text2TextGenerationPipelineType
|
|
716
716
|
*/
|
|
717
717
|
|
|
718
718
|
/**
|
|
719
719
|
* Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks.
|
|
720
|
-
*
|
|
720
|
+
*
|
|
721
721
|
* **Example:** Text-to-text generation w/ `Xenova/LaMini-Flan-T5-783M`.
|
|
722
722
|
* ```javascript
|
|
723
723
|
* const generator = await pipeline('text2text-generation', 'Xenova/LaMini-Flan-T5-783M');
|
|
@@ -793,18 +793,18 @@ export class Text2TextGenerationPipeline extends (/** @type {new (options: TextP
|
|
|
793
793
|
* @typedef {Object} SummarizationSingle
|
|
794
794
|
* @property {string} summary_text The summary text.
|
|
795
795
|
* @typedef {SummarizationSingle[]} SummarizationOutput
|
|
796
|
-
*
|
|
796
|
+
*
|
|
797
797
|
* @callback SummarizationPipelineCallback Summarize the text(s) given as inputs.
|
|
798
798
|
* @param {string|string[]} texts One or several articles (or one list of articles) to summarize.
|
|
799
799
|
* @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
800
800
|
* @returns {Promise<SummarizationOutput|SummarizationOutput[]>}
|
|
801
|
-
*
|
|
801
|
+
*
|
|
802
802
|
* @typedef {TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable} SummarizationPipelineType
|
|
803
803
|
*/
|
|
804
804
|
|
|
805
805
|
/**
|
|
806
806
|
* A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline.
|
|
807
|
-
*
|
|
807
|
+
*
|
|
808
808
|
* **Example:** Summarization w/ `Xenova/distilbart-cnn-6-6`.
|
|
809
809
|
* ```javascript
|
|
810
810
|
* const generator = await pipeline('summarization', 'Xenova/distilbart-cnn-6-6');
|
|
@@ -840,23 +840,23 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
|
|
|
840
840
|
* @typedef {Object} TranslationSingle
|
|
841
841
|
* @property {string} translation_text The translated text.
|
|
842
842
|
* @typedef {TranslationSingle[]} TranslationOutput
|
|
843
|
-
*
|
|
843
|
+
*
|
|
844
844
|
* @callback TranslationPipelineCallback Translate the text(s) given as inputs.
|
|
845
845
|
* @param {string|string[]} texts Texts to be translated.
|
|
846
846
|
* @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
847
847
|
* @returns {Promise<TranslationOutput|TranslationOutput[]>}
|
|
848
|
-
*
|
|
848
|
+
*
|
|
849
849
|
* @typedef {TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable} TranslationPipelineType
|
|
850
850
|
*/
|
|
851
851
|
|
|
852
852
|
/**
|
|
853
853
|
* Translates text from one language to another.
|
|
854
|
-
*
|
|
854
|
+
*
|
|
855
855
|
* **Example:** Multilingual translation w/ `Xenova/nllb-200-distilled-600M`.
|
|
856
|
-
*
|
|
856
|
+
*
|
|
857
857
|
* See [here](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200)
|
|
858
858
|
* for the full list of languages and their corresponding codes.
|
|
859
|
-
*
|
|
859
|
+
*
|
|
860
860
|
* ```javascript
|
|
861
861
|
* const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M');
|
|
862
862
|
* const output = await translator('जीवन एक चॉकलेट बॉक्स की तरह है।', {
|
|
@@ -865,12 +865,12 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
|
|
|
865
865
|
* });
|
|
866
866
|
* // [{ translation_text: 'La vie est comme une boîte à chocolat.' }]
|
|
867
867
|
* ```
|
|
868
|
-
*
|
|
868
|
+
*
|
|
869
869
|
* **Example:** Multilingual translation w/ `Xenova/m2m100_418M`.
|
|
870
|
-
*
|
|
870
|
+
*
|
|
871
871
|
* See [here](https://huggingface.co/facebook/m2m100_418M#languages-covered)
|
|
872
872
|
* for the full list of languages and their corresponding codes.
|
|
873
|
-
*
|
|
873
|
+
*
|
|
874
874
|
* ```javascript
|
|
875
875
|
* const translator = await pipeline('translation', 'Xenova/m2m100_418M');
|
|
876
876
|
* const output = await translator('生活就像一盒巧克力。', {
|
|
@@ -879,12 +879,12 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
|
|
|
879
879
|
* });
|
|
880
880
|
* // [{ translation_text: 'Life is like a box of chocolate.' }]
|
|
881
881
|
* ```
|
|
882
|
-
*
|
|
882
|
+
*
|
|
883
883
|
* **Example:** Multilingual translation w/ `Xenova/mbart-large-50-many-to-many-mmt`.
|
|
884
|
-
*
|
|
884
|
+
*
|
|
885
885
|
* See [here](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt#languages-covered)
|
|
886
886
|
* for the full list of languages and their corresponding codes.
|
|
887
|
-
*
|
|
887
|
+
*
|
|
888
888
|
* ```javascript
|
|
889
889
|
* const translator = await pipeline('translation', 'Xenova/mbart-large-50-many-to-many-mmt');
|
|
890
890
|
* const output = await translator('संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है', {
|
|
@@ -913,21 +913,21 @@ function isChat(x) {
|
|
|
913
913
|
|
|
914
914
|
/**
|
|
915
915
|
* @typedef {import('./tokenizers.js').Message[]} Chat
|
|
916
|
-
*
|
|
916
|
+
*
|
|
917
917
|
* @typedef {Object} TextGenerationSingle
|
|
918
918
|
* @property {string|Chat} generated_text The generated text.
|
|
919
919
|
* @typedef {TextGenerationSingle[]} TextGenerationOutput
|
|
920
|
-
*
|
|
920
|
+
*
|
|
921
921
|
* @typedef {Object} TextGenerationSpecificParams Parameters specific to text-generation pipelines.
|
|
922
922
|
* @property {boolean} [add_special_tokens] Whether or not to add special tokens when tokenizing the sequences.
|
|
923
923
|
* @property {boolean} [return_full_text=true] If set to `false` only added text is returned, otherwise the full text is returned.
|
|
924
924
|
* @typedef {import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams} TextGenerationConfig
|
|
925
|
-
*
|
|
925
|
+
*
|
|
926
926
|
* @callback TextGenerationPipelineCallback Complete the prompt(s) given as inputs.
|
|
927
927
|
* @param {string|string[]|Chat|Chat[]} texts One or several prompts (or one list of prompts) to complete.
|
|
928
928
|
* @param {Partial<TextGenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
929
929
|
* @returns {Promise<TextGenerationOutput|TextGenerationOutput[]>} An array or object containing the generated texts.
|
|
930
|
-
*
|
|
930
|
+
*
|
|
931
931
|
* @typedef {TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable} TextGenerationPipelineType
|
|
932
932
|
*/
|
|
933
933
|
|
|
@@ -935,7 +935,7 @@ function isChat(x) {
|
|
|
935
935
|
* Language generation pipeline using any `ModelWithLMHead` or `ModelForCausalLM`.
|
|
936
936
|
* This pipeline predicts the words that will follow a specified text prompt.
|
|
937
937
|
* NOTE: For the full list of generation parameters, see [`GenerationConfig`](./utils/generation#module_utils/generation.GenerationConfig).
|
|
938
|
-
*
|
|
938
|
+
*
|
|
939
939
|
* **Example:** Text generation with `Xenova/distilgpt2` (default settings).
|
|
940
940
|
* ```javascript
|
|
941
941
|
* const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
|
|
@@ -943,7 +943,7 @@ function isChat(x) {
|
|
|
943
943
|
* const output = await generator(text);
|
|
944
944
|
* // [{ generated_text: "I enjoy walking with my cute dog, and I love to play with the other dogs." }]
|
|
945
945
|
* ```
|
|
946
|
-
*
|
|
946
|
+
*
|
|
947
947
|
* **Example:** Text generation with `Xenova/distilgpt2` (custom settings).
|
|
948
948
|
* ```javascript
|
|
949
949
|
* const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
|
|
@@ -962,7 +962,7 @@ function isChat(x) {
|
|
|
962
962
|
* // "generated_text": "Once upon a time, there was an abundance of information about the most important and influential"
|
|
963
963
|
* // }]
|
|
964
964
|
* ```
|
|
965
|
-
*
|
|
965
|
+
*
|
|
966
966
|
* **Example:** Run code generation with `Xenova/codegen-350M-mono`.
|
|
967
967
|
* ```javascript
|
|
968
968
|
* const generator = await pipeline('text-generation', 'Xenova/codegen-350M-mono');
|
|
@@ -1081,7 +1081,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
|
|
|
1081
1081
|
* @property {string} sequence The sequence for which this is the output.
|
|
1082
1082
|
* @property {string[]} labels The labels sorted by order of likelihood.
|
|
1083
1083
|
* @property {number[]} scores The probabilities for each of the labels.
|
|
1084
|
-
*
|
|
1084
|
+
*
|
|
1085
1085
|
* @typedef {Object} ZeroShotClassificationPipelineOptions Parameters specific to zero-shot classification pipelines.
|
|
1086
1086
|
* @property {string} [hypothesis_template="This example is {}."] The template used to turn each
|
|
1087
1087
|
* candidate label into an NLI-style hypothesis. The candidate label will replace the {} placeholder.
|
|
@@ -1089,14 +1089,14 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
|
|
|
1089
1089
|
* If `false`, the scores are normalized such that the sum of the label likelihoods for each sequence
|
|
1090
1090
|
* is 1. If `true`, the labels are considered independent and probabilities are normalized for each
|
|
1091
1091
|
* candidate by doing a softmax of the entailment score vs. the contradiction score.
|
|
1092
|
-
*
|
|
1092
|
+
*
|
|
1093
1093
|
* @callback ZeroShotClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
1094
1094
|
* @param {string|string[]} texts The sequence(s) to classify, will be truncated if the model input is too large.
|
|
1095
1095
|
* @param {string|string[]} candidate_labels The set of possible class labels to classify each sequence into.
|
|
1096
1096
|
* Can be a single label, a string of comma-separated labels, or a list of labels.
|
|
1097
1097
|
* @param {ZeroShotClassificationPipelineOptions} [options] The options to use for zero-shot classification.
|
|
1098
1098
|
* @returns {Promise<ZeroShotClassificationOutput|ZeroShotClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
1099
|
-
*
|
|
1099
|
+
*
|
|
1100
1100
|
* @typedef {TextPipelineConstructorArgs & ZeroShotClassificationPipelineCallback & Disposable} ZeroShotClassificationPipelineType
|
|
1101
1101
|
*/
|
|
1102
1102
|
|
|
@@ -1105,7 +1105,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
|
|
|
1105
1105
|
* trained on NLI (natural language inference) tasks. Equivalent of `text-classification`
|
|
1106
1106
|
* pipelines, but these models don't require a hardcoded number of potential classes, they
|
|
1107
1107
|
* can be chosen at runtime. It usually means it's slower but it is **much** more flexible.
|
|
1108
|
-
*
|
|
1108
|
+
*
|
|
1109
1109
|
* **Example:** Zero shot classification with `Xenova/mobilebert-uncased-mnli`.
|
|
1110
1110
|
* ```javascript
|
|
1111
1111
|
* const classifier = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli');
|
|
@@ -1118,7 +1118,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
|
|
|
1118
1118
|
* // scores: [ 0.5562091040482018, 0.1843621307860853, 0.13942646639336376, 0.12000229877234923 ]
|
|
1119
1119
|
* // }
|
|
1120
1120
|
* ```
|
|
1121
|
-
*
|
|
1121
|
+
*
|
|
1122
1122
|
* **Example:** Zero shot classification with `Xenova/nli-deberta-v3-xsmall` (multi-label).
|
|
1123
1123
|
* ```javascript
|
|
1124
1124
|
* const classifier = await pipeline('zero-shot-classification', 'Xenova/nli-deberta-v3-xsmall');
|
|
@@ -1232,20 +1232,20 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
|
|
|
1232
1232
|
* @property {'none'|'mean'|'cls'} [pooling="none"] The pooling method to use.
|
|
1233
1233
|
* @property {boolean} [normalize=false] Whether or not to normalize the embeddings in the last dimension.
|
|
1234
1234
|
* @property {boolean} [quantize=false] Whether or not to quantize the embeddings.
|
|
1235
|
-
* @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
|
|
1236
|
-
*
|
|
1235
|
+
* @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
|
|
1236
|
+
*
|
|
1237
1237
|
* @callback FeatureExtractionPipelineCallback Extract the features of the input(s).
|
|
1238
1238
|
* @param {string|string[]} texts One or several texts (or one list of texts) to get the features of.
|
|
1239
1239
|
* @param {FeatureExtractionPipelineOptions} [options] The options to use for feature extraction.
|
|
1240
1240
|
* @returns {Promise<Tensor>} The features computed by the model.
|
|
1241
|
-
*
|
|
1241
|
+
*
|
|
1242
1242
|
* @typedef {TextPipelineConstructorArgs & FeatureExtractionPipelineCallback & Disposable} FeatureExtractionPipelineType
|
|
1243
1243
|
*/
|
|
1244
1244
|
|
|
1245
1245
|
/**
|
|
1246
1246
|
* Feature extraction pipeline using no model head. This pipeline extracts the hidden
|
|
1247
1247
|
* states from the base transformer, which can be used as features in downstream tasks.
|
|
1248
|
-
*
|
|
1248
|
+
*
|
|
1249
1249
|
* **Example:** Run feature extraction with `bert-base-uncased` (without pooling/normalization).
|
|
1250
1250
|
* ```javascript
|
|
1251
1251
|
* const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
|
|
@@ -1256,7 +1256,7 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
|
|
|
1256
1256
|
* // dims: [1, 8, 768]
|
|
1257
1257
|
* // }
|
|
1258
1258
|
* ```
|
|
1259
|
-
*
|
|
1259
|
+
*
|
|
1260
1260
|
* **Example:** Run feature extraction with `bert-base-uncased` (with pooling/normalization).
|
|
1261
1261
|
* ```javascript
|
|
1262
1262
|
* const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
|
|
@@ -1267,7 +1267,7 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
|
|
|
1267
1267
|
* // dims: [1, 768]
|
|
1268
1268
|
* // }
|
|
1269
1269
|
* ```
|
|
1270
|
-
*
|
|
1270
|
+
*
|
|
1271
1271
|
* **Example:** Calculating embeddings with `sentence-transformers` models.
|
|
1272
1272
|
* ```javascript
|
|
1273
1273
|
* const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
@@ -1348,19 +1348,19 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip
|
|
|
1348
1348
|
/**
|
|
1349
1349
|
* @typedef {Object} ImageFeatureExtractionPipelineOptions Parameters specific to image feature extraction pipelines.
|
|
1350
1350
|
* @property {boolean} [pool=null] Whether or not to return the pooled output. If set to `false`, the model will return the raw hidden states.
|
|
1351
|
-
*
|
|
1351
|
+
*
|
|
1352
1352
|
* @callback ImageFeatureExtractionPipelineCallback Extract the features of the input(s).
|
|
1353
1353
|
* @param {ImagePipelineInputs} images One or several images (or one list of images) to get the features of.
|
|
1354
1354
|
* @param {ImageFeatureExtractionPipelineOptions} [options] The options to use for image feature extraction.
|
|
1355
1355
|
* @returns {Promise<Tensor>} The image features computed by the model.
|
|
1356
|
-
*
|
|
1356
|
+
*
|
|
1357
1357
|
* @typedef {ImagePipelineConstructorArgs & ImageFeatureExtractionPipelineCallback & Disposable} ImageFeatureExtractionPipelineType
|
|
1358
1358
|
*/
|
|
1359
1359
|
|
|
1360
1360
|
/**
|
|
1361
1361
|
* Image feature extraction pipeline using no model head. This pipeline extracts the hidden
|
|
1362
1362
|
* states from the base transformer, which can be used as features in downstream tasks.
|
|
1363
|
-
*
|
|
1363
|
+
*
|
|
1364
1364
|
* **Example:** Perform image feature extraction with `Xenova/vit-base-patch16-224-in21k`.
|
|
1365
1365
|
* ```javascript
|
|
1366
1366
|
* const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/vit-base-patch16-224-in21k');
|
|
@@ -1373,7 +1373,7 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip
|
|
|
1373
1373
|
* // size: 151296
|
|
1374
1374
|
* // }
|
|
1375
1375
|
* ```
|
|
1376
|
-
*
|
|
1376
|
+
*
|
|
1377
1377
|
* **Example:** Compute image embeddings with `Xenova/clip-vit-base-patch32`.
|
|
1378
1378
|
* ```javascript
|
|
1379
1379
|
* const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32');
|
|
@@ -1429,12 +1429,12 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
|
|
|
1429
1429
|
* @property {string} label The label predicted.
|
|
1430
1430
|
* @property {number} score The corresponding probability.
|
|
1431
1431
|
* @typedef {AudioClassificationSingle[]} AudioClassificationOutput
|
|
1432
|
-
*
|
|
1432
|
+
*
|
|
1433
1433
|
* @typedef {Object} AudioClassificationPipelineOptions Parameters specific to audio classification pipelines.
|
|
1434
1434
|
* @property {number} [top_k=5] The number of top labels that will be returned by the pipeline.
|
|
1435
1435
|
* If the provided number is `null` or higher than the number of labels available in the model configuration,
|
|
1436
1436
|
* it will default to the number of labels.
|
|
1437
|
-
*
|
|
1437
|
+
*
|
|
1438
1438
|
* @callback AudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
1439
1439
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
|
|
1440
1440
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -1443,14 +1443,14 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
|
|
|
1443
1443
|
* - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
|
|
1444
1444
|
* @param {AudioClassificationPipelineOptions} [options] The options to use for audio classification.
|
|
1445
1445
|
* @returns {Promise<AudioClassificationOutput|AudioClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
1446
|
-
*
|
|
1446
|
+
*
|
|
1447
1447
|
* @typedef {AudioPipelineConstructorArgs & AudioClassificationPipelineCallback & Disposable} AudioClassificationPipelineType
|
|
1448
1448
|
*/
|
|
1449
1449
|
|
|
1450
1450
|
/**
|
|
1451
1451
|
* Audio classification pipeline using any `AutoModelForAudioClassification`.
|
|
1452
1452
|
* This pipeline predicts the class of a raw waveform or an audio file.
|
|
1453
|
-
*
|
|
1453
|
+
*
|
|
1454
1454
|
* **Example:** Perform audio classification with `Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech`.
|
|
1455
1455
|
* ```javascript
|
|
1456
1456
|
* const classifier = await pipeline('audio-classification', 'Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech');
|
|
@@ -1461,7 +1461,7 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
|
|
|
1461
1461
|
* // { label: 'female', score: 0.001845747814513743 }
|
|
1462
1462
|
* // ]
|
|
1463
1463
|
* ```
|
|
1464
|
-
*
|
|
1464
|
+
*
|
|
1465
1465
|
* **Example:** Perform audio classification with `Xenova/ast-finetuned-audioset-10-10-0.4593` and return top 4 results.
|
|
1466
1466
|
* ```javascript
|
|
1467
1467
|
* const classifier = await pipeline('audio-classification', 'Xenova/ast-finetuned-audioset-10-10-0.4593');
|
|
@@ -1526,12 +1526,12 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
|
|
|
1526
1526
|
* @typedef {Object} ZeroShotAudioClassificationOutput
|
|
1527
1527
|
* @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
|
|
1528
1528
|
* @property {number} score The score attributed by the model for that label (between 0 and 1).
|
|
1529
|
-
*
|
|
1529
|
+
*
|
|
1530
1530
|
* @typedef {Object} ZeroShotAudioClassificationPipelineOptions Parameters specific to zero-shot audio classification pipelines.
|
|
1531
1531
|
* @property {string} [hypothesis_template="This is a sound of {}."] The sentence used in conjunction with `candidate_labels`
|
|
1532
1532
|
* to attempt the audio classification by replacing the placeholder with the candidate_labels.
|
|
1533
1533
|
* Then likelihood is estimated by using `logits_per_audio`.
|
|
1534
|
-
*
|
|
1534
|
+
*
|
|
1535
1535
|
* @callback ZeroShotAudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
1536
1536
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
|
|
1537
1537
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -1541,14 +1541,14 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
|
|
|
1541
1541
|
* @param {string[]} candidate_labels The candidate labels for this audio.
|
|
1542
1542
|
* @param {ZeroShotAudioClassificationPipelineOptions} [options] The options to use for zero-shot audio classification.
|
|
1543
1543
|
* @returns {Promise<ZeroShotAudioClassificationOutput[]|ZeroShotAudioClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
|
|
1544
|
-
*
|
|
1544
|
+
*
|
|
1545
1545
|
* @typedef {TextAudioPipelineConstructorArgs & ZeroShotAudioClassificationPipelineCallback & Disposable} ZeroShotAudioClassificationPipelineType
|
|
1546
1546
|
*/
|
|
1547
1547
|
|
|
1548
1548
|
/**
|
|
1549
1549
|
* Zero shot audio classification pipeline using `ClapModel`. This pipeline predicts the class of an audio when you
|
|
1550
1550
|
* provide an audio and a set of `candidate_labels`.
|
|
1551
|
-
*
|
|
1551
|
+
*
|
|
1552
1552
|
* **Example**: Perform zero-shot audio classification with `Xenova/clap-htsat-unfused`.
|
|
1553
1553
|
* ```javascript
|
|
1554
1554
|
* const classifier = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused');
|
|
@@ -1581,7 +1581,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1581
1581
|
audio = [/** @type {AudioInput} */ (audio)];
|
|
1582
1582
|
}
|
|
1583
1583
|
|
|
1584
|
-
// Insert label into hypothesis template
|
|
1584
|
+
// Insert label into hypothesis template
|
|
1585
1585
|
const texts = candidate_labels.map(
|
|
1586
1586
|
x => hypothesis_template.replace('{}', x)
|
|
1587
1587
|
);
|
|
@@ -1625,7 +1625,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1625
1625
|
* @property {string} text The recognized text.
|
|
1626
1626
|
* @property {Chunk[]} [chunks] When using `return_timestamps`, the `chunks` will become a list
|
|
1627
1627
|
* containing all the various text chunks identified by the model.
|
|
1628
|
-
*
|
|
1628
|
+
*
|
|
1629
1629
|
* @typedef {Object} AutomaticSpeechRecognitionSpecificParams Parameters specific to automatic-speech-recognition pipelines.
|
|
1630
1630
|
* @property {boolean|'word'} [return_timestamps] Whether to return timestamps or not. Default is `false`.
|
|
1631
1631
|
* @property {number} [chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking).
|
|
@@ -1635,7 +1635,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1635
1635
|
* @property {string} [task] The task to perform. Default is `null`, meaning it should be auto-detected.
|
|
1636
1636
|
* @property {number} [num_frames] The number of frames in the input audio.
|
|
1637
1637
|
* @typedef {import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig
|
|
1638
|
-
*
|
|
1638
|
+
*
|
|
1639
1639
|
* @callback AutomaticSpeechRecognitionPipelineCallback Transcribe the audio sequence(s) given as inputs to text.
|
|
1640
1640
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be transcribed. The input is either:
|
|
1641
1641
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -1644,7 +1644,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1644
1644
|
* - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
|
|
1645
1645
|
* @param {Partial<AutomaticSpeechRecognitionConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
1646
1646
|
* @returns {Promise<AutomaticSpeechRecognitionOutput|AutomaticSpeechRecognitionOutput[]>} An object containing the transcription text and optionally timestamps if `return_timestamps` is `true`.
|
|
1647
|
-
*
|
|
1647
|
+
*
|
|
1648
1648
|
* @typedef {TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable} AutomaticSpeechRecognitionPipelineType
|
|
1649
1649
|
*/
|
|
1650
1650
|
|
|
@@ -1658,7 +1658,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1658
1658
|
* const output = await transcriber(url);
|
|
1659
1659
|
* // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." }
|
|
1660
1660
|
* ```
|
|
1661
|
-
*
|
|
1661
|
+
*
|
|
1662
1662
|
* **Example:** Transcribe English w/ timestamps.
|
|
1663
1663
|
* ```javascript
|
|
1664
1664
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -1672,7 +1672,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1672
1672
|
* // ]
|
|
1673
1673
|
* // }
|
|
1674
1674
|
* ```
|
|
1675
|
-
*
|
|
1675
|
+
*
|
|
1676
1676
|
* **Example:** Transcribe English w/ word-level timestamps.
|
|
1677
1677
|
* ```javascript
|
|
1678
1678
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -1691,7 +1691,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1691
1691
|
* // ]
|
|
1692
1692
|
* // }
|
|
1693
1693
|
* ```
|
|
1694
|
-
*
|
|
1694
|
+
*
|
|
1695
1695
|
* **Example:** Transcribe French.
|
|
1696
1696
|
* ```javascript
|
|
1697
1697
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
|
|
@@ -1699,7 +1699,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1699
1699
|
* const output = await transcriber(url, { language: 'french', task: 'transcribe' });
|
|
1700
1700
|
* // { text: " J'adore, j'aime, je n'aime pas, je déteste." }
|
|
1701
1701
|
* ```
|
|
1702
|
-
*
|
|
1702
|
+
*
|
|
1703
1703
|
* **Example:** Translate French to English.
|
|
1704
1704
|
* ```javascript
|
|
1705
1705
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
|
|
@@ -1707,7 +1707,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1707
1707
|
* const output = await transcriber(url, { language: 'french', task: 'translate' });
|
|
1708
1708
|
* // { text: " I love, I like, I don't like, I hate." }
|
|
1709
1709
|
* ```
|
|
1710
|
-
*
|
|
1710
|
+
*
|
|
1711
1711
|
* **Example:** Transcribe/translate audio longer than 30 seconds.
|
|
1712
1712
|
* ```javascript
|
|
1713
1713
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -1730,6 +1730,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
|
|
|
1730
1730
|
async _call(audio, kwargs = {}) {
|
|
1731
1731
|
switch (this.model.config.model_type) {
|
|
1732
1732
|
case 'whisper':
|
|
1733
|
+
case 'lite-whisper':
|
|
1733
1734
|
return this._call_whisper(audio, kwargs)
|
|
1734
1735
|
case 'wav2vec2':
|
|
1735
1736
|
case 'wav2vec2-bert':
|
|
@@ -1929,18 +1930,18 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
|
|
|
1929
1930
|
* @typedef {Object} ImageToTextSingle
|
|
1930
1931
|
* @property {string} generated_text The generated text.
|
|
1931
1932
|
* @typedef {ImageToTextSingle[]} ImageToTextOutput
|
|
1932
|
-
*
|
|
1933
|
+
*
|
|
1933
1934
|
* @callback ImageToTextPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
1934
1935
|
* @param {ImagePipelineInputs} texts The images to be captioned.
|
|
1935
1936
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
1936
1937
|
* @returns {Promise<ImageToTextOutput|ImageToTextOutput[]>} An object (or array of objects) containing the generated text(s).
|
|
1937
|
-
*
|
|
1938
|
+
*
|
|
1938
1939
|
* @typedef {TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable} ImageToTextPipelineType
|
|
1939
1940
|
*/
|
|
1940
1941
|
|
|
1941
1942
|
/**
|
|
1942
1943
|
* Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image.
|
|
1943
|
-
*
|
|
1944
|
+
*
|
|
1944
1945
|
* **Example:** Generate a caption for an image w/ `Xenova/vit-gpt2-image-captioning`.
|
|
1945
1946
|
* ```javascript
|
|
1946
1947
|
* const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');
|
|
@@ -1948,7 +1949,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
|
|
|
1948
1949
|
* const output = await captioner(url);
|
|
1949
1950
|
* // [{ generated_text: 'a cat laying on a couch with another cat' }]
|
|
1950
1951
|
* ```
|
|
1951
|
-
*
|
|
1952
|
+
*
|
|
1952
1953
|
* **Example:** Optical Character Recognition (OCR) w/ `Xenova/trocr-small-handwritten`.
|
|
1953
1954
|
* ```javascript
|
|
1954
1955
|
* const captioner = await pipeline('image-to-text', 'Xenova/trocr-small-handwritten');
|
|
@@ -1994,22 +1995,22 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
|
|
|
1994
1995
|
* @property {string} label The label identified by the model.
|
|
1995
1996
|
* @property {number} score The score attributed by the model for that label.
|
|
1996
1997
|
* @typedef {ImageClassificationSingle[]} ImageClassificationOutput
|
|
1997
|
-
*
|
|
1998
|
+
*
|
|
1998
1999
|
* @typedef {Object} ImageClassificationPipelineOptions Parameters specific to image classification pipelines.
|
|
1999
|
-
* @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
|
|
2000
|
-
*
|
|
2000
|
+
* @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
|
|
2001
|
+
*
|
|
2001
2002
|
* @callback ImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
2002
2003
|
* @param {ImagePipelineInputs} images The input images(s) to be classified.
|
|
2003
2004
|
* @param {ImageClassificationPipelineOptions} [options] The options to use for image classification.
|
|
2004
2005
|
* @returns {Promise<ImageClassificationOutput|ImageClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
2005
|
-
*
|
|
2006
|
+
*
|
|
2006
2007
|
* @typedef {ImagePipelineConstructorArgs & ImageClassificationPipelineCallback & Disposable} ImageClassificationPipelineType
|
|
2007
2008
|
*/
|
|
2008
2009
|
|
|
2009
2010
|
/**
|
|
2010
2011
|
* Image classification pipeline using any `AutoModelForImageClassification`.
|
|
2011
2012
|
* This pipeline predicts the class of an image.
|
|
2012
|
-
*
|
|
2013
|
+
*
|
|
2013
2014
|
* **Example:** Classify an image.
|
|
2014
2015
|
* ```javascript
|
|
2015
2016
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -2019,7 +2020,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
|
|
|
2019
2020
|
* // { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
|
|
2020
2021
|
* // ]
|
|
2021
2022
|
* ```
|
|
2022
|
-
*
|
|
2023
|
+
*
|
|
2023
2024
|
* **Example:** Classify an image and return top `n` classes.
|
|
2024
2025
|
* ```javascript
|
|
2025
2026
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -2031,7 +2032,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
|
|
|
2031
2032
|
* // { label: 'lion, king of beasts, Panthera leo', score: 0.00045060308184474707 },
|
|
2032
2033
|
* // ]
|
|
2033
2034
|
* ```
|
|
2034
|
-
*
|
|
2035
|
+
*
|
|
2035
2036
|
* **Example:** Classify an image and return all classes.
|
|
2036
2037
|
* ```javascript
|
|
2037
2038
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -2095,10 +2096,10 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
|
|
|
2095
2096
|
|
|
2096
2097
|
/**
|
|
2097
2098
|
* @typedef {Object} ImageSegmentationPipelineOutput
|
|
2098
|
-
* @property {string} label The label of the segment.
|
|
2099
|
+
* @property {string|null} label The label of the segment.
|
|
2099
2100
|
* @property {number|null} score The score of the segment.
|
|
2100
2101
|
* @property {RawImage} mask The mask of the segment.
|
|
2101
|
-
*
|
|
2102
|
+
*
|
|
2102
2103
|
* @typedef {Object} ImageSegmentationPipelineOptions Parameters specific to image segmentation pipelines.
|
|
2103
2104
|
* @property {number} [threshold=0.5] Probability threshold to filter out predicted masks.
|
|
2104
2105
|
* @property {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
|
|
@@ -2107,19 +2108,19 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
|
|
|
2107
2108
|
* depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
|
|
2108
2109
|
* @property {number[]} [label_ids_to_fuse=null] List of label ids to fuse. If not set, do not fuse any labels.
|
|
2109
2110
|
* @property {number[][]} [target_sizes=null] List of target sizes for the input images. If not set, use the original image sizes.
|
|
2110
|
-
*
|
|
2111
|
+
*
|
|
2111
2112
|
* @callback ImageSegmentationPipelineCallback Segment the input images.
|
|
2112
2113
|
* @param {ImagePipelineInputs} images The input images.
|
|
2113
2114
|
* @param {ImageSegmentationPipelineOptions} [options] The options to use for image segmentation.
|
|
2114
2115
|
* @returns {Promise<ImageSegmentationPipelineOutput[]>} The annotated segments.
|
|
2115
|
-
*
|
|
2116
|
+
*
|
|
2116
2117
|
* @typedef {ImagePipelineConstructorArgs & ImageSegmentationPipelineCallback & Disposable} ImageSegmentationPipelineType
|
|
2117
2118
|
*/
|
|
2118
2119
|
|
|
2119
2120
|
/**
|
|
2120
2121
|
* Image segmentation pipeline using any `AutoModelForXXXSegmentation`.
|
|
2121
2122
|
* This pipeline predicts masks of objects and their classes.
|
|
2122
|
-
*
|
|
2123
|
+
*
|
|
2123
2124
|
* **Example:** Perform image segmentation with `Xenova/detr-resnet-50-panoptic`.
|
|
2124
2125
|
* ```javascript
|
|
2125
2126
|
* const segmenter = await pipeline('image-segmentation', 'Xenova/detr-resnet-50-panoptic');
|
|
@@ -2165,14 +2166,30 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
|
|
|
2165
2166
|
const preparedImages = await prepareImages(images);
|
|
2166
2167
|
const imageSizes = preparedImages.map(x => [x.height, x.width]);
|
|
2167
2168
|
|
|
2168
|
-
const
|
|
2169
|
-
|
|
2169
|
+
const inputs = await this.processor(preparedImages);
|
|
2170
|
+
|
|
2171
|
+
const { inputNames, outputNames } = this.model.sessions['model'];
|
|
2172
|
+
if (!inputNames.includes('pixel_values')) {
|
|
2173
|
+
if (inputNames.length !== 1) {
|
|
2174
|
+
throw Error(`Expected a single input name, but got ${inputNames.length} inputs: ${inputNames}.`);
|
|
2175
|
+
}
|
|
2176
|
+
|
|
2177
|
+
const newName = inputNames[0];
|
|
2178
|
+
if (newName in inputs) {
|
|
2179
|
+
throw Error(`Input name ${newName} already exists in the inputs.`);
|
|
2180
|
+
}
|
|
2181
|
+
// To ensure compatibility with certain background-removal models,
|
|
2182
|
+
// we may need to perform a mapping of input to output names
|
|
2183
|
+
inputs[newName] = inputs.pixel_values;
|
|
2184
|
+
}
|
|
2185
|
+
|
|
2186
|
+
const output = await this.model(inputs);
|
|
2170
2187
|
|
|
2171
2188
|
let fn = null;
|
|
2172
2189
|
if (subtask !== null) {
|
|
2173
2190
|
fn = this.subtasks_mapping[subtask];
|
|
2174
|
-
} else {
|
|
2175
|
-
for (
|
|
2191
|
+
} else if (this.processor.image_processor) {
|
|
2192
|
+
for (const [task, func] of Object.entries(this.subtasks_mapping)) {
|
|
2176
2193
|
if (func in this.processor.image_processor) {
|
|
2177
2194
|
fn = this.processor.image_processor[func].bind(this.processor.image_processor);
|
|
2178
2195
|
subtask = task;
|
|
@@ -2186,7 +2203,28 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
|
|
|
2186
2203
|
|
|
2187
2204
|
/** @type {ImageSegmentationPipelineOutput[]} */
|
|
2188
2205
|
const annotation = [];
|
|
2189
|
-
if (subtask
|
|
2206
|
+
if (!subtask) {
|
|
2207
|
+
// We define an epsilon to safeguard against numerical/precision issues when detecting
|
|
2208
|
+
// the normalization mode of the output (i.e., sigmoid already applied, or not).
|
|
2209
|
+
// See https://github.com/microsoft/onnxruntime/issues/23943 for more information.
|
|
2210
|
+
const epsilon = 1e-5;
|
|
2211
|
+
|
|
2212
|
+
// Perform standard image segmentation
|
|
2213
|
+
const result = output[outputNames[0]];
|
|
2214
|
+
for (let i = 0; i < imageSizes.length; ++i) {
|
|
2215
|
+
const size = imageSizes[i];
|
|
2216
|
+
const item = result[i];
|
|
2217
|
+
if (item.data.some(x => x < -epsilon || x > 1 + epsilon)) {
|
|
2218
|
+
item.sigmoid_();
|
|
2219
|
+
}
|
|
2220
|
+
const mask = await RawImage.fromTensor(item.mul_(255).to('uint8')).resize(size[1], size[0]);
|
|
2221
|
+
annotation.push({
|
|
2222
|
+
label: null,
|
|
2223
|
+
score: null,
|
|
2224
|
+
mask
|
|
2225
|
+
});
|
|
2226
|
+
}
|
|
2227
|
+
} else if (subtask === 'panoptic' || subtask === 'instance') {
|
|
2190
2228
|
const processed = fn(
|
|
2191
2229
|
output,
|
|
2192
2230
|
threshold,
|
|
@@ -2242,29 +2280,86 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
|
|
|
2242
2280
|
}
|
|
2243
2281
|
}
|
|
2244
2282
|
|
|
2283
|
+
|
|
2284
|
+
/**
|
|
2285
|
+
* @typedef {Object} BackgroundRemovalPipelineOptions Parameters specific to image segmentation pipelines.
|
|
2286
|
+
*
|
|
2287
|
+
* @callback BackgroundRemovalPipelineCallback Segment the input images.
|
|
2288
|
+
* @param {ImagePipelineInputs} images The input images.
|
|
2289
|
+
* @param {BackgroundRemovalPipelineOptions} [options] The options to use for image segmentation.
|
|
2290
|
+
* @returns {Promise<RawImage[]>} The images with the background removed.
|
|
2291
|
+
*
|
|
2292
|
+
* @typedef {ImagePipelineConstructorArgs & BackgroundRemovalPipelineCallback & Disposable} BackgroundRemovalPipelineType
|
|
2293
|
+
*/
|
|
2294
|
+
|
|
2295
|
+
/**
|
|
2296
|
+
* Background removal pipeline using certain `AutoModelForXXXSegmentation`.
|
|
2297
|
+
* This pipeline removes the backgrounds of images.
|
|
2298
|
+
*
|
|
2299
|
+
* **Example:** Perform background removal with `Xenova/modnet`.
|
|
2300
|
+
* ```javascript
|
|
2301
|
+
* const segmenter = await pipeline('background-removal', 'Xenova/modnet');
|
|
2302
|
+
* const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/portrait-of-woman_small.jpg';
|
|
2303
|
+
* const output = await segmenter(url);
|
|
2304
|
+
* // [
|
|
2305
|
+
* // RawImage { data: Uint8ClampedArray(648000) [ ... ], width: 360, height: 450, channels: 4 }
|
|
2306
|
+
* // ]
|
|
2307
|
+
* ```
|
|
2308
|
+
*/
|
|
2309
|
+
export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => BackgroundRemovalPipelineType} */ (/** @type {any} */(ImageSegmentationPipeline))) {
|
|
2310
|
+
/**
|
|
2311
|
+
* Create a new BackgroundRemovalPipeline.
|
|
2312
|
+
* @param {ImagePipelineConstructorArgs} options An object used to instantiate the pipeline.
|
|
2313
|
+
*/
|
|
2314
|
+
constructor(options) {
|
|
2315
|
+
super(options);
|
|
2316
|
+
}
|
|
2317
|
+
|
|
2318
|
+
/** @type {BackgroundRemovalPipelineCallback} */
|
|
2319
|
+
async _call(images, options = {}) {
|
|
2320
|
+
const isBatched = Array.isArray(images);
|
|
2321
|
+
|
|
2322
|
+
if (isBatched && images.length !== 1) {
|
|
2323
|
+
throw Error("Background removal pipeline currently only supports a batch size of 1.");
|
|
2324
|
+
}
|
|
2325
|
+
|
|
2326
|
+
const preparedImages = await prepareImages(images);
|
|
2327
|
+
|
|
2328
|
+
// @ts-expect-error TS2339
|
|
2329
|
+
const masks = await super._call(images, options);
|
|
2330
|
+
const result = preparedImages.map((img, i) => {
|
|
2331
|
+
const cloned = img.clone();
|
|
2332
|
+
cloned.putAlpha(masks[i].mask);
|
|
2333
|
+
return cloned;
|
|
2334
|
+
});
|
|
2335
|
+
|
|
2336
|
+
return result;
|
|
2337
|
+
}
|
|
2338
|
+
}
|
|
2339
|
+
|
|
2245
2340
|
/**
|
|
2246
2341
|
* @typedef {Object} ZeroShotImageClassificationOutput
|
|
2247
2342
|
* @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
|
|
2248
2343
|
* @property {number} score The score attributed by the model for that label (between 0 and 1).
|
|
2249
|
-
*
|
|
2344
|
+
*
|
|
2250
2345
|
* @typedef {Object} ZeroShotImageClassificationPipelineOptions Parameters specific to zero-shot image classification pipelines.
|
|
2251
2346
|
* @property {string} [hypothesis_template="This is a photo of {}"] The sentence used in conjunction with `candidate_labels`
|
|
2252
2347
|
* to attempt the image classification by replacing the placeholder with the candidate_labels.
|
|
2253
2348
|
* Then likelihood is estimated by using `logits_per_image`.
|
|
2254
|
-
*
|
|
2349
|
+
*
|
|
2255
2350
|
* @callback ZeroShotImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
2256
2351
|
* @param {ImagePipelineInputs} images The input images.
|
|
2257
2352
|
* @param {string[]} candidate_labels The candidate labels for this image.
|
|
2258
2353
|
* @param {ZeroShotImageClassificationPipelineOptions} [options] The options to use for zero-shot image classification.
|
|
2259
2354
|
* @returns {Promise<ZeroShotImageClassificationOutput[]|ZeroShotImageClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
|
|
2260
|
-
*
|
|
2355
|
+
*
|
|
2261
2356
|
* @typedef {TextImagePipelineConstructorArgs & ZeroShotImageClassificationPipelineCallback & Disposable} ZeroShotImageClassificationPipelineType
|
|
2262
2357
|
*/
|
|
2263
2358
|
|
|
2264
2359
|
/**
|
|
2265
2360
|
* Zero shot image classification pipeline. This pipeline predicts the class of
|
|
2266
2361
|
* an image when you provide an image and a set of `candidate_labels`.
|
|
2267
|
-
*
|
|
2362
|
+
*
|
|
2268
2363
|
* **Example:** Zero shot image classification w/ `Xenova/clip-vit-base-patch32`.
|
|
2269
2364
|
* ```javascript
|
|
2270
2365
|
* const classifier = await pipeline('zero-shot-image-classification', 'Xenova/clip-vit-base-patch32');
|
|
@@ -2294,7 +2389,7 @@ export class ZeroShotImageClassificationPipeline extends (/** @type {new (option
|
|
|
2294
2389
|
const isBatched = Array.isArray(images);
|
|
2295
2390
|
const preparedImages = await prepareImages(images);
|
|
2296
2391
|
|
|
2297
|
-
// Insert label into hypothesis template
|
|
2392
|
+
// Insert label into hypothesis template
|
|
2298
2393
|
const texts = candidate_labels.map(
|
|
2299
2394
|
x => hypothesis_template.replace('{}', x)
|
|
2300
2395
|
);
|
|
@@ -2341,23 +2436,23 @@ export class ZeroShotImageClassificationPipeline extends (/** @type {new (option
|
|
|
2341
2436
|
* @property {number} score The score attributed by the model for that label.
|
|
2342
2437
|
* @property {BoundingBox} box The bounding box of detected object in image's original size, or as a percentage if `percentage` is set to true.
|
|
2343
2438
|
* @typedef {ObjectDetectionPipelineSingle[]} ObjectDetectionPipelineOutput
|
|
2344
|
-
*
|
|
2439
|
+
*
|
|
2345
2440
|
* @typedef {Object} ObjectDetectionPipelineOptions Parameters specific to object detection pipelines.
|
|
2346
2441
|
* @property {number} [threshold=0.9] The threshold used to filter boxes by score.
|
|
2347
2442
|
* @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
|
|
2348
|
-
*
|
|
2443
|
+
*
|
|
2349
2444
|
* @callback ObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
|
|
2350
2445
|
* @param {ImagePipelineInputs} images The input images.
|
|
2351
2446
|
* @param {ObjectDetectionPipelineOptions} [options] The options to use for object detection.
|
|
2352
|
-
* @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
|
|
2353
|
-
*
|
|
2447
|
+
* @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
|
|
2448
|
+
*
|
|
2354
2449
|
* @typedef {ImagePipelineConstructorArgs & ObjectDetectionPipelineCallback & Disposable} ObjectDetectionPipelineType
|
|
2355
2450
|
*/
|
|
2356
2451
|
|
|
2357
2452
|
/**
|
|
2358
2453
|
* Object detection pipeline using any `AutoModelForObjectDetection`.
|
|
2359
2454
|
* This pipeline predicts bounding boxes of objects and their classes.
|
|
2360
|
-
*
|
|
2455
|
+
*
|
|
2361
2456
|
* **Example:** Run object-detection with `Xenova/detr-resnet-50`.
|
|
2362
2457
|
* ```javascript
|
|
2363
2458
|
* const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
|
|
@@ -2431,27 +2526,27 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
|
|
|
2431
2526
|
* @property {string} label Text query corresponding to the found object.
|
|
2432
2527
|
* @property {number} score Score corresponding to the object (between 0 and 1).
|
|
2433
2528
|
* @property {BoundingBox} box Bounding box of the detected object in image's original size, or as a percentage if `percentage` is set to true.
|
|
2434
|
-
*
|
|
2529
|
+
*
|
|
2435
2530
|
* @typedef {Object} ZeroShotObjectDetectionPipelineOptions Parameters specific to zero-shot object detection pipelines.
|
|
2436
2531
|
* @property {number} [threshold=0.1] The probability necessary to make a prediction.
|
|
2437
2532
|
* @property {number} [top_k=null] The number of top predictions that will be returned by the pipeline.
|
|
2438
2533
|
* If the provided number is `null` or higher than the number of predictions available, it will default
|
|
2439
2534
|
* to the number of predictions.
|
|
2440
2535
|
* @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
|
|
2441
|
-
*
|
|
2536
|
+
*
|
|
2442
2537
|
* @callback ZeroShotObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
|
|
2443
2538
|
* @param {ImagePipelineInputs} images The input images.
|
|
2444
2539
|
* @param {string[]} candidate_labels What the model should recognize in the image.
|
|
2445
2540
|
* @param {ZeroShotObjectDetectionPipelineOptions} [options] The options to use for zero-shot object detection.
|
|
2446
2541
|
* @returns {Promise<ZeroShotObjectDetectionOutput[]|ZeroShotObjectDetectionOutput[][]>} An array of objects containing the predicted labels, scores, and bounding boxes.
|
|
2447
|
-
*
|
|
2542
|
+
*
|
|
2448
2543
|
* @typedef {TextImagePipelineConstructorArgs & ZeroShotObjectDetectionPipelineCallback & Disposable} ZeroShotObjectDetectionPipelineType
|
|
2449
2544
|
*/
|
|
2450
2545
|
|
|
2451
2546
|
/**
|
|
2452
2547
|
* Zero-shot object detection pipeline. This pipeline predicts bounding boxes of
|
|
2453
2548
|
* objects when you provide an image and a set of `candidate_labels`.
|
|
2454
|
-
*
|
|
2549
|
+
*
|
|
2455
2550
|
* **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32`.
|
|
2456
2551
|
* ```javascript
|
|
2457
2552
|
* const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
|
|
@@ -2481,7 +2576,7 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
|
|
|
2481
2576
|
* // }
|
|
2482
2577
|
* // ]
|
|
2483
2578
|
* ```
|
|
2484
|
-
*
|
|
2579
|
+
*
|
|
2485
2580
|
* **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32` (returning top 4 matches and setting a threshold).
|
|
2486
2581
|
* ```javascript
|
|
2487
2582
|
* const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
|
|
@@ -2554,7 +2649,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
|
|
|
2554
2649
|
const output = await this.model({ ...text_inputs, pixel_values });
|
|
2555
2650
|
|
|
2556
2651
|
let result;
|
|
2557
|
-
if('post_process_grounded_object_detection' in this.processor) {
|
|
2652
|
+
if ('post_process_grounded_object_detection' in this.processor) {
|
|
2558
2653
|
// @ts-ignore
|
|
2559
2654
|
const processed = this.processor.post_process_grounded_object_detection(
|
|
2560
2655
|
output,
|
|
@@ -2596,13 +2691,13 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
|
|
|
2596
2691
|
* @typedef {Object} DocumentQuestionAnsweringSingle
|
|
2597
2692
|
* @property {string} answer The generated text.
|
|
2598
2693
|
* @typedef {DocumentQuestionAnsweringSingle[]} DocumentQuestionAnsweringOutput
|
|
2599
|
-
*
|
|
2694
|
+
*
|
|
2600
2695
|
* @callback DocumentQuestionAnsweringPipelineCallback Answer the question given as input by using the document.
|
|
2601
2696
|
* @param {ImageInput} image The image of the document to use.
|
|
2602
2697
|
* @param {string} question A question to ask of the document.
|
|
2603
2698
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
2604
2699
|
* @returns {Promise<DocumentQuestionAnsweringOutput|DocumentQuestionAnsweringOutput[]>} An object (or array of objects) containing the answer(s).
|
|
2605
|
-
*
|
|
2700
|
+
*
|
|
2606
2701
|
* @typedef {TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable} DocumentQuestionAnsweringPipelineType
|
|
2607
2702
|
*/
|
|
2608
2703
|
|
|
@@ -2610,7 +2705,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
|
|
|
2610
2705
|
* Document Question Answering pipeline using any `AutoModelForDocumentQuestionAnswering`.
|
|
2611
2706
|
* The inputs/outputs are similar to the (extractive) question answering pipeline; however,
|
|
2612
2707
|
* the pipeline takes an image (and optional OCR'd words/boxes) as input instead of text context.
|
|
2613
|
-
*
|
|
2708
|
+
*
|
|
2614
2709
|
* **Example:** Answer questions about a document with `Xenova/donut-base-finetuned-docvqa`.
|
|
2615
2710
|
* ```javascript
|
|
2616
2711
|
* const qa_pipeline = await pipeline('document-question-answering', 'Xenova/donut-base-finetuned-docvqa');
|
|
@@ -2680,22 +2775,22 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
|
|
|
2680
2775
|
* @typedef {Object} TextToAudioOutput
|
|
2681
2776
|
* @property {Float32Array} audio The generated audio waveform.
|
|
2682
2777
|
* @property {number} sampling_rate The sampling rate of the generated audio waveform.
|
|
2683
|
-
*
|
|
2778
|
+
*
|
|
2684
2779
|
* @typedef {Object} TextToAudioPipelineOptions Parameters specific to text-to-audio pipelines.
|
|
2685
2780
|
* @property {Tensor|Float32Array|string|URL} [speaker_embeddings=null] The speaker embeddings (if the model requires it).
|
|
2686
|
-
*
|
|
2781
|
+
*
|
|
2687
2782
|
* @callback TextToAudioPipelineCallback Generates speech/audio from the inputs.
|
|
2688
2783
|
* @param {string|string[]} texts The text(s) to generate.
|
|
2689
2784
|
* @param {TextToAudioPipelineOptions} options Parameters passed to the model generation/forward method.
|
|
2690
2785
|
* @returns {Promise<TextToAudioOutput>} An object containing the generated audio and sampling rate.
|
|
2691
|
-
*
|
|
2786
|
+
*
|
|
2692
2787
|
* @typedef {TextToAudioPipelineConstructorArgs & TextToAudioPipelineCallback & Disposable} TextToAudioPipelineType
|
|
2693
2788
|
*/
|
|
2694
2789
|
|
|
2695
2790
|
/**
|
|
2696
2791
|
* Text-to-audio generation pipeline using any `AutoModelForTextToWaveform` or `AutoModelForTextToSpectrogram`.
|
|
2697
2792
|
* This pipeline generates an audio file from an input text and optional other conditional inputs.
|
|
2698
|
-
*
|
|
2793
|
+
*
|
|
2699
2794
|
* **Example:** Generate audio from text with `Xenova/speecht5_tts`.
|
|
2700
2795
|
* ```javascript
|
|
2701
2796
|
* const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
|
|
@@ -2706,17 +2801,17 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
|
|
|
2706
2801
|
* // sampling_rate: 16000
|
|
2707
2802
|
* // }
|
|
2708
2803
|
* ```
|
|
2709
|
-
*
|
|
2804
|
+
*
|
|
2710
2805
|
* You can then save the audio to a .wav file with the `wavefile` package:
|
|
2711
2806
|
* ```javascript
|
|
2712
2807
|
* import wavefile from 'wavefile';
|
|
2713
2808
|
* import fs from 'fs';
|
|
2714
|
-
*
|
|
2809
|
+
*
|
|
2715
2810
|
* const wav = new wavefile.WaveFile();
|
|
2716
2811
|
* wav.fromScratch(1, out.sampling_rate, '32f', out.audio);
|
|
2717
2812
|
* fs.writeFileSync('out.wav', wav.toBuffer());
|
|
2718
2813
|
* ```
|
|
2719
|
-
*
|
|
2814
|
+
*
|
|
2720
2815
|
* **Example:** Multilingual speech generation with `Xenova/mms-tts-fra`. See [here](https://huggingface.co/models?pipeline_tag=text-to-speech&other=vits&sort=trending) for the full list of available languages (1107).
|
|
2721
2816
|
* ```javascript
|
|
2722
2817
|
* const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra');
|
|
@@ -2822,13 +2917,13 @@ export class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPi
|
|
|
2822
2917
|
* @callback ImageToImagePipelineCallback Transform the image(s) passed as inputs.
|
|
2823
2918
|
* @param {ImagePipelineInputs} images The images to transform.
|
|
2824
2919
|
* @returns {Promise<RawImage|RawImage[]>} The transformed image or list of images.
|
|
2825
|
-
*
|
|
2920
|
+
*
|
|
2826
2921
|
* @typedef {ImagePipelineConstructorArgs & ImageToImagePipelineCallback & Disposable} ImageToImagePipelineType
|
|
2827
2922
|
*/
|
|
2828
2923
|
|
|
2829
2924
|
/**
|
|
2830
2925
|
* Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input.
|
|
2831
|
-
*
|
|
2926
|
+
*
|
|
2832
2927
|
* **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`
|
|
2833
2928
|
* ```javascript
|
|
2834
2929
|
* const upscaler = await pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64');
|
|
@@ -2873,17 +2968,17 @@ export class ImageToImagePipeline extends (/** @type {new (options: ImagePipelin
|
|
|
2873
2968
|
* @typedef {Object} DepthEstimationPipelineOutput
|
|
2874
2969
|
* @property {Tensor} predicted_depth The raw depth map predicted by the model.
|
|
2875
2970
|
* @property {RawImage} depth The processed depth map as an image (with the same size as the input image).
|
|
2876
|
-
*
|
|
2971
|
+
*
|
|
2877
2972
|
* @callback DepthEstimationPipelineCallback Predicts the depth for the image(s) passed as inputs.
|
|
2878
2973
|
* @param {ImagePipelineInputs} images The images to compute depth for.
|
|
2879
2974
|
* @returns {Promise<DepthEstimationPipelineOutput|DepthEstimationPipelineOutput[]>} An image or a list of images containing result(s).
|
|
2880
|
-
*
|
|
2975
|
+
*
|
|
2881
2976
|
* @typedef {ImagePipelineConstructorArgs & DepthEstimationPipelineCallback & Disposable} DepthEstimationPipelineType
|
|
2882
2977
|
*/
|
|
2883
2978
|
|
|
2884
2979
|
/**
|
|
2885
2980
|
* Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image.
|
|
2886
|
-
*
|
|
2981
|
+
*
|
|
2887
2982
|
* **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`
|
|
2888
2983
|
* ```javascript
|
|
2889
2984
|
* const depth_estimator = await pipeline('depth-estimation', 'Xenova/dpt-hybrid-midas');
|
|
@@ -3134,6 +3229,16 @@ const SUPPORTED_TASKS = Object.freeze({
|
|
|
3134
3229
|
},
|
|
3135
3230
|
"type": "multimodal",
|
|
3136
3231
|
},
|
|
3232
|
+
"background-removal": {
|
|
3233
|
+
// no tokenizer
|
|
3234
|
+
"pipeline": BackgroundRemovalPipeline,
|
|
3235
|
+
"model": [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
|
|
3236
|
+
"processor": AutoProcessor,
|
|
3237
|
+
"default": {
|
|
3238
|
+
"model": "Xenova/modnet",
|
|
3239
|
+
},
|
|
3240
|
+
"type": "image",
|
|
3241
|
+
},
|
|
3137
3242
|
|
|
3138
3243
|
"zero-shot-image-classification": {
|
|
3139
3244
|
"tokenizer": AutoTokenizer,
|
|
@@ -3258,7 +3363,7 @@ const TASK_ALIASES = Object.freeze({
|
|
|
3258
3363
|
|
|
3259
3364
|
/**
|
|
3260
3365
|
* Utility factory method to build a `Pipeline` object.
|
|
3261
|
-
*
|
|
3366
|
+
*
|
|
3262
3367
|
* @template {PipelineType} T The type of pipeline to return.
|
|
3263
3368
|
* @param {T} task The task defining which pipeline will be returned. Currently accepted tasks are:
|
|
3264
3369
|
* - `"audio-classification"`: will return a `AudioClassificationPipeline`.
|
|
@@ -3299,6 +3404,8 @@ export async function pipeline(
|
|
|
3299
3404
|
revision = 'main',
|
|
3300
3405
|
device = null,
|
|
3301
3406
|
dtype = null,
|
|
3407
|
+
subfolder = 'onnx',
|
|
3408
|
+
use_external_data_format = null,
|
|
3302
3409
|
model_file_name = null,
|
|
3303
3410
|
session_options = {},
|
|
3304
3411
|
} = {}
|
|
@@ -3329,6 +3436,8 @@ export async function pipeline(
|
|
|
3329
3436
|
revision,
|
|
3330
3437
|
device,
|
|
3331
3438
|
dtype,
|
|
3439
|
+
subfolder,
|
|
3440
|
+
use_external_data_format,
|
|
3332
3441
|
model_file_name,
|
|
3333
3442
|
session_options,
|
|
3334
3443
|
}
|