@huggingface/transformers 3.4.0 → 3.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/dist/transformers.js +528 -201
- package/dist/transformers.js.map +1 -1
- package/dist/transformers.min.js +1 -1
- package/dist/transformers.min.js.map +1 -1
- package/dist/transformers.node.cjs +508 -200
- package/dist/transformers.node.cjs.map +1 -1
- package/dist/transformers.node.min.cjs +1 -1
- package/dist/transformers.node.min.cjs.map +1 -1
- package/dist/transformers.node.min.mjs +1 -1
- package/dist/transformers.node.min.mjs.map +1 -1
- package/dist/transformers.node.mjs +528 -201
- package/dist/transformers.node.mjs.map +1 -1
- package/dist/transformers.web.js +528 -201
- package/dist/transformers.web.js.map +1 -1
- package/dist/transformers.web.min.js +1 -1
- package/dist/transformers.web.min.js.map +1 -1
- package/package.json +1 -1
- package/src/configs.js +2 -0
- package/src/env.js +1 -1
- package/src/models/feature_extractors.js +1 -0
- package/src/models/snac/feature_extraction_snac.js +3 -0
- package/src/models.js +125 -2
- package/src/pipelines.js +140 -135
- package/src/tokenizers.js +44 -34
- package/src/utils/data-structures.js +74 -0
- package/src/utils/hub.js +36 -15
- package/src/utils/image.js +9 -1
- package/src/utils/tensor.js +6 -2
- package/types/configs.d.ts.map +1 -1
- package/types/models/feature_extractors.d.ts +1 -0
- package/types/models/snac/feature_extraction_snac.d.ts +4 -0
- package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
- package/types/models.d.ts +72 -0
- package/types/models.d.ts.map +1 -1
- package/types/pipelines.d.ts +2 -2
- package/types/pipelines.d.ts.map +1 -1
- package/types/tokenizers.d.ts +4 -1
- package/types/tokenizers.d.ts.map +1 -1
- package/types/tsconfig.tsbuildinfo +1 -1
- package/types/utils/data-structures.d.ts +26 -0
- package/types/utils/data-structures.d.ts.map +1 -1
- package/types/utils/hub.d.ts.map +1 -1
- package/types/utils/image.d.ts +2 -2
- package/types/utils/image.d.ts.map +1 -1
- package/types/utils/tensor.d.ts.map +1 -1
package/src/pipelines.js
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @file Pipelines provide a high-level, easy to use, API for running machine learning models.
|
|
3
|
-
*
|
|
3
|
+
*
|
|
4
4
|
* **Example:** Instantiate pipeline using the `pipeline` function.
|
|
5
5
|
* ```javascript
|
|
6
6
|
* import { pipeline } from '@huggingface/transformers';
|
|
7
|
-
*
|
|
7
|
+
*
|
|
8
8
|
* const classifier = await pipeline('sentiment-analysis');
|
|
9
9
|
* const output = await classifier('I love transformers!');
|
|
10
10
|
* // [{'label': 'POSITIVE', 'score': 0.999817686}]
|
|
11
11
|
* ```
|
|
12
|
-
*
|
|
12
|
+
*
|
|
13
13
|
* @module pipelines
|
|
14
14
|
*/
|
|
15
15
|
|
|
@@ -78,7 +78,7 @@ import { RawImage } from './utils/image.js';
|
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
/**
|
|
81
|
-
* @typedef {string | RawImage | URL} ImageInput
|
|
81
|
+
* @typedef {string | RawImage | URL | Blob | HTMLCanvasElement | OffscreenCanvas} ImageInput
|
|
82
82
|
* @typedef {ImageInput|ImageInput[]} ImagePipelineInputs
|
|
83
83
|
*/
|
|
84
84
|
|
|
@@ -152,7 +152,7 @@ function get_bounding_box(box, asInteger) {
|
|
|
152
152
|
/**
|
|
153
153
|
* @callback DisposeType Disposes the item.
|
|
154
154
|
* @returns {Promise<void>} A promise that resolves when the item has been disposed.
|
|
155
|
-
*
|
|
155
|
+
*
|
|
156
156
|
* @typedef {Object} Disposable
|
|
157
157
|
* @property {DisposeType} dispose A promise that resolves when the pipeline has been disposed.
|
|
158
158
|
*/
|
|
@@ -189,7 +189,7 @@ export class Pipeline extends Callable {
|
|
|
189
189
|
* @property {string} task The task of the pipeline. Useful for specifying subtasks.
|
|
190
190
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
191
191
|
* @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
|
|
192
|
-
*
|
|
192
|
+
*
|
|
193
193
|
* @typedef {ModelTokenizerConstructorArgs} TextPipelineConstructorArgs An object used to instantiate a text-based pipeline.
|
|
194
194
|
*/
|
|
195
195
|
|
|
@@ -198,7 +198,7 @@ export class Pipeline extends Callable {
|
|
|
198
198
|
* @property {string} task The task of the pipeline. Useful for specifying subtasks.
|
|
199
199
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
200
200
|
* @property {Processor} processor The processor used by the pipeline.
|
|
201
|
-
*
|
|
201
|
+
*
|
|
202
202
|
* @typedef {ModelProcessorConstructorArgs} AudioPipelineConstructorArgs An object used to instantiate an audio-based pipeline.
|
|
203
203
|
* @typedef {ModelProcessorConstructorArgs} ImagePipelineConstructorArgs An object used to instantiate an image-based pipeline.
|
|
204
204
|
*/
|
|
@@ -210,7 +210,7 @@ export class Pipeline extends Callable {
|
|
|
210
210
|
* @property {PreTrainedModel} model The model used by the pipeline.
|
|
211
211
|
* @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
|
|
212
212
|
* @property {Processor} processor The processor used by the pipeline.
|
|
213
|
-
*
|
|
213
|
+
*
|
|
214
214
|
* @typedef {ModelTokenizerProcessorConstructorArgs} TextAudioPipelineConstructorArgs An object used to instantiate a text- and audio-based pipeline.
|
|
215
215
|
* @typedef {ModelTokenizerProcessorConstructorArgs} TextImagePipelineConstructorArgs An object used to instantiate a text- and image-based pipeline.
|
|
216
216
|
*/
|
|
@@ -220,15 +220,15 @@ export class Pipeline extends Callable {
|
|
|
220
220
|
* @property {string} label The label predicted.
|
|
221
221
|
* @property {number} score The corresponding probability.
|
|
222
222
|
* @typedef {TextClassificationSingle[]} TextClassificationOutput
|
|
223
|
-
*
|
|
223
|
+
*
|
|
224
224
|
* @typedef {Object} TextClassificationPipelineOptions Parameters specific to text classification pipelines.
|
|
225
225
|
* @property {number} [top_k=1] The number of top predictions to be returned.
|
|
226
|
-
*
|
|
226
|
+
*
|
|
227
227
|
* @callback TextClassificationPipelineCallback Classify the text(s) given as inputs.
|
|
228
228
|
* @param {string|string[]} texts The input text(s) to be classified.
|
|
229
229
|
* @param {TextClassificationPipelineOptions} [options] The options to use for text classification.
|
|
230
230
|
* @returns {Promise<TextClassificationOutput|TextClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
231
|
-
*
|
|
231
|
+
*
|
|
232
232
|
* @typedef {TextPipelineConstructorArgs & TextClassificationPipelineCallback & Disposable} TextClassificationPipelineType
|
|
233
233
|
*/
|
|
234
234
|
|
|
@@ -241,7 +241,7 @@ export class Pipeline extends Callable {
|
|
|
241
241
|
* const output = await classifier('I love transformers!');
|
|
242
242
|
* // [{ label: 'POSITIVE', score: 0.999788761138916 }]
|
|
243
243
|
* ```
|
|
244
|
-
*
|
|
244
|
+
*
|
|
245
245
|
* **Example:** Multilingual sentiment-analysis w/ `Xenova/bert-base-multilingual-uncased-sentiment` (and return top 5 classes).
|
|
246
246
|
* ```javascript
|
|
247
247
|
* const classifier = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
|
|
@@ -254,7 +254,7 @@ export class Pipeline extends Callable {
|
|
|
254
254
|
* // { label: '2 stars', score: 0.0009423971059732139 }
|
|
255
255
|
* // ]
|
|
256
256
|
* ```
|
|
257
|
-
*
|
|
257
|
+
*
|
|
258
258
|
* **Example:** Toxic comment classification w/ `Xenova/toxic-bert` (and return all classes).
|
|
259
259
|
* ```javascript
|
|
260
260
|
* const classifier = await pipeline('text-classification', 'Xenova/toxic-bert');
|
|
@@ -339,21 +339,21 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
|
|
|
339
339
|
* @property {number} [start] The index of the start of the corresponding entity in the sentence.
|
|
340
340
|
* @property {number} [end] The index of the end of the corresponding entity in the sentence.
|
|
341
341
|
* @typedef {TokenClassificationSingle[]} TokenClassificationOutput
|
|
342
|
-
*
|
|
342
|
+
*
|
|
343
343
|
* @typedef {Object} TokenClassificationPipelineOptions Parameters specific to token classification pipelines.
|
|
344
344
|
* @property {string[]} [ignore_labels] A list of labels to ignore.
|
|
345
|
-
*
|
|
345
|
+
*
|
|
346
346
|
* @callback TokenClassificationPipelineCallback Classify each token of the text(s) given as inputs.
|
|
347
347
|
* @param {string|string[]} texts One or several texts (or one list of texts) for token classification.
|
|
348
348
|
* @param {TokenClassificationPipelineOptions} [options] The options to use for token classification.
|
|
349
349
|
* @returns {Promise<TokenClassificationOutput|TokenClassificationOutput[]>} The result.
|
|
350
|
-
*
|
|
350
|
+
*
|
|
351
351
|
* @typedef {TextPipelineConstructorArgs & TokenClassificationPipelineCallback & Disposable} TokenClassificationPipelineType
|
|
352
352
|
*/
|
|
353
353
|
|
|
354
354
|
/**
|
|
355
355
|
* Named Entity Recognition pipeline using any `ModelForTokenClassification`.
|
|
356
|
-
*
|
|
356
|
+
*
|
|
357
357
|
* **Example:** Perform named entity recognition with `Xenova/bert-base-NER`.
|
|
358
358
|
* ```javascript
|
|
359
359
|
* const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
|
|
@@ -363,7 +363,7 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
|
|
|
363
363
|
* // { entity: 'B-LOC', score: 0.9994474053382874, index: 9, word: 'London' }
|
|
364
364
|
* // ]
|
|
365
365
|
* ```
|
|
366
|
-
*
|
|
366
|
+
*
|
|
367
367
|
* **Example:** Perform named entity recognition with `Xenova/bert-base-NER` (and return all labels).
|
|
368
368
|
* ```javascript
|
|
369
369
|
* const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
|
|
@@ -459,22 +459,22 @@ export class TokenClassificationPipeline extends (/** @type {new (options: TextP
|
|
|
459
459
|
* @property {number} [start] The character start index of the answer (in the tokenized version of the input).
|
|
460
460
|
* @property {number} [end] The character end index of the answer (in the tokenized version of the input).
|
|
461
461
|
* @property {string} answer The answer to the question.
|
|
462
|
-
*
|
|
462
|
+
*
|
|
463
463
|
* @typedef {Object} QuestionAnsweringPipelineOptions Parameters specific to question answering pipelines.
|
|
464
464
|
* @property {number} [top_k=1] The number of top answer predictions to be returned.
|
|
465
|
-
*
|
|
465
|
+
*
|
|
466
466
|
* @callback QuestionAnsweringPipelineCallback Answer the question(s) given as inputs by using the context(s).
|
|
467
467
|
* @param {string|string[]} question One or several question(s) (must be used in conjunction with the `context` argument).
|
|
468
468
|
* @param {string|string[]} context One or several context(s) associated with the question(s) (must be used in conjunction with the `question` argument).
|
|
469
469
|
* @param {QuestionAnsweringPipelineOptions} [options] The options to use for question answering.
|
|
470
470
|
* @returns {Promise<QuestionAnsweringOutput|QuestionAnsweringOutput[]>} An array or object containing the predicted answers and scores.
|
|
471
|
-
*
|
|
471
|
+
*
|
|
472
472
|
* @typedef {TextPipelineConstructorArgs & QuestionAnsweringPipelineCallback & Disposable} QuestionAnsweringPipelineType
|
|
473
473
|
*/
|
|
474
474
|
|
|
475
475
|
/**
|
|
476
476
|
* Question Answering pipeline using any `ModelForQuestionAnswering`.
|
|
477
|
-
*
|
|
477
|
+
*
|
|
478
478
|
* **Example:** Run question answering with `Xenova/distilbert-base-uncased-distilled-squad`.
|
|
479
479
|
* ```javascript
|
|
480
480
|
* const answerer = await pipeline('question-answering', 'Xenova/distilbert-base-uncased-distilled-squad');
|
|
@@ -599,10 +599,10 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
|
|
|
599
599
|
* @property {number} token The predicted token id (to replace the masked one).
|
|
600
600
|
* @property {string} token_str The predicted token (to replace the masked one).
|
|
601
601
|
* @typedef {FillMaskSingle[]} FillMaskOutput
|
|
602
|
-
*
|
|
602
|
+
*
|
|
603
603
|
* @typedef {Object} FillMaskPipelineOptions Parameters specific to fill mask pipelines.
|
|
604
604
|
* @property {number} [top_k=5] When passed, overrides the number of predictions to return.
|
|
605
|
-
*
|
|
605
|
+
*
|
|
606
606
|
* @callback FillMaskPipelineCallback Fill the masked token in the text(s) given as inputs.
|
|
607
607
|
* @param {string|string[]} texts One or several texts (or one list of prompts) with masked tokens.
|
|
608
608
|
* @param {FillMaskPipelineOptions} [options] The options to use for masked language modelling.
|
|
@@ -610,13 +610,13 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
|
|
|
610
610
|
* and the sequence with the predicted token filled in, or an array of such arrays (one for each input text).
|
|
611
611
|
* If only one input text is given, the output will be an array of objects.
|
|
612
612
|
* @throws {Error} When the mask token is not found in the input text.
|
|
613
|
-
*
|
|
613
|
+
*
|
|
614
614
|
* @typedef {TextPipelineConstructorArgs & FillMaskPipelineCallback & Disposable} FillMaskPipelineType
|
|
615
615
|
*/
|
|
616
616
|
|
|
617
617
|
/**
|
|
618
618
|
* Masked language modeling prediction pipeline using any `ModelWithLMHead`.
|
|
619
|
-
*
|
|
619
|
+
*
|
|
620
620
|
* **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-uncased`.
|
|
621
621
|
* ```javascript
|
|
622
622
|
* const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
|
|
@@ -629,7 +629,7 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
|
|
|
629
629
|
* // { token_str: 'life', score: 0.01859794743359089, token: 1297, sequence: 'The goal of life is life.' }
|
|
630
630
|
* // ]
|
|
631
631
|
* ```
|
|
632
|
-
*
|
|
632
|
+
*
|
|
633
633
|
* **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-cased` (and return top result).
|
|
634
634
|
* ```javascript
|
|
635
635
|
* const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
|
|
@@ -706,18 +706,18 @@ export class FillMaskPipeline extends (/** @type {new (options: TextPipelineCons
|
|
|
706
706
|
* @typedef {Object} Text2TextGenerationSingle
|
|
707
707
|
* @property {string} generated_text The generated text.
|
|
708
708
|
* @typedef {Text2TextGenerationSingle[]} Text2TextGenerationOutput
|
|
709
|
-
*
|
|
709
|
+
*
|
|
710
710
|
* @callback Text2TextGenerationPipelineCallback Generate the output text(s) using text(s) given as inputs.
|
|
711
711
|
* @param {string|string[]} texts Input text for the encoder.
|
|
712
712
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
713
713
|
* @returns {Promise<Text2TextGenerationOutput|Text2TextGenerationOutput[]>}
|
|
714
|
-
*
|
|
714
|
+
*
|
|
715
715
|
* @typedef {TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable} Text2TextGenerationPipelineType
|
|
716
716
|
*/
|
|
717
717
|
|
|
718
718
|
/**
|
|
719
719
|
* Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks.
|
|
720
|
-
*
|
|
720
|
+
*
|
|
721
721
|
* **Example:** Text-to-text generation w/ `Xenova/LaMini-Flan-T5-783M`.
|
|
722
722
|
* ```javascript
|
|
723
723
|
* const generator = await pipeline('text2text-generation', 'Xenova/LaMini-Flan-T5-783M');
|
|
@@ -793,18 +793,18 @@ export class Text2TextGenerationPipeline extends (/** @type {new (options: TextP
|
|
|
793
793
|
* @typedef {Object} SummarizationSingle
|
|
794
794
|
* @property {string} summary_text The summary text.
|
|
795
795
|
* @typedef {SummarizationSingle[]} SummarizationOutput
|
|
796
|
-
*
|
|
796
|
+
*
|
|
797
797
|
* @callback SummarizationPipelineCallback Summarize the text(s) given as inputs.
|
|
798
798
|
* @param {string|string[]} texts One or several articles (or one list of articles) to summarize.
|
|
799
799
|
* @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
800
800
|
* @returns {Promise<SummarizationOutput|SummarizationOutput[]>}
|
|
801
|
-
*
|
|
801
|
+
*
|
|
802
802
|
* @typedef {TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable} SummarizationPipelineType
|
|
803
803
|
*/
|
|
804
804
|
|
|
805
805
|
/**
|
|
806
806
|
* A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline.
|
|
807
|
-
*
|
|
807
|
+
*
|
|
808
808
|
* **Example:** Summarization w/ `Xenova/distilbart-cnn-6-6`.
|
|
809
809
|
* ```javascript
|
|
810
810
|
* const generator = await pipeline('summarization', 'Xenova/distilbart-cnn-6-6');
|
|
@@ -840,23 +840,23 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
|
|
|
840
840
|
* @typedef {Object} TranslationSingle
|
|
841
841
|
* @property {string} translation_text The translated text.
|
|
842
842
|
* @typedef {TranslationSingle[]} TranslationOutput
|
|
843
|
-
*
|
|
843
|
+
*
|
|
844
844
|
* @callback TranslationPipelineCallback Translate the text(s) given as inputs.
|
|
845
845
|
* @param {string|string[]} texts Texts to be translated.
|
|
846
846
|
* @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
847
847
|
* @returns {Promise<TranslationOutput|TranslationOutput[]>}
|
|
848
|
-
*
|
|
848
|
+
*
|
|
849
849
|
* @typedef {TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable} TranslationPipelineType
|
|
850
850
|
*/
|
|
851
851
|
|
|
852
852
|
/**
|
|
853
853
|
* Translates text from one language to another.
|
|
854
|
-
*
|
|
854
|
+
*
|
|
855
855
|
* **Example:** Multilingual translation w/ `Xenova/nllb-200-distilled-600M`.
|
|
856
|
-
*
|
|
856
|
+
*
|
|
857
857
|
* See [here](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200)
|
|
858
858
|
* for the full list of languages and their corresponding codes.
|
|
859
|
-
*
|
|
859
|
+
*
|
|
860
860
|
* ```javascript
|
|
861
861
|
* const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M');
|
|
862
862
|
* const output = await translator('जीवन एक चॉकलेट बॉक्स की तरह है।', {
|
|
@@ -865,12 +865,12 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
|
|
|
865
865
|
* });
|
|
866
866
|
* // [{ translation_text: 'La vie est comme une boîte à chocolat.' }]
|
|
867
867
|
* ```
|
|
868
|
-
*
|
|
868
|
+
*
|
|
869
869
|
* **Example:** Multilingual translation w/ `Xenova/m2m100_418M`.
|
|
870
|
-
*
|
|
870
|
+
*
|
|
871
871
|
* See [here](https://huggingface.co/facebook/m2m100_418M#languages-covered)
|
|
872
872
|
* for the full list of languages and their corresponding codes.
|
|
873
|
-
*
|
|
873
|
+
*
|
|
874
874
|
* ```javascript
|
|
875
875
|
* const translator = await pipeline('translation', 'Xenova/m2m100_418M');
|
|
876
876
|
* const output = await translator('生活就像一盒巧克力。', {
|
|
@@ -879,12 +879,12 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
|
|
|
879
879
|
* });
|
|
880
880
|
* // [{ translation_text: 'Life is like a box of chocolate.' }]
|
|
881
881
|
* ```
|
|
882
|
-
*
|
|
882
|
+
*
|
|
883
883
|
* **Example:** Multilingual translation w/ `Xenova/mbart-large-50-many-to-many-mmt`.
|
|
884
|
-
*
|
|
884
|
+
*
|
|
885
885
|
* See [here](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt#languages-covered)
|
|
886
886
|
* for the full list of languages and their corresponding codes.
|
|
887
|
-
*
|
|
887
|
+
*
|
|
888
888
|
* ```javascript
|
|
889
889
|
* const translator = await pipeline('translation', 'Xenova/mbart-large-50-many-to-many-mmt');
|
|
890
890
|
* const output = await translator('संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है', {
|
|
@@ -913,21 +913,21 @@ function isChat(x) {
|
|
|
913
913
|
|
|
914
914
|
/**
|
|
915
915
|
* @typedef {import('./tokenizers.js').Message[]} Chat
|
|
916
|
-
*
|
|
916
|
+
*
|
|
917
917
|
* @typedef {Object} TextGenerationSingle
|
|
918
918
|
* @property {string|Chat} generated_text The generated text.
|
|
919
919
|
* @typedef {TextGenerationSingle[]} TextGenerationOutput
|
|
920
|
-
*
|
|
920
|
+
*
|
|
921
921
|
* @typedef {Object} TextGenerationSpecificParams Parameters specific to text-generation pipelines.
|
|
922
922
|
* @property {boolean} [add_special_tokens] Whether or not to add special tokens when tokenizing the sequences.
|
|
923
923
|
* @property {boolean} [return_full_text=true] If set to `false` only added text is returned, otherwise the full text is returned.
|
|
924
924
|
* @typedef {import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams} TextGenerationConfig
|
|
925
|
-
*
|
|
925
|
+
*
|
|
926
926
|
* @callback TextGenerationPipelineCallback Complete the prompt(s) given as inputs.
|
|
927
927
|
* @param {string|string[]|Chat|Chat[]} texts One or several prompts (or one list of prompts) to complete.
|
|
928
928
|
* @param {Partial<TextGenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
929
929
|
* @returns {Promise<TextGenerationOutput|TextGenerationOutput[]>} An array or object containing the generated texts.
|
|
930
|
-
*
|
|
930
|
+
*
|
|
931
931
|
* @typedef {TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable} TextGenerationPipelineType
|
|
932
932
|
*/
|
|
933
933
|
|
|
@@ -935,7 +935,7 @@ function isChat(x) {
|
|
|
935
935
|
* Language generation pipeline using any `ModelWithLMHead` or `ModelForCausalLM`.
|
|
936
936
|
* This pipeline predicts the words that will follow a specified text prompt.
|
|
937
937
|
* NOTE: For the full list of generation parameters, see [`GenerationConfig`](./utils/generation#module_utils/generation.GenerationConfig).
|
|
938
|
-
*
|
|
938
|
+
*
|
|
939
939
|
* **Example:** Text generation with `Xenova/distilgpt2` (default settings).
|
|
940
940
|
* ```javascript
|
|
941
941
|
* const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
|
|
@@ -943,7 +943,7 @@ function isChat(x) {
|
|
|
943
943
|
* const output = await generator(text);
|
|
944
944
|
* // [{ generated_text: "I enjoy walking with my cute dog, and I love to play with the other dogs." }]
|
|
945
945
|
* ```
|
|
946
|
-
*
|
|
946
|
+
*
|
|
947
947
|
* **Example:** Text generation with `Xenova/distilgpt2` (custom settings).
|
|
948
948
|
* ```javascript
|
|
949
949
|
* const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
|
|
@@ -962,7 +962,7 @@ function isChat(x) {
|
|
|
962
962
|
* // "generated_text": "Once upon a time, there was an abundance of information about the most important and influential"
|
|
963
963
|
* // }]
|
|
964
964
|
* ```
|
|
965
|
-
*
|
|
965
|
+
*
|
|
966
966
|
* **Example:** Run code generation with `Xenova/codegen-350M-mono`.
|
|
967
967
|
* ```javascript
|
|
968
968
|
* const generator = await pipeline('text-generation', 'Xenova/codegen-350M-mono');
|
|
@@ -1081,7 +1081,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
|
|
|
1081
1081
|
* @property {string} sequence The sequence for which this is the output.
|
|
1082
1082
|
* @property {string[]} labels The labels sorted by order of likelihood.
|
|
1083
1083
|
* @property {number[]} scores The probabilities for each of the labels.
|
|
1084
|
-
*
|
|
1084
|
+
*
|
|
1085
1085
|
* @typedef {Object} ZeroShotClassificationPipelineOptions Parameters specific to zero-shot classification pipelines.
|
|
1086
1086
|
* @property {string} [hypothesis_template="This example is {}."] The template used to turn each
|
|
1087
1087
|
* candidate label into an NLI-style hypothesis. The candidate label will replace the {} placeholder.
|
|
@@ -1089,14 +1089,14 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
|
|
|
1089
1089
|
* If `false`, the scores are normalized such that the sum of the label likelihoods for each sequence
|
|
1090
1090
|
* is 1. If `true`, the labels are considered independent and probabilities are normalized for each
|
|
1091
1091
|
* candidate by doing a softmax of the entailment score vs. the contradiction score.
|
|
1092
|
-
*
|
|
1092
|
+
*
|
|
1093
1093
|
* @callback ZeroShotClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
1094
1094
|
* @param {string|string[]} texts The sequence(s) to classify, will be truncated if the model input is too large.
|
|
1095
1095
|
* @param {string|string[]} candidate_labels The set of possible class labels to classify each sequence into.
|
|
1096
1096
|
* Can be a single label, a string of comma-separated labels, or a list of labels.
|
|
1097
1097
|
* @param {ZeroShotClassificationPipelineOptions} [options] The options to use for zero-shot classification.
|
|
1098
1098
|
* @returns {Promise<ZeroShotClassificationOutput|ZeroShotClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
1099
|
-
*
|
|
1099
|
+
*
|
|
1100
1100
|
* @typedef {TextPipelineConstructorArgs & ZeroShotClassificationPipelineCallback & Disposable} ZeroShotClassificationPipelineType
|
|
1101
1101
|
*/
|
|
1102
1102
|
|
|
@@ -1105,7 +1105,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
|
|
|
1105
1105
|
* trained on NLI (natural language inference) tasks. Equivalent of `text-classification`
|
|
1106
1106
|
* pipelines, but these models don't require a hardcoded number of potential classes, they
|
|
1107
1107
|
* can be chosen at runtime. It usually means it's slower but it is **much** more flexible.
|
|
1108
|
-
*
|
|
1108
|
+
*
|
|
1109
1109
|
* **Example:** Zero shot classification with `Xenova/mobilebert-uncased-mnli`.
|
|
1110
1110
|
* ```javascript
|
|
1111
1111
|
* const classifier = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli');
|
|
@@ -1118,7 +1118,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
|
|
|
1118
1118
|
* // scores: [ 0.5562091040482018, 0.1843621307860853, 0.13942646639336376, 0.12000229877234923 ]
|
|
1119
1119
|
* // }
|
|
1120
1120
|
* ```
|
|
1121
|
-
*
|
|
1121
|
+
*
|
|
1122
1122
|
* **Example:** Zero shot classification with `Xenova/nli-deberta-v3-xsmall` (multi-label).
|
|
1123
1123
|
* ```javascript
|
|
1124
1124
|
* const classifier = await pipeline('zero-shot-classification', 'Xenova/nli-deberta-v3-xsmall');
|
|
@@ -1232,20 +1232,20 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
|
|
|
1232
1232
|
* @property {'none'|'mean'|'cls'} [pooling="none"] The pooling method to use.
|
|
1233
1233
|
* @property {boolean} [normalize=false] Whether or not to normalize the embeddings in the last dimension.
|
|
1234
1234
|
* @property {boolean} [quantize=false] Whether or not to quantize the embeddings.
|
|
1235
|
-
* @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
|
|
1236
|
-
*
|
|
1235
|
+
* @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
|
|
1236
|
+
*
|
|
1237
1237
|
* @callback FeatureExtractionPipelineCallback Extract the features of the input(s).
|
|
1238
1238
|
* @param {string|string[]} texts One or several texts (or one list of texts) to get the features of.
|
|
1239
1239
|
* @param {FeatureExtractionPipelineOptions} [options] The options to use for feature extraction.
|
|
1240
1240
|
* @returns {Promise<Tensor>} The features computed by the model.
|
|
1241
|
-
*
|
|
1241
|
+
*
|
|
1242
1242
|
* @typedef {TextPipelineConstructorArgs & FeatureExtractionPipelineCallback & Disposable} FeatureExtractionPipelineType
|
|
1243
1243
|
*/
|
|
1244
1244
|
|
|
1245
1245
|
/**
|
|
1246
1246
|
* Feature extraction pipeline using no model head. This pipeline extracts the hidden
|
|
1247
1247
|
* states from the base transformer, which can be used as features in downstream tasks.
|
|
1248
|
-
*
|
|
1248
|
+
*
|
|
1249
1249
|
* **Example:** Run feature extraction with `bert-base-uncased` (without pooling/normalization).
|
|
1250
1250
|
* ```javascript
|
|
1251
1251
|
* const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
|
|
@@ -1256,7 +1256,7 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
|
|
|
1256
1256
|
* // dims: [1, 8, 768]
|
|
1257
1257
|
* // }
|
|
1258
1258
|
* ```
|
|
1259
|
-
*
|
|
1259
|
+
*
|
|
1260
1260
|
* **Example:** Run feature extraction with `bert-base-uncased` (with pooling/normalization).
|
|
1261
1261
|
* ```javascript
|
|
1262
1262
|
* const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
|
|
@@ -1267,7 +1267,7 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
|
|
|
1267
1267
|
* // dims: [1, 768]
|
|
1268
1268
|
* // }
|
|
1269
1269
|
* ```
|
|
1270
|
-
*
|
|
1270
|
+
*
|
|
1271
1271
|
* **Example:** Calculating embeddings with `sentence-transformers` models.
|
|
1272
1272
|
* ```javascript
|
|
1273
1273
|
* const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
|
|
@@ -1348,19 +1348,19 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip
|
|
|
1348
1348
|
/**
|
|
1349
1349
|
* @typedef {Object} ImageFeatureExtractionPipelineOptions Parameters specific to image feature extraction pipelines.
|
|
1350
1350
|
* @property {boolean} [pool=null] Whether or not to return the pooled output. If set to `false`, the model will return the raw hidden states.
|
|
1351
|
-
*
|
|
1351
|
+
*
|
|
1352
1352
|
* @callback ImageFeatureExtractionPipelineCallback Extract the features of the input(s).
|
|
1353
1353
|
* @param {ImagePipelineInputs} images One or several images (or one list of images) to get the features of.
|
|
1354
1354
|
* @param {ImageFeatureExtractionPipelineOptions} [options] The options to use for image feature extraction.
|
|
1355
1355
|
* @returns {Promise<Tensor>} The image features computed by the model.
|
|
1356
|
-
*
|
|
1356
|
+
*
|
|
1357
1357
|
* @typedef {ImagePipelineConstructorArgs & ImageFeatureExtractionPipelineCallback & Disposable} ImageFeatureExtractionPipelineType
|
|
1358
1358
|
*/
|
|
1359
1359
|
|
|
1360
1360
|
/**
|
|
1361
1361
|
* Image feature extraction pipeline using no model head. This pipeline extracts the hidden
|
|
1362
1362
|
* states from the base transformer, which can be used as features in downstream tasks.
|
|
1363
|
-
*
|
|
1363
|
+
*
|
|
1364
1364
|
* **Example:** Perform image feature extraction with `Xenova/vit-base-patch16-224-in21k`.
|
|
1365
1365
|
* ```javascript
|
|
1366
1366
|
* const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/vit-base-patch16-224-in21k');
|
|
@@ -1373,7 +1373,7 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip
|
|
|
1373
1373
|
* // size: 151296
|
|
1374
1374
|
* // }
|
|
1375
1375
|
* ```
|
|
1376
|
-
*
|
|
1376
|
+
*
|
|
1377
1377
|
* **Example:** Compute image embeddings with `Xenova/clip-vit-base-patch32`.
|
|
1378
1378
|
* ```javascript
|
|
1379
1379
|
* const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32');
|
|
@@ -1429,12 +1429,12 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
|
|
|
1429
1429
|
* @property {string} label The label predicted.
|
|
1430
1430
|
* @property {number} score The corresponding probability.
|
|
1431
1431
|
* @typedef {AudioClassificationSingle[]} AudioClassificationOutput
|
|
1432
|
-
*
|
|
1432
|
+
*
|
|
1433
1433
|
* @typedef {Object} AudioClassificationPipelineOptions Parameters specific to audio classification pipelines.
|
|
1434
1434
|
* @property {number} [top_k=5] The number of top labels that will be returned by the pipeline.
|
|
1435
1435
|
* If the provided number is `null` or higher than the number of labels available in the model configuration,
|
|
1436
1436
|
* it will default to the number of labels.
|
|
1437
|
-
*
|
|
1437
|
+
*
|
|
1438
1438
|
* @callback AudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
1439
1439
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
|
|
1440
1440
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -1443,14 +1443,14 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
|
|
|
1443
1443
|
* - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
|
|
1444
1444
|
* @param {AudioClassificationPipelineOptions} [options] The options to use for audio classification.
|
|
1445
1445
|
* @returns {Promise<AudioClassificationOutput|AudioClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
1446
|
-
*
|
|
1446
|
+
*
|
|
1447
1447
|
* @typedef {AudioPipelineConstructorArgs & AudioClassificationPipelineCallback & Disposable} AudioClassificationPipelineType
|
|
1448
1448
|
*/
|
|
1449
1449
|
|
|
1450
1450
|
/**
|
|
1451
1451
|
* Audio classification pipeline using any `AutoModelForAudioClassification`.
|
|
1452
1452
|
* This pipeline predicts the class of a raw waveform or an audio file.
|
|
1453
|
-
*
|
|
1453
|
+
*
|
|
1454
1454
|
* **Example:** Perform audio classification with `Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech`.
|
|
1455
1455
|
* ```javascript
|
|
1456
1456
|
* const classifier = await pipeline('audio-classification', 'Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech');
|
|
@@ -1461,7 +1461,7 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
|
|
|
1461
1461
|
* // { label: 'female', score: 0.001845747814513743 }
|
|
1462
1462
|
* // ]
|
|
1463
1463
|
* ```
|
|
1464
|
-
*
|
|
1464
|
+
*
|
|
1465
1465
|
* **Example:** Perform audio classification with `Xenova/ast-finetuned-audioset-10-10-0.4593` and return top 4 results.
|
|
1466
1466
|
* ```javascript
|
|
1467
1467
|
* const classifier = await pipeline('audio-classification', 'Xenova/ast-finetuned-audioset-10-10-0.4593');
|
|
@@ -1526,12 +1526,12 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
|
|
|
1526
1526
|
* @typedef {Object} ZeroShotAudioClassificationOutput
|
|
1527
1527
|
* @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
|
|
1528
1528
|
* @property {number} score The score attributed by the model for that label (between 0 and 1).
|
|
1529
|
-
*
|
|
1529
|
+
*
|
|
1530
1530
|
* @typedef {Object} ZeroShotAudioClassificationPipelineOptions Parameters specific to zero-shot audio classification pipelines.
|
|
1531
1531
|
* @property {string} [hypothesis_template="This is a sound of {}."] The sentence used in conjunction with `candidate_labels`
|
|
1532
1532
|
* to attempt the audio classification by replacing the placeholder with the candidate_labels.
|
|
1533
1533
|
* Then likelihood is estimated by using `logits_per_audio`.
|
|
1534
|
-
*
|
|
1534
|
+
*
|
|
1535
1535
|
* @callback ZeroShotAudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
|
|
1536
1536
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
|
|
1537
1537
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -1541,14 +1541,14 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
|
|
|
1541
1541
|
* @param {string[]} candidate_labels The candidate labels for this audio.
|
|
1542
1542
|
* @param {ZeroShotAudioClassificationPipelineOptions} [options] The options to use for zero-shot audio classification.
|
|
1543
1543
|
* @returns {Promise<ZeroShotAudioClassificationOutput[]|ZeroShotAudioClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
|
|
1544
|
-
*
|
|
1544
|
+
*
|
|
1545
1545
|
* @typedef {TextAudioPipelineConstructorArgs & ZeroShotAudioClassificationPipelineCallback & Disposable} ZeroShotAudioClassificationPipelineType
|
|
1546
1546
|
*/
|
|
1547
1547
|
|
|
1548
1548
|
/**
|
|
1549
1549
|
* Zero shot audio classification pipeline using `ClapModel`. This pipeline predicts the class of an audio when you
|
|
1550
1550
|
* provide an audio and a set of `candidate_labels`.
|
|
1551
|
-
*
|
|
1551
|
+
*
|
|
1552
1552
|
* **Example**: Perform zero-shot audio classification with `Xenova/clap-htsat-unfused`.
|
|
1553
1553
|
* ```javascript
|
|
1554
1554
|
* const classifier = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused');
|
|
@@ -1581,7 +1581,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1581
1581
|
audio = [/** @type {AudioInput} */ (audio)];
|
|
1582
1582
|
}
|
|
1583
1583
|
|
|
1584
|
-
// Insert label into hypothesis template
|
|
1584
|
+
// Insert label into hypothesis template
|
|
1585
1585
|
const texts = candidate_labels.map(
|
|
1586
1586
|
x => hypothesis_template.replace('{}', x)
|
|
1587
1587
|
);
|
|
@@ -1625,7 +1625,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1625
1625
|
* @property {string} text The recognized text.
|
|
1626
1626
|
* @property {Chunk[]} [chunks] When using `return_timestamps`, the `chunks` will become a list
|
|
1627
1627
|
* containing all the various text chunks identified by the model.
|
|
1628
|
-
*
|
|
1628
|
+
*
|
|
1629
1629
|
* @typedef {Object} AutomaticSpeechRecognitionSpecificParams Parameters specific to automatic-speech-recognition pipelines.
|
|
1630
1630
|
* @property {boolean|'word'} [return_timestamps] Whether to return timestamps or not. Default is `false`.
|
|
1631
1631
|
* @property {number} [chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking).
|
|
@@ -1635,7 +1635,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1635
1635
|
* @property {string} [task] The task to perform. Default is `null`, meaning it should be auto-detected.
|
|
1636
1636
|
* @property {number} [num_frames] The number of frames in the input audio.
|
|
1637
1637
|
* @typedef {import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig
|
|
1638
|
-
*
|
|
1638
|
+
*
|
|
1639
1639
|
* @callback AutomaticSpeechRecognitionPipelineCallback Transcribe the audio sequence(s) given as inputs to text.
|
|
1640
1640
|
* @param {AudioPipelineInputs} audio The input audio file(s) to be transcribed. The input is either:
|
|
1641
1641
|
* - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
|
|
@@ -1644,7 +1644,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1644
1644
|
* - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
|
|
1645
1645
|
* @param {Partial<AutomaticSpeechRecognitionConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
1646
1646
|
* @returns {Promise<AutomaticSpeechRecognitionOutput|AutomaticSpeechRecognitionOutput[]>} An object containing the transcription text and optionally timestamps if `return_timestamps` is `true`.
|
|
1647
|
-
*
|
|
1647
|
+
*
|
|
1648
1648
|
* @typedef {TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable} AutomaticSpeechRecognitionPipelineType
|
|
1649
1649
|
*/
|
|
1650
1650
|
|
|
@@ -1658,7 +1658,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1658
1658
|
* const output = await transcriber(url);
|
|
1659
1659
|
* // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." }
|
|
1660
1660
|
* ```
|
|
1661
|
-
*
|
|
1661
|
+
*
|
|
1662
1662
|
* **Example:** Transcribe English w/ timestamps.
|
|
1663
1663
|
* ```javascript
|
|
1664
1664
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -1672,7 +1672,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1672
1672
|
* // ]
|
|
1673
1673
|
* // }
|
|
1674
1674
|
* ```
|
|
1675
|
-
*
|
|
1675
|
+
*
|
|
1676
1676
|
* **Example:** Transcribe English w/ word-level timestamps.
|
|
1677
1677
|
* ```javascript
|
|
1678
1678
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -1691,7 +1691,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1691
1691
|
* // ]
|
|
1692
1692
|
* // }
|
|
1693
1693
|
* ```
|
|
1694
|
-
*
|
|
1694
|
+
*
|
|
1695
1695
|
* **Example:** Transcribe French.
|
|
1696
1696
|
* ```javascript
|
|
1697
1697
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
|
|
@@ -1699,7 +1699,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1699
1699
|
* const output = await transcriber(url, { language: 'french', task: 'transcribe' });
|
|
1700
1700
|
* // { text: " J'adore, j'aime, je n'aime pas, je déteste." }
|
|
1701
1701
|
* ```
|
|
1702
|
-
*
|
|
1702
|
+
*
|
|
1703
1703
|
* **Example:** Translate French to English.
|
|
1704
1704
|
* ```javascript
|
|
1705
1705
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
|
|
@@ -1707,7 +1707,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
|
|
|
1707
1707
|
* const output = await transcriber(url, { language: 'french', task: 'translate' });
|
|
1708
1708
|
* // { text: " I love, I like, I don't like, I hate." }
|
|
1709
1709
|
* ```
|
|
1710
|
-
*
|
|
1710
|
+
*
|
|
1711
1711
|
* **Example:** Transcribe/translate audio longer than 30 seconds.
|
|
1712
1712
|
* ```javascript
|
|
1713
1713
|
* const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
|
|
@@ -1930,18 +1930,18 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
|
|
|
1930
1930
|
* @typedef {Object} ImageToTextSingle
|
|
1931
1931
|
* @property {string} generated_text The generated text.
|
|
1932
1932
|
* @typedef {ImageToTextSingle[]} ImageToTextOutput
|
|
1933
|
-
*
|
|
1933
|
+
*
|
|
1934
1934
|
* @callback ImageToTextPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
1935
1935
|
* @param {ImagePipelineInputs} texts The images to be captioned.
|
|
1936
1936
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
1937
1937
|
* @returns {Promise<ImageToTextOutput|ImageToTextOutput[]>} An object (or array of objects) containing the generated text(s).
|
|
1938
|
-
*
|
|
1938
|
+
*
|
|
1939
1939
|
* @typedef {TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable} ImageToTextPipelineType
|
|
1940
1940
|
*/
|
|
1941
1941
|
|
|
1942
1942
|
/**
|
|
1943
1943
|
* Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image.
|
|
1944
|
-
*
|
|
1944
|
+
*
|
|
1945
1945
|
* **Example:** Generate a caption for an image w/ `Xenova/vit-gpt2-image-captioning`.
|
|
1946
1946
|
* ```javascript
|
|
1947
1947
|
* const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');
|
|
@@ -1949,7 +1949,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
|
|
|
1949
1949
|
* const output = await captioner(url);
|
|
1950
1950
|
* // [{ generated_text: 'a cat laying on a couch with another cat' }]
|
|
1951
1951
|
* ```
|
|
1952
|
-
*
|
|
1952
|
+
*
|
|
1953
1953
|
* **Example:** Optical Character Recognition (OCR) w/ `Xenova/trocr-small-handwritten`.
|
|
1954
1954
|
* ```javascript
|
|
1955
1955
|
* const captioner = await pipeline('image-to-text', 'Xenova/trocr-small-handwritten');
|
|
@@ -1995,22 +1995,22 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
|
|
|
1995
1995
|
* @property {string} label The label identified by the model.
|
|
1996
1996
|
* @property {number} score The score attributed by the model for that label.
|
|
1997
1997
|
* @typedef {ImageClassificationSingle[]} ImageClassificationOutput
|
|
1998
|
-
*
|
|
1998
|
+
*
|
|
1999
1999
|
* @typedef {Object} ImageClassificationPipelineOptions Parameters specific to image classification pipelines.
|
|
2000
|
-
* @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
|
|
2001
|
-
*
|
|
2000
|
+
* @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
|
|
2001
|
+
*
|
|
2002
2002
|
* @callback ImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
2003
2003
|
* @param {ImagePipelineInputs} images The input images(s) to be classified.
|
|
2004
2004
|
* @param {ImageClassificationPipelineOptions} [options] The options to use for image classification.
|
|
2005
2005
|
* @returns {Promise<ImageClassificationOutput|ImageClassificationOutput[]>} An array or object containing the predicted labels and scores.
|
|
2006
|
-
*
|
|
2006
|
+
*
|
|
2007
2007
|
* @typedef {ImagePipelineConstructorArgs & ImageClassificationPipelineCallback & Disposable} ImageClassificationPipelineType
|
|
2008
2008
|
*/
|
|
2009
2009
|
|
|
2010
2010
|
/**
|
|
2011
2011
|
* Image classification pipeline using any `AutoModelForImageClassification`.
|
|
2012
2012
|
* This pipeline predicts the class of an image.
|
|
2013
|
-
*
|
|
2013
|
+
*
|
|
2014
2014
|
* **Example:** Classify an image.
|
|
2015
2015
|
* ```javascript
|
|
2016
2016
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -2020,7 +2020,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
|
|
|
2020
2020
|
* // { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
|
|
2021
2021
|
* // ]
|
|
2022
2022
|
* ```
|
|
2023
|
-
*
|
|
2023
|
+
*
|
|
2024
2024
|
* **Example:** Classify an image and return top `n` classes.
|
|
2025
2025
|
* ```javascript
|
|
2026
2026
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -2032,7 +2032,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
|
|
|
2032
2032
|
* // { label: 'lion, king of beasts, Panthera leo', score: 0.00045060308184474707 },
|
|
2033
2033
|
* // ]
|
|
2034
2034
|
* ```
|
|
2035
|
-
*
|
|
2035
|
+
*
|
|
2036
2036
|
* **Example:** Classify an image and return all classes.
|
|
2037
2037
|
* ```javascript
|
|
2038
2038
|
* const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
|
|
@@ -2099,7 +2099,7 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
|
|
|
2099
2099
|
* @property {string|null} label The label of the segment.
|
|
2100
2100
|
* @property {number|null} score The score of the segment.
|
|
2101
2101
|
* @property {RawImage} mask The mask of the segment.
|
|
2102
|
-
*
|
|
2102
|
+
*
|
|
2103
2103
|
* @typedef {Object} ImageSegmentationPipelineOptions Parameters specific to image segmentation pipelines.
|
|
2104
2104
|
* @property {number} [threshold=0.5] Probability threshold to filter out predicted masks.
|
|
2105
2105
|
* @property {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
|
|
@@ -2108,19 +2108,19 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
|
|
|
2108
2108
|
* depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
|
|
2109
2109
|
* @property {number[]} [label_ids_to_fuse=null] List of label ids to fuse. If not set, do not fuse any labels.
|
|
2110
2110
|
* @property {number[][]} [target_sizes=null] List of target sizes for the input images. If not set, use the original image sizes.
|
|
2111
|
-
*
|
|
2111
|
+
*
|
|
2112
2112
|
* @callback ImageSegmentationPipelineCallback Segment the input images.
|
|
2113
2113
|
* @param {ImagePipelineInputs} images The input images.
|
|
2114
2114
|
* @param {ImageSegmentationPipelineOptions} [options] The options to use for image segmentation.
|
|
2115
2115
|
* @returns {Promise<ImageSegmentationPipelineOutput[]>} The annotated segments.
|
|
2116
|
-
*
|
|
2116
|
+
*
|
|
2117
2117
|
* @typedef {ImagePipelineConstructorArgs & ImageSegmentationPipelineCallback & Disposable} ImageSegmentationPipelineType
|
|
2118
2118
|
*/
|
|
2119
2119
|
|
|
2120
2120
|
/**
|
|
2121
2121
|
* Image segmentation pipeline using any `AutoModelForXXXSegmentation`.
|
|
2122
2122
|
* This pipeline predicts masks of objects and their classes.
|
|
2123
|
-
*
|
|
2123
|
+
*
|
|
2124
2124
|
* **Example:** Perform image segmentation with `Xenova/detr-resnet-50-panoptic`.
|
|
2125
2125
|
* ```javascript
|
|
2126
2126
|
* const segmenter = await pipeline('image-segmentation', 'Xenova/detr-resnet-50-panoptic');
|
|
@@ -2204,12 +2204,17 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
|
|
|
2204
2204
|
/** @type {ImageSegmentationPipelineOutput[]} */
|
|
2205
2205
|
const annotation = [];
|
|
2206
2206
|
if (!subtask) {
|
|
2207
|
+
// We define an epsilon to safeguard against numerical/precision issues when detecting
|
|
2208
|
+
// the normalization mode of the output (i.e., sigmoid already applied, or not).
|
|
2209
|
+
// See https://github.com/microsoft/onnxruntime/issues/23943 for more information.
|
|
2210
|
+
const epsilon = 1e-5;
|
|
2211
|
+
|
|
2207
2212
|
// Perform standard image segmentation
|
|
2208
2213
|
const result = output[outputNames[0]];
|
|
2209
2214
|
for (let i = 0; i < imageSizes.length; ++i) {
|
|
2210
2215
|
const size = imageSizes[i];
|
|
2211
2216
|
const item = result[i];
|
|
2212
|
-
if (item.data.some(x => x <
|
|
2217
|
+
if (item.data.some(x => x < -epsilon || x > 1 + epsilon)) {
|
|
2213
2218
|
item.sigmoid_();
|
|
2214
2219
|
}
|
|
2215
2220
|
const mask = await RawImage.fromTensor(item.mul_(255).to('uint8')).resize(size[1], size[0]);
|
|
@@ -2278,19 +2283,19 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
|
|
|
2278
2283
|
|
|
2279
2284
|
/**
|
|
2280
2285
|
* @typedef {Object} BackgroundRemovalPipelineOptions Parameters specific to image segmentation pipelines.
|
|
2281
|
-
*
|
|
2286
|
+
*
|
|
2282
2287
|
* @callback BackgroundRemovalPipelineCallback Segment the input images.
|
|
2283
2288
|
* @param {ImagePipelineInputs} images The input images.
|
|
2284
2289
|
* @param {BackgroundRemovalPipelineOptions} [options] The options to use for image segmentation.
|
|
2285
2290
|
* @returns {Promise<RawImage[]>} The images with the background removed.
|
|
2286
|
-
*
|
|
2291
|
+
*
|
|
2287
2292
|
* @typedef {ImagePipelineConstructorArgs & BackgroundRemovalPipelineCallback & Disposable} BackgroundRemovalPipelineType
|
|
2288
2293
|
*/
|
|
2289
2294
|
|
|
2290
2295
|
/**
|
|
2291
2296
|
* Background removal pipeline using certain `AutoModelForXXXSegmentation`.
|
|
2292
2297
|
* This pipeline removes the backgrounds of images.
|
|
2293
|
-
*
|
|
2298
|
+
*
|
|
2294
2299
|
* **Example:** Perform background removal with `Xenova/modnet`.
|
|
2295
2300
|
* ```javascript
|
|
2296
2301
|
* const segmenter = await pipeline('background-removal', 'Xenova/modnet');
|
|
@@ -2301,7 +2306,7 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
|
|
|
2301
2306
|
* // ]
|
|
2302
2307
|
* ```
|
|
2303
2308
|
*/
|
|
2304
|
-
export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) =>
|
|
2309
|
+
export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => BackgroundRemovalPipelineType} */ (/** @type {any} */(ImageSegmentationPipeline))) {
|
|
2305
2310
|
/**
|
|
2306
2311
|
* Create a new BackgroundRemovalPipeline.
|
|
2307
2312
|
* @param {ImagePipelineConstructorArgs} options An object used to instantiate the pipeline.
|
|
@@ -2336,25 +2341,25 @@ export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePi
|
|
|
2336
2341
|
* @typedef {Object} ZeroShotImageClassificationOutput
|
|
2337
2342
|
* @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
|
|
2338
2343
|
* @property {number} score The score attributed by the model for that label (between 0 and 1).
|
|
2339
|
-
*
|
|
2344
|
+
*
|
|
2340
2345
|
* @typedef {Object} ZeroShotImageClassificationPipelineOptions Parameters specific to zero-shot image classification pipelines.
|
|
2341
2346
|
* @property {string} [hypothesis_template="This is a photo of {}"] The sentence used in conjunction with `candidate_labels`
|
|
2342
2347
|
* to attempt the image classification by replacing the placeholder with the candidate_labels.
|
|
2343
2348
|
* Then likelihood is estimated by using `logits_per_image`.
|
|
2344
|
-
*
|
|
2349
|
+
*
|
|
2345
2350
|
* @callback ZeroShotImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
|
|
2346
2351
|
* @param {ImagePipelineInputs} images The input images.
|
|
2347
2352
|
* @param {string[]} candidate_labels The candidate labels for this image.
|
|
2348
2353
|
* @param {ZeroShotImageClassificationPipelineOptions} [options] The options to use for zero-shot image classification.
|
|
2349
2354
|
* @returns {Promise<ZeroShotImageClassificationOutput[]|ZeroShotImageClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
|
|
2350
|
-
*
|
|
2355
|
+
*
|
|
2351
2356
|
* @typedef {TextImagePipelineConstructorArgs & ZeroShotImageClassificationPipelineCallback & Disposable} ZeroShotImageClassificationPipelineType
|
|
2352
2357
|
*/
|
|
2353
2358
|
|
|
2354
2359
|
/**
|
|
2355
2360
|
* Zero shot image classification pipeline. This pipeline predicts the class of
|
|
2356
2361
|
* an image when you provide an image and a set of `candidate_labels`.
|
|
2357
|
-
*
|
|
2362
|
+
*
|
|
2358
2363
|
* **Example:** Zero shot image classification w/ `Xenova/clip-vit-base-patch32`.
|
|
2359
2364
|
* ```javascript
|
|
2360
2365
|
* const classifier = await pipeline('zero-shot-image-classification', 'Xenova/clip-vit-base-patch32');
|
|
@@ -2384,7 +2389,7 @@ export class ZeroShotImageClassificationPipeline extends (/** @type {new (option
|
|
|
2384
2389
|
const isBatched = Array.isArray(images);
|
|
2385
2390
|
const preparedImages = await prepareImages(images);
|
|
2386
2391
|
|
|
2387
|
-
// Insert label into hypothesis template
|
|
2392
|
+
// Insert label into hypothesis template
|
|
2388
2393
|
const texts = candidate_labels.map(
|
|
2389
2394
|
x => hypothesis_template.replace('{}', x)
|
|
2390
2395
|
);
|
|
@@ -2431,23 +2436,23 @@ export class ZeroShotImageClassificationPipeline extends (/** @type {new (option
|
|
|
2431
2436
|
* @property {number} score The score attributed by the model for that label.
|
|
2432
2437
|
* @property {BoundingBox} box The bounding box of detected object in image's original size, or as a percentage if `percentage` is set to true.
|
|
2433
2438
|
* @typedef {ObjectDetectionPipelineSingle[]} ObjectDetectionPipelineOutput
|
|
2434
|
-
*
|
|
2439
|
+
*
|
|
2435
2440
|
* @typedef {Object} ObjectDetectionPipelineOptions Parameters specific to object detection pipelines.
|
|
2436
2441
|
* @property {number} [threshold=0.9] The threshold used to filter boxes by score.
|
|
2437
2442
|
* @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
|
|
2438
|
-
*
|
|
2443
|
+
*
|
|
2439
2444
|
* @callback ObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
|
|
2440
2445
|
* @param {ImagePipelineInputs} images The input images.
|
|
2441
2446
|
* @param {ObjectDetectionPipelineOptions} [options] The options to use for object detection.
|
|
2442
|
-
* @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
|
|
2443
|
-
*
|
|
2447
|
+
* @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
|
|
2448
|
+
*
|
|
2444
2449
|
* @typedef {ImagePipelineConstructorArgs & ObjectDetectionPipelineCallback & Disposable} ObjectDetectionPipelineType
|
|
2445
2450
|
*/
|
|
2446
2451
|
|
|
2447
2452
|
/**
|
|
2448
2453
|
* Object detection pipeline using any `AutoModelForObjectDetection`.
|
|
2449
2454
|
* This pipeline predicts bounding boxes of objects and their classes.
|
|
2450
|
-
*
|
|
2455
|
+
*
|
|
2451
2456
|
* **Example:** Run object-detection with `Xenova/detr-resnet-50`.
|
|
2452
2457
|
* ```javascript
|
|
2453
2458
|
* const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
|
|
@@ -2521,27 +2526,27 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
|
|
|
2521
2526
|
* @property {string} label Text query corresponding to the found object.
|
|
2522
2527
|
* @property {number} score Score corresponding to the object (between 0 and 1).
|
|
2523
2528
|
* @property {BoundingBox} box Bounding box of the detected object in image's original size, or as a percentage if `percentage` is set to true.
|
|
2524
|
-
*
|
|
2529
|
+
*
|
|
2525
2530
|
* @typedef {Object} ZeroShotObjectDetectionPipelineOptions Parameters specific to zero-shot object detection pipelines.
|
|
2526
2531
|
* @property {number} [threshold=0.1] The probability necessary to make a prediction.
|
|
2527
2532
|
* @property {number} [top_k=null] The number of top predictions that will be returned by the pipeline.
|
|
2528
2533
|
* If the provided number is `null` or higher than the number of predictions available, it will default
|
|
2529
2534
|
* to the number of predictions.
|
|
2530
2535
|
* @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
|
|
2531
|
-
*
|
|
2536
|
+
*
|
|
2532
2537
|
* @callback ZeroShotObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
|
|
2533
2538
|
* @param {ImagePipelineInputs} images The input images.
|
|
2534
2539
|
* @param {string[]} candidate_labels What the model should recognize in the image.
|
|
2535
2540
|
* @param {ZeroShotObjectDetectionPipelineOptions} [options] The options to use for zero-shot object detection.
|
|
2536
2541
|
* @returns {Promise<ZeroShotObjectDetectionOutput[]|ZeroShotObjectDetectionOutput[][]>} An array of objects containing the predicted labels, scores, and bounding boxes.
|
|
2537
|
-
*
|
|
2542
|
+
*
|
|
2538
2543
|
* @typedef {TextImagePipelineConstructorArgs & ZeroShotObjectDetectionPipelineCallback & Disposable} ZeroShotObjectDetectionPipelineType
|
|
2539
2544
|
*/
|
|
2540
2545
|
|
|
2541
2546
|
/**
|
|
2542
2547
|
* Zero-shot object detection pipeline. This pipeline predicts bounding boxes of
|
|
2543
2548
|
* objects when you provide an image and a set of `candidate_labels`.
|
|
2544
|
-
*
|
|
2549
|
+
*
|
|
2545
2550
|
* **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32`.
|
|
2546
2551
|
* ```javascript
|
|
2547
2552
|
* const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
|
|
@@ -2571,7 +2576,7 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
|
|
|
2571
2576
|
* // }
|
|
2572
2577
|
* // ]
|
|
2573
2578
|
* ```
|
|
2574
|
-
*
|
|
2579
|
+
*
|
|
2575
2580
|
* **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32` (returning top 4 matches and setting a threshold).
|
|
2576
2581
|
* ```javascript
|
|
2577
2582
|
* const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
|
|
@@ -2686,13 +2691,13 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
|
|
|
2686
2691
|
* @typedef {Object} DocumentQuestionAnsweringSingle
|
|
2687
2692
|
* @property {string} answer The generated text.
|
|
2688
2693
|
* @typedef {DocumentQuestionAnsweringSingle[]} DocumentQuestionAnsweringOutput
|
|
2689
|
-
*
|
|
2694
|
+
*
|
|
2690
2695
|
* @callback DocumentQuestionAnsweringPipelineCallback Answer the question given as input by using the document.
|
|
2691
2696
|
* @param {ImageInput} image The image of the document to use.
|
|
2692
2697
|
* @param {string} question A question to ask of the document.
|
|
2693
2698
|
* @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
|
|
2694
2699
|
* @returns {Promise<DocumentQuestionAnsweringOutput|DocumentQuestionAnsweringOutput[]>} An object (or array of objects) containing the answer(s).
|
|
2695
|
-
*
|
|
2700
|
+
*
|
|
2696
2701
|
* @typedef {TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable} DocumentQuestionAnsweringPipelineType
|
|
2697
2702
|
*/
|
|
2698
2703
|
|
|
@@ -2700,7 +2705,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
|
|
|
2700
2705
|
* Document Question Answering pipeline using any `AutoModelForDocumentQuestionAnswering`.
|
|
2701
2706
|
* The inputs/outputs are similar to the (extractive) question answering pipeline; however,
|
|
2702
2707
|
* the pipeline takes an image (and optional OCR'd words/boxes) as input instead of text context.
|
|
2703
|
-
*
|
|
2708
|
+
*
|
|
2704
2709
|
* **Example:** Answer questions about a document with `Xenova/donut-base-finetuned-docvqa`.
|
|
2705
2710
|
* ```javascript
|
|
2706
2711
|
* const qa_pipeline = await pipeline('document-question-answering', 'Xenova/donut-base-finetuned-docvqa');
|
|
@@ -2770,22 +2775,22 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
|
|
|
2770
2775
|
* @typedef {Object} TextToAudioOutput
|
|
2771
2776
|
* @property {Float32Array} audio The generated audio waveform.
|
|
2772
2777
|
* @property {number} sampling_rate The sampling rate of the generated audio waveform.
|
|
2773
|
-
*
|
|
2778
|
+
*
|
|
2774
2779
|
* @typedef {Object} TextToAudioPipelineOptions Parameters specific to text-to-audio pipelines.
|
|
2775
2780
|
* @property {Tensor|Float32Array|string|URL} [speaker_embeddings=null] The speaker embeddings (if the model requires it).
|
|
2776
|
-
*
|
|
2781
|
+
*
|
|
2777
2782
|
* @callback TextToAudioPipelineCallback Generates speech/audio from the inputs.
|
|
2778
2783
|
* @param {string|string[]} texts The text(s) to generate.
|
|
2779
2784
|
* @param {TextToAudioPipelineOptions} options Parameters passed to the model generation/forward method.
|
|
2780
2785
|
* @returns {Promise<TextToAudioOutput>} An object containing the generated audio and sampling rate.
|
|
2781
|
-
*
|
|
2786
|
+
*
|
|
2782
2787
|
* @typedef {TextToAudioPipelineConstructorArgs & TextToAudioPipelineCallback & Disposable} TextToAudioPipelineType
|
|
2783
2788
|
*/
|
|
2784
2789
|
|
|
2785
2790
|
/**
|
|
2786
2791
|
* Text-to-audio generation pipeline using any `AutoModelForTextToWaveform` or `AutoModelForTextToSpectrogram`.
|
|
2787
2792
|
* This pipeline generates an audio file from an input text and optional other conditional inputs.
|
|
2788
|
-
*
|
|
2793
|
+
*
|
|
2789
2794
|
* **Example:** Generate audio from text with `Xenova/speecht5_tts`.
|
|
2790
2795
|
* ```javascript
|
|
2791
2796
|
* const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
|
|
@@ -2796,17 +2801,17 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
|
|
|
2796
2801
|
* // sampling_rate: 16000
|
|
2797
2802
|
* // }
|
|
2798
2803
|
* ```
|
|
2799
|
-
*
|
|
2804
|
+
*
|
|
2800
2805
|
* You can then save the audio to a .wav file with the `wavefile` package:
|
|
2801
2806
|
* ```javascript
|
|
2802
2807
|
* import wavefile from 'wavefile';
|
|
2803
2808
|
* import fs from 'fs';
|
|
2804
|
-
*
|
|
2809
|
+
*
|
|
2805
2810
|
* const wav = new wavefile.WaveFile();
|
|
2806
2811
|
* wav.fromScratch(1, out.sampling_rate, '32f', out.audio);
|
|
2807
2812
|
* fs.writeFileSync('out.wav', wav.toBuffer());
|
|
2808
2813
|
* ```
|
|
2809
|
-
*
|
|
2814
|
+
*
|
|
2810
2815
|
* **Example:** Multilingual speech generation with `Xenova/mms-tts-fra`. See [here](https://huggingface.co/models?pipeline_tag=text-to-speech&other=vits&sort=trending) for the full list of available languages (1107).
|
|
2811
2816
|
* ```javascript
|
|
2812
2817
|
* const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra');
|
|
@@ -2912,13 +2917,13 @@ export class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPi
|
|
|
2912
2917
|
* @callback ImageToImagePipelineCallback Transform the image(s) passed as inputs.
|
|
2913
2918
|
* @param {ImagePipelineInputs} images The images to transform.
|
|
2914
2919
|
* @returns {Promise<RawImage|RawImage[]>} The transformed image or list of images.
|
|
2915
|
-
*
|
|
2920
|
+
*
|
|
2916
2921
|
* @typedef {ImagePipelineConstructorArgs & ImageToImagePipelineCallback & Disposable} ImageToImagePipelineType
|
|
2917
2922
|
*/
|
|
2918
2923
|
|
|
2919
2924
|
/**
|
|
2920
2925
|
* Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input.
|
|
2921
|
-
*
|
|
2926
|
+
*
|
|
2922
2927
|
* **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`
|
|
2923
2928
|
* ```javascript
|
|
2924
2929
|
* const upscaler = await pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64');
|
|
@@ -2963,17 +2968,17 @@ export class ImageToImagePipeline extends (/** @type {new (options: ImagePipelin
|
|
|
2963
2968
|
* @typedef {Object} DepthEstimationPipelineOutput
|
|
2964
2969
|
* @property {Tensor} predicted_depth The raw depth map predicted by the model.
|
|
2965
2970
|
* @property {RawImage} depth The processed depth map as an image (with the same size as the input image).
|
|
2966
|
-
*
|
|
2971
|
+
*
|
|
2967
2972
|
* @callback DepthEstimationPipelineCallback Predicts the depth for the image(s) passed as inputs.
|
|
2968
2973
|
* @param {ImagePipelineInputs} images The images to compute depth for.
|
|
2969
2974
|
* @returns {Promise<DepthEstimationPipelineOutput|DepthEstimationPipelineOutput[]>} An image or a list of images containing result(s).
|
|
2970
|
-
*
|
|
2975
|
+
*
|
|
2971
2976
|
* @typedef {ImagePipelineConstructorArgs & DepthEstimationPipelineCallback & Disposable} DepthEstimationPipelineType
|
|
2972
2977
|
*/
|
|
2973
2978
|
|
|
2974
2979
|
/**
|
|
2975
2980
|
* Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image.
|
|
2976
|
-
*
|
|
2981
|
+
*
|
|
2977
2982
|
* **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`
|
|
2978
2983
|
* ```javascript
|
|
2979
2984
|
* const depth_estimator = await pipeline('depth-estimation', 'Xenova/dpt-hybrid-midas');
|
|
@@ -3358,7 +3363,7 @@ const TASK_ALIASES = Object.freeze({
|
|
|
3358
3363
|
|
|
3359
3364
|
/**
|
|
3360
3365
|
* Utility factory method to build a `Pipeline` object.
|
|
3361
|
-
*
|
|
3366
|
+
*
|
|
3362
3367
|
* @template {PipelineType} T The type of pipeline to return.
|
|
3363
3368
|
* @param {T} task The task defining which pipeline will be returned. Currently accepted tasks are:
|
|
3364
3369
|
* - `"audio-classification"`: will return a `AudioClassificationPipeline`.
|