@huggingface/transformers 3.3.3 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +13 -3
  2. package/dist/ort-wasm-simd-threaded.jsep.mjs +124 -115
  3. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  4. package/dist/transformers.js +2778 -1592
  5. package/dist/transformers.js.map +1 -1
  6. package/dist/transformers.min.js +1 -1
  7. package/dist/transformers.min.js.map +1 -1
  8. package/dist/{transformers.cjs → transformers.node.cjs} +1699 -2530
  9. package/dist/transformers.node.cjs.map +1 -0
  10. package/dist/transformers.node.min.cjs +2 -0
  11. package/dist/transformers.node.min.cjs.map +1 -0
  12. package/dist/transformers.node.min.mjs +2 -0
  13. package/dist/transformers.node.min.mjs.map +1 -0
  14. package/dist/{transformers.mjs → transformers.node.mjs} +1738 -2510
  15. package/dist/transformers.node.mjs.map +1 -0
  16. package/dist/transformers.web.js +35876 -0
  17. package/dist/transformers.web.js.map +1 -0
  18. package/dist/transformers.web.min.js +2 -0
  19. package/dist/transformers.web.min.js.map +1 -0
  20. package/package.json +6 -6
  21. package/src/backends/onnx.js +14 -15
  22. package/src/configs.js +6 -1
  23. package/src/env.js +1 -1
  24. package/src/generation/streamers.js +4 -3
  25. package/src/models/dac/feature_extraction_dac.js +3 -0
  26. package/src/models/encodec/feature_extraction_encodec.js +32 -0
  27. package/src/models/feature_extractors.js +3 -0
  28. package/src/models/idefics3/image_processing_idefics3.js +1 -1
  29. package/src/models/image_processors.js +1 -0
  30. package/src/models/processors.js +2 -0
  31. package/src/models/smolvlm/image_processing_smolvlm.js +2 -0
  32. package/src/models/smolvlm/processing_smolvlm.js +2 -0
  33. package/src/models/snac/feature_extraction_snac.js +3 -0
  34. package/src/models/ultravox/processing_ultravox.js +54 -0
  35. package/src/models/whisper/common_whisper.js +7 -1
  36. package/src/models/whisper/feature_extraction_whisper.js +18 -10
  37. package/src/models.js +546 -78
  38. package/src/pipelines.js +246 -137
  39. package/src/tokenizers.js +42 -28
  40. package/src/transformers.js +1 -0
  41. package/src/utils/audio.js +2 -0
  42. package/src/utils/hub.js +140 -80
  43. package/src/utils/image.js +9 -1
  44. package/src/utils/maths.js +1 -1
  45. package/src/utils/tensor.js +12 -5
  46. package/src/utils/video.js +128 -0
  47. package/types/backends/onnx.d.ts +2 -2
  48. package/types/backends/onnx.d.ts.map +1 -1
  49. package/types/configs.d.ts +1 -1
  50. package/types/configs.d.ts.map +1 -1
  51. package/types/generation/streamers.d.ts.map +1 -1
  52. package/types/models/dac/feature_extraction_dac.d.ts +4 -0
  53. package/types/models/dac/feature_extraction_dac.d.ts.map +1 -0
  54. package/types/models/encodec/feature_extraction_encodec.d.ts +13 -0
  55. package/types/models/encodec/feature_extraction_encodec.d.ts.map +1 -0
  56. package/types/models/feature_extractors.d.ts +3 -0
  57. package/types/models/florence2/processing_florence2.d.ts +1 -1
  58. package/types/models/florence2/processing_florence2.d.ts.map +1 -1
  59. package/types/models/image_processors.d.ts +1 -0
  60. package/types/models/processors.d.ts +2 -0
  61. package/types/models/smolvlm/image_processing_smolvlm.d.ts +2 -0
  62. package/types/models/smolvlm/image_processing_smolvlm.d.ts.map +1 -0
  63. package/types/models/smolvlm/processing_smolvlm.d.ts +2 -0
  64. package/types/models/smolvlm/processing_smolvlm.d.ts.map +1 -0
  65. package/types/models/snac/feature_extraction_snac.d.ts +4 -0
  66. package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
  67. package/types/models/ultravox/processing_ultravox.d.ts +16 -0
  68. package/types/models/ultravox/processing_ultravox.d.ts.map +1 -0
  69. package/types/models/whisper/common_whisper.d.ts.map +1 -1
  70. package/types/models/whisper/feature_extraction_whisper.d.ts +3 -1
  71. package/types/models/whisper/feature_extraction_whisper.d.ts.map +1 -1
  72. package/types/models.d.ts +180 -4
  73. package/types/models.d.ts.map +1 -1
  74. package/types/pipelines.d.ts +51 -5
  75. package/types/pipelines.d.ts.map +1 -1
  76. package/types/tokenizers.d.ts.map +1 -1
  77. package/types/transformers.d.ts +1 -0
  78. package/types/tsconfig.tsbuildinfo +1 -1
  79. package/types/utils/audio.d.ts.map +1 -1
  80. package/types/utils/hub.d.ts +19 -7
  81. package/types/utils/hub.d.ts.map +1 -1
  82. package/types/utils/image.d.ts +2 -2
  83. package/types/utils/image.d.ts.map +1 -1
  84. package/types/utils/maths.d.ts +2 -2
  85. package/types/utils/maths.d.ts.map +1 -1
  86. package/types/utils/tensor.d.ts +17 -18
  87. package/types/utils/tensor.d.ts.map +1 -1
  88. package/types/utils/video.d.ts +37 -0
  89. package/types/utils/video.d.ts.map +1 -0
  90. package/dist/transformers.cjs.map +0 -1
  91. package/dist/transformers.min.cjs +0 -2
  92. package/dist/transformers.min.cjs.map +0 -1
  93. package/dist/transformers.min.mjs +0 -2
  94. package/dist/transformers.min.mjs.map +0 -1
  95. package/dist/transformers.mjs.map +0 -1
package/src/pipelines.js CHANGED
@@ -1,15 +1,15 @@
1
1
  /**
2
2
  * @file Pipelines provide a high-level, easy to use, API for running machine learning models.
3
- *
3
+ *
4
4
  * **Example:** Instantiate pipeline using the `pipeline` function.
5
5
  * ```javascript
6
6
  * import { pipeline } from '@huggingface/transformers';
7
- *
7
+ *
8
8
  * const classifier = await pipeline('sentiment-analysis');
9
9
  * const output = await classifier('I love transformers!');
10
10
  * // [{'label': 'POSITIVE', 'score': 0.999817686}]
11
11
  * ```
12
- *
12
+ *
13
13
  * @module pipelines
14
14
  */
15
15
 
@@ -78,7 +78,7 @@ import { RawImage } from './utils/image.js';
78
78
 
79
79
 
80
80
  /**
81
- * @typedef {string | RawImage | URL} ImageInput
81
+ * @typedef {string | RawImage | URL | Blob | HTMLCanvasElement | OffscreenCanvas} ImageInput
82
82
  * @typedef {ImageInput|ImageInput[]} ImagePipelineInputs
83
83
  */
84
84
 
@@ -152,7 +152,7 @@ function get_bounding_box(box, asInteger) {
152
152
  /**
153
153
  * @callback DisposeType Disposes the item.
154
154
  * @returns {Promise<void>} A promise that resolves when the item has been disposed.
155
- *
155
+ *
156
156
  * @typedef {Object} Disposable
157
157
  * @property {DisposeType} dispose A promise that resolves when the pipeline has been disposed.
158
158
  */
@@ -189,7 +189,7 @@ export class Pipeline extends Callable {
189
189
  * @property {string} task The task of the pipeline. Useful for specifying subtasks.
190
190
  * @property {PreTrainedModel} model The model used by the pipeline.
191
191
  * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
192
- *
192
+ *
193
193
  * @typedef {ModelTokenizerConstructorArgs} TextPipelineConstructorArgs An object used to instantiate a text-based pipeline.
194
194
  */
195
195
 
@@ -198,7 +198,7 @@ export class Pipeline extends Callable {
198
198
  * @property {string} task The task of the pipeline. Useful for specifying subtasks.
199
199
  * @property {PreTrainedModel} model The model used by the pipeline.
200
200
  * @property {Processor} processor The processor used by the pipeline.
201
- *
201
+ *
202
202
  * @typedef {ModelProcessorConstructorArgs} AudioPipelineConstructorArgs An object used to instantiate an audio-based pipeline.
203
203
  * @typedef {ModelProcessorConstructorArgs} ImagePipelineConstructorArgs An object used to instantiate an image-based pipeline.
204
204
  */
@@ -210,7 +210,7 @@ export class Pipeline extends Callable {
210
210
  * @property {PreTrainedModel} model The model used by the pipeline.
211
211
  * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
212
212
  * @property {Processor} processor The processor used by the pipeline.
213
- *
213
+ *
214
214
  * @typedef {ModelTokenizerProcessorConstructorArgs} TextAudioPipelineConstructorArgs An object used to instantiate a text- and audio-based pipeline.
215
215
  * @typedef {ModelTokenizerProcessorConstructorArgs} TextImagePipelineConstructorArgs An object used to instantiate a text- and image-based pipeline.
216
216
  */
@@ -220,15 +220,15 @@ export class Pipeline extends Callable {
220
220
  * @property {string} label The label predicted.
221
221
  * @property {number} score The corresponding probability.
222
222
  * @typedef {TextClassificationSingle[]} TextClassificationOutput
223
- *
223
+ *
224
224
  * @typedef {Object} TextClassificationPipelineOptions Parameters specific to text classification pipelines.
225
225
  * @property {number} [top_k=1] The number of top predictions to be returned.
226
- *
226
+ *
227
227
  * @callback TextClassificationPipelineCallback Classify the text(s) given as inputs.
228
228
  * @param {string|string[]} texts The input text(s) to be classified.
229
229
  * @param {TextClassificationPipelineOptions} [options] The options to use for text classification.
230
230
  * @returns {Promise<TextClassificationOutput|TextClassificationOutput[]>} An array or object containing the predicted labels and scores.
231
- *
231
+ *
232
232
  * @typedef {TextPipelineConstructorArgs & TextClassificationPipelineCallback & Disposable} TextClassificationPipelineType
233
233
  */
234
234
 
@@ -241,7 +241,7 @@ export class Pipeline extends Callable {
241
241
  * const output = await classifier('I love transformers!');
242
242
  * // [{ label: 'POSITIVE', score: 0.999788761138916 }]
243
243
  * ```
244
- *
244
+ *
245
245
  * **Example:** Multilingual sentiment-analysis w/ `Xenova/bert-base-multilingual-uncased-sentiment` (and return top 5 classes).
246
246
  * ```javascript
247
247
  * const classifier = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
@@ -254,7 +254,7 @@ export class Pipeline extends Callable {
254
254
  * // { label: '2 stars', score: 0.0009423971059732139 }
255
255
  * // ]
256
256
  * ```
257
- *
257
+ *
258
258
  * **Example:** Toxic comment classification w/ `Xenova/toxic-bert` (and return all classes).
259
259
  * ```javascript
260
260
  * const classifier = await pipeline('text-classification', 'Xenova/toxic-bert');
@@ -339,21 +339,21 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
339
339
  * @property {number} [start] The index of the start of the corresponding entity in the sentence.
340
340
  * @property {number} [end] The index of the end of the corresponding entity in the sentence.
341
341
  * @typedef {TokenClassificationSingle[]} TokenClassificationOutput
342
- *
342
+ *
343
343
  * @typedef {Object} TokenClassificationPipelineOptions Parameters specific to token classification pipelines.
344
344
  * @property {string[]} [ignore_labels] A list of labels to ignore.
345
- *
345
+ *
346
346
  * @callback TokenClassificationPipelineCallback Classify each token of the text(s) given as inputs.
347
347
  * @param {string|string[]} texts One or several texts (or one list of texts) for token classification.
348
348
  * @param {TokenClassificationPipelineOptions} [options] The options to use for token classification.
349
349
  * @returns {Promise<TokenClassificationOutput|TokenClassificationOutput[]>} The result.
350
- *
350
+ *
351
351
  * @typedef {TextPipelineConstructorArgs & TokenClassificationPipelineCallback & Disposable} TokenClassificationPipelineType
352
352
  */
353
353
 
354
354
  /**
355
355
  * Named Entity Recognition pipeline using any `ModelForTokenClassification`.
356
- *
356
+ *
357
357
  * **Example:** Perform named entity recognition with `Xenova/bert-base-NER`.
358
358
  * ```javascript
359
359
  * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
@@ -363,7 +363,7 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
363
363
  * // { entity: 'B-LOC', score: 0.9994474053382874, index: 9, word: 'London' }
364
364
  * // ]
365
365
  * ```
366
- *
366
+ *
367
367
  * **Example:** Perform named entity recognition with `Xenova/bert-base-NER` (and return all labels).
368
368
  * ```javascript
369
369
  * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
@@ -459,22 +459,22 @@ export class TokenClassificationPipeline extends (/** @type {new (options: TextP
459
459
  * @property {number} [start] The character start index of the answer (in the tokenized version of the input).
460
460
  * @property {number} [end] The character end index of the answer (in the tokenized version of the input).
461
461
  * @property {string} answer The answer to the question.
462
- *
462
+ *
463
463
  * @typedef {Object} QuestionAnsweringPipelineOptions Parameters specific to question answering pipelines.
464
464
  * @property {number} [top_k=1] The number of top answer predictions to be returned.
465
- *
465
+ *
466
466
  * @callback QuestionAnsweringPipelineCallback Answer the question(s) given as inputs by using the context(s).
467
467
  * @param {string|string[]} question One or several question(s) (must be used in conjunction with the `context` argument).
468
468
  * @param {string|string[]} context One or several context(s) associated with the question(s) (must be used in conjunction with the `question` argument).
469
469
  * @param {QuestionAnsweringPipelineOptions} [options] The options to use for question answering.
470
470
  * @returns {Promise<QuestionAnsweringOutput|QuestionAnsweringOutput[]>} An array or object containing the predicted answers and scores.
471
- *
471
+ *
472
472
  * @typedef {TextPipelineConstructorArgs & QuestionAnsweringPipelineCallback & Disposable} QuestionAnsweringPipelineType
473
473
  */
474
474
 
475
475
  /**
476
476
  * Question Answering pipeline using any `ModelForQuestionAnswering`.
477
- *
477
+ *
478
478
  * **Example:** Run question answering with `Xenova/distilbert-base-uncased-distilled-squad`.
479
479
  * ```javascript
480
480
  * const answerer = await pipeline('question-answering', 'Xenova/distilbert-base-uncased-distilled-squad');
@@ -599,10 +599,10 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
599
599
  * @property {number} token The predicted token id (to replace the masked one).
600
600
  * @property {string} token_str The predicted token (to replace the masked one).
601
601
  * @typedef {FillMaskSingle[]} FillMaskOutput
602
- *
602
+ *
603
603
  * @typedef {Object} FillMaskPipelineOptions Parameters specific to fill mask pipelines.
604
604
  * @property {number} [top_k=5] When passed, overrides the number of predictions to return.
605
- *
605
+ *
606
606
  * @callback FillMaskPipelineCallback Fill the masked token in the text(s) given as inputs.
607
607
  * @param {string|string[]} texts One or several texts (or one list of prompts) with masked tokens.
608
608
  * @param {FillMaskPipelineOptions} [options] The options to use for masked language modelling.
@@ -610,13 +610,13 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
610
610
  * and the sequence with the predicted token filled in, or an array of such arrays (one for each input text).
611
611
  * If only one input text is given, the output will be an array of objects.
612
612
  * @throws {Error} When the mask token is not found in the input text.
613
- *
613
+ *
614
614
  * @typedef {TextPipelineConstructorArgs & FillMaskPipelineCallback & Disposable} FillMaskPipelineType
615
615
  */
616
616
 
617
617
  /**
618
618
  * Masked language modeling prediction pipeline using any `ModelWithLMHead`.
619
- *
619
+ *
620
620
  * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-uncased`.
621
621
  * ```javascript
622
622
  * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
@@ -629,7 +629,7 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
629
629
  * // { token_str: 'life', score: 0.01859794743359089, token: 1297, sequence: 'The goal of life is life.' }
630
630
  * // ]
631
631
  * ```
632
- *
632
+ *
633
633
  * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-cased` (and return top result).
634
634
  * ```javascript
635
635
  * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
@@ -706,18 +706,18 @@ export class FillMaskPipeline extends (/** @type {new (options: TextPipelineCons
706
706
  * @typedef {Object} Text2TextGenerationSingle
707
707
  * @property {string} generated_text The generated text.
708
708
  * @typedef {Text2TextGenerationSingle[]} Text2TextGenerationOutput
709
- *
709
+ *
710
710
  * @callback Text2TextGenerationPipelineCallback Generate the output text(s) using text(s) given as inputs.
711
711
  * @param {string|string[]} texts Input text for the encoder.
712
712
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
713
713
  * @returns {Promise<Text2TextGenerationOutput|Text2TextGenerationOutput[]>}
714
- *
714
+ *
715
715
  * @typedef {TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable} Text2TextGenerationPipelineType
716
716
  */
717
717
 
718
718
  /**
719
719
  * Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks.
720
- *
720
+ *
721
721
  * **Example:** Text-to-text generation w/ `Xenova/LaMini-Flan-T5-783M`.
722
722
  * ```javascript
723
723
  * const generator = await pipeline('text2text-generation', 'Xenova/LaMini-Flan-T5-783M');
@@ -793,18 +793,18 @@ export class Text2TextGenerationPipeline extends (/** @type {new (options: TextP
793
793
  * @typedef {Object} SummarizationSingle
794
794
  * @property {string} summary_text The summary text.
795
795
  * @typedef {SummarizationSingle[]} SummarizationOutput
796
- *
796
+ *
797
797
  * @callback SummarizationPipelineCallback Summarize the text(s) given as inputs.
798
798
  * @param {string|string[]} texts One or several articles (or one list of articles) to summarize.
799
799
  * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
800
800
  * @returns {Promise<SummarizationOutput|SummarizationOutput[]>}
801
- *
801
+ *
802
802
  * @typedef {TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable} SummarizationPipelineType
803
803
  */
804
804
 
805
805
  /**
806
806
  * A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline.
807
- *
807
+ *
808
808
  * **Example:** Summarization w/ `Xenova/distilbart-cnn-6-6`.
809
809
  * ```javascript
810
810
  * const generator = await pipeline('summarization', 'Xenova/distilbart-cnn-6-6');
@@ -840,23 +840,23 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
840
840
  * @typedef {Object} TranslationSingle
841
841
  * @property {string} translation_text The translated text.
842
842
  * @typedef {TranslationSingle[]} TranslationOutput
843
- *
843
+ *
844
844
  * @callback TranslationPipelineCallback Translate the text(s) given as inputs.
845
845
  * @param {string|string[]} texts Texts to be translated.
846
846
  * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
847
847
  * @returns {Promise<TranslationOutput|TranslationOutput[]>}
848
- *
848
+ *
849
849
  * @typedef {TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable} TranslationPipelineType
850
850
  */
851
851
 
852
852
  /**
853
853
  * Translates text from one language to another.
854
- *
854
+ *
855
855
  * **Example:** Multilingual translation w/ `Xenova/nllb-200-distilled-600M`.
856
- *
856
+ *
857
857
  * See [here](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200)
858
858
  * for the full list of languages and their corresponding codes.
859
- *
859
+ *
860
860
  * ```javascript
861
861
  * const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M');
862
862
  * const output = await translator('जीवन एक चॉकलेट बॉक्स की तरह है।', {
@@ -865,12 +865,12 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
865
865
  * });
866
866
  * // [{ translation_text: 'La vie est comme une boîte à chocolat.' }]
867
867
  * ```
868
- *
868
+ *
869
869
  * **Example:** Multilingual translation w/ `Xenova/m2m100_418M`.
870
- *
870
+ *
871
871
  * See [here](https://huggingface.co/facebook/m2m100_418M#languages-covered)
872
872
  * for the full list of languages and their corresponding codes.
873
- *
873
+ *
874
874
  * ```javascript
875
875
  * const translator = await pipeline('translation', 'Xenova/m2m100_418M');
876
876
  * const output = await translator('生活就像一盒巧克力。', {
@@ -879,12 +879,12 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
879
879
  * });
880
880
  * // [{ translation_text: 'Life is like a box of chocolate.' }]
881
881
  * ```
882
- *
882
+ *
883
883
  * **Example:** Multilingual translation w/ `Xenova/mbart-large-50-many-to-many-mmt`.
884
- *
884
+ *
885
885
  * See [here](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt#languages-covered)
886
886
  * for the full list of languages and their corresponding codes.
887
- *
887
+ *
888
888
  * ```javascript
889
889
  * const translator = await pipeline('translation', 'Xenova/mbart-large-50-many-to-many-mmt');
890
890
  * const output = await translator('संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है', {
@@ -913,21 +913,21 @@ function isChat(x) {
913
913
 
914
914
  /**
915
915
  * @typedef {import('./tokenizers.js').Message[]} Chat
916
- *
916
+ *
917
917
  * @typedef {Object} TextGenerationSingle
918
918
  * @property {string|Chat} generated_text The generated text.
919
919
  * @typedef {TextGenerationSingle[]} TextGenerationOutput
920
- *
920
+ *
921
921
  * @typedef {Object} TextGenerationSpecificParams Parameters specific to text-generation pipelines.
922
922
  * @property {boolean} [add_special_tokens] Whether or not to add special tokens when tokenizing the sequences.
923
923
  * @property {boolean} [return_full_text=true] If set to `false` only added text is returned, otherwise the full text is returned.
924
924
  * @typedef {import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams} TextGenerationConfig
925
- *
925
+ *
926
926
  * @callback TextGenerationPipelineCallback Complete the prompt(s) given as inputs.
927
927
  * @param {string|string[]|Chat|Chat[]} texts One or several prompts (or one list of prompts) to complete.
928
928
  * @param {Partial<TextGenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
929
929
  * @returns {Promise<TextGenerationOutput|TextGenerationOutput[]>} An array or object containing the generated texts.
930
- *
930
+ *
931
931
  * @typedef {TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable} TextGenerationPipelineType
932
932
  */
933
933
 
@@ -935,7 +935,7 @@ function isChat(x) {
935
935
  * Language generation pipeline using any `ModelWithLMHead` or `ModelForCausalLM`.
936
936
  * This pipeline predicts the words that will follow a specified text prompt.
937
937
  * NOTE: For the full list of generation parameters, see [`GenerationConfig`](./utils/generation#module_utils/generation.GenerationConfig).
938
- *
938
+ *
939
939
  * **Example:** Text generation with `Xenova/distilgpt2` (default settings).
940
940
  * ```javascript
941
941
  * const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
@@ -943,7 +943,7 @@ function isChat(x) {
943
943
  * const output = await generator(text);
944
944
  * // [{ generated_text: "I enjoy walking with my cute dog, and I love to play with the other dogs." }]
945
945
  * ```
946
- *
946
+ *
947
947
  * **Example:** Text generation with `Xenova/distilgpt2` (custom settings).
948
948
  * ```javascript
949
949
  * const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
@@ -962,7 +962,7 @@ function isChat(x) {
962
962
  * // "generated_text": "Once upon a time, there was an abundance of information about the most important and influential"
963
963
  * // }]
964
964
  * ```
965
- *
965
+ *
966
966
  * **Example:** Run code generation with `Xenova/codegen-350M-mono`.
967
967
  * ```javascript
968
968
  * const generator = await pipeline('text-generation', 'Xenova/codegen-350M-mono');
@@ -1081,7 +1081,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
1081
1081
  * @property {string} sequence The sequence for which this is the output.
1082
1082
  * @property {string[]} labels The labels sorted by order of likelihood.
1083
1083
  * @property {number[]} scores The probabilities for each of the labels.
1084
- *
1084
+ *
1085
1085
  * @typedef {Object} ZeroShotClassificationPipelineOptions Parameters specific to zero-shot classification pipelines.
1086
1086
  * @property {string} [hypothesis_template="This example is {}."] The template used to turn each
1087
1087
  * candidate label into an NLI-style hypothesis. The candidate label will replace the {} placeholder.
@@ -1089,14 +1089,14 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
1089
1089
  * If `false`, the scores are normalized such that the sum of the label likelihoods for each sequence
1090
1090
  * is 1. If `true`, the labels are considered independent and probabilities are normalized for each
1091
1091
  * candidate by doing a softmax of the entailment score vs. the contradiction score.
1092
- *
1092
+ *
1093
1093
  * @callback ZeroShotClassificationPipelineCallback Classify the sequence(s) given as inputs.
1094
1094
  * @param {string|string[]} texts The sequence(s) to classify, will be truncated if the model input is too large.
1095
1095
  * @param {string|string[]} candidate_labels The set of possible class labels to classify each sequence into.
1096
1096
  * Can be a single label, a string of comma-separated labels, or a list of labels.
1097
1097
  * @param {ZeroShotClassificationPipelineOptions} [options] The options to use for zero-shot classification.
1098
1098
  * @returns {Promise<ZeroShotClassificationOutput|ZeroShotClassificationOutput[]>} An array or object containing the predicted labels and scores.
1099
- *
1099
+ *
1100
1100
  * @typedef {TextPipelineConstructorArgs & ZeroShotClassificationPipelineCallback & Disposable} ZeroShotClassificationPipelineType
1101
1101
  */
1102
1102
 
@@ -1105,7 +1105,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
1105
1105
  * trained on NLI (natural language inference) tasks. Equivalent of `text-classification`
1106
1106
  * pipelines, but these models don't require a hardcoded number of potential classes, they
1107
1107
  * can be chosen at runtime. It usually means it's slower but it is **much** more flexible.
1108
- *
1108
+ *
1109
1109
  * **Example:** Zero shot classification with `Xenova/mobilebert-uncased-mnli`.
1110
1110
  * ```javascript
1111
1111
  * const classifier = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli');
@@ -1118,7 +1118,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
1118
1118
  * // scores: [ 0.5562091040482018, 0.1843621307860853, 0.13942646639336376, 0.12000229877234923 ]
1119
1119
  * // }
1120
1120
  * ```
1121
- *
1121
+ *
1122
1122
  * **Example:** Zero shot classification with `Xenova/nli-deberta-v3-xsmall` (multi-label).
1123
1123
  * ```javascript
1124
1124
  * const classifier = await pipeline('zero-shot-classification', 'Xenova/nli-deberta-v3-xsmall');
@@ -1232,20 +1232,20 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
1232
1232
  * @property {'none'|'mean'|'cls'} [pooling="none"] The pooling method to use.
1233
1233
  * @property {boolean} [normalize=false] Whether or not to normalize the embeddings in the last dimension.
1234
1234
  * @property {boolean} [quantize=false] Whether or not to quantize the embeddings.
1235
- * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
1236
- *
1235
+ * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
1236
+ *
1237
1237
  * @callback FeatureExtractionPipelineCallback Extract the features of the input(s).
1238
1238
  * @param {string|string[]} texts One or several texts (or one list of texts) to get the features of.
1239
1239
  * @param {FeatureExtractionPipelineOptions} [options] The options to use for feature extraction.
1240
1240
  * @returns {Promise<Tensor>} The features computed by the model.
1241
- *
1241
+ *
1242
1242
  * @typedef {TextPipelineConstructorArgs & FeatureExtractionPipelineCallback & Disposable} FeatureExtractionPipelineType
1243
1243
  */
1244
1244
 
1245
1245
  /**
1246
1246
  * Feature extraction pipeline using no model head. This pipeline extracts the hidden
1247
1247
  * states from the base transformer, which can be used as features in downstream tasks.
1248
- *
1248
+ *
1249
1249
  * **Example:** Run feature extraction with `bert-base-uncased` (without pooling/normalization).
1250
1250
  * ```javascript
1251
1251
  * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
@@ -1256,7 +1256,7 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
1256
1256
  * // dims: [1, 8, 768]
1257
1257
  * // }
1258
1258
  * ```
1259
- *
1259
+ *
1260
1260
  * **Example:** Run feature extraction with `bert-base-uncased` (with pooling/normalization).
1261
1261
  * ```javascript
1262
1262
  * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
@@ -1267,7 +1267,7 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
1267
1267
  * // dims: [1, 768]
1268
1268
  * // }
1269
1269
  * ```
1270
- *
1270
+ *
1271
1271
  * **Example:** Calculating embeddings with `sentence-transformers` models.
1272
1272
  * ```javascript
1273
1273
  * const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
@@ -1348,19 +1348,19 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip
1348
1348
  /**
1349
1349
  * @typedef {Object} ImageFeatureExtractionPipelineOptions Parameters specific to image feature extraction pipelines.
1350
1350
  * @property {boolean} [pool=null] Whether or not to return the pooled output. If set to `false`, the model will return the raw hidden states.
1351
- *
1351
+ *
1352
1352
  * @callback ImageFeatureExtractionPipelineCallback Extract the features of the input(s).
1353
1353
  * @param {ImagePipelineInputs} images One or several images (or one list of images) to get the features of.
1354
1354
  * @param {ImageFeatureExtractionPipelineOptions} [options] The options to use for image feature extraction.
1355
1355
  * @returns {Promise<Tensor>} The image features computed by the model.
1356
- *
1356
+ *
1357
1357
  * @typedef {ImagePipelineConstructorArgs & ImageFeatureExtractionPipelineCallback & Disposable} ImageFeatureExtractionPipelineType
1358
1358
  */
1359
1359
 
1360
1360
  /**
1361
1361
  * Image feature extraction pipeline using no model head. This pipeline extracts the hidden
1362
1362
  * states from the base transformer, which can be used as features in downstream tasks.
1363
- *
1363
+ *
1364
1364
  * **Example:** Perform image feature extraction with `Xenova/vit-base-patch16-224-in21k`.
1365
1365
  * ```javascript
1366
1366
  * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/vit-base-patch16-224-in21k');
@@ -1373,7 +1373,7 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip
1373
1373
  * // size: 151296
1374
1374
  * // }
1375
1375
  * ```
1376
- *
1376
+ *
1377
1377
  * **Example:** Compute image embeddings with `Xenova/clip-vit-base-patch32`.
1378
1378
  * ```javascript
1379
1379
  * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32');
@@ -1429,12 +1429,12 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
1429
1429
  * @property {string} label The label predicted.
1430
1430
  * @property {number} score The corresponding probability.
1431
1431
  * @typedef {AudioClassificationSingle[]} AudioClassificationOutput
1432
- *
1432
+ *
1433
1433
  * @typedef {Object} AudioClassificationPipelineOptions Parameters specific to audio classification pipelines.
1434
1434
  * @property {number} [top_k=5] The number of top labels that will be returned by the pipeline.
1435
1435
  * If the provided number is `null` or higher than the number of labels available in the model configuration,
1436
1436
  * it will default to the number of labels.
1437
- *
1437
+ *
1438
1438
  * @callback AudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
1439
1439
  * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
1440
1440
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -1443,14 +1443,14 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
1443
1443
  * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
1444
1444
  * @param {AudioClassificationPipelineOptions} [options] The options to use for audio classification.
1445
1445
  * @returns {Promise<AudioClassificationOutput|AudioClassificationOutput[]>} An array or object containing the predicted labels and scores.
1446
- *
1446
+ *
1447
1447
  * @typedef {AudioPipelineConstructorArgs & AudioClassificationPipelineCallback & Disposable} AudioClassificationPipelineType
1448
1448
  */
1449
1449
 
1450
1450
  /**
1451
1451
  * Audio classification pipeline using any `AutoModelForAudioClassification`.
1452
1452
  * This pipeline predicts the class of a raw waveform or an audio file.
1453
- *
1453
+ *
1454
1454
  * **Example:** Perform audio classification with `Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech`.
1455
1455
  * ```javascript
1456
1456
  * const classifier = await pipeline('audio-classification', 'Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech');
@@ -1461,7 +1461,7 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
1461
1461
  * // { label: 'female', score: 0.001845747814513743 }
1462
1462
  * // ]
1463
1463
  * ```
1464
- *
1464
+ *
1465
1465
  * **Example:** Perform audio classification with `Xenova/ast-finetuned-audioset-10-10-0.4593` and return top 4 results.
1466
1466
  * ```javascript
1467
1467
  * const classifier = await pipeline('audio-classification', 'Xenova/ast-finetuned-audioset-10-10-0.4593');
@@ -1526,12 +1526,12 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
1526
1526
  * @typedef {Object} ZeroShotAudioClassificationOutput
1527
1527
  * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
1528
1528
  * @property {number} score The score attributed by the model for that label (between 0 and 1).
1529
- *
1529
+ *
1530
1530
  * @typedef {Object} ZeroShotAudioClassificationPipelineOptions Parameters specific to zero-shot audio classification pipelines.
1531
1531
  * @property {string} [hypothesis_template="This is a sound of {}."] The sentence used in conjunction with `candidate_labels`
1532
1532
  * to attempt the audio classification by replacing the placeholder with the candidate_labels.
1533
1533
  * Then likelihood is estimated by using `logits_per_audio`.
1534
- *
1534
+ *
1535
1535
  * @callback ZeroShotAudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
1536
1536
  * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
1537
1537
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -1541,14 +1541,14 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
1541
1541
  * @param {string[]} candidate_labels The candidate labels for this audio.
1542
1542
  * @param {ZeroShotAudioClassificationPipelineOptions} [options] The options to use for zero-shot audio classification.
1543
1543
  * @returns {Promise<ZeroShotAudioClassificationOutput[]|ZeroShotAudioClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
1544
- *
1544
+ *
1545
1545
  * @typedef {TextAudioPipelineConstructorArgs & ZeroShotAudioClassificationPipelineCallback & Disposable} ZeroShotAudioClassificationPipelineType
1546
1546
  */
1547
1547
 
1548
1548
  /**
1549
1549
  * Zero shot audio classification pipeline using `ClapModel`. This pipeline predicts the class of an audio when you
1550
1550
  * provide an audio and a set of `candidate_labels`.
1551
- *
1551
+ *
1552
1552
  * **Example**: Perform zero-shot audio classification with `Xenova/clap-htsat-unfused`.
1553
1553
  * ```javascript
1554
1554
  * const classifier = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused');
@@ -1581,7 +1581,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1581
1581
  audio = [/** @type {AudioInput} */ (audio)];
1582
1582
  }
1583
1583
 
1584
- // Insert label into hypothesis template
1584
+ // Insert label into hypothesis template
1585
1585
  const texts = candidate_labels.map(
1586
1586
  x => hypothesis_template.replace('{}', x)
1587
1587
  );
@@ -1625,7 +1625,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1625
1625
  * @property {string} text The recognized text.
1626
1626
  * @property {Chunk[]} [chunks] When using `return_timestamps`, the `chunks` will become a list
1627
1627
  * containing all the various text chunks identified by the model.
1628
- *
1628
+ *
1629
1629
  * @typedef {Object} AutomaticSpeechRecognitionSpecificParams Parameters specific to automatic-speech-recognition pipelines.
1630
1630
  * @property {boolean|'word'} [return_timestamps] Whether to return timestamps or not. Default is `false`.
1631
1631
  * @property {number} [chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking).
@@ -1635,7 +1635,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1635
1635
  * @property {string} [task] The task to perform. Default is `null`, meaning it should be auto-detected.
1636
1636
  * @property {number} [num_frames] The number of frames in the input audio.
1637
1637
  * @typedef {import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig
1638
- *
1638
+ *
1639
1639
  * @callback AutomaticSpeechRecognitionPipelineCallback Transcribe the audio sequence(s) given as inputs to text.
1640
1640
  * @param {AudioPipelineInputs} audio The input audio file(s) to be transcribed. The input is either:
1641
1641
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -1644,7 +1644,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1644
1644
  * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
1645
1645
  * @param {Partial<AutomaticSpeechRecognitionConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
1646
1646
  * @returns {Promise<AutomaticSpeechRecognitionOutput|AutomaticSpeechRecognitionOutput[]>} An object containing the transcription text and optionally timestamps if `return_timestamps` is `true`.
1647
- *
1647
+ *
1648
1648
  * @typedef {TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable} AutomaticSpeechRecognitionPipelineType
1649
1649
  */
1650
1650
 
@@ -1658,7 +1658,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1658
1658
  * const output = await transcriber(url);
1659
1659
  * // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." }
1660
1660
  * ```
1661
- *
1661
+ *
1662
1662
  * **Example:** Transcribe English w/ timestamps.
1663
1663
  * ```javascript
1664
1664
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -1672,7 +1672,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1672
1672
  * // ]
1673
1673
  * // }
1674
1674
  * ```
1675
- *
1675
+ *
1676
1676
  * **Example:** Transcribe English w/ word-level timestamps.
1677
1677
  * ```javascript
1678
1678
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -1691,7 +1691,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1691
1691
  * // ]
1692
1692
  * // }
1693
1693
  * ```
1694
- *
1694
+ *
1695
1695
  * **Example:** Transcribe French.
1696
1696
  * ```javascript
1697
1697
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
@@ -1699,7 +1699,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1699
1699
  * const output = await transcriber(url, { language: 'french', task: 'transcribe' });
1700
1700
  * // { text: " J'adore, j'aime, je n'aime pas, je déteste." }
1701
1701
  * ```
1702
- *
1702
+ *
1703
1703
  * **Example:** Translate French to English.
1704
1704
  * ```javascript
1705
1705
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
@@ -1707,7 +1707,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1707
1707
  * const output = await transcriber(url, { language: 'french', task: 'translate' });
1708
1708
  * // { text: " I love, I like, I don't like, I hate." }
1709
1709
  * ```
1710
- *
1710
+ *
1711
1711
  * **Example:** Transcribe/translate audio longer than 30 seconds.
1712
1712
  * ```javascript
1713
1713
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -1730,6 +1730,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
1730
1730
  async _call(audio, kwargs = {}) {
1731
1731
  switch (this.model.config.model_type) {
1732
1732
  case 'whisper':
1733
+ case 'lite-whisper':
1733
1734
  return this._call_whisper(audio, kwargs)
1734
1735
  case 'wav2vec2':
1735
1736
  case 'wav2vec2-bert':
@@ -1929,18 +1930,18 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
1929
1930
  * @typedef {Object} ImageToTextSingle
1930
1931
  * @property {string} generated_text The generated text.
1931
1932
  * @typedef {ImageToTextSingle[]} ImageToTextOutput
1932
- *
1933
+ *
1933
1934
  * @callback ImageToTextPipelineCallback Assign labels to the image(s) passed as inputs.
1934
1935
  * @param {ImagePipelineInputs} texts The images to be captioned.
1935
1936
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
1936
1937
  * @returns {Promise<ImageToTextOutput|ImageToTextOutput[]>} An object (or array of objects) containing the generated text(s).
1937
- *
1938
+ *
1938
1939
  * @typedef {TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable} ImageToTextPipelineType
1939
1940
  */
1940
1941
 
1941
1942
  /**
1942
1943
  * Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image.
1943
- *
1944
+ *
1944
1945
  * **Example:** Generate a caption for an image w/ `Xenova/vit-gpt2-image-captioning`.
1945
1946
  * ```javascript
1946
1947
  * const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');
@@ -1948,7 +1949,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
1948
1949
  * const output = await captioner(url);
1949
1950
  * // [{ generated_text: 'a cat laying on a couch with another cat' }]
1950
1951
  * ```
1951
- *
1952
+ *
1952
1953
  * **Example:** Optical Character Recognition (OCR) w/ `Xenova/trocr-small-handwritten`.
1953
1954
  * ```javascript
1954
1955
  * const captioner = await pipeline('image-to-text', 'Xenova/trocr-small-handwritten');
@@ -1994,22 +1995,22 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
1994
1995
  * @property {string} label The label identified by the model.
1995
1996
  * @property {number} score The score attributed by the model for that label.
1996
1997
  * @typedef {ImageClassificationSingle[]} ImageClassificationOutput
1997
- *
1998
+ *
1998
1999
  * @typedef {Object} ImageClassificationPipelineOptions Parameters specific to image classification pipelines.
1999
- * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
2000
- *
2000
+ * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
2001
+ *
2001
2002
  * @callback ImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
2002
2003
  * @param {ImagePipelineInputs} images The input images(s) to be classified.
2003
2004
  * @param {ImageClassificationPipelineOptions} [options] The options to use for image classification.
2004
2005
  * @returns {Promise<ImageClassificationOutput|ImageClassificationOutput[]>} An array or object containing the predicted labels and scores.
2005
- *
2006
+ *
2006
2007
  * @typedef {ImagePipelineConstructorArgs & ImageClassificationPipelineCallback & Disposable} ImageClassificationPipelineType
2007
2008
  */
2008
2009
 
2009
2010
  /**
2010
2011
  * Image classification pipeline using any `AutoModelForImageClassification`.
2011
2012
  * This pipeline predicts the class of an image.
2012
- *
2013
+ *
2013
2014
  * **Example:** Classify an image.
2014
2015
  * ```javascript
2015
2016
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -2019,7 +2020,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
2019
2020
  * // { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
2020
2021
  * // ]
2021
2022
  * ```
2022
- *
2023
+ *
2023
2024
  * **Example:** Classify an image and return top `n` classes.
2024
2025
  * ```javascript
2025
2026
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -2031,7 +2032,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
2031
2032
  * // { label: 'lion, king of beasts, Panthera leo', score: 0.00045060308184474707 },
2032
2033
  * // ]
2033
2034
  * ```
2034
- *
2035
+ *
2035
2036
  * **Example:** Classify an image and return all classes.
2036
2037
  * ```javascript
2037
2038
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -2095,10 +2096,10 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
2095
2096
 
2096
2097
  /**
2097
2098
  * @typedef {Object} ImageSegmentationPipelineOutput
2098
- * @property {string} label The label of the segment.
2099
+ * @property {string|null} label The label of the segment.
2099
2100
  * @property {number|null} score The score of the segment.
2100
2101
  * @property {RawImage} mask The mask of the segment.
2101
- *
2102
+ *
2102
2103
  * @typedef {Object} ImageSegmentationPipelineOptions Parameters specific to image segmentation pipelines.
2103
2104
  * @property {number} [threshold=0.5] Probability threshold to filter out predicted masks.
2104
2105
  * @property {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
@@ -2107,19 +2108,19 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
2107
2108
  * depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
2108
2109
  * @property {number[]} [label_ids_to_fuse=null] List of label ids to fuse. If not set, do not fuse any labels.
2109
2110
  * @property {number[][]} [target_sizes=null] List of target sizes for the input images. If not set, use the original image sizes.
2110
- *
2111
+ *
2111
2112
  * @callback ImageSegmentationPipelineCallback Segment the input images.
2112
2113
  * @param {ImagePipelineInputs} images The input images.
2113
2114
  * @param {ImageSegmentationPipelineOptions} [options] The options to use for image segmentation.
2114
2115
  * @returns {Promise<ImageSegmentationPipelineOutput[]>} The annotated segments.
2115
- *
2116
+ *
2116
2117
  * @typedef {ImagePipelineConstructorArgs & ImageSegmentationPipelineCallback & Disposable} ImageSegmentationPipelineType
2117
2118
  */
2118
2119
 
2119
2120
  /**
2120
2121
  * Image segmentation pipeline using any `AutoModelForXXXSegmentation`.
2121
2122
  * This pipeline predicts masks of objects and their classes.
2122
- *
2123
+ *
2123
2124
  * **Example:** Perform image segmentation with `Xenova/detr-resnet-50-panoptic`.
2124
2125
  * ```javascript
2125
2126
  * const segmenter = await pipeline('image-segmentation', 'Xenova/detr-resnet-50-panoptic');
@@ -2165,14 +2166,30 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
2165
2166
  const preparedImages = await prepareImages(images);
2166
2167
  const imageSizes = preparedImages.map(x => [x.height, x.width]);
2167
2168
 
2168
- const { pixel_values, pixel_mask } = await this.processor(preparedImages);
2169
- const output = await this.model({ pixel_values, pixel_mask });
2169
+ const inputs = await this.processor(preparedImages);
2170
+
2171
+ const { inputNames, outputNames } = this.model.sessions['model'];
2172
+ if (!inputNames.includes('pixel_values')) {
2173
+ if (inputNames.length !== 1) {
2174
+ throw Error(`Expected a single input name, but got ${inputNames.length} inputs: ${inputNames}.`);
2175
+ }
2176
+
2177
+ const newName = inputNames[0];
2178
+ if (newName in inputs) {
2179
+ throw Error(`Input name ${newName} already exists in the inputs.`);
2180
+ }
2181
+ // To ensure compatibility with certain background-removal models,
2182
+ // we may need to perform a mapping of input to output names
2183
+ inputs[newName] = inputs.pixel_values;
2184
+ }
2185
+
2186
+ const output = await this.model(inputs);
2170
2187
 
2171
2188
  let fn = null;
2172
2189
  if (subtask !== null) {
2173
2190
  fn = this.subtasks_mapping[subtask];
2174
- } else {
2175
- for (let [task, func] of Object.entries(this.subtasks_mapping)) {
2191
+ } else if (this.processor.image_processor) {
2192
+ for (const [task, func] of Object.entries(this.subtasks_mapping)) {
2176
2193
  if (func in this.processor.image_processor) {
2177
2194
  fn = this.processor.image_processor[func].bind(this.processor.image_processor);
2178
2195
  subtask = task;
@@ -2186,7 +2203,28 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
2186
2203
 
2187
2204
  /** @type {ImageSegmentationPipelineOutput[]} */
2188
2205
  const annotation = [];
2189
- if (subtask === 'panoptic' || subtask === 'instance') {
2206
+ if (!subtask) {
2207
+ // We define an epsilon to safeguard against numerical/precision issues when detecting
2208
+ // the normalization mode of the output (i.e., sigmoid already applied, or not).
2209
+ // See https://github.com/microsoft/onnxruntime/issues/23943 for more information.
2210
+ const epsilon = 1e-5;
2211
+
2212
+ // Perform standard image segmentation
2213
+ const result = output[outputNames[0]];
2214
+ for (let i = 0; i < imageSizes.length; ++i) {
2215
+ const size = imageSizes[i];
2216
+ const item = result[i];
2217
+ if (item.data.some(x => x < -epsilon || x > 1 + epsilon)) {
2218
+ item.sigmoid_();
2219
+ }
2220
+ const mask = await RawImage.fromTensor(item.mul_(255).to('uint8')).resize(size[1], size[0]);
2221
+ annotation.push({
2222
+ label: null,
2223
+ score: null,
2224
+ mask
2225
+ });
2226
+ }
2227
+ } else if (subtask === 'panoptic' || subtask === 'instance') {
2190
2228
  const processed = fn(
2191
2229
  output,
2192
2230
  threshold,
@@ -2242,29 +2280,86 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
2242
2280
  }
2243
2281
  }
2244
2282
 
2283
+
2284
+ /**
2285
+ * @typedef {Object} BackgroundRemovalPipelineOptions Parameters specific to image segmentation pipelines.
2286
+ *
2287
+ * @callback BackgroundRemovalPipelineCallback Segment the input images.
2288
+ * @param {ImagePipelineInputs} images The input images.
2289
+ * @param {BackgroundRemovalPipelineOptions} [options] The options to use for image segmentation.
2290
+ * @returns {Promise<RawImage[]>} The images with the background removed.
2291
+ *
2292
+ * @typedef {ImagePipelineConstructorArgs & BackgroundRemovalPipelineCallback & Disposable} BackgroundRemovalPipelineType
2293
+ */
2294
+
2295
+ /**
2296
+ * Background removal pipeline using certain `AutoModelForXXXSegmentation`.
2297
+ * This pipeline removes the backgrounds of images.
2298
+ *
2299
+ * **Example:** Perform background removal with `Xenova/modnet`.
2300
+ * ```javascript
2301
+ * const segmenter = await pipeline('background-removal', 'Xenova/modnet');
2302
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/portrait-of-woman_small.jpg';
2303
+ * const output = await segmenter(url);
2304
+ * // [
2305
+ * // RawImage { data: Uint8ClampedArray(648000) [ ... ], width: 360, height: 450, channels: 4 }
2306
+ * // ]
2307
+ * ```
2308
+ */
2309
+ export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => BackgroundRemovalPipelineType} */ (/** @type {any} */(ImageSegmentationPipeline))) {
2310
+ /**
2311
+ * Create a new BackgroundRemovalPipeline.
2312
+ * @param {ImagePipelineConstructorArgs} options An object used to instantiate the pipeline.
2313
+ */
2314
+ constructor(options) {
2315
+ super(options);
2316
+ }
2317
+
2318
+ /** @type {BackgroundRemovalPipelineCallback} */
2319
+ async _call(images, options = {}) {
2320
+ const isBatched = Array.isArray(images);
2321
+
2322
+ if (isBatched && images.length !== 1) {
2323
+ throw Error("Background removal pipeline currently only supports a batch size of 1.");
2324
+ }
2325
+
2326
+ const preparedImages = await prepareImages(images);
2327
+
2328
+ // @ts-expect-error TS2339
2329
+ const masks = await super._call(images, options);
2330
+ const result = preparedImages.map((img, i) => {
2331
+ const cloned = img.clone();
2332
+ cloned.putAlpha(masks[i].mask);
2333
+ return cloned;
2334
+ });
2335
+
2336
+ return result;
2337
+ }
2338
+ }
2339
+
2245
2340
  /**
2246
2341
  * @typedef {Object} ZeroShotImageClassificationOutput
2247
2342
  * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
2248
2343
  * @property {number} score The score attributed by the model for that label (between 0 and 1).
2249
- *
2344
+ *
2250
2345
  * @typedef {Object} ZeroShotImageClassificationPipelineOptions Parameters specific to zero-shot image classification pipelines.
2251
2346
  * @property {string} [hypothesis_template="This is a photo of {}"] The sentence used in conjunction with `candidate_labels`
2252
2347
  * to attempt the image classification by replacing the placeholder with the candidate_labels.
2253
2348
  * Then likelihood is estimated by using `logits_per_image`.
2254
- *
2349
+ *
2255
2350
  * @callback ZeroShotImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
2256
2351
  * @param {ImagePipelineInputs} images The input images.
2257
2352
  * @param {string[]} candidate_labels The candidate labels for this image.
2258
2353
  * @param {ZeroShotImageClassificationPipelineOptions} [options] The options to use for zero-shot image classification.
2259
2354
  * @returns {Promise<ZeroShotImageClassificationOutput[]|ZeroShotImageClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
2260
- *
2355
+ *
2261
2356
  * @typedef {TextImagePipelineConstructorArgs & ZeroShotImageClassificationPipelineCallback & Disposable} ZeroShotImageClassificationPipelineType
2262
2357
  */
2263
2358
 
2264
2359
  /**
2265
2360
  * Zero shot image classification pipeline. This pipeline predicts the class of
2266
2361
  * an image when you provide an image and a set of `candidate_labels`.
2267
- *
2362
+ *
2268
2363
  * **Example:** Zero shot image classification w/ `Xenova/clip-vit-base-patch32`.
2269
2364
  * ```javascript
2270
2365
  * const classifier = await pipeline('zero-shot-image-classification', 'Xenova/clip-vit-base-patch32');
@@ -2294,7 +2389,7 @@ export class ZeroShotImageClassificationPipeline extends (/** @type {new (option
2294
2389
  const isBatched = Array.isArray(images);
2295
2390
  const preparedImages = await prepareImages(images);
2296
2391
 
2297
- // Insert label into hypothesis template
2392
+ // Insert label into hypothesis template
2298
2393
  const texts = candidate_labels.map(
2299
2394
  x => hypothesis_template.replace('{}', x)
2300
2395
  );
@@ -2341,23 +2436,23 @@ export class ZeroShotImageClassificationPipeline extends (/** @type {new (option
2341
2436
  * @property {number} score The score attributed by the model for that label.
2342
2437
  * @property {BoundingBox} box The bounding box of detected object in image's original size, or as a percentage if `percentage` is set to true.
2343
2438
  * @typedef {ObjectDetectionPipelineSingle[]} ObjectDetectionPipelineOutput
2344
- *
2439
+ *
2345
2440
  * @typedef {Object} ObjectDetectionPipelineOptions Parameters specific to object detection pipelines.
2346
2441
  * @property {number} [threshold=0.9] The threshold used to filter boxes by score.
2347
2442
  * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
2348
- *
2443
+ *
2349
2444
  * @callback ObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
2350
2445
  * @param {ImagePipelineInputs} images The input images.
2351
2446
  * @param {ObjectDetectionPipelineOptions} [options] The options to use for object detection.
2352
- * @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
2353
- *
2447
+ * @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
2448
+ *
2354
2449
  * @typedef {ImagePipelineConstructorArgs & ObjectDetectionPipelineCallback & Disposable} ObjectDetectionPipelineType
2355
2450
  */
2356
2451
 
2357
2452
  /**
2358
2453
  * Object detection pipeline using any `AutoModelForObjectDetection`.
2359
2454
  * This pipeline predicts bounding boxes of objects and their classes.
2360
- *
2455
+ *
2361
2456
  * **Example:** Run object-detection with `Xenova/detr-resnet-50`.
2362
2457
  * ```javascript
2363
2458
  * const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
@@ -2431,27 +2526,27 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
2431
2526
  * @property {string} label Text query corresponding to the found object.
2432
2527
  * @property {number} score Score corresponding to the object (between 0 and 1).
2433
2528
  * @property {BoundingBox} box Bounding box of the detected object in image's original size, or as a percentage if `percentage` is set to true.
2434
- *
2529
+ *
2435
2530
  * @typedef {Object} ZeroShotObjectDetectionPipelineOptions Parameters specific to zero-shot object detection pipelines.
2436
2531
  * @property {number} [threshold=0.1] The probability necessary to make a prediction.
2437
2532
  * @property {number} [top_k=null] The number of top predictions that will be returned by the pipeline.
2438
2533
  * If the provided number is `null` or higher than the number of predictions available, it will default
2439
2534
  * to the number of predictions.
2440
2535
  * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
2441
- *
2536
+ *
2442
2537
  * @callback ZeroShotObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
2443
2538
  * @param {ImagePipelineInputs} images The input images.
2444
2539
  * @param {string[]} candidate_labels What the model should recognize in the image.
2445
2540
  * @param {ZeroShotObjectDetectionPipelineOptions} [options] The options to use for zero-shot object detection.
2446
2541
  * @returns {Promise<ZeroShotObjectDetectionOutput[]|ZeroShotObjectDetectionOutput[][]>} An array of objects containing the predicted labels, scores, and bounding boxes.
2447
- *
2542
+ *
2448
2543
  * @typedef {TextImagePipelineConstructorArgs & ZeroShotObjectDetectionPipelineCallback & Disposable} ZeroShotObjectDetectionPipelineType
2449
2544
  */
2450
2545
 
2451
2546
  /**
2452
2547
  * Zero-shot object detection pipeline. This pipeline predicts bounding boxes of
2453
2548
  * objects when you provide an image and a set of `candidate_labels`.
2454
- *
2549
+ *
2455
2550
  * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32`.
2456
2551
  * ```javascript
2457
2552
  * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
@@ -2481,7 +2576,7 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
2481
2576
  * // }
2482
2577
  * // ]
2483
2578
  * ```
2484
- *
2579
+ *
2485
2580
  * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32` (returning top 4 matches and setting a threshold).
2486
2581
  * ```javascript
2487
2582
  * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
@@ -2554,7 +2649,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
2554
2649
  const output = await this.model({ ...text_inputs, pixel_values });
2555
2650
 
2556
2651
  let result;
2557
- if('post_process_grounded_object_detection' in this.processor) {
2652
+ if ('post_process_grounded_object_detection' in this.processor) {
2558
2653
  // @ts-ignore
2559
2654
  const processed = this.processor.post_process_grounded_object_detection(
2560
2655
  output,
@@ -2596,13 +2691,13 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
2596
2691
  * @typedef {Object} DocumentQuestionAnsweringSingle
2597
2692
  * @property {string} answer The generated text.
2598
2693
  * @typedef {DocumentQuestionAnsweringSingle[]} DocumentQuestionAnsweringOutput
2599
- *
2694
+ *
2600
2695
  * @callback DocumentQuestionAnsweringPipelineCallback Answer the question given as input by using the document.
2601
2696
  * @param {ImageInput} image The image of the document to use.
2602
2697
  * @param {string} question A question to ask of the document.
2603
2698
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
2604
2699
  * @returns {Promise<DocumentQuestionAnsweringOutput|DocumentQuestionAnsweringOutput[]>} An object (or array of objects) containing the answer(s).
2605
- *
2700
+ *
2606
2701
  * @typedef {TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable} DocumentQuestionAnsweringPipelineType
2607
2702
  */
2608
2703
 
@@ -2610,7 +2705,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
2610
2705
  * Document Question Answering pipeline using any `AutoModelForDocumentQuestionAnswering`.
2611
2706
  * The inputs/outputs are similar to the (extractive) question answering pipeline; however,
2612
2707
  * the pipeline takes an image (and optional OCR'd words/boxes) as input instead of text context.
2613
- *
2708
+ *
2614
2709
  * **Example:** Answer questions about a document with `Xenova/donut-base-finetuned-docvqa`.
2615
2710
  * ```javascript
2616
2711
  * const qa_pipeline = await pipeline('document-question-answering', 'Xenova/donut-base-finetuned-docvqa');
@@ -2680,22 +2775,22 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
2680
2775
  * @typedef {Object} TextToAudioOutput
2681
2776
  * @property {Float32Array} audio The generated audio waveform.
2682
2777
  * @property {number} sampling_rate The sampling rate of the generated audio waveform.
2683
- *
2778
+ *
2684
2779
  * @typedef {Object} TextToAudioPipelineOptions Parameters specific to text-to-audio pipelines.
2685
2780
  * @property {Tensor|Float32Array|string|URL} [speaker_embeddings=null] The speaker embeddings (if the model requires it).
2686
- *
2781
+ *
2687
2782
  * @callback TextToAudioPipelineCallback Generates speech/audio from the inputs.
2688
2783
  * @param {string|string[]} texts The text(s) to generate.
2689
2784
  * @param {TextToAudioPipelineOptions} options Parameters passed to the model generation/forward method.
2690
2785
  * @returns {Promise<TextToAudioOutput>} An object containing the generated audio and sampling rate.
2691
- *
2786
+ *
2692
2787
  * @typedef {TextToAudioPipelineConstructorArgs & TextToAudioPipelineCallback & Disposable} TextToAudioPipelineType
2693
2788
  */
2694
2789
 
2695
2790
  /**
2696
2791
  * Text-to-audio generation pipeline using any `AutoModelForTextToWaveform` or `AutoModelForTextToSpectrogram`.
2697
2792
  * This pipeline generates an audio file from an input text and optional other conditional inputs.
2698
- *
2793
+ *
2699
2794
  * **Example:** Generate audio from text with `Xenova/speecht5_tts`.
2700
2795
  * ```javascript
2701
2796
  * const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
@@ -2706,17 +2801,17 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
2706
2801
  * // sampling_rate: 16000
2707
2802
  * // }
2708
2803
  * ```
2709
- *
2804
+ *
2710
2805
  * You can then save the audio to a .wav file with the `wavefile` package:
2711
2806
  * ```javascript
2712
2807
  * import wavefile from 'wavefile';
2713
2808
  * import fs from 'fs';
2714
- *
2809
+ *
2715
2810
  * const wav = new wavefile.WaveFile();
2716
2811
  * wav.fromScratch(1, out.sampling_rate, '32f', out.audio);
2717
2812
  * fs.writeFileSync('out.wav', wav.toBuffer());
2718
2813
  * ```
2719
- *
2814
+ *
2720
2815
  * **Example:** Multilingual speech generation with `Xenova/mms-tts-fra`. See [here](https://huggingface.co/models?pipeline_tag=text-to-speech&other=vits&sort=trending) for the full list of available languages (1107).
2721
2816
  * ```javascript
2722
2817
  * const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra');
@@ -2822,13 +2917,13 @@ export class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPi
2822
2917
  * @callback ImageToImagePipelineCallback Transform the image(s) passed as inputs.
2823
2918
  * @param {ImagePipelineInputs} images The images to transform.
2824
2919
  * @returns {Promise<RawImage|RawImage[]>} The transformed image or list of images.
2825
- *
2920
+ *
2826
2921
  * @typedef {ImagePipelineConstructorArgs & ImageToImagePipelineCallback & Disposable} ImageToImagePipelineType
2827
2922
  */
2828
2923
 
2829
2924
  /**
2830
2925
  * Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input.
2831
- *
2926
+ *
2832
2927
  * **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`
2833
2928
  * ```javascript
2834
2929
  * const upscaler = await pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64');
@@ -2873,17 +2968,17 @@ export class ImageToImagePipeline extends (/** @type {new (options: ImagePipelin
2873
2968
  * @typedef {Object} DepthEstimationPipelineOutput
2874
2969
  * @property {Tensor} predicted_depth The raw depth map predicted by the model.
2875
2970
  * @property {RawImage} depth The processed depth map as an image (with the same size as the input image).
2876
- *
2971
+ *
2877
2972
  * @callback DepthEstimationPipelineCallback Predicts the depth for the image(s) passed as inputs.
2878
2973
  * @param {ImagePipelineInputs} images The images to compute depth for.
2879
2974
  * @returns {Promise<DepthEstimationPipelineOutput|DepthEstimationPipelineOutput[]>} An image or a list of images containing result(s).
2880
- *
2975
+ *
2881
2976
  * @typedef {ImagePipelineConstructorArgs & DepthEstimationPipelineCallback & Disposable} DepthEstimationPipelineType
2882
2977
  */
2883
2978
 
2884
2979
  /**
2885
2980
  * Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image.
2886
- *
2981
+ *
2887
2982
  * **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`
2888
2983
  * ```javascript
2889
2984
  * const depth_estimator = await pipeline('depth-estimation', 'Xenova/dpt-hybrid-midas');
@@ -3134,6 +3229,16 @@ const SUPPORTED_TASKS = Object.freeze({
3134
3229
  },
3135
3230
  "type": "multimodal",
3136
3231
  },
3232
+ "background-removal": {
3233
+ // no tokenizer
3234
+ "pipeline": BackgroundRemovalPipeline,
3235
+ "model": [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
3236
+ "processor": AutoProcessor,
3237
+ "default": {
3238
+ "model": "Xenova/modnet",
3239
+ },
3240
+ "type": "image",
3241
+ },
3137
3242
 
3138
3243
  "zero-shot-image-classification": {
3139
3244
  "tokenizer": AutoTokenizer,
@@ -3258,7 +3363,7 @@ const TASK_ALIASES = Object.freeze({
3258
3363
 
3259
3364
  /**
3260
3365
  * Utility factory method to build a `Pipeline` object.
3261
- *
3366
+ *
3262
3367
  * @template {PipelineType} T The type of pipeline to return.
3263
3368
  * @param {T} task The task defining which pipeline will be returned. Currently accepted tasks are:
3264
3369
  * - `"audio-classification"`: will return a `AudioClassificationPipeline`.
@@ -3299,6 +3404,8 @@ export async function pipeline(
3299
3404
  revision = 'main',
3300
3405
  device = null,
3301
3406
  dtype = null,
3407
+ subfolder = 'onnx',
3408
+ use_external_data_format = null,
3302
3409
  model_file_name = null,
3303
3410
  session_options = {},
3304
3411
  } = {}
@@ -3329,6 +3436,8 @@ export async function pipeline(
3329
3436
  revision,
3330
3437
  device,
3331
3438
  dtype,
3439
+ subfolder,
3440
+ use_external_data_format,
3332
3441
  model_file_name,
3333
3442
  session_options,
3334
3443
  }