@huggingface/transformers 3.4.0 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +6 -2
  2. package/dist/transformers.js +315 -152
  3. package/dist/transformers.js.map +1 -1
  4. package/dist/transformers.min.js +1 -1
  5. package/dist/transformers.min.js.map +1 -1
  6. package/dist/transformers.node.cjs +303 -151
  7. package/dist/transformers.node.cjs.map +1 -1
  8. package/dist/transformers.node.min.cjs +1 -1
  9. package/dist/transformers.node.min.cjs.map +1 -1
  10. package/dist/transformers.node.min.mjs +1 -1
  11. package/dist/transformers.node.min.mjs.map +1 -1
  12. package/dist/transformers.node.mjs +315 -152
  13. package/dist/transformers.node.mjs.map +1 -1
  14. package/dist/transformers.web.js +315 -152
  15. package/dist/transformers.web.js.map +1 -1
  16. package/dist/transformers.web.min.js +1 -1
  17. package/dist/transformers.web.min.js.map +1 -1
  18. package/package.json +1 -1
  19. package/src/configs.js +2 -0
  20. package/src/env.js +1 -1
  21. package/src/models/feature_extractors.js +1 -0
  22. package/src/models/snac/feature_extraction_snac.js +3 -0
  23. package/src/models.js +90 -2
  24. package/src/pipelines.js +140 -135
  25. package/src/utils/image.js +9 -1
  26. package/src/utils/tensor.js +6 -2
  27. package/types/configs.d.ts.map +1 -1
  28. package/types/models/feature_extractors.d.ts +1 -0
  29. package/types/models/snac/feature_extraction_snac.d.ts +4 -0
  30. package/types/models/snac/feature_extraction_snac.d.ts.map +1 -0
  31. package/types/models.d.ts +48 -0
  32. package/types/models.d.ts.map +1 -1
  33. package/types/pipelines.d.ts +2 -2
  34. package/types/pipelines.d.ts.map +1 -1
  35. package/types/tsconfig.tsbuildinfo +1 -1
  36. package/types/utils/image.d.ts +2 -2
  37. package/types/utils/image.d.ts.map +1 -1
  38. package/types/utils/tensor.d.ts.map +1 -1
package/src/pipelines.js CHANGED
@@ -1,15 +1,15 @@
1
1
  /**
2
2
  * @file Pipelines provide a high-level, easy to use, API for running machine learning models.
3
- *
3
+ *
4
4
  * **Example:** Instantiate pipeline using the `pipeline` function.
5
5
  * ```javascript
6
6
  * import { pipeline } from '@huggingface/transformers';
7
- *
7
+ *
8
8
  * const classifier = await pipeline('sentiment-analysis');
9
9
  * const output = await classifier('I love transformers!');
10
10
  * // [{'label': 'POSITIVE', 'score': 0.999817686}]
11
11
  * ```
12
- *
12
+ *
13
13
  * @module pipelines
14
14
  */
15
15
 
@@ -78,7 +78,7 @@ import { RawImage } from './utils/image.js';
78
78
 
79
79
 
80
80
  /**
81
- * @typedef {string | RawImage | URL} ImageInput
81
+ * @typedef {string | RawImage | URL | Blob | HTMLCanvasElement | OffscreenCanvas} ImageInput
82
82
  * @typedef {ImageInput|ImageInput[]} ImagePipelineInputs
83
83
  */
84
84
 
@@ -152,7 +152,7 @@ function get_bounding_box(box, asInteger) {
152
152
  /**
153
153
  * @callback DisposeType Disposes the item.
154
154
  * @returns {Promise<void>} A promise that resolves when the item has been disposed.
155
- *
155
+ *
156
156
  * @typedef {Object} Disposable
157
157
  * @property {DisposeType} dispose A promise that resolves when the pipeline has been disposed.
158
158
  */
@@ -189,7 +189,7 @@ export class Pipeline extends Callable {
189
189
  * @property {string} task The task of the pipeline. Useful for specifying subtasks.
190
190
  * @property {PreTrainedModel} model The model used by the pipeline.
191
191
  * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
192
- *
192
+ *
193
193
  * @typedef {ModelTokenizerConstructorArgs} TextPipelineConstructorArgs An object used to instantiate a text-based pipeline.
194
194
  */
195
195
 
@@ -198,7 +198,7 @@ export class Pipeline extends Callable {
198
198
  * @property {string} task The task of the pipeline. Useful for specifying subtasks.
199
199
  * @property {PreTrainedModel} model The model used by the pipeline.
200
200
  * @property {Processor} processor The processor used by the pipeline.
201
- *
201
+ *
202
202
  * @typedef {ModelProcessorConstructorArgs} AudioPipelineConstructorArgs An object used to instantiate an audio-based pipeline.
203
203
  * @typedef {ModelProcessorConstructorArgs} ImagePipelineConstructorArgs An object used to instantiate an image-based pipeline.
204
204
  */
@@ -210,7 +210,7 @@ export class Pipeline extends Callable {
210
210
  * @property {PreTrainedModel} model The model used by the pipeline.
211
211
  * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
212
212
  * @property {Processor} processor The processor used by the pipeline.
213
- *
213
+ *
214
214
  * @typedef {ModelTokenizerProcessorConstructorArgs} TextAudioPipelineConstructorArgs An object used to instantiate a text- and audio-based pipeline.
215
215
  * @typedef {ModelTokenizerProcessorConstructorArgs} TextImagePipelineConstructorArgs An object used to instantiate a text- and image-based pipeline.
216
216
  */
@@ -220,15 +220,15 @@ export class Pipeline extends Callable {
220
220
  * @property {string} label The label predicted.
221
221
  * @property {number} score The corresponding probability.
222
222
  * @typedef {TextClassificationSingle[]} TextClassificationOutput
223
- *
223
+ *
224
224
  * @typedef {Object} TextClassificationPipelineOptions Parameters specific to text classification pipelines.
225
225
  * @property {number} [top_k=1] The number of top predictions to be returned.
226
- *
226
+ *
227
227
  * @callback TextClassificationPipelineCallback Classify the text(s) given as inputs.
228
228
  * @param {string|string[]} texts The input text(s) to be classified.
229
229
  * @param {TextClassificationPipelineOptions} [options] The options to use for text classification.
230
230
  * @returns {Promise<TextClassificationOutput|TextClassificationOutput[]>} An array or object containing the predicted labels and scores.
231
- *
231
+ *
232
232
  * @typedef {TextPipelineConstructorArgs & TextClassificationPipelineCallback & Disposable} TextClassificationPipelineType
233
233
  */
234
234
 
@@ -241,7 +241,7 @@ export class Pipeline extends Callable {
241
241
  * const output = await classifier('I love transformers!');
242
242
  * // [{ label: 'POSITIVE', score: 0.999788761138916 }]
243
243
  * ```
244
- *
244
+ *
245
245
  * **Example:** Multilingual sentiment-analysis w/ `Xenova/bert-base-multilingual-uncased-sentiment` (and return top 5 classes).
246
246
  * ```javascript
247
247
  * const classifier = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
@@ -254,7 +254,7 @@ export class Pipeline extends Callable {
254
254
  * // { label: '2 stars', score: 0.0009423971059732139 }
255
255
  * // ]
256
256
  * ```
257
- *
257
+ *
258
258
  * **Example:** Toxic comment classification w/ `Xenova/toxic-bert` (and return all classes).
259
259
  * ```javascript
260
260
  * const classifier = await pipeline('text-classification', 'Xenova/toxic-bert');
@@ -339,21 +339,21 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
339
339
  * @property {number} [start] The index of the start of the corresponding entity in the sentence.
340
340
  * @property {number} [end] The index of the end of the corresponding entity in the sentence.
341
341
  * @typedef {TokenClassificationSingle[]} TokenClassificationOutput
342
- *
342
+ *
343
343
  * @typedef {Object} TokenClassificationPipelineOptions Parameters specific to token classification pipelines.
344
344
  * @property {string[]} [ignore_labels] A list of labels to ignore.
345
- *
345
+ *
346
346
  * @callback TokenClassificationPipelineCallback Classify each token of the text(s) given as inputs.
347
347
  * @param {string|string[]} texts One or several texts (or one list of texts) for token classification.
348
348
  * @param {TokenClassificationPipelineOptions} [options] The options to use for token classification.
349
349
  * @returns {Promise<TokenClassificationOutput|TokenClassificationOutput[]>} The result.
350
- *
350
+ *
351
351
  * @typedef {TextPipelineConstructorArgs & TokenClassificationPipelineCallback & Disposable} TokenClassificationPipelineType
352
352
  */
353
353
 
354
354
  /**
355
355
  * Named Entity Recognition pipeline using any `ModelForTokenClassification`.
356
- *
356
+ *
357
357
  * **Example:** Perform named entity recognition with `Xenova/bert-base-NER`.
358
358
  * ```javascript
359
359
  * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
@@ -363,7 +363,7 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
363
363
  * // { entity: 'B-LOC', score: 0.9994474053382874, index: 9, word: 'London' }
364
364
  * // ]
365
365
  * ```
366
- *
366
+ *
367
367
  * **Example:** Perform named entity recognition with `Xenova/bert-base-NER` (and return all labels).
368
368
  * ```javascript
369
369
  * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
@@ -459,22 +459,22 @@ export class TokenClassificationPipeline extends (/** @type {new (options: TextP
459
459
  * @property {number} [start] The character start index of the answer (in the tokenized version of the input).
460
460
  * @property {number} [end] The character end index of the answer (in the tokenized version of the input).
461
461
  * @property {string} answer The answer to the question.
462
- *
462
+ *
463
463
  * @typedef {Object} QuestionAnsweringPipelineOptions Parameters specific to question answering pipelines.
464
464
  * @property {number} [top_k=1] The number of top answer predictions to be returned.
465
- *
465
+ *
466
466
  * @callback QuestionAnsweringPipelineCallback Answer the question(s) given as inputs by using the context(s).
467
467
  * @param {string|string[]} question One or several question(s) (must be used in conjunction with the `context` argument).
468
468
  * @param {string|string[]} context One or several context(s) associated with the question(s) (must be used in conjunction with the `question` argument).
469
469
  * @param {QuestionAnsweringPipelineOptions} [options] The options to use for question answering.
470
470
  * @returns {Promise<QuestionAnsweringOutput|QuestionAnsweringOutput[]>} An array or object containing the predicted answers and scores.
471
- *
471
+ *
472
472
  * @typedef {TextPipelineConstructorArgs & QuestionAnsweringPipelineCallback & Disposable} QuestionAnsweringPipelineType
473
473
  */
474
474
 
475
475
  /**
476
476
  * Question Answering pipeline using any `ModelForQuestionAnswering`.
477
- *
477
+ *
478
478
  * **Example:** Run question answering with `Xenova/distilbert-base-uncased-distilled-squad`.
479
479
  * ```javascript
480
480
  * const answerer = await pipeline('question-answering', 'Xenova/distilbert-base-uncased-distilled-squad');
@@ -599,10 +599,10 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
599
599
  * @property {number} token The predicted token id (to replace the masked one).
600
600
  * @property {string} token_str The predicted token (to replace the masked one).
601
601
  * @typedef {FillMaskSingle[]} FillMaskOutput
602
- *
602
+ *
603
603
  * @typedef {Object} FillMaskPipelineOptions Parameters specific to fill mask pipelines.
604
604
  * @property {number} [top_k=5] When passed, overrides the number of predictions to return.
605
- *
605
+ *
606
606
  * @callback FillMaskPipelineCallback Fill the masked token in the text(s) given as inputs.
607
607
  * @param {string|string[]} texts One or several texts (or one list of prompts) with masked tokens.
608
608
  * @param {FillMaskPipelineOptions} [options] The options to use for masked language modelling.
@@ -610,13 +610,13 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
610
610
  * and the sequence with the predicted token filled in, or an array of such arrays (one for each input text).
611
611
  * If only one input text is given, the output will be an array of objects.
612
612
  * @throws {Error} When the mask token is not found in the input text.
613
- *
613
+ *
614
614
  * @typedef {TextPipelineConstructorArgs & FillMaskPipelineCallback & Disposable} FillMaskPipelineType
615
615
  */
616
616
 
617
617
  /**
618
618
  * Masked language modeling prediction pipeline using any `ModelWithLMHead`.
619
- *
619
+ *
620
620
  * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-uncased`.
621
621
  * ```javascript
622
622
  * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
@@ -629,7 +629,7 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
629
629
  * // { token_str: 'life', score: 0.01859794743359089, token: 1297, sequence: 'The goal of life is life.' }
630
630
  * // ]
631
631
  * ```
632
- *
632
+ *
633
633
  * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-cased` (and return top result).
634
634
  * ```javascript
635
635
  * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
@@ -706,18 +706,18 @@ export class FillMaskPipeline extends (/** @type {new (options: TextPipelineCons
706
706
  * @typedef {Object} Text2TextGenerationSingle
707
707
  * @property {string} generated_text The generated text.
708
708
  * @typedef {Text2TextGenerationSingle[]} Text2TextGenerationOutput
709
- *
709
+ *
710
710
  * @callback Text2TextGenerationPipelineCallback Generate the output text(s) using text(s) given as inputs.
711
711
  * @param {string|string[]} texts Input text for the encoder.
712
712
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
713
713
  * @returns {Promise<Text2TextGenerationOutput|Text2TextGenerationOutput[]>}
714
- *
714
+ *
715
715
  * @typedef {TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable} Text2TextGenerationPipelineType
716
716
  */
717
717
 
718
718
  /**
719
719
  * Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks.
720
- *
720
+ *
721
721
  * **Example:** Text-to-text generation w/ `Xenova/LaMini-Flan-T5-783M`.
722
722
  * ```javascript
723
723
  * const generator = await pipeline('text2text-generation', 'Xenova/LaMini-Flan-T5-783M');
@@ -793,18 +793,18 @@ export class Text2TextGenerationPipeline extends (/** @type {new (options: TextP
793
793
  * @typedef {Object} SummarizationSingle
794
794
  * @property {string} summary_text The summary text.
795
795
  * @typedef {SummarizationSingle[]} SummarizationOutput
796
- *
796
+ *
797
797
  * @callback SummarizationPipelineCallback Summarize the text(s) given as inputs.
798
798
  * @param {string|string[]} texts One or several articles (or one list of articles) to summarize.
799
799
  * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
800
800
  * @returns {Promise<SummarizationOutput|SummarizationOutput[]>}
801
- *
801
+ *
802
802
  * @typedef {TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable} SummarizationPipelineType
803
803
  */
804
804
 
805
805
  /**
806
806
  * A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline.
807
- *
807
+ *
808
808
  * **Example:** Summarization w/ `Xenova/distilbart-cnn-6-6`.
809
809
  * ```javascript
810
810
  * const generator = await pipeline('summarization', 'Xenova/distilbart-cnn-6-6');
@@ -840,23 +840,23 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
840
840
  * @typedef {Object} TranslationSingle
841
841
  * @property {string} translation_text The translated text.
842
842
  * @typedef {TranslationSingle[]} TranslationOutput
843
- *
843
+ *
844
844
  * @callback TranslationPipelineCallback Translate the text(s) given as inputs.
845
845
  * @param {string|string[]} texts Texts to be translated.
846
846
  * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
847
847
  * @returns {Promise<TranslationOutput|TranslationOutput[]>}
848
- *
848
+ *
849
849
  * @typedef {TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable} TranslationPipelineType
850
850
  */
851
851
 
852
852
  /**
853
853
  * Translates text from one language to another.
854
- *
854
+ *
855
855
  * **Example:** Multilingual translation w/ `Xenova/nllb-200-distilled-600M`.
856
- *
856
+ *
857
857
  * See [here](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200)
858
858
  * for the full list of languages and their corresponding codes.
859
- *
859
+ *
860
860
  * ```javascript
861
861
  * const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M');
862
862
  * const output = await translator('जीवन एक चॉकलेट बॉक्स की तरह है।', {
@@ -865,12 +865,12 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
865
865
  * });
866
866
  * // [{ translation_text: 'La vie est comme une boîte à chocolat.' }]
867
867
  * ```
868
- *
868
+ *
869
869
  * **Example:** Multilingual translation w/ `Xenova/m2m100_418M`.
870
- *
870
+ *
871
871
  * See [here](https://huggingface.co/facebook/m2m100_418M#languages-covered)
872
872
  * for the full list of languages and their corresponding codes.
873
- *
873
+ *
874
874
  * ```javascript
875
875
  * const translator = await pipeline('translation', 'Xenova/m2m100_418M');
876
876
  * const output = await translator('生活就像一盒巧克力。', {
@@ -879,12 +879,12 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
879
879
  * });
880
880
  * // [{ translation_text: 'Life is like a box of chocolate.' }]
881
881
  * ```
882
- *
882
+ *
883
883
  * **Example:** Multilingual translation w/ `Xenova/mbart-large-50-many-to-many-mmt`.
884
- *
884
+ *
885
885
  * See [here](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt#languages-covered)
886
886
  * for the full list of languages and their corresponding codes.
887
- *
887
+ *
888
888
  * ```javascript
889
889
  * const translator = await pipeline('translation', 'Xenova/mbart-large-50-many-to-many-mmt');
890
890
  * const output = await translator('संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है', {
@@ -913,21 +913,21 @@ function isChat(x) {
913
913
 
914
914
  /**
915
915
  * @typedef {import('./tokenizers.js').Message[]} Chat
916
- *
916
+ *
917
917
  * @typedef {Object} TextGenerationSingle
918
918
  * @property {string|Chat} generated_text The generated text.
919
919
  * @typedef {TextGenerationSingle[]} TextGenerationOutput
920
- *
920
+ *
921
921
  * @typedef {Object} TextGenerationSpecificParams Parameters specific to text-generation pipelines.
922
922
  * @property {boolean} [add_special_tokens] Whether or not to add special tokens when tokenizing the sequences.
923
923
  * @property {boolean} [return_full_text=true] If set to `false` only added text is returned, otherwise the full text is returned.
924
924
  * @typedef {import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams} TextGenerationConfig
925
- *
925
+ *
926
926
  * @callback TextGenerationPipelineCallback Complete the prompt(s) given as inputs.
927
927
  * @param {string|string[]|Chat|Chat[]} texts One or several prompts (or one list of prompts) to complete.
928
928
  * @param {Partial<TextGenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
929
929
  * @returns {Promise<TextGenerationOutput|TextGenerationOutput[]>} An array or object containing the generated texts.
930
- *
930
+ *
931
931
  * @typedef {TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable} TextGenerationPipelineType
932
932
  */
933
933
 
@@ -935,7 +935,7 @@ function isChat(x) {
935
935
  * Language generation pipeline using any `ModelWithLMHead` or `ModelForCausalLM`.
936
936
  * This pipeline predicts the words that will follow a specified text prompt.
937
937
  * NOTE: For the full list of generation parameters, see [`GenerationConfig`](./utils/generation#module_utils/generation.GenerationConfig).
938
- *
938
+ *
939
939
  * **Example:** Text generation with `Xenova/distilgpt2` (default settings).
940
940
  * ```javascript
941
941
  * const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
@@ -943,7 +943,7 @@ function isChat(x) {
943
943
  * const output = await generator(text);
944
944
  * // [{ generated_text: "I enjoy walking with my cute dog, and I love to play with the other dogs." }]
945
945
  * ```
946
- *
946
+ *
947
947
  * **Example:** Text generation with `Xenova/distilgpt2` (custom settings).
948
948
  * ```javascript
949
949
  * const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
@@ -962,7 +962,7 @@ function isChat(x) {
962
962
  * // "generated_text": "Once upon a time, there was an abundance of information about the most important and influential"
963
963
  * // }]
964
964
  * ```
965
- *
965
+ *
966
966
  * **Example:** Run code generation with `Xenova/codegen-350M-mono`.
967
967
  * ```javascript
968
968
  * const generator = await pipeline('text-generation', 'Xenova/codegen-350M-mono');
@@ -1081,7 +1081,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
1081
1081
  * @property {string} sequence The sequence for which this is the output.
1082
1082
  * @property {string[]} labels The labels sorted by order of likelihood.
1083
1083
  * @property {number[]} scores The probabilities for each of the labels.
1084
- *
1084
+ *
1085
1085
  * @typedef {Object} ZeroShotClassificationPipelineOptions Parameters specific to zero-shot classification pipelines.
1086
1086
  * @property {string} [hypothesis_template="This example is {}."] The template used to turn each
1087
1087
  * candidate label into an NLI-style hypothesis. The candidate label will replace the {} placeholder.
@@ -1089,14 +1089,14 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
1089
1089
  * If `false`, the scores are normalized such that the sum of the label likelihoods for each sequence
1090
1090
  * is 1. If `true`, the labels are considered independent and probabilities are normalized for each
1091
1091
  * candidate by doing a softmax of the entailment score vs. the contradiction score.
1092
- *
1092
+ *
1093
1093
  * @callback ZeroShotClassificationPipelineCallback Classify the sequence(s) given as inputs.
1094
1094
  * @param {string|string[]} texts The sequence(s) to classify, will be truncated if the model input is too large.
1095
1095
  * @param {string|string[]} candidate_labels The set of possible class labels to classify each sequence into.
1096
1096
  * Can be a single label, a string of comma-separated labels, or a list of labels.
1097
1097
  * @param {ZeroShotClassificationPipelineOptions} [options] The options to use for zero-shot classification.
1098
1098
  * @returns {Promise<ZeroShotClassificationOutput|ZeroShotClassificationOutput[]>} An array or object containing the predicted labels and scores.
1099
- *
1099
+ *
1100
1100
  * @typedef {TextPipelineConstructorArgs & ZeroShotClassificationPipelineCallback & Disposable} ZeroShotClassificationPipelineType
1101
1101
  */
1102
1102
 
@@ -1105,7 +1105,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
1105
1105
  * trained on NLI (natural language inference) tasks. Equivalent of `text-classification`
1106
1106
  * pipelines, but these models don't require a hardcoded number of potential classes, they
1107
1107
  * can be chosen at runtime. It usually means it's slower but it is **much** more flexible.
1108
- *
1108
+ *
1109
1109
  * **Example:** Zero shot classification with `Xenova/mobilebert-uncased-mnli`.
1110
1110
  * ```javascript
1111
1111
  * const classifier = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli');
@@ -1118,7 +1118,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
1118
1118
  * // scores: [ 0.5562091040482018, 0.1843621307860853, 0.13942646639336376, 0.12000229877234923 ]
1119
1119
  * // }
1120
1120
  * ```
1121
- *
1121
+ *
1122
1122
  * **Example:** Zero shot classification with `Xenova/nli-deberta-v3-xsmall` (multi-label).
1123
1123
  * ```javascript
1124
1124
  * const classifier = await pipeline('zero-shot-classification', 'Xenova/nli-deberta-v3-xsmall');
@@ -1232,20 +1232,20 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
1232
1232
  * @property {'none'|'mean'|'cls'} [pooling="none"] The pooling method to use.
1233
1233
  * @property {boolean} [normalize=false] Whether or not to normalize the embeddings in the last dimension.
1234
1234
  * @property {boolean} [quantize=false] Whether or not to quantize the embeddings.
1235
- * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
1236
- *
1235
+ * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
1236
+ *
1237
1237
  * @callback FeatureExtractionPipelineCallback Extract the features of the input(s).
1238
1238
  * @param {string|string[]} texts One or several texts (or one list of texts) to get the features of.
1239
1239
  * @param {FeatureExtractionPipelineOptions} [options] The options to use for feature extraction.
1240
1240
  * @returns {Promise<Tensor>} The features computed by the model.
1241
- *
1241
+ *
1242
1242
  * @typedef {TextPipelineConstructorArgs & FeatureExtractionPipelineCallback & Disposable} FeatureExtractionPipelineType
1243
1243
  */
1244
1244
 
1245
1245
  /**
1246
1246
  * Feature extraction pipeline using no model head. This pipeline extracts the hidden
1247
1247
  * states from the base transformer, which can be used as features in downstream tasks.
1248
- *
1248
+ *
1249
1249
  * **Example:** Run feature extraction with `bert-base-uncased` (without pooling/normalization).
1250
1250
  * ```javascript
1251
1251
  * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
@@ -1256,7 +1256,7 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
1256
1256
  * // dims: [1, 8, 768]
1257
1257
  * // }
1258
1258
  * ```
1259
- *
1259
+ *
1260
1260
  * **Example:** Run feature extraction with `bert-base-uncased` (with pooling/normalization).
1261
1261
  * ```javascript
1262
1262
  * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
@@ -1267,7 +1267,7 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te
1267
1267
  * // dims: [1, 768]
1268
1268
  * // }
1269
1269
  * ```
1270
- *
1270
+ *
1271
1271
  * **Example:** Calculating embeddings with `sentence-transformers` models.
1272
1272
  * ```javascript
1273
1273
  * const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
@@ -1348,19 +1348,19 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip
1348
1348
  /**
1349
1349
  * @typedef {Object} ImageFeatureExtractionPipelineOptions Parameters specific to image feature extraction pipelines.
1350
1350
  * @property {boolean} [pool=null] Whether or not to return the pooled output. If set to `false`, the model will return the raw hidden states.
1351
- *
1351
+ *
1352
1352
  * @callback ImageFeatureExtractionPipelineCallback Extract the features of the input(s).
1353
1353
  * @param {ImagePipelineInputs} images One or several images (or one list of images) to get the features of.
1354
1354
  * @param {ImageFeatureExtractionPipelineOptions} [options] The options to use for image feature extraction.
1355
1355
  * @returns {Promise<Tensor>} The image features computed by the model.
1356
- *
1356
+ *
1357
1357
  * @typedef {ImagePipelineConstructorArgs & ImageFeatureExtractionPipelineCallback & Disposable} ImageFeatureExtractionPipelineType
1358
1358
  */
1359
1359
 
1360
1360
  /**
1361
1361
  * Image feature extraction pipeline using no model head. This pipeline extracts the hidden
1362
1362
  * states from the base transformer, which can be used as features in downstream tasks.
1363
- *
1363
+ *
1364
1364
  * **Example:** Perform image feature extraction with `Xenova/vit-base-patch16-224-in21k`.
1365
1365
  * ```javascript
1366
1366
  * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/vit-base-patch16-224-in21k');
@@ -1373,7 +1373,7 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip
1373
1373
  * // size: 151296
1374
1374
  * // }
1375
1375
  * ```
1376
- *
1376
+ *
1377
1377
  * **Example:** Compute image embeddings with `Xenova/clip-vit-base-patch32`.
1378
1378
  * ```javascript
1379
1379
  * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32');
@@ -1429,12 +1429,12 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
1429
1429
  * @property {string} label The label predicted.
1430
1430
  * @property {number} score The corresponding probability.
1431
1431
  * @typedef {AudioClassificationSingle[]} AudioClassificationOutput
1432
- *
1432
+ *
1433
1433
  * @typedef {Object} AudioClassificationPipelineOptions Parameters specific to audio classification pipelines.
1434
1434
  * @property {number} [top_k=5] The number of top labels that will be returned by the pipeline.
1435
1435
  * If the provided number is `null` or higher than the number of labels available in the model configuration,
1436
1436
  * it will default to the number of labels.
1437
- *
1437
+ *
1438
1438
  * @callback AudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
1439
1439
  * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
1440
1440
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -1443,14 +1443,14 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
1443
1443
  * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
1444
1444
  * @param {AudioClassificationPipelineOptions} [options] The options to use for audio classification.
1445
1445
  * @returns {Promise<AudioClassificationOutput|AudioClassificationOutput[]>} An array or object containing the predicted labels and scores.
1446
- *
1446
+ *
1447
1447
  * @typedef {AudioPipelineConstructorArgs & AudioClassificationPipelineCallback & Disposable} AudioClassificationPipelineType
1448
1448
  */
1449
1449
 
1450
1450
  /**
1451
1451
  * Audio classification pipeline using any `AutoModelForAudioClassification`.
1452
1452
  * This pipeline predicts the class of a raw waveform or an audio file.
1453
- *
1453
+ *
1454
1454
  * **Example:** Perform audio classification with `Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech`.
1455
1455
  * ```javascript
1456
1456
  * const classifier = await pipeline('audio-classification', 'Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech');
@@ -1461,7 +1461,7 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
1461
1461
  * // { label: 'female', score: 0.001845747814513743 }
1462
1462
  * // ]
1463
1463
  * ```
1464
- *
1464
+ *
1465
1465
  * **Example:** Perform audio classification with `Xenova/ast-finetuned-audioset-10-10-0.4593` and return top 4 results.
1466
1466
  * ```javascript
1467
1467
  * const classifier = await pipeline('audio-classification', 'Xenova/ast-finetuned-audioset-10-10-0.4593');
@@ -1526,12 +1526,12 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
1526
1526
  * @typedef {Object} ZeroShotAudioClassificationOutput
1527
1527
  * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
1528
1528
  * @property {number} score The score attributed by the model for that label (between 0 and 1).
1529
- *
1529
+ *
1530
1530
  * @typedef {Object} ZeroShotAudioClassificationPipelineOptions Parameters specific to zero-shot audio classification pipelines.
1531
1531
  * @property {string} [hypothesis_template="This is a sound of {}."] The sentence used in conjunction with `candidate_labels`
1532
1532
  * to attempt the audio classification by replacing the placeholder with the candidate_labels.
1533
1533
  * Then likelihood is estimated by using `logits_per_audio`.
1534
- *
1534
+ *
1535
1535
  * @callback ZeroShotAudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
1536
1536
  * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
1537
1537
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -1541,14 +1541,14 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
1541
1541
  * @param {string[]} candidate_labels The candidate labels for this audio.
1542
1542
  * @param {ZeroShotAudioClassificationPipelineOptions} [options] The options to use for zero-shot audio classification.
1543
1543
  * @returns {Promise<ZeroShotAudioClassificationOutput[]|ZeroShotAudioClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
1544
- *
1544
+ *
1545
1545
  * @typedef {TextAudioPipelineConstructorArgs & ZeroShotAudioClassificationPipelineCallback & Disposable} ZeroShotAudioClassificationPipelineType
1546
1546
  */
1547
1547
 
1548
1548
  /**
1549
1549
  * Zero shot audio classification pipeline using `ClapModel`. This pipeline predicts the class of an audio when you
1550
1550
  * provide an audio and a set of `candidate_labels`.
1551
- *
1551
+ *
1552
1552
  * **Example**: Perform zero-shot audio classification with `Xenova/clap-htsat-unfused`.
1553
1553
  * ```javascript
1554
1554
  * const classifier = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused');
@@ -1581,7 +1581,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1581
1581
  audio = [/** @type {AudioInput} */ (audio)];
1582
1582
  }
1583
1583
 
1584
- // Insert label into hypothesis template
1584
+ // Insert label into hypothesis template
1585
1585
  const texts = candidate_labels.map(
1586
1586
  x => hypothesis_template.replace('{}', x)
1587
1587
  );
@@ -1625,7 +1625,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1625
1625
  * @property {string} text The recognized text.
1626
1626
  * @property {Chunk[]} [chunks] When using `return_timestamps`, the `chunks` will become a list
1627
1627
  * containing all the various text chunks identified by the model.
1628
- *
1628
+ *
1629
1629
  * @typedef {Object} AutomaticSpeechRecognitionSpecificParams Parameters specific to automatic-speech-recognition pipelines.
1630
1630
  * @property {boolean|'word'} [return_timestamps] Whether to return timestamps or not. Default is `false`.
1631
1631
  * @property {number} [chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking).
@@ -1635,7 +1635,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1635
1635
  * @property {string} [task] The task to perform. Default is `null`, meaning it should be auto-detected.
1636
1636
  * @property {number} [num_frames] The number of frames in the input audio.
1637
1637
  * @typedef {import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig
1638
- *
1638
+ *
1639
1639
  * @callback AutomaticSpeechRecognitionPipelineCallback Transcribe the audio sequence(s) given as inputs to text.
1640
1640
  * @param {AudioPipelineInputs} audio The input audio file(s) to be transcribed. The input is either:
1641
1641
  * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
@@ -1644,7 +1644,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1644
1644
  * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
1645
1645
  * @param {Partial<AutomaticSpeechRecognitionConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
1646
1646
  * @returns {Promise<AutomaticSpeechRecognitionOutput|AutomaticSpeechRecognitionOutput[]>} An object containing the transcription text and optionally timestamps if `return_timestamps` is `true`.
1647
- *
1647
+ *
1648
1648
  * @typedef {TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable} AutomaticSpeechRecognitionPipelineType
1649
1649
  */
1650
1650
 
@@ -1658,7 +1658,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1658
1658
  * const output = await transcriber(url);
1659
1659
  * // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." }
1660
1660
  * ```
1661
- *
1661
+ *
1662
1662
  * **Example:** Transcribe English w/ timestamps.
1663
1663
  * ```javascript
1664
1664
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -1672,7 +1672,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1672
1672
  * // ]
1673
1673
  * // }
1674
1674
  * ```
1675
- *
1675
+ *
1676
1676
  * **Example:** Transcribe English w/ word-level timestamps.
1677
1677
  * ```javascript
1678
1678
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -1691,7 +1691,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1691
1691
  * // ]
1692
1692
  * // }
1693
1693
  * ```
1694
- *
1694
+ *
1695
1695
  * **Example:** Transcribe French.
1696
1696
  * ```javascript
1697
1697
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
@@ -1699,7 +1699,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1699
1699
  * const output = await transcriber(url, { language: 'french', task: 'transcribe' });
1700
1700
  * // { text: " J'adore, j'aime, je n'aime pas, je déteste." }
1701
1701
  * ```
1702
- *
1702
+ *
1703
1703
  * **Example:** Translate French to English.
1704
1704
  * ```javascript
1705
1705
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
@@ -1707,7 +1707,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
1707
1707
  * const output = await transcriber(url, { language: 'french', task: 'translate' });
1708
1708
  * // { text: " I love, I like, I don't like, I hate." }
1709
1709
  * ```
1710
- *
1710
+ *
1711
1711
  * **Example:** Transcribe/translate audio longer than 30 seconds.
1712
1712
  * ```javascript
1713
1713
  * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
@@ -1930,18 +1930,18 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
1930
1930
  * @typedef {Object} ImageToTextSingle
1931
1931
  * @property {string} generated_text The generated text.
1932
1932
  * @typedef {ImageToTextSingle[]} ImageToTextOutput
1933
- *
1933
+ *
1934
1934
  * @callback ImageToTextPipelineCallback Assign labels to the image(s) passed as inputs.
1935
1935
  * @param {ImagePipelineInputs} texts The images to be captioned.
1936
1936
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
1937
1937
  * @returns {Promise<ImageToTextOutput|ImageToTextOutput[]>} An object (or array of objects) containing the generated text(s).
1938
- *
1938
+ *
1939
1939
  * @typedef {TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable} ImageToTextPipelineType
1940
1940
  */
1941
1941
 
1942
1942
  /**
1943
1943
  * Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image.
1944
- *
1944
+ *
1945
1945
  * **Example:** Generate a caption for an image w/ `Xenova/vit-gpt2-image-captioning`.
1946
1946
  * ```javascript
1947
1947
  * const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');
@@ -1949,7 +1949,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
1949
1949
  * const output = await captioner(url);
1950
1950
  * // [{ generated_text: 'a cat laying on a couch with another cat' }]
1951
1951
  * ```
1952
- *
1952
+ *
1953
1953
  * **Example:** Optical Character Recognition (OCR) w/ `Xenova/trocr-small-handwritten`.
1954
1954
  * ```javascript
1955
1955
  * const captioner = await pipeline('image-to-text', 'Xenova/trocr-small-handwritten');
@@ -1995,22 +1995,22 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
1995
1995
  * @property {string} label The label identified by the model.
1996
1996
  * @property {number} score The score attributed by the model for that label.
1997
1997
  * @typedef {ImageClassificationSingle[]} ImageClassificationOutput
1998
- *
1998
+ *
1999
1999
  * @typedef {Object} ImageClassificationPipelineOptions Parameters specific to image classification pipelines.
2000
- * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
2001
- *
2000
+ * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
2001
+ *
2002
2002
  * @callback ImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
2003
2003
  * @param {ImagePipelineInputs} images The input images(s) to be classified.
2004
2004
  * @param {ImageClassificationPipelineOptions} [options] The options to use for image classification.
2005
2005
  * @returns {Promise<ImageClassificationOutput|ImageClassificationOutput[]>} An array or object containing the predicted labels and scores.
2006
- *
2006
+ *
2007
2007
  * @typedef {ImagePipelineConstructorArgs & ImageClassificationPipelineCallback & Disposable} ImageClassificationPipelineType
2008
2008
  */
2009
2009
 
2010
2010
  /**
2011
2011
  * Image classification pipeline using any `AutoModelForImageClassification`.
2012
2012
  * This pipeline predicts the class of an image.
2013
- *
2013
+ *
2014
2014
  * **Example:** Classify an image.
2015
2015
  * ```javascript
2016
2016
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -2020,7 +2020,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
2020
2020
  * // { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
2021
2021
  * // ]
2022
2022
  * ```
2023
- *
2023
+ *
2024
2024
  * **Example:** Classify an image and return top `n` classes.
2025
2025
  * ```javascript
2026
2026
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -2032,7 +2032,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
2032
2032
  * // { label: 'lion, king of beasts, Panthera leo', score: 0.00045060308184474707 },
2033
2033
  * // ]
2034
2034
  * ```
2035
- *
2035
+ *
2036
2036
  * **Example:** Classify an image and return all classes.
2037
2037
  * ```javascript
2038
2038
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
@@ -2099,7 +2099,7 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
2099
2099
  * @property {string|null} label The label of the segment.
2100
2100
  * @property {number|null} score The score of the segment.
2101
2101
  * @property {RawImage} mask The mask of the segment.
2102
- *
2102
+ *
2103
2103
  * @typedef {Object} ImageSegmentationPipelineOptions Parameters specific to image segmentation pipelines.
2104
2104
  * @property {number} [threshold=0.5] Probability threshold to filter out predicted masks.
2105
2105
  * @property {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
@@ -2108,19 +2108,19 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
2108
2108
  * depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
2109
2109
  * @property {number[]} [label_ids_to_fuse=null] List of label ids to fuse. If not set, do not fuse any labels.
2110
2110
  * @property {number[][]} [target_sizes=null] List of target sizes for the input images. If not set, use the original image sizes.
2111
- *
2111
+ *
2112
2112
  * @callback ImageSegmentationPipelineCallback Segment the input images.
2113
2113
  * @param {ImagePipelineInputs} images The input images.
2114
2114
  * @param {ImageSegmentationPipelineOptions} [options] The options to use for image segmentation.
2115
2115
  * @returns {Promise<ImageSegmentationPipelineOutput[]>} The annotated segments.
2116
- *
2116
+ *
2117
2117
  * @typedef {ImagePipelineConstructorArgs & ImageSegmentationPipelineCallback & Disposable} ImageSegmentationPipelineType
2118
2118
  */
2119
2119
 
2120
2120
  /**
2121
2121
  * Image segmentation pipeline using any `AutoModelForXXXSegmentation`.
2122
2122
  * This pipeline predicts masks of objects and their classes.
2123
- *
2123
+ *
2124
2124
  * **Example:** Perform image segmentation with `Xenova/detr-resnet-50-panoptic`.
2125
2125
  * ```javascript
2126
2126
  * const segmenter = await pipeline('image-segmentation', 'Xenova/detr-resnet-50-panoptic');
@@ -2204,12 +2204,17 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
2204
2204
  /** @type {ImageSegmentationPipelineOutput[]} */
2205
2205
  const annotation = [];
2206
2206
  if (!subtask) {
2207
+ // We define an epsilon to safeguard against numerical/precision issues when detecting
2208
+ // the normalization mode of the output (i.e., sigmoid already applied, or not).
2209
+ // See https://github.com/microsoft/onnxruntime/issues/23943 for more information.
2210
+ const epsilon = 1e-5;
2211
+
2207
2212
  // Perform standard image segmentation
2208
2213
  const result = output[outputNames[0]];
2209
2214
  for (let i = 0; i < imageSizes.length; ++i) {
2210
2215
  const size = imageSizes[i];
2211
2216
  const item = result[i];
2212
- if (item.data.some(x => x < 0 || x > 1)) {
2217
+ if (item.data.some(x => x < -epsilon || x > 1 + epsilon)) {
2213
2218
  item.sigmoid_();
2214
2219
  }
2215
2220
  const mask = await RawImage.fromTensor(item.mul_(255).to('uint8')).resize(size[1], size[0]);
@@ -2278,19 +2283,19 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
2278
2283
 
2279
2284
  /**
2280
2285
  * @typedef {Object} BackgroundRemovalPipelineOptions Parameters specific to image segmentation pipelines.
2281
- *
2286
+ *
2282
2287
  * @callback BackgroundRemovalPipelineCallback Segment the input images.
2283
2288
  * @param {ImagePipelineInputs} images The input images.
2284
2289
  * @param {BackgroundRemovalPipelineOptions} [options] The options to use for image segmentation.
2285
2290
  * @returns {Promise<RawImage[]>} The images with the background removed.
2286
- *
2291
+ *
2287
2292
  * @typedef {ImagePipelineConstructorArgs & BackgroundRemovalPipelineCallback & Disposable} BackgroundRemovalPipelineType
2288
2293
  */
2289
2294
 
2290
2295
  /**
2291
2296
  * Background removal pipeline using certain `AutoModelForXXXSegmentation`.
2292
2297
  * This pipeline removes the backgrounds of images.
2293
- *
2298
+ *
2294
2299
  * **Example:** Perform background removal with `Xenova/modnet`.
2295
2300
  * ```javascript
2296
2301
  * const segmenter = await pipeline('background-removal', 'Xenova/modnet');
@@ -2301,7 +2306,7 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
2301
2306
  * // ]
2302
2307
  * ```
2303
2308
  */
2304
- export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => ImageSegmentationPipelineType} */ (ImageSegmentationPipeline)) {
2309
+ export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => BackgroundRemovalPipelineType} */ (/** @type {any} */(ImageSegmentationPipeline))) {
2305
2310
  /**
2306
2311
  * Create a new BackgroundRemovalPipeline.
2307
2312
  * @param {ImagePipelineConstructorArgs} options An object used to instantiate the pipeline.
@@ -2336,25 +2341,25 @@ export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePi
2336
2341
  * @typedef {Object} ZeroShotImageClassificationOutput
2337
2342
  * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
2338
2343
  * @property {number} score The score attributed by the model for that label (between 0 and 1).
2339
- *
2344
+ *
2340
2345
  * @typedef {Object} ZeroShotImageClassificationPipelineOptions Parameters specific to zero-shot image classification pipelines.
2341
2346
  * @property {string} [hypothesis_template="This is a photo of {}"] The sentence used in conjunction with `candidate_labels`
2342
2347
  * to attempt the image classification by replacing the placeholder with the candidate_labels.
2343
2348
  * Then likelihood is estimated by using `logits_per_image`.
2344
- *
2349
+ *
2345
2350
  * @callback ZeroShotImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
2346
2351
  * @param {ImagePipelineInputs} images The input images.
2347
2352
  * @param {string[]} candidate_labels The candidate labels for this image.
2348
2353
  * @param {ZeroShotImageClassificationPipelineOptions} [options] The options to use for zero-shot image classification.
2349
2354
  * @returns {Promise<ZeroShotImageClassificationOutput[]|ZeroShotImageClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
2350
- *
2355
+ *
2351
2356
  * @typedef {TextImagePipelineConstructorArgs & ZeroShotImageClassificationPipelineCallback & Disposable} ZeroShotImageClassificationPipelineType
2352
2357
  */
2353
2358
 
2354
2359
  /**
2355
2360
  * Zero shot image classification pipeline. This pipeline predicts the class of
2356
2361
  * an image when you provide an image and a set of `candidate_labels`.
2357
- *
2362
+ *
2358
2363
  * **Example:** Zero shot image classification w/ `Xenova/clip-vit-base-patch32`.
2359
2364
  * ```javascript
2360
2365
  * const classifier = await pipeline('zero-shot-image-classification', 'Xenova/clip-vit-base-patch32');
@@ -2384,7 +2389,7 @@ export class ZeroShotImageClassificationPipeline extends (/** @type {new (option
2384
2389
  const isBatched = Array.isArray(images);
2385
2390
  const preparedImages = await prepareImages(images);
2386
2391
 
2387
- // Insert label into hypothesis template
2392
+ // Insert label into hypothesis template
2388
2393
  const texts = candidate_labels.map(
2389
2394
  x => hypothesis_template.replace('{}', x)
2390
2395
  );
@@ -2431,23 +2436,23 @@ export class ZeroShotImageClassificationPipeline extends (/** @type {new (option
2431
2436
  * @property {number} score The score attributed by the model for that label.
2432
2437
  * @property {BoundingBox} box The bounding box of detected object in image's original size, or as a percentage if `percentage` is set to true.
2433
2438
  * @typedef {ObjectDetectionPipelineSingle[]} ObjectDetectionPipelineOutput
2434
- *
2439
+ *
2435
2440
  * @typedef {Object} ObjectDetectionPipelineOptions Parameters specific to object detection pipelines.
2436
2441
  * @property {number} [threshold=0.9] The threshold used to filter boxes by score.
2437
2442
  * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
2438
- *
2443
+ *
2439
2444
  * @callback ObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
2440
2445
  * @param {ImagePipelineInputs} images The input images.
2441
2446
  * @param {ObjectDetectionPipelineOptions} [options] The options to use for object detection.
2442
- * @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
2443
- *
2447
+ * @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
2448
+ *
2444
2449
  * @typedef {ImagePipelineConstructorArgs & ObjectDetectionPipelineCallback & Disposable} ObjectDetectionPipelineType
2445
2450
  */
2446
2451
 
2447
2452
  /**
2448
2453
  * Object detection pipeline using any `AutoModelForObjectDetection`.
2449
2454
  * This pipeline predicts bounding boxes of objects and their classes.
2450
- *
2455
+ *
2451
2456
  * **Example:** Run object-detection with `Xenova/detr-resnet-50`.
2452
2457
  * ```javascript
2453
2458
  * const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
@@ -2521,27 +2526,27 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
2521
2526
  * @property {string} label Text query corresponding to the found object.
2522
2527
  * @property {number} score Score corresponding to the object (between 0 and 1).
2523
2528
  * @property {BoundingBox} box Bounding box of the detected object in image's original size, or as a percentage if `percentage` is set to true.
2524
- *
2529
+ *
2525
2530
  * @typedef {Object} ZeroShotObjectDetectionPipelineOptions Parameters specific to zero-shot object detection pipelines.
2526
2531
  * @property {number} [threshold=0.1] The probability necessary to make a prediction.
2527
2532
  * @property {number} [top_k=null] The number of top predictions that will be returned by the pipeline.
2528
2533
  * If the provided number is `null` or higher than the number of predictions available, it will default
2529
2534
  * to the number of predictions.
2530
2535
  * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
2531
- *
2536
+ *
2532
2537
  * @callback ZeroShotObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
2533
2538
  * @param {ImagePipelineInputs} images The input images.
2534
2539
  * @param {string[]} candidate_labels What the model should recognize in the image.
2535
2540
  * @param {ZeroShotObjectDetectionPipelineOptions} [options] The options to use for zero-shot object detection.
2536
2541
  * @returns {Promise<ZeroShotObjectDetectionOutput[]|ZeroShotObjectDetectionOutput[][]>} An array of objects containing the predicted labels, scores, and bounding boxes.
2537
- *
2542
+ *
2538
2543
  * @typedef {TextImagePipelineConstructorArgs & ZeroShotObjectDetectionPipelineCallback & Disposable} ZeroShotObjectDetectionPipelineType
2539
2544
  */
2540
2545
 
2541
2546
  /**
2542
2547
  * Zero-shot object detection pipeline. This pipeline predicts bounding boxes of
2543
2548
  * objects when you provide an image and a set of `candidate_labels`.
2544
- *
2549
+ *
2545
2550
  * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32`.
2546
2551
  * ```javascript
2547
2552
  * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
@@ -2571,7 +2576,7 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
2571
2576
  * // }
2572
2577
  * // ]
2573
2578
  * ```
2574
- *
2579
+ *
2575
2580
  * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32` (returning top 4 matches and setting a threshold).
2576
2581
  * ```javascript
2577
2582
  * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
@@ -2686,13 +2691,13 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
2686
2691
  * @typedef {Object} DocumentQuestionAnsweringSingle
2687
2692
  * @property {string} answer The generated text.
2688
2693
  * @typedef {DocumentQuestionAnsweringSingle[]} DocumentQuestionAnsweringOutput
2689
- *
2694
+ *
2690
2695
  * @callback DocumentQuestionAnsweringPipelineCallback Answer the question given as input by using the document.
2691
2696
  * @param {ImageInput} image The image of the document to use.
2692
2697
  * @param {string} question A question to ask of the document.
2693
2698
  * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
2694
2699
  * @returns {Promise<DocumentQuestionAnsweringOutput|DocumentQuestionAnsweringOutput[]>} An object (or array of objects) containing the answer(s).
2695
- *
2700
+ *
2696
2701
  * @typedef {TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable} DocumentQuestionAnsweringPipelineType
2697
2702
  */
2698
2703
 
@@ -2700,7 +2705,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
2700
2705
  * Document Question Answering pipeline using any `AutoModelForDocumentQuestionAnswering`.
2701
2706
  * The inputs/outputs are similar to the (extractive) question answering pipeline; however,
2702
2707
  * the pipeline takes an image (and optional OCR'd words/boxes) as input instead of text context.
2703
- *
2708
+ *
2704
2709
  * **Example:** Answer questions about a document with `Xenova/donut-base-finetuned-docvqa`.
2705
2710
  * ```javascript
2706
2711
  * const qa_pipeline = await pipeline('document-question-answering', 'Xenova/donut-base-finetuned-docvqa');
@@ -2770,22 +2775,22 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
2770
2775
  * @typedef {Object} TextToAudioOutput
2771
2776
  * @property {Float32Array} audio The generated audio waveform.
2772
2777
  * @property {number} sampling_rate The sampling rate of the generated audio waveform.
2773
- *
2778
+ *
2774
2779
  * @typedef {Object} TextToAudioPipelineOptions Parameters specific to text-to-audio pipelines.
2775
2780
  * @property {Tensor|Float32Array|string|URL} [speaker_embeddings=null] The speaker embeddings (if the model requires it).
2776
- *
2781
+ *
2777
2782
  * @callback TextToAudioPipelineCallback Generates speech/audio from the inputs.
2778
2783
  * @param {string|string[]} texts The text(s) to generate.
2779
2784
  * @param {TextToAudioPipelineOptions} options Parameters passed to the model generation/forward method.
2780
2785
  * @returns {Promise<TextToAudioOutput>} An object containing the generated audio and sampling rate.
2781
- *
2786
+ *
2782
2787
  * @typedef {TextToAudioPipelineConstructorArgs & TextToAudioPipelineCallback & Disposable} TextToAudioPipelineType
2783
2788
  */
2784
2789
 
2785
2790
  /**
2786
2791
  * Text-to-audio generation pipeline using any `AutoModelForTextToWaveform` or `AutoModelForTextToSpectrogram`.
2787
2792
  * This pipeline generates an audio file from an input text and optional other conditional inputs.
2788
- *
2793
+ *
2789
2794
  * **Example:** Generate audio from text with `Xenova/speecht5_tts`.
2790
2795
  * ```javascript
2791
2796
  * const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
@@ -2796,17 +2801,17 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
2796
2801
  * // sampling_rate: 16000
2797
2802
  * // }
2798
2803
  * ```
2799
- *
2804
+ *
2800
2805
  * You can then save the audio to a .wav file with the `wavefile` package:
2801
2806
  * ```javascript
2802
2807
  * import wavefile from 'wavefile';
2803
2808
  * import fs from 'fs';
2804
- *
2809
+ *
2805
2810
  * const wav = new wavefile.WaveFile();
2806
2811
  * wav.fromScratch(1, out.sampling_rate, '32f', out.audio);
2807
2812
  * fs.writeFileSync('out.wav', wav.toBuffer());
2808
2813
  * ```
2809
- *
2814
+ *
2810
2815
  * **Example:** Multilingual speech generation with `Xenova/mms-tts-fra`. See [here](https://huggingface.co/models?pipeline_tag=text-to-speech&other=vits&sort=trending) for the full list of available languages (1107).
2811
2816
  * ```javascript
2812
2817
  * const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra');
@@ -2912,13 +2917,13 @@ export class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPi
2912
2917
  * @callback ImageToImagePipelineCallback Transform the image(s) passed as inputs.
2913
2918
  * @param {ImagePipelineInputs} images The images to transform.
2914
2919
  * @returns {Promise<RawImage|RawImage[]>} The transformed image or list of images.
2915
- *
2920
+ *
2916
2921
  * @typedef {ImagePipelineConstructorArgs & ImageToImagePipelineCallback & Disposable} ImageToImagePipelineType
2917
2922
  */
2918
2923
 
2919
2924
  /**
2920
2925
  * Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input.
2921
- *
2926
+ *
2922
2927
  * **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`
2923
2928
  * ```javascript
2924
2929
  * const upscaler = await pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64');
@@ -2963,17 +2968,17 @@ export class ImageToImagePipeline extends (/** @type {new (options: ImagePipelin
2963
2968
  * @typedef {Object} DepthEstimationPipelineOutput
2964
2969
  * @property {Tensor} predicted_depth The raw depth map predicted by the model.
2965
2970
  * @property {RawImage} depth The processed depth map as an image (with the same size as the input image).
2966
- *
2971
+ *
2967
2972
  * @callback DepthEstimationPipelineCallback Predicts the depth for the image(s) passed as inputs.
2968
2973
  * @param {ImagePipelineInputs} images The images to compute depth for.
2969
2974
  * @returns {Promise<DepthEstimationPipelineOutput|DepthEstimationPipelineOutput[]>} An image or a list of images containing result(s).
2970
- *
2975
+ *
2971
2976
  * @typedef {ImagePipelineConstructorArgs & DepthEstimationPipelineCallback & Disposable} DepthEstimationPipelineType
2972
2977
  */
2973
2978
 
2974
2979
  /**
2975
2980
  * Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image.
2976
- *
2981
+ *
2977
2982
  * **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`
2978
2983
  * ```javascript
2979
2984
  * const depth_estimator = await pipeline('depth-estimation', 'Xenova/dpt-hybrid-midas');
@@ -3358,7 +3363,7 @@ const TASK_ALIASES = Object.freeze({
3358
3363
 
3359
3364
  /**
3360
3365
  * Utility factory method to build a `Pipeline` object.
3361
- *
3366
+ *
3362
3367
  * @template {PipelineType} T The type of pipeline to return.
3363
3368
  * @param {T} task The task defining which pipeline will be returned. Currently accepted tasks are:
3364
3369
  * - `"audio-classification"`: will return a `AudioClassificationPipeline`.