@huggingface/transformers 3.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +376 -0
  3. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  4. package/dist/transformers.cjs +30741 -0
  5. package/dist/transformers.cjs.map +1 -0
  6. package/dist/transformers.js +33858 -0
  7. package/dist/transformers.js.map +1 -0
  8. package/dist/transformers.min.cjs +173 -0
  9. package/dist/transformers.min.cjs.map +1 -0
  10. package/dist/transformers.min.js +231 -0
  11. package/dist/transformers.min.js.map +1 -0
  12. package/package.json +92 -0
  13. package/src/backends/onnx.js +151 -0
  14. package/src/configs.js +360 -0
  15. package/src/env.js +152 -0
  16. package/src/generation/configuration_utils.js +381 -0
  17. package/src/generation/logits_process.js +716 -0
  18. package/src/generation/logits_sampler.js +204 -0
  19. package/src/generation/parameters.js +35 -0
  20. package/src/generation/stopping_criteria.js +156 -0
  21. package/src/generation/streamers.js +212 -0
  22. package/src/models/whisper/common_whisper.js +151 -0
  23. package/src/models/whisper/generation_whisper.js +89 -0
  24. package/src/models.js +7028 -0
  25. package/src/ops/registry.js +92 -0
  26. package/src/pipelines.js +3341 -0
  27. package/src/processors.js +2614 -0
  28. package/src/tokenizers.js +4395 -0
  29. package/src/transformers.js +28 -0
  30. package/src/utils/audio.js +704 -0
  31. package/src/utils/constants.js +2 -0
  32. package/src/utils/core.js +149 -0
  33. package/src/utils/data-structures.js +445 -0
  34. package/src/utils/devices.js +11 -0
  35. package/src/utils/dtypes.js +62 -0
  36. package/src/utils/generic.js +35 -0
  37. package/src/utils/hub.js +671 -0
  38. package/src/utils/image.js +745 -0
  39. package/src/utils/maths.js +1050 -0
  40. package/src/utils/tensor.js +1378 -0
  41. package/types/backends/onnx.d.ts +26 -0
  42. package/types/backends/onnx.d.ts.map +1 -0
  43. package/types/configs.d.ts +59 -0
  44. package/types/configs.d.ts.map +1 -0
  45. package/types/env.d.ts +106 -0
  46. package/types/env.d.ts.map +1 -0
  47. package/types/generation/configuration_utils.d.ts +320 -0
  48. package/types/generation/configuration_utils.d.ts.map +1 -0
  49. package/types/generation/logits_process.d.ts +354 -0
  50. package/types/generation/logits_process.d.ts.map +1 -0
  51. package/types/generation/logits_sampler.d.ts +51 -0
  52. package/types/generation/logits_sampler.d.ts.map +1 -0
  53. package/types/generation/parameters.d.ts +47 -0
  54. package/types/generation/parameters.d.ts.map +1 -0
  55. package/types/generation/stopping_criteria.d.ts +81 -0
  56. package/types/generation/stopping_criteria.d.ts.map +1 -0
  57. package/types/generation/streamers.d.ts +81 -0
  58. package/types/generation/streamers.d.ts.map +1 -0
  59. package/types/models/whisper/common_whisper.d.ts +8 -0
  60. package/types/models/whisper/common_whisper.d.ts.map +1 -0
  61. package/types/models/whisper/generation_whisper.d.ts +76 -0
  62. package/types/models/whisper/generation_whisper.d.ts.map +1 -0
  63. package/types/models.d.ts +3845 -0
  64. package/types/models.d.ts.map +1 -0
  65. package/types/ops/registry.d.ts +11 -0
  66. package/types/ops/registry.d.ts.map +1 -0
  67. package/types/pipelines.d.ts +2403 -0
  68. package/types/pipelines.d.ts.map +1 -0
  69. package/types/processors.d.ts +917 -0
  70. package/types/processors.d.ts.map +1 -0
  71. package/types/tokenizers.d.ts +999 -0
  72. package/types/tokenizers.d.ts.map +1 -0
  73. package/types/transformers.d.ts +13 -0
  74. package/types/transformers.d.ts.map +1 -0
  75. package/types/utils/audio.d.ts +130 -0
  76. package/types/utils/audio.d.ts.map +1 -0
  77. package/types/utils/constants.d.ts +2 -0
  78. package/types/utils/constants.d.ts.map +1 -0
  79. package/types/utils/core.d.ts +91 -0
  80. package/types/utils/core.d.ts.map +1 -0
  81. package/types/utils/data-structures.d.ts +236 -0
  82. package/types/utils/data-structures.d.ts.map +1 -0
  83. package/types/utils/devices.d.ts +8 -0
  84. package/types/utils/devices.d.ts.map +1 -0
  85. package/types/utils/dtypes.d.ts +22 -0
  86. package/types/utils/dtypes.d.ts.map +1 -0
  87. package/types/utils/generic.d.ts +11 -0
  88. package/types/utils/generic.d.ts.map +1 -0
  89. package/types/utils/hub.d.ts +191 -0
  90. package/types/utils/hub.d.ts.map +1 -0
  91. package/types/utils/image.d.ts +119 -0
  92. package/types/utils/image.d.ts.map +1 -0
  93. package/types/utils/maths.d.ts +280 -0
  94. package/types/utils/maths.d.ts.map +1 -0
  95. package/types/utils/tensor.d.ts +392 -0
  96. package/types/utils/tensor.d.ts.map +1 -0
@@ -0,0 +1,2403 @@
1
+ /**
2
+ * @typedef {keyof typeof SUPPORTED_TASKS} TaskType
3
+ * @typedef {keyof typeof TASK_ALIASES} AliasType
4
+ * @typedef {TaskType | AliasType} PipelineType All possible pipeline types.
5
+ * @typedef {{[K in TaskType]: InstanceType<typeof SUPPORTED_TASKS[K]["pipeline"]>}} SupportedTasks A mapping of pipeline names to their corresponding pipeline classes.
6
+ * @typedef {{[K in AliasType]: InstanceType<typeof SUPPORTED_TASKS[TASK_ALIASES[K]]["pipeline"]>}} AliasTasks A mapping from pipeline aliases to their corresponding pipeline classes.
7
+ * @typedef {SupportedTasks & AliasTasks} AllTasks A mapping from all pipeline names and aliases to their corresponding pipeline classes.
8
+ */
9
+ /**
10
+ * Utility factory method to build a `Pipeline` object.
11
+ *
12
+ * @template {PipelineType} T The type of pipeline to return.
13
+ * @param {T} task The task defining which pipeline will be returned. Currently accepted tasks are:
14
+ * - `"audio-classification"`: will return a `AudioClassificationPipeline`.
15
+ * - `"automatic-speech-recognition"`: will return a `AutomaticSpeechRecognitionPipeline`.
16
+ * - `"depth-estimation"`: will return a `DepthEstimationPipeline`.
17
+ * - `"document-question-answering"`: will return a `DocumentQuestionAnsweringPipeline`.
18
+ * - `"feature-extraction"`: will return a `FeatureExtractionPipeline`.
19
+ * - `"fill-mask"`: will return a `FillMaskPipeline`.
20
+ * - `"image-classification"`: will return a `ImageClassificationPipeline`.
21
+ * - `"image-segmentation"`: will return a `ImageSegmentationPipeline`.
22
+ * - `"image-to-text"`: will return a `ImageToTextPipeline`.
23
+ * - `"object-detection"`: will return a `ObjectDetectionPipeline`.
24
+ * - `"question-answering"`: will return a `QuestionAnsweringPipeline`.
25
+ * - `"summarization"`: will return a `SummarizationPipeline`.
26
+ * - `"text2text-generation"`: will return a `Text2TextGenerationPipeline`.
27
+ * - `"text-classification"` (alias "sentiment-analysis" available): will return a `TextClassificationPipeline`.
28
+ * - `"text-generation"`: will return a `TextGenerationPipeline`.
29
+ * - `"token-classification"` (alias "ner" available): will return a `TokenClassificationPipeline`.
30
+ * - `"translation"`: will return a `TranslationPipeline`.
31
+ * - `"translation_xx_to_yy"`: will return a `TranslationPipeline`.
32
+ * - `"zero-shot-classification"`: will return a `ZeroShotClassificationPipeline`.
33
+ * - `"zero-shot-audio-classification"`: will return a `ZeroShotAudioClassificationPipeline`.
34
+ * - `"zero-shot-image-classification"`: will return a `ZeroShotImageClassificationPipeline`.
35
+ * - `"zero-shot-object-detection"`: will return a `ZeroShotObjectDetectionPipeline`.
36
+ * @param {string} [model=null] The name of the pre-trained model to use. If not specified, the default model for the task will be used.
37
+ * @param {import('./utils/hub.js').PretrainedModelOptions} [options] Optional parameters for the pipeline.
38
+ * @returns {Promise<AllTasks[T]>} A Pipeline object for the specified task.
39
+ * @throws {Error} If an unsupported pipeline is requested.
40
+ */
41
+ export function pipeline<T extends PipelineType>(task: T, model?: string, { progress_callback, config, cache_dir, local_files_only, revision, device, dtype, model_file_name, session_options, }?: import('./utils/hub.js').PretrainedModelOptions): Promise<AllTasks[T]>;
42
+ declare const Pipeline_base: new () => {
43
+ (...args: any[]): any;
44
+ _call(...args: any[]): any;
45
+ };
46
+ /**
47
+ * @callback DisposeType Disposes the item.
48
+ * @returns {Promise<void>} A promise that resolves when the item has been disposed.
49
+ *
50
+ * @typedef {Object} Disposable
51
+ * @property {DisposeType} dispose A promise that resolves when the pipeline has been disposed.
52
+ */
53
+ /**
54
+ * The Pipeline class is the class from which all pipelines inherit.
55
+ * Refer to this class for methods shared across different pipelines.
56
+ * @extends Callable
57
+ */
58
+ export class Pipeline extends Pipeline_base {
59
+ /**
60
+ * Create a new Pipeline.
61
+ * @param {Object} options An object containing the following properties:
62
+ * @param {string} [options.task] The task of the pipeline. Useful for specifying subtasks.
63
+ * @param {PreTrainedModel} [options.model] The model used by the pipeline.
64
+ * @param {PreTrainedTokenizer} [options.tokenizer=null] The tokenizer used by the pipeline (if any).
65
+ * @param {Processor} [options.processor=null] The processor used by the pipeline (if any).
66
+ */
67
+ constructor({ task, model, tokenizer, processor }: {
68
+ task?: string;
69
+ model?: PreTrainedModel;
70
+ tokenizer?: PreTrainedTokenizer;
71
+ processor?: Processor;
72
+ });
73
+ task: string;
74
+ model: PreTrainedModel;
75
+ tokenizer: PreTrainedTokenizer;
76
+ processor: Processor;
77
+ dispose(): Promise<void>;
78
+ }
79
+ declare const TextClassificationPipeline_base: new (options: TextPipelineConstructorArgs) => TextClassificationPipelineType;
80
+ /**
81
+ * @typedef {Object} ModelTokenizerConstructorArgs
82
+ * @property {string} task The task of the pipeline. Useful for specifying subtasks.
83
+ * @property {PreTrainedModel} model The model used by the pipeline.
84
+ * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
85
+ *
86
+ * @typedef {ModelTokenizerConstructorArgs} TextPipelineConstructorArgs An object used to instantiate a text-based pipeline.
87
+ */
88
+ /**
89
+ * @typedef {Object} ModelProcessorConstructorArgs
90
+ * @property {string} task The task of the pipeline. Useful for specifying subtasks.
91
+ * @property {PreTrainedModel} model The model used by the pipeline.
92
+ * @property {Processor} processor The processor used by the pipeline.
93
+ *
94
+ * @typedef {ModelProcessorConstructorArgs} AudioPipelineConstructorArgs An object used to instantiate an audio-based pipeline.
95
+ * @typedef {ModelProcessorConstructorArgs} ImagePipelineConstructorArgs An object used to instantiate an image-based pipeline.
96
+ */
97
+ /**
98
+ * @typedef {Object} ModelTokenizerProcessorConstructorArgs
99
+ * @property {string} task The task of the pipeline. Useful for specifying subtasks.
100
+ * @property {PreTrainedModel} model The model used by the pipeline.
101
+ * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline.
102
+ * @property {Processor} processor The processor used by the pipeline.
103
+ *
104
+ * @typedef {ModelTokenizerProcessorConstructorArgs} TextAudioPipelineConstructorArgs An object used to instantiate a text- and audio-based pipeline.
105
+ * @typedef {ModelTokenizerProcessorConstructorArgs} TextImagePipelineConstructorArgs An object used to instantiate a text- and image-based pipeline.
106
+ */
107
+ /**
108
+ * @typedef {Object} TextClassificationSingle
109
+ * @property {string} label The label predicted.
110
+ * @property {number} score The corresponding probability.
111
+ * @typedef {TextClassificationSingle[]} TextClassificationOutput
112
+ *
113
+ * @typedef {Object} TextClassificationPipelineOptions Parameters specific to text classification pipelines.
114
+ * @property {number} [top_k=1] The number of top predictions to be returned.
115
+ *
116
+ * @callback TextClassificationPipelineCallback Classify the text(s) given as inputs.
117
+ * @param {string|string[]} texts The input text(s) to be classified.
118
+ * @param {TextClassificationPipelineOptions} [options] The options to use for text classification.
119
+ * @returns {Promise<TextClassificationOutput|TextClassificationOutput[]>} An array or object containing the predicted labels and scores.
120
+ *
121
+ * @typedef {TextPipelineConstructorArgs & TextClassificationPipelineCallback & Disposable} TextClassificationPipelineType
122
+ */
123
+ /**
124
+ * Text classification pipeline using any `ModelForSequenceClassification`.
125
+ *
126
+ * **Example:** Sentiment-analysis w/ `Xenova/distilbert-base-uncased-finetuned-sst-2-english`.
127
+ * ```javascript
128
+ * const classifier = await pipeline('sentiment-analysis', 'Xenova/distilbert-base-uncased-finetuned-sst-2-english');
129
+ * const output = await classifier('I love transformers!');
130
+ * // [{ label: 'POSITIVE', score: 0.999788761138916 }]
131
+ * ```
132
+ *
133
+ * **Example:** Multilingual sentiment-analysis w/ `Xenova/bert-base-multilingual-uncased-sentiment` (and return top 5 classes).
134
+ * ```javascript
135
+ * const classifier = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
136
+ * const output = await classifier('Le meilleur film de tous les temps.', { top_k: 5 });
137
+ * // [
138
+ * // { label: '5 stars', score: 0.9610759615898132 },
139
+ * // { label: '4 stars', score: 0.03323351591825485 },
140
+ * // { label: '3 stars', score: 0.0036155181005597115 },
141
+ * // { label: '1 star', score: 0.0011325967498123646 },
142
+ * // { label: '2 stars', score: 0.0009423971059732139 }
143
+ * // ]
144
+ * ```
145
+ *
146
+ * **Example:** Toxic comment classification w/ `Xenova/toxic-bert` (and return all classes).
147
+ * ```javascript
148
+ * const classifier = await pipeline('text-classification', 'Xenova/toxic-bert');
149
+ * const output = await classifier('I hate you!', { top_k: null });
150
+ * // [
151
+ * // { label: 'toxic', score: 0.9593140482902527 },
152
+ * // { label: 'insult', score: 0.16187334060668945 },
153
+ * // { label: 'obscene', score: 0.03452680632472038 },
154
+ * // { label: 'identity_hate', score: 0.0223250575363636 },
155
+ * // { label: 'threat', score: 0.019197041168808937 },
156
+ * // { label: 'severe_toxic', score: 0.005651099607348442 }
157
+ * // ]
158
+ * ```
159
+ */
160
+ export class TextClassificationPipeline extends TextClassificationPipeline_base {
161
+ _call(texts: string | string[], options?: TextClassificationPipelineOptions): Promise<TextClassificationOutput | TextClassificationOutput[]>;
162
+ }
163
+ declare const TokenClassificationPipeline_base: new (options: TextPipelineConstructorArgs) => TokenClassificationPipelineType;
164
+ /**
165
+ * @typedef {Object} TokenClassificationSingle
166
+ * @property {string} word The token/word classified. This is obtained by decoding the selected tokens.
167
+ * @property {number} score The corresponding probability for `entity`.
168
+ * @property {string} entity The entity predicted for that token/word.
169
+ * @property {number} index The index of the corresponding token in the sentence.
170
+ * @property {number} [start] The index of the start of the corresponding entity in the sentence.
171
+ * @property {number} [end] The index of the end of the corresponding entity in the sentence.
172
+ * @typedef {TokenClassificationSingle[]} TokenClassificationOutput
173
+ *
174
+ * @typedef {Object} TokenClassificationPipelineOptions Parameters specific to token classification pipelines.
175
+ * @property {string[]} [ignore_labels] A list of labels to ignore.
176
+ *
177
+ * @callback TokenClassificationPipelineCallback Classify each token of the text(s) given as inputs.
178
+ * @param {string|string[]} texts One or several texts (or one list of texts) for token classification.
179
+ * @param {TokenClassificationPipelineOptions} [options] The options to use for token classification.
180
+ * @returns {Promise<TokenClassificationOutput|TokenClassificationOutput[]>} The result.
181
+ *
182
+ * @typedef {TextPipelineConstructorArgs & TokenClassificationPipelineCallback & Disposable} TokenClassificationPipelineType
183
+ */
184
+ /**
185
+ * Named Entity Recognition pipeline using any `ModelForTokenClassification`.
186
+ *
187
+ * **Example:** Perform named entity recognition with `Xenova/bert-base-NER`.
188
+ * ```javascript
189
+ * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
190
+ * const output = await classifier('My name is Sarah and I live in London');
191
+ * // [
192
+ * // { entity: 'B-PER', score: 0.9980202913284302, index: 4, word: 'Sarah' },
193
+ * // { entity: 'B-LOC', score: 0.9994474053382874, index: 9, word: 'London' }
194
+ * // ]
195
+ * ```
196
+ *
197
+ * **Example:** Perform named entity recognition with `Xenova/bert-base-NER` (and return all labels).
198
+ * ```javascript
199
+ * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER');
200
+ * const output = await classifier('Sarah lives in the United States of America', { ignore_labels: [] });
201
+ * // [
202
+ * // { entity: 'B-PER', score: 0.9966587424278259, index: 1, word: 'Sarah' },
203
+ * // { entity: 'O', score: 0.9987385869026184, index: 2, word: 'lives' },
204
+ * // { entity: 'O', score: 0.9990072846412659, index: 3, word: 'in' },
205
+ * // { entity: 'O', score: 0.9988298416137695, index: 4, word: 'the' },
206
+ * // { entity: 'B-LOC', score: 0.9995510578155518, index: 5, word: 'United' },
207
+ * // { entity: 'I-LOC', score: 0.9990395307540894, index: 6, word: 'States' },
208
+ * // { entity: 'I-LOC', score: 0.9986724853515625, index: 7, word: 'of' },
209
+ * // { entity: 'I-LOC', score: 0.9975294470787048, index: 8, word: 'America' }
210
+ * // ]
211
+ * ```
212
+ */
213
+ export class TokenClassificationPipeline extends TokenClassificationPipeline_base {
214
+ _call(texts: string | string[], options?: TokenClassificationPipelineOptions): Promise<TokenClassificationOutput | TokenClassificationOutput[]>;
215
+ }
216
+ declare const QuestionAnsweringPipeline_base: new (options: TextPipelineConstructorArgs) => QuestionAnsweringPipelineType;
217
+ /**
218
+ * @typedef {Object} QuestionAnsweringOutput
219
+ * @property {number} score The probability associated to the answer.
220
+ * @property {number} [start] The character start index of the answer (in the tokenized version of the input).
221
+ * @property {number} [end] The character end index of the answer (in the tokenized version of the input).
222
+ * @property {string} answer The answer to the question.
223
+ *
224
+ * @typedef {Object} QuestionAnsweringPipelineOptions Parameters specific to question answering pipelines.
225
+ * @property {number} [top_k=1] The number of top answer predictions to be returned.
226
+ *
227
+ * @callback QuestionAnsweringPipelineCallback Answer the question(s) given as inputs by using the context(s).
228
+ * @param {string|string[]} question One or several question(s) (must be used in conjunction with the `context` argument).
229
+ * @param {string|string[]} context One or several context(s) associated with the question(s) (must be used in conjunction with the `question` argument).
230
+ * @param {QuestionAnsweringPipelineOptions} [options] The options to use for question answering.
231
+ * @returns {Promise<QuestionAnsweringOutput|QuestionAnsweringOutput[]>} An array or object containing the predicted answers and scores.
232
+ *
233
+ * @typedef {TextPipelineConstructorArgs & QuestionAnsweringPipelineCallback & Disposable} QuestionAnsweringPipelineType
234
+ */
235
+ /**
236
+ * Question Answering pipeline using any `ModelForQuestionAnswering`.
237
+ *
238
+ * **Example:** Run question answering with `Xenova/distilbert-base-uncased-distilled-squad`.
239
+ * ```javascript
240
+ * const answerer = await pipeline('question-answering', 'Xenova/distilbert-base-uncased-distilled-squad');
241
+ * const question = 'Who was Jim Henson?';
242
+ * const context = 'Jim Henson was a nice puppet.';
243
+ * const output = await answerer(question, context);
244
+ * // {
245
+ * // answer: "a nice puppet",
246
+ * // score: 0.5768911502526741
247
+ * // }
248
+ * ```
249
+ */
250
+ export class QuestionAnsweringPipeline extends QuestionAnsweringPipeline_base {
251
+ _call(question: string | string[], context: string | string[], options?: QuestionAnsweringPipelineOptions): Promise<QuestionAnsweringOutput | QuestionAnsweringOutput[]>;
252
+ }
253
+ declare const FillMaskPipeline_base: new (options: TextPipelineConstructorArgs) => FillMaskPipelineType;
254
+ /**
255
+ * @typedef {Object} FillMaskSingle
256
+ * @property {string} sequence The corresponding input with the mask token prediction.
257
+ * @property {number} score The corresponding probability.
258
+ * @property {number} token The predicted token id (to replace the masked one).
259
+ * @property {string} token_str The predicted token (to replace the masked one).
260
+ * @typedef {FillMaskSingle[]} FillMaskOutput
261
+ *
262
+ * @typedef {Object} FillMaskPipelineOptions Parameters specific to fill mask pipelines.
263
+ * @property {number} [top_k=5] When passed, overrides the number of predictions to return.
264
+ *
265
+ * @callback FillMaskPipelineCallback Fill the masked token in the text(s) given as inputs.
266
+ * @param {string|string[]} texts One or several texts (or one list of prompts) with masked tokens.
267
+ * @param {FillMaskPipelineOptions} [options] The options to use for masked language modelling.
268
+ * @returns {Promise<FillMaskOutput|FillMaskOutput[]>} An array of objects containing the score, predicted token, predicted token string,
269
+ * and the sequence with the predicted token filled in, or an array of such arrays (one for each input text).
270
+ * If only one input text is given, the output will be an array of objects.
271
+ * @throws {Error} When the mask token is not found in the input text.
272
+ *
273
+ * @typedef {TextPipelineConstructorArgs & FillMaskPipelineCallback & Disposable} FillMaskPipelineType
274
+ */
275
+ /**
276
+ * Masked language modeling prediction pipeline using any `ModelWithLMHead`.
277
+ *
278
+ * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-uncased`.
279
+ * ```javascript
280
+ * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
281
+ * const output = await unmasker('The goal of life is [MASK].');
282
+ * // [
283
+ * // { token_str: 'survival', score: 0.06137419492006302, token: 8115, sequence: 'The goal of life is survival.' },
284
+ * // { token_str: 'love', score: 0.03902450203895569, token: 1567, sequence: 'The goal of life is love.' },
285
+ * // { token_str: 'happiness', score: 0.03253183513879776, token: 9266, sequence: 'The goal of life is happiness.' },
286
+ * // { token_str: 'freedom', score: 0.018736306577920914, token: 4438, sequence: 'The goal of life is freedom.' },
287
+ * // { token_str: 'life', score: 0.01859794743359089, token: 1297, sequence: 'The goal of life is life.' }
288
+ * // ]
289
+ * ```
290
+ *
291
+ * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-cased` (and return top result).
292
+ * ```javascript
293
+ * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
294
+ * const output = await unmasker('The Milky Way is a [MASK] galaxy.', { top_k: 1 });
295
+ * // [{ token_str: 'spiral', score: 0.6299987435340881, token: 14061, sequence: 'The Milky Way is a spiral galaxy.' }]
296
+ * ```
297
+ */
298
+ export class FillMaskPipeline extends FillMaskPipeline_base {
299
+ _call(texts: string | string[], options?: FillMaskPipelineOptions): Promise<FillMaskOutput | FillMaskOutput[]>;
300
+ }
301
+ declare const Text2TextGenerationPipeline_base: new (options: TextPipelineConstructorArgs) => Text2TextGenerationPipelineType;
302
+ /**
303
+ * @typedef {Object} Text2TextGenerationSingle
304
+ * @property {string} generated_text The generated text.
305
+ * @typedef {Text2TextGenerationSingle[]} Text2TextGenerationOutput
306
+ *
307
+ * @callback Text2TextGenerationPipelineCallback Generate the output text(s) using text(s) given as inputs.
308
+ * @param {string|string[]} texts Input text for the encoder.
309
+ * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
310
+ * @returns {Promise<Text2TextGenerationOutput|Text2TextGenerationOutput[]>}
311
+ *
312
+ * @typedef {TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable} Text2TextGenerationPipelineType
313
+ */
314
+ /**
315
+ * Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks.
316
+ *
317
+ * **Example:** Text-to-text generation w/ `Xenova/LaMini-Flan-T5-783M`.
318
+ * ```javascript
319
+ * const generator = await pipeline('text2text-generation', 'Xenova/LaMini-Flan-T5-783M');
320
+ * const output = await generator('how can I become more healthy?', {
321
+ * max_new_tokens: 100,
322
+ * });
323
+ * // [{ generated_text: "To become more healthy, you can: 1. Eat a balanced diet with plenty of fruits, vegetables, whole grains, lean proteins, and healthy fats. 2. Stay hydrated by drinking plenty of water. 3. Get enough sleep and manage stress levels. 4. Avoid smoking and excessive alcohol consumption. 5. Regularly exercise and maintain a healthy weight. 6. Practice good hygiene and sanitation. 7. Seek medical attention if you experience any health issues." }]
324
+ * ```
325
+ */
326
+ export class Text2TextGenerationPipeline extends Text2TextGenerationPipeline_base {
327
+ /** @type {'generated_text'} */
328
+ _key: 'generated_text';
329
+ _call(texts: string | string[], options?: Partial<import('./generation/configuration_utils.js').GenerationConfig>): Promise<Text2TextGenerationOutput | Text2TextGenerationOutput[]>;
330
+ }
331
+ declare const SummarizationPipeline_base: new (options: TextPipelineConstructorArgs) => SummarizationPipelineType;
332
+ /**
333
+ * @typedef {Object} SummarizationSingle
334
+ * @property {string} summary_text The summary text.
335
+ * @typedef {SummarizationSingle[]} SummarizationOutput
336
+ *
337
+ * @callback SummarizationPipelineCallback Summarize the text(s) given as inputs.
338
+ * @param {string|string[]} texts One or several articles (or one list of articles) to summarize.
339
+ * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
340
+ * @returns {Promise<SummarizationOutput|SummarizationOutput[]>}
341
+ *
342
+ * @typedef {TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable} SummarizationPipelineType
343
+ */
344
+ /**
345
+ * A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline.
346
+ *
347
+ * **Example:** Summarization w/ `Xenova/distilbart-cnn-6-6`.
348
+ * ```javascript
349
+ * const generator = await pipeline('summarization', 'Xenova/distilbart-cnn-6-6');
350
+ * const text = 'The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, ' +
351
+ * 'and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. ' +
352
+ * 'During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest ' +
353
+ * 'man-made structure in the world, a title it held for 41 years until the Chrysler Building in New ' +
354
+ * 'York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to ' +
355
+ * 'the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the ' +
356
+ * 'Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second ' +
357
+ * 'tallest free-standing structure in France after the Millau Viaduct.';
358
+ * const output = await generator(text, {
359
+ * max_new_tokens: 100,
360
+ * });
361
+ * // [{ summary_text: ' The Eiffel Tower is about the same height as an 81-storey building and the tallest structure in Paris. It is the second tallest free-standing structure in France after the Millau Viaduct.' }]
362
+ * ```
363
+ */
364
+ export class SummarizationPipeline extends SummarizationPipeline_base {
365
+ /** @type {'summary_text'} */
366
+ _key: 'summary_text';
367
+ }
368
+ declare const TranslationPipeline_base: new (options: TextPipelineConstructorArgs) => TranslationPipelineType;
369
+ /**
370
+ * @typedef {Object} TranslationSingle
371
+ * @property {string} translation_text The translated text.
372
+ * @typedef {TranslationSingle[]} TranslationOutput
373
+ *
374
+ * @callback TranslationPipelineCallback Translate the text(s) given as inputs.
375
+ * @param {string|string[]} texts Texts to be translated.
376
+ * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
377
+ * @returns {Promise<TranslationOutput|TranslationOutput[]>}
378
+ *
379
+ * @typedef {TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable} TranslationPipelineType
380
+ */
381
+ /**
382
+ * Translates text from one language to another.
383
+ *
384
+ * **Example:** Multilingual translation w/ `Xenova/nllb-200-distilled-600M`.
385
+ *
386
+ * See [here](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200)
387
+ * for the full list of languages and their corresponding codes.
388
+ *
389
+ * ```javascript
390
+ * const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M');
391
+ * const output = await translator('जीवन एक चॉकलेट बॉक्स की तरह है।', {
392
+ * src_lang: 'hin_Deva', // Hindi
393
+ * tgt_lang: 'fra_Latn', // French
394
+ * });
395
+ * // [{ translation_text: 'La vie est comme une boîte à chocolat.' }]
396
+ * ```
397
+ *
398
+ * **Example:** Multilingual translation w/ `Xenova/m2m100_418M`.
399
+ *
400
+ * See [here](https://huggingface.co/facebook/m2m100_418M#languages-covered)
401
+ * for the full list of languages and their corresponding codes.
402
+ *
403
+ * ```javascript
404
+ * const translator = await pipeline('translation', 'Xenova/m2m100_418M');
405
+ * const output = await translator('生活就像一盒巧克力。', {
406
+ * src_lang: 'zh', // Chinese
407
+ * tgt_lang: 'en', // English
408
+ * });
409
+ * // [{ translation_text: 'Life is like a box of chocolate.' }]
410
+ * ```
411
+ *
412
+ * **Example:** Multilingual translation w/ `Xenova/mbart-large-50-many-to-many-mmt`.
413
+ *
414
+ * See [here](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt#languages-covered)
415
+ * for the full list of languages and their corresponding codes.
416
+ *
417
+ * ```javascript
418
+ * const translator = await pipeline('translation', 'Xenova/mbart-large-50-many-to-many-mmt');
419
+ * const output = await translator('संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है', {
420
+ * src_lang: 'hi_IN', // Hindi
421
+ * tgt_lang: 'fr_XX', // French
422
+ * });
423
+ * // [{ translation_text: 'Le chef des Nations affirme qu 'il n 'y a military solution in Syria.' }]
424
+ * ```
425
+ */
426
+ export class TranslationPipeline extends TranslationPipeline_base {
427
+ /** @type {'translation_text'} */
428
+ _key: 'translation_text';
429
+ }
430
+ declare const TextGenerationPipeline_base: new (options: TextPipelineConstructorArgs) => TextGenerationPipelineType;
431
+ /**
432
+ * @typedef {import('./tokenizers.js').Message[]} Chat
433
+ *
434
+ * @typedef {Object} TextGenerationSingle
435
+ * @property {string|Chat} generated_text The generated text.
436
+ * @typedef {TextGenerationSingle[]} TextGenerationOutput
437
+ *
438
+ * @typedef {Object} TextGenerationSpecificParams Parameters specific to text-generation pipelines.
439
+ * @property {boolean} [add_special_tokens] Whether or not to add special tokens when tokenizing the sequences.
440
+ * @property {boolean} [return_full_text=true] If set to `false` only added text is returned, otherwise the full text is returned.
441
+ * @typedef {import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams} TextGenerationConfig
442
+ *
443
+ * @callback TextGenerationPipelineCallback Complete the prompt(s) given as inputs.
444
+ * @param {string|string[]|Chat|Chat[]} texts One or several prompts (or one list of prompts) to complete.
445
+ * @param {Partial<TextGenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
446
+ * @returns {Promise<TextGenerationOutput|TextGenerationOutput[]>} An array or object containing the generated texts.
447
+ *
448
+ * @typedef {TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable} TextGenerationPipelineType
449
+ */
450
+ /**
451
+ * Language generation pipeline using any `ModelWithLMHead` or `ModelForCausalLM`.
452
+ * This pipeline predicts the words that will follow a specified text prompt.
453
+ * NOTE: For the full list of generation parameters, see [`GenerationConfig`](./utils/generation#module_utils/generation.GenerationConfig).
454
+ *
455
+ * **Example:** Text generation with `Xenova/distilgpt2` (default settings).
456
+ * ```javascript
457
+ * const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
458
+ * const text = 'I enjoy walking with my cute dog,';
459
+ * const output = await generator(text);
460
+ * // [{ generated_text: "I enjoy walking with my cute dog, and I love to play with the other dogs." }]
461
+ * ```
462
+ *
463
+ * **Example:** Text generation with `Xenova/distilgpt2` (custom settings).
464
+ * ```javascript
465
+ * const generator = await pipeline('text-generation', 'Xenova/distilgpt2');
466
+ * const text = 'Once upon a time, there was';
467
+ * const output = await generator(text, {
468
+ * temperature: 2,
469
+ * max_new_tokens: 10,
470
+ * repetition_penalty: 1.5,
471
+ * no_repeat_ngram_size: 2,
472
+ * num_beams: 2,
473
+ * num_return_sequences: 2,
474
+ * });
475
+ * // [{
476
+ * // "generated_text": "Once upon a time, there was an abundance of information about the history and activities that"
477
+ * // }, {
478
+ * // "generated_text": "Once upon a time, there was an abundance of information about the most important and influential"
479
+ * // }]
480
+ * ```
481
+ *
482
+ * **Example:** Run code generation with `Xenova/codegen-350M-mono`.
483
+ * ```javascript
484
+ * const generator = await pipeline('text-generation', 'Xenova/codegen-350M-mono');
485
+ * const text = 'def fib(n):';
486
+ * const output = await generator(text, {
487
+ * max_new_tokens: 44,
488
+ * });
489
+ * // [{
490
+ * // generated_text: 'def fib(n):\n' +
491
+ * // ' if n == 0:\n' +
492
+ * // ' return 0\n' +
493
+ * // ' elif n == 1:\n' +
494
+ * // ' return 1\n' +
495
+ * // ' else:\n' +
496
+ * // ' return fib(n-1) + fib(n-2)\n'
497
+ * // }]
498
+ * ```
499
+ */
500
+ export class TextGenerationPipeline extends TextGenerationPipeline_base {
501
+ _call(texts: string | string[] | Chat | Chat[], options?: Partial<TextGenerationConfig>): Promise<TextGenerationOutput | TextGenerationOutput[]>;
502
+ }
503
+ declare const ZeroShotClassificationPipeline_base: new (options: TextPipelineConstructorArgs) => ZeroShotClassificationPipelineType;
504
+ /**
505
+ * @typedef {Object} ZeroShotClassificationOutput
506
+ * @property {string} sequence The sequence for which this is the output.
507
+ * @property {string[]} labels The labels sorted by order of likelihood.
508
+ * @property {number[]} scores The probabilities for each of the labels.
509
+ *
510
+ * @typedef {Object} ZeroShotClassificationPipelineOptions Parameters specific to zero-shot classification pipelines.
511
+ * @property {string} [hypothesis_template="This example is {}."] The template used to turn each
512
+ * candidate label into an NLI-style hypothesis. The candidate label will replace the {} placeholder.
513
+ * @property {boolean} [multi_label=false] Whether or not multiple candidate labels can be true.
514
+ * If `false`, the scores are normalized such that the sum of the label likelihoods for each sequence
515
+ * is 1. If `true`, the labels are considered independent and probabilities are normalized for each
516
+ * candidate by doing a softmax of the entailment score vs. the contradiction score.
517
+ *
518
+ * @callback ZeroShotClassificationPipelineCallback Classify the sequence(s) given as inputs.
519
+ * @param {string|string[]} texts The sequence(s) to classify, will be truncated if the model input is too large.
520
+ * @param {string|string[]} candidate_labels The set of possible class labels to classify each sequence into.
521
+ * Can be a single label, a string of comma-separated labels, or a list of labels.
522
+ * @param {ZeroShotClassificationPipelineOptions} [options] The options to use for zero-shot classification.
523
+ * @returns {Promise<ZeroShotClassificationOutput|ZeroShotClassificationOutput[]>} An array or object containing the predicted labels and scores.
524
+ *
525
+ * @typedef {TextPipelineConstructorArgs & ZeroShotClassificationPipelineCallback & Disposable} ZeroShotClassificationPipelineType
526
+ */
527
+ /**
528
+ * NLI-based zero-shot classification pipeline using a `ModelForSequenceClassification`
529
+ * trained on NLI (natural language inference) tasks. Equivalent of `text-classification`
530
+ * pipelines, but these models don't require a hardcoded number of potential classes, they
531
+ * can be chosen at runtime. It usually means it's slower but it is **much** more flexible.
532
+ *
533
+ * **Example:** Zero shot classification with `Xenova/mobilebert-uncased-mnli`.
534
+ * ```javascript
535
+ * const classifier = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli');
536
+ * const text = 'Last week I upgraded my iOS version and ever since then my phone has been overheating whenever I use your app.';
537
+ * const labels = [ 'mobile', 'billing', 'website', 'account access' ];
538
+ * const output = await classifier(text, labels);
539
+ * // {
540
+ * // sequence: 'Last week I upgraded my iOS version and ever since then my phone has been overheating whenever I use your app.',
541
+ * // labels: [ 'mobile', 'website', 'billing', 'account access' ],
542
+ * // scores: [ 0.5562091040482018, 0.1843621307860853, 0.13942646639336376, 0.12000229877234923 ]
543
+ * // }
544
+ * ```
545
+ *
546
+ * **Example:** Zero shot classification with `Xenova/nli-deberta-v3-xsmall` (multi-label).
547
+ * ```javascript
548
+ * const classifier = await pipeline('zero-shot-classification', 'Xenova/nli-deberta-v3-xsmall');
549
+ * const text = 'I have a problem with my iphone that needs to be resolved asap!';
550
+ * const labels = [ 'urgent', 'not urgent', 'phone', 'tablet', 'computer' ];
551
+ * const output = await classifier(text, labels, { multi_label: true });
552
+ * // {
553
+ * // sequence: 'I have a problem with my iphone that needs to be resolved asap!',
554
+ * // labels: [ 'urgent', 'phone', 'computer', 'tablet', 'not urgent' ],
555
+ * // scores: [ 0.9958870956360275, 0.9923963400697035, 0.002333537946160235, 0.0015134138567598765, 0.0010699384208377163 ]
556
+ * // }
557
+ * ```
558
+ */
559
+ export class ZeroShotClassificationPipeline extends ZeroShotClassificationPipeline_base {
560
+ label2id: {
561
+ [k: string]: any;
562
+ };
563
+ entailment_id: any;
564
+ contradiction_id: any;
565
+ _call(texts: string | string[], candidate_labels: string | string[], options?: ZeroShotClassificationPipelineOptions): Promise<ZeroShotClassificationOutput | ZeroShotClassificationOutput[]>;
566
+ }
567
+ declare const FeatureExtractionPipeline_base: new (options: TextPipelineConstructorArgs) => FeatureExtractionPipelineType;
568
+ /**
569
+ * @typedef {Object} FeatureExtractionPipelineOptions Parameters specific to feature extraction pipelines.
570
+ * @property {'none'|'mean'|'cls'} [pooling="none"] The pooling method to use.
571
+ * @property {boolean} [normalize=false] Whether or not to normalize the embeddings in the last dimension.
572
+ * @property {boolean} [quantize=false] Whether or not to quantize the embeddings.
573
+ * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization.
574
+ *
575
+ * @callback FeatureExtractionPipelineCallback Extract the features of the input(s).
576
+ * @param {string|string[]} texts One or several texts (or one list of texts) to get the features of.
577
+ * @param {FeatureExtractionPipelineOptions} [options] The options to use for feature extraction.
578
+ * @returns {Promise<Tensor>} The features computed by the model.
579
+ *
580
+ * @typedef {TextPipelineConstructorArgs & FeatureExtractionPipelineCallback & Disposable} FeatureExtractionPipelineType
581
+ */
582
+ /**
583
+ * Feature extraction pipeline using no model head. This pipeline extracts the hidden
584
+ * states from the base transformer, which can be used as features in downstream tasks.
585
+ *
586
+ * **Example:** Run feature extraction with `bert-base-uncased` (without pooling/normalization).
587
+ * ```javascript
588
+ * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
589
+ * const output = await extractor('This is a simple test.');
590
+ * // Tensor {
591
+ * // type: 'float32',
592
+ * // data: Float32Array [0.05939924716949463, 0.021655935794115067, ...],
593
+ * // dims: [1, 8, 768]
594
+ * // }
595
+ * ```
596
+ *
597
+ * **Example:** Run feature extraction with `bert-base-uncased` (with pooling/normalization).
598
+ * ```javascript
599
+ * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
600
+ * const output = await extractor('This is a simple test.', { pooling: 'mean', normalize: true });
601
+ * // Tensor {
602
+ * // type: 'float32',
603
+ * // data: Float32Array [0.03373778983950615, -0.010106077417731285, ...],
604
+ * // dims: [1, 768]
605
+ * // }
606
+ * ```
607
+ *
608
+ * **Example:** Calculating embeddings with `sentence-transformers` models.
609
+ * ```javascript
610
+ * const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
611
+ * const output = await extractor('This is a simple test.', { pooling: 'mean', normalize: true });
612
+ * // Tensor {
613
+ * // type: 'float32',
614
+ * // data: Float32Array [0.09094982594251633, -0.014774246141314507, ...],
615
+ * // dims: [1, 384]
616
+ * // }
617
+ * ```
618
+ * **Example:** Calculating binary embeddings with `sentence-transformers` models.
619
+ * ```javascript
620
+ * const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
621
+ * const output = await extractor('This is a simple test.', { pooling: 'mean', quantize: true, precision: 'binary' });
622
+ * // Tensor {
623
+ * // type: 'int8',
624
+ * // data: Int8Array [49, 108, 24, ...],
625
+ * // dims: [1, 48]
626
+ * // }
627
+ * ```
628
+ */
629
+ export class FeatureExtractionPipeline extends FeatureExtractionPipeline_base {
630
+ _call(texts: string | string[], options?: FeatureExtractionPipelineOptions): Promise<Tensor>;
631
+ }
632
+ declare const ImageFeatureExtractionPipeline_base: new (options: ImagePipelineConstructorArgs) => ImageFeatureExtractionPipelineType;
633
+ /**
634
+ * @typedef {Object} ImageFeatureExtractionPipelineOptions Parameters specific to image feature extraction pipelines.
635
+ * @property {boolean} [pool=null] Whether or not to return the pooled output. If set to `false`, the model will return the raw hidden states.
636
+ *
637
+ * @callback ImageFeatureExtractionPipelineCallback Extract the features of the input(s).
638
+ * @param {ImagePipelineInputs} images One or several images (or one list of images) to get the features of.
639
+ * @param {ImageFeatureExtractionPipelineOptions} [options] The options to use for image feature extraction.
640
+ * @returns {Promise<Tensor>} The image features computed by the model.
641
+ *
642
+ * @typedef {ImagePipelineConstructorArgs & ImageFeatureExtractionPipelineCallback & Disposable} ImageFeatureExtractionPipelineType
643
+ */
644
+ /**
645
+ * Image feature extraction pipeline using no model head. This pipeline extracts the hidden
646
+ * states from the base transformer, which can be used as features in downstream tasks.
647
+ *
648
+ * **Example:** Perform image feature extraction with `Xenova/vit-base-patch16-224-in21k`.
649
+ * ```javascript
650
+ * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/vit-base-patch16-224-in21k');
651
+ * const url = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png';
652
+ * const features = await image_feature_extractor(url);
653
+ * // Tensor {
654
+ * // dims: [ 1, 197, 768 ],
655
+ * // type: 'float32',
656
+ * // data: Float32Array(151296) [ ... ],
657
+ * // size: 151296
658
+ * // }
659
+ * ```
660
+ *
661
+ * **Example:** Compute image embeddings with `Xenova/clip-vit-base-patch32`.
662
+ * ```javascript
663
+ * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32');
664
+ * const url = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png';
665
+ * const features = await image_feature_extractor(url);
666
+ * // Tensor {
667
+ * // dims: [ 1, 512 ],
668
+ * // type: 'float32',
669
+ * // data: Float32Array(512) [ ... ],
670
+ * // size: 512
671
+ * // }
672
+ * ```
673
+ */
674
+ export class ImageFeatureExtractionPipeline extends ImageFeatureExtractionPipeline_base {
675
+ _call(images: ImagePipelineInputs, options?: ImageFeatureExtractionPipelineOptions): Promise<Tensor>;
676
+ }
677
+ declare const AudioClassificationPipeline_base: new (options: AudioPipelineConstructorArgs) => AudioClassificationPipelineType;
678
+ /**
679
+ * @typedef {Object} AudioClassificationSingle
680
+ * @property {string} label The label predicted.
681
+ * @property {number} score The corresponding probability.
682
+ * @typedef {AudioClassificationSingle[]} AudioClassificationOutput
683
+ *
684
+ * @typedef {Object} AudioClassificationPipelineOptions Parameters specific to audio classification pipelines.
685
+ * @property {number} [top_k=5] The number of top labels that will be returned by the pipeline.
686
+ * If the provided number is `null` or higher than the number of labels available in the model configuration,
687
+ * it will default to the number of labels.
688
+ *
689
+ * @callback AudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
690
+ * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
691
+ * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
692
+ * to get the waveform using the [`AudioContext`](https://developer.mozilla.org/en-US/docs/Web/API/AudioContext) API.
693
+ * If `AudioContext` is not available, you should pass the raw waveform in as a Float32Array of shape `(n, )`.
694
+ * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
695
+ * @param {AudioClassificationPipelineOptions} [options] The options to use for audio classification.
696
+ * @returns {Promise<AudioClassificationOutput|AudioClassificationOutput[]>} An array or object containing the predicted labels and scores.
697
+ *
698
+ * @typedef {AudioPipelineConstructorArgs & AudioClassificationPipelineCallback & Disposable} AudioClassificationPipelineType
699
+ */
700
+ /**
701
+ * Audio classification pipeline using any `AutoModelForAudioClassification`.
702
+ * This pipeline predicts the class of a raw waveform or an audio file.
703
+ *
704
+ * **Example:** Perform audio classification with `Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech`.
705
+ * ```javascript
706
+ * const classifier = await pipeline('audio-classification', 'Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech');
707
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
708
+ * const output = await classifier(url);
709
+ * // [
710
+ * // { label: 'male', score: 0.9981542229652405 },
711
+ * // { label: 'female', score: 0.001845747814513743 }
712
+ * // ]
713
+ * ```
714
+ *
715
+ * **Example:** Perform audio classification with `Xenova/ast-finetuned-audioset-10-10-0.4593` and return top 4 results.
716
+ * ```javascript
717
+ * const classifier = await pipeline('audio-classification', 'Xenova/ast-finetuned-audioset-10-10-0.4593');
718
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cat_meow.wav';
719
+ * const output = await classifier(url, { top_k: 4 });
720
+ * // [
721
+ * // { label: 'Meow', score: 0.5617874264717102 },
722
+ * // { label: 'Cat', score: 0.22365376353263855 },
723
+ * // { label: 'Domestic animals, pets', score: 0.1141069084405899 },
724
+ * // { label: 'Animal', score: 0.08985692262649536 },
725
+ * // ]
726
+ * ```
727
+ */
728
+ export class AudioClassificationPipeline extends AudioClassificationPipeline_base {
729
+ _call(audio: AudioPipelineInputs, options?: AudioClassificationPipelineOptions): Promise<AudioClassificationOutput | AudioClassificationOutput[]>;
730
+ }
731
+ declare const ZeroShotAudioClassificationPipeline_base: new (options: TextAudioPipelineConstructorArgs) => ZeroShotAudioClassificationPipelineType;
732
+ /**
733
+ * @typedef {Object} ZeroShotAudioClassificationOutput
734
+ * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
735
+ * @property {number} score The score attributed by the model for that label (between 0 and 1).
736
+ *
737
+ * @typedef {Object} ZeroShotAudioClassificationPipelineOptions Parameters specific to zero-shot audio classification pipelines.
738
+ * @property {string} [hypothesis_template="This is a sound of {}."] The sentence used in conjunction with `candidate_labels`
739
+ * to attempt the audio classification by replacing the placeholder with the candidate_labels.
740
+ * Then likelihood is estimated by using `logits_per_audio`.
741
+ *
742
+ * @callback ZeroShotAudioClassificationPipelineCallback Classify the sequence(s) given as inputs.
743
+ * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either:
744
+ * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
745
+ * to get the waveform using the [`AudioContext`](https://developer.mozilla.org/en-US/docs/Web/API/AudioContext) API.
746
+ * If `AudioContext` is not available, you should pass the raw waveform in as a Float32Array of shape `(n, )`.
747
+ * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
748
+ * @param {string[]} candidate_labels The candidate labels for this audio.
749
+ * @param {ZeroShotAudioClassificationPipelineOptions} [options] The options to use for zero-shot audio classification.
750
+ * @returns {Promise<ZeroShotAudioClassificationOutput[]|ZeroShotAudioClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
751
+ *
752
+ * @typedef {TextAudioPipelineConstructorArgs & ZeroShotAudioClassificationPipelineCallback & Disposable} ZeroShotAudioClassificationPipelineType
753
+ */
754
+ /**
755
+ * Zero shot audio classification pipeline using `ClapModel`. This pipeline predicts the class of an audio when you
756
+ * provide an audio and a set of `candidate_labels`.
757
+ *
758
+ * **Example**: Perform zero-shot audio classification with `Xenova/clap-htsat-unfused`.
759
+ * ```javascript
760
+ * const classifier = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused');
761
+ * const audio = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/dog_barking.wav';
762
+ * const candidate_labels = ['dog', 'vaccum cleaner'];
763
+ * const scores = await classifier(audio, candidate_labels);
764
+ * // [
765
+ * // { score: 0.9993992447853088, label: 'dog' },
766
+ * // { score: 0.0006007603369653225, label: 'vaccum cleaner' }
767
+ * // ]
768
+ * ```
769
+ */
770
+ export class ZeroShotAudioClassificationPipeline extends ZeroShotAudioClassificationPipeline_base {
771
+ _call(audio: AudioPipelineInputs, candidate_labels: string[], options?: ZeroShotAudioClassificationPipelineOptions): Promise<ZeroShotAudioClassificationOutput[] | ZeroShotAudioClassificationOutput[][]>;
772
+ }
773
+ declare const AutomaticSpeechRecognitionPipeline_base: new (options: TextAudioPipelineConstructorArgs) => AutomaticSpeechRecognitionPipelineType;
774
+ /**
775
+ * @typedef {Object} Chunk
776
+ * @property {[number, number]} timestamp The start and end timestamp of the chunk in seconds.
777
+ * @property {string} text The recognized text.
778
+ */
779
+ /**
780
+ * @typedef {Object} AutomaticSpeechRecognitionOutput
781
+ * @property {string} text The recognized text.
782
+ * @property {Chunk[]} [chunks] When using `return_timestamps`, the `chunks` will become a list
783
+ * containing all the various text chunks identified by the model.
784
+ *
785
+ * @typedef {Object} AutomaticSpeechRecognitionSpecificParams Parameters specific to automatic-speech-recognition pipelines.
786
+ * @property {boolean|'word'} [return_timestamps] Whether to return timestamps or not. Default is `false`.
787
+ * @property {number} [chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking).
788
+ * @property {number} [stride_length_s] The length of overlap between consecutive audio chunks in seconds. If not provided, defaults to `chunk_length_s / 6`.
789
+ * @property {boolean} [force_full_sequences] Whether to force outputting full sequences or not. Default is `false`.
790
+ * @property {string} [language] The source language. Default is `null`, meaning it should be auto-detected. Use this to potentially improve performance if the source language is known.
791
+ * @property {string} [task] The task to perform. Default is `null`, meaning it should be auto-detected.
792
+ * @property {number} [num_frames] The number of frames in the input audio.
793
+ * @typedef {import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig
794
+ *
795
+ * @callback AutomaticSpeechRecognitionPipelineCallback Transcribe the audio sequence(s) given as inputs to text.
796
+ * @param {AudioPipelineInputs} audio The input audio file(s) to be transcribed. The input is either:
797
+ * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate
798
+ * to get the waveform using the [`AudioContext`](https://developer.mozilla.org/en-US/docs/Web/API/AudioContext) API.
799
+ * If `AudioContext` is not available, you should pass the raw waveform in as a Float32Array of shape `(n, )`.
800
+ * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
801
+ * @param {Partial<AutomaticSpeechRecognitionConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
802
+ * @returns {Promise<AutomaticSpeechRecognitionOutput|AutomaticSpeechRecognitionOutput[]>} An object containing the transcription text and optionally timestamps if `return_timestamps` is `true`.
803
+ *
804
+ * @typedef {TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable} AutomaticSpeechRecognitionPipelineType
805
+ */
806
+ /**
807
+ * Pipeline that aims at extracting spoken text contained within some audio.
808
+ *
809
+ * **Example:** Transcribe English.
810
+ * ```javascript
811
+ * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
812
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
813
+ * const output = await transcriber(url);
814
+ * // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." }
815
+ * ```
816
+ *
817
+ * **Example:** Transcribe English w/ timestamps.
818
+ * ```javascript
819
+ * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
820
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
821
+ * const output = await transcriber(url, { return_timestamps: true });
822
+ * // {
823
+ * // text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country."
824
+ * // chunks: [
825
+ * // { timestamp: [0, 8], text: " And so my fellow Americans ask not what your country can do for you" }
826
+ * // { timestamp: [8, 11], text: " ask what you can do for your country." }
827
+ * // ]
828
+ * // }
829
+ * ```
830
+ *
831
+ * **Example:** Transcribe English w/ word-level timestamps.
832
+ * ```javascript
833
+ * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
834
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
835
+ * const output = await transcriber(url, { return_timestamps: 'word' });
836
+ * // {
837
+ * // "text": " And so my fellow Americans ask not what your country can do for you ask what you can do for your country.",
838
+ * // "chunks": [
839
+ * // { "text": " And", "timestamp": [0, 0.78] },
840
+ * // { "text": " so", "timestamp": [0.78, 1.06] },
841
+ * // { "text": " my", "timestamp": [1.06, 1.46] },
842
+ * // ...
843
+ * // { "text": " for", "timestamp": [9.72, 9.92] },
844
+ * // { "text": " your", "timestamp": [9.92, 10.22] },
845
+ * // { "text": " country.", "timestamp": [10.22, 13.5] }
846
+ * // ]
847
+ * // }
848
+ * ```
849
+ *
850
+ * **Example:** Transcribe French.
851
+ * ```javascript
852
+ * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
853
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/french-audio.mp3';
854
+ * const output = await transcriber(url, { language: 'french', task: 'transcribe' });
855
+ * // { text: " J'adore, j'aime, je n'aime pas, je déteste." }
856
+ * ```
857
+ *
858
+ * **Example:** Translate French to English.
859
+ * ```javascript
860
+ * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small');
861
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/french-audio.mp3';
862
+ * const output = await transcriber(url, { language: 'french', task: 'translate' });
863
+ * // { text: " I love, I like, I don't like, I hate." }
864
+ * ```
865
+ *
866
+ * **Example:** Transcribe/translate audio longer than 30 seconds.
867
+ * ```javascript
868
+ * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
869
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/ted_60.wav';
870
+ * const output = await transcriber(url, { chunk_length_s: 30, stride_length_s: 5 });
871
+ * // { text: " So in college, I was a government major, which means [...] So I'd start off light and I'd bump it up" }
872
+ * ```
873
+ */
874
+ export class AutomaticSpeechRecognitionPipeline extends AutomaticSpeechRecognitionPipeline_base {
875
+ _call(audio: AudioPipelineInputs, options?: Partial<AutomaticSpeechRecognitionConfig>): Promise<AutomaticSpeechRecognitionOutput | AutomaticSpeechRecognitionOutput[]>;
876
+ /**
877
+ * @type {AutomaticSpeechRecognitionPipelineCallback}
878
+ * @private
879
+ */
880
+ private _call_wav2vec2;
881
+ /**
882
+ * @type {AutomaticSpeechRecognitionPipelineCallback}
883
+ * @private
884
+ */
885
+ private _call_whisper;
886
+ }
887
+ declare const ImageToTextPipeline_base: new (options: TextImagePipelineConstructorArgs) => ImageToTextPipelineType;
888
+ /**
889
+ * @typedef {Object} ImageToTextSingle
890
+ * @property {string} generated_text The generated text.
891
+ * @typedef {ImageToTextSingle[]} ImageToTextOutput
892
+ *
893
+ * @callback ImageToTextPipelineCallback Assign labels to the image(s) passed as inputs.
894
+ * @param {ImagePipelineInputs} texts The images to be captioned.
895
+ * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
896
+ * @returns {Promise<ImageToTextOutput|ImageToTextOutput[]>} An object (or array of objects) containing the generated text(s).
897
+ *
898
+ * @typedef {TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable} ImageToTextPipelineType
899
+ */
900
+ /**
901
+ * Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image.
902
+ *
903
+ * **Example:** Generate a caption for an image w/ `Xenova/vit-gpt2-image-captioning`.
904
+ * ```javascript
905
+ * const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning');
906
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg';
907
+ * const output = await captioner(url);
908
+ * // [{ generated_text: 'a cat laying on a couch with another cat' }]
909
+ * ```
910
+ *
911
+ * **Example:** Optical Character Recognition (OCR) w/ `Xenova/trocr-small-handwritten`.
912
+ * ```javascript
913
+ * const captioner = await pipeline('image-to-text', 'Xenova/trocr-small-handwritten');
914
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/handwriting.jpg';
915
+ * const output = await captioner(url);
916
+ * // [{ generated_text: 'Mr. Brown commented icily.' }]
917
+ * ```
918
+ */
919
+ export class ImageToTextPipeline extends ImageToTextPipeline_base {
920
+ _call(texts: ImagePipelineInputs, options?: Partial<import('./generation/configuration_utils.js').GenerationConfig>): Promise<ImageToTextOutput | ImageToTextOutput[]>;
921
+ }
922
+ declare const ImageClassificationPipeline_base: new (options: ImagePipelineConstructorArgs) => ImageClassificationPipelineType;
923
+ /**
924
+ * @typedef {Object} ImageClassificationSingle
925
+ * @property {string} label The label identified by the model.
926
+ * @property {number} score The score attributed by the model for that label.
927
+ * @typedef {ImageClassificationSingle[]} ImageClassificationOutput
928
+ *
929
+ * @typedef {Object} ImageClassificationPipelineOptions Parameters specific to image classification pipelines.
930
+ * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline.
931
+ *
932
+ * @callback ImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
933
+ * @param {ImagePipelineInputs} images The input images(s) to be classified.
934
+ * @param {ImageClassificationPipelineOptions} [options] The options to use for image classification.
935
+ * @returns {Promise<ImageClassificationOutput|ImageClassificationOutput[]>} An array or object containing the predicted labels and scores.
936
+ *
937
+ * @typedef {ImagePipelineConstructorArgs & ImageClassificationPipelineCallback & Disposable} ImageClassificationPipelineType
938
+ */
939
+ /**
940
+ * Image classification pipeline using any `AutoModelForImageClassification`.
941
+ * This pipeline predicts the class of an image.
942
+ *
943
+ * **Example:** Classify an image.
944
+ * ```javascript
945
+ * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
946
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/tiger.jpg';
947
+ * const output = await classifier(url);
948
+ * // [
949
+ * // { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
950
+ * // ]
951
+ * ```
952
+ *
953
+ * **Example:** Classify an image and return top `n` classes.
954
+ * ```javascript
955
+ * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
956
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/tiger.jpg';
957
+ * const output = await classifier(url, { top_k: 3 });
958
+ * // [
959
+ * // { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
960
+ * // { label: 'tiger cat', score: 0.3634825646877289 },
961
+ * // { label: 'lion, king of beasts, Panthera leo', score: 0.00045060308184474707 },
962
+ * // ]
963
+ * ```
964
+ *
965
+ * **Example:** Classify an image and return all classes.
966
+ * ```javascript
967
+ * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
968
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/tiger.jpg';
969
+ * const output = await classifier(url, { top_k: 0 });
970
+ * // [
971
+ * // { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
972
+ * // { label: 'tiger cat', score: 0.3634825646877289 },
973
+ * // { label: 'lion, king of beasts, Panthera leo', score: 0.00045060308184474707 },
974
+ * // { label: 'jaguar, panther, Panthera onca, Felis onca', score: 0.00035465499968267977 },
975
+ * // ...
976
+ * // ]
977
+ * ```
978
+ */
979
+ export class ImageClassificationPipeline extends ImageClassificationPipeline_base {
980
+ _call(images: ImagePipelineInputs, options?: ImageClassificationPipelineOptions): Promise<ImageClassificationOutput | ImageClassificationOutput[]>;
981
+ }
982
+ declare const ImageSegmentationPipeline_base: new (options: ImagePipelineConstructorArgs) => ImageSegmentationPipelineType;
983
+ /**
984
+ * @typedef {Object} ImageSegmentationPipelineOutput
985
+ * @property {string} label The label of the segment.
986
+ * @property {number|null} score The score of the segment.
987
+ * @property {RawImage} mask The mask of the segment.
988
+ *
989
+ * @typedef {Object} ImageSegmentationPipelineOptions Parameters specific to image segmentation pipelines.
990
+ * @property {number} [threshold=0.5] Probability threshold to filter out predicted masks.
991
+ * @property {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
992
+ * @property {number} [overlap_mask_area_threshold=0.8] Mask overlap threshold to eliminate small, disconnected segments.
993
+ * @property {null|string} [subtask=null] Segmentation task to be performed. One of [`panoptic`, `instance`, and `semantic`],
994
+ * depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
995
+ * @property {number[]} [label_ids_to_fuse=null] List of label ids to fuse. If not set, do not fuse any labels.
996
+ * @property {number[][]} [target_sizes=null] List of target sizes for the input images. If not set, use the original image sizes.
997
+ *
998
+ * @callback ImageSegmentationPipelineCallback Segment the input images.
999
+ * @param {ImagePipelineInputs} images The input images.
1000
+ * @param {ImageSegmentationPipelineOptions} [options] The options to use for image segmentation.
1001
+ * @returns {Promise<ImageSegmentationPipelineOutput[]>} The annotated segments.
1002
+ *
1003
+ * @typedef {ImagePipelineConstructorArgs & ImageSegmentationPipelineCallback & Disposable} ImageSegmentationPipelineType
1004
+ */
1005
+ /**
1006
+ * Image segmentation pipeline using any `AutoModelForXXXSegmentation`.
1007
+ * This pipeline predicts masks of objects and their classes.
1008
+ *
1009
+ * **Example:** Perform image segmentation with `Xenova/detr-resnet-50-panoptic`.
1010
+ * ```javascript
1011
+ * const segmenter = await pipeline('image-segmentation', 'Xenova/detr-resnet-50-panoptic');
1012
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg';
1013
+ * const output = await segmenter(url);
1014
+ * // [
1015
+ * // { label: 'remote', score: 0.9984649419784546, mask: RawImage { ... } },
1016
+ * // { label: 'cat', score: 0.9994316101074219, mask: RawImage { ... } }
1017
+ * // ]
1018
+ * ```
1019
+ */
1020
+ export class ImageSegmentationPipeline extends ImageSegmentationPipeline_base {
1021
+ subtasks_mapping: {
1022
+ panoptic: string;
1023
+ instance: string;
1024
+ semantic: string;
1025
+ };
1026
+ _call(images: ImagePipelineInputs, options?: ImageSegmentationPipelineOptions): Promise<ImageSegmentationPipelineOutput[]>;
1027
+ }
1028
+ declare const ZeroShotImageClassificationPipeline_base: new (options: TextImagePipelineConstructorArgs) => ZeroShotImageClassificationPipelineType;
1029
+ /**
1030
+ * @typedef {Object} ZeroShotImageClassificationOutput
1031
+ * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`.
1032
+ * @property {number} score The score attributed by the model for that label (between 0 and 1).
1033
+ *
1034
+ * @typedef {Object} ZeroShotImageClassificationPipelineOptions Parameters specific to zero-shot image classification pipelines.
1035
+ * @property {string} [hypothesis_template="This is a photo of {}"] The sentence used in conjunction with `candidate_labels`
1036
+ * to attempt the image classification by replacing the placeholder with the candidate_labels.
1037
+ * Then likelihood is estimated by using `logits_per_image`.
1038
+ *
1039
+ * @callback ZeroShotImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
1040
+ * @param {ImagePipelineInputs} images The input images.
1041
+ * @param {string[]} candidate_labels The candidate labels for this image.
1042
+ * @param {ZeroShotImageClassificationPipelineOptions} [options] The options to use for zero-shot image classification.
1043
+ * @returns {Promise<ZeroShotImageClassificationOutput[]|ZeroShotImageClassificationOutput[][]>} An array of objects containing the predicted labels and scores.
1044
+ *
1045
+ * @typedef {TextImagePipelineConstructorArgs & ZeroShotImageClassificationPipelineCallback & Disposable} ZeroShotImageClassificationPipelineType
1046
+ */
1047
+ /**
1048
+ * Zero shot image classification pipeline. This pipeline predicts the class of
1049
+ * an image when you provide an image and a set of `candidate_labels`.
1050
+ *
1051
+ * **Example:** Zero shot image classification w/ `Xenova/clip-vit-base-patch32`.
1052
+ * ```javascript
1053
+ * const classifier = await pipeline('zero-shot-image-classification', 'Xenova/clip-vit-base-patch32');
1054
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/tiger.jpg';
1055
+ * const output = await classifier(url, ['tiger', 'horse', 'dog']);
1056
+ * // [
1057
+ * // { score: 0.9993917942047119, label: 'tiger' },
1058
+ * // { score: 0.0003519294841680676, label: 'horse' },
1059
+ * // { score: 0.0002562698791734874, label: 'dog' }
1060
+ * // ]
1061
+ * ```
1062
+ */
1063
+ export class ZeroShotImageClassificationPipeline extends ZeroShotImageClassificationPipeline_base {
1064
+ _call(images: ImagePipelineInputs, candidate_labels: string[], options?: ZeroShotImageClassificationPipelineOptions): Promise<ZeroShotImageClassificationOutput[] | ZeroShotImageClassificationOutput[][]>;
1065
+ }
1066
+ declare const ObjectDetectionPipeline_base: new (options: ImagePipelineConstructorArgs) => ObjectDetectionPipelineType;
1067
+ /**
1068
+ * @typedef {Object} ObjectDetectionPipelineSingle
1069
+ * @property {string} label The class label identified by the model.
1070
+ * @property {number} score The score attributed by the model for that label.
1071
+ * @property {BoundingBox} box The bounding box of detected object in image's original size, or as a percentage if `percentage` is set to true.
1072
+ * @typedef {ObjectDetectionPipelineSingle[]} ObjectDetectionPipelineOutput
1073
+ *
1074
+ * @typedef {Object} ObjectDetectionPipelineOptions Parameters specific to object detection pipelines.
1075
+ * @property {number} [threshold=0.9] The threshold used to filter boxes by score.
1076
+ * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
1077
+ *
1078
+ * @callback ObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
1079
+ * @param {ImagePipelineInputs} images The input images.
1080
+ * @param {ObjectDetectionPipelineOptions} [options] The options to use for object detection.
1081
+ * @returns {Promise<ObjectDetectionPipelineOutput|ObjectDetectionPipelineOutput[]>} A list of objects or a list of list of objects.
1082
+ *
1083
+ * @typedef {ImagePipelineConstructorArgs & ObjectDetectionPipelineCallback & Disposable} ObjectDetectionPipelineType
1084
+ */
1085
+ /**
1086
+ * Object detection pipeline using any `AutoModelForObjectDetection`.
1087
+ * This pipeline predicts bounding boxes of objects and their classes.
1088
+ *
1089
+ * **Example:** Run object-detection with `Xenova/detr-resnet-50`.
1090
+ * ```javascript
1091
+ * const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
1092
+ * const img = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg';
1093
+ * const output = await detector(img, { threshold: 0.9 });
1094
+ * // [{
1095
+ * // score: 0.9976370930671692,
1096
+ * // label: "remote",
1097
+ * // box: { xmin: 31, ymin: 68, xmax: 190, ymax: 118 }
1098
+ * // },
1099
+ * // ...
1100
+ * // {
1101
+ * // score: 0.9984092116355896,
1102
+ * // label: "cat",
1103
+ * // box: { xmin: 331, ymin: 19, xmax: 649, ymax: 371 }
1104
+ * // }]
1105
+ * ```
1106
+ */
1107
+ export class ObjectDetectionPipeline extends ObjectDetectionPipeline_base {
1108
+ _call(images: ImagePipelineInputs, options?: ObjectDetectionPipelineOptions): Promise<ObjectDetectionPipelineOutput | ObjectDetectionPipelineOutput[]>;
1109
+ }
1110
+ declare const ZeroShotObjectDetectionPipeline_base: new (options: TextImagePipelineConstructorArgs) => ZeroShotObjectDetectionPipelineType;
1111
+ /**
1112
+ * @typedef {Object} ZeroShotObjectDetectionOutput
1113
+ * @property {string} label Text query corresponding to the found object.
1114
+ * @property {number} score Score corresponding to the object (between 0 and 1).
1115
+ * @property {BoundingBox} box Bounding box of the detected object in image's original size, or as a percentage if `percentage` is set to true.
1116
+ *
1117
+ * @typedef {Object} ZeroShotObjectDetectionPipelineOptions Parameters specific to zero-shot object detection pipelines.
1118
+ * @property {number} [threshold=0.1] The probability necessary to make a prediction.
1119
+ * @property {number} [top_k=null] The number of top predictions that will be returned by the pipeline.
1120
+ * If the provided number is `null` or higher than the number of predictions available, it will default
1121
+ * to the number of predictions.
1122
+ * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
1123
+ *
1124
+ * @callback ZeroShotObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
1125
+ * @param {ImagePipelineInputs} images The input images.
1126
+ * @param {string[]} candidate_labels What the model should recognize in the image.
1127
+ * @param {ZeroShotObjectDetectionPipelineOptions} [options] The options to use for zero-shot object detection.
1128
+ * @returns {Promise<ZeroShotObjectDetectionOutput[]|ZeroShotObjectDetectionOutput[][]>} An array of objects containing the predicted labels, scores, and bounding boxes.
1129
+ *
1130
+ * @typedef {TextImagePipelineConstructorArgs & ZeroShotObjectDetectionPipelineCallback & Disposable} ZeroShotObjectDetectionPipelineType
1131
+ */
1132
+ /**
1133
+ * Zero-shot object detection pipeline. This pipeline predicts bounding boxes of
1134
+ * objects when you provide an image and a set of `candidate_labels`.
1135
+ *
1136
+ * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32`.
1137
+ * ```javascript
1138
+ * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
1139
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/astronaut.png';
1140
+ * const candidate_labels = ['human face', 'rocket', 'helmet', 'american flag'];
1141
+ * const output = await detector(url, candidate_labels);
1142
+ * // [
1143
+ * // {
1144
+ * // score: 0.24392342567443848,
1145
+ * // label: 'human face',
1146
+ * // box: { xmin: 180, ymin: 67, xmax: 274, ymax: 175 }
1147
+ * // },
1148
+ * // {
1149
+ * // score: 0.15129457414150238,
1150
+ * // label: 'american flag',
1151
+ * // box: { xmin: 0, ymin: 4, xmax: 106, ymax: 513 }
1152
+ * // },
1153
+ * // {
1154
+ * // score: 0.13649864494800568,
1155
+ * // label: 'helmet',
1156
+ * // box: { xmin: 277, ymin: 337, xmax: 511, ymax: 511 }
1157
+ * // },
1158
+ * // {
1159
+ * // score: 0.10262022167444229,
1160
+ * // label: 'rocket',
1161
+ * // box: { xmin: 352, ymin: -1, xmax: 463, ymax: 287 }
1162
+ * // }
1163
+ * // ]
1164
+ * ```
1165
+ *
1166
+ * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32` (returning top 4 matches and setting a threshold).
1167
+ * ```javascript
1168
+ * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
1169
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/beach.png';
1170
+ * const candidate_labels = ['hat', 'book', 'sunglasses', 'camera'];
1171
+ * const output = await detector(url, candidate_labels, { top_k: 4, threshold: 0.05 });
1172
+ * // [
1173
+ * // {
1174
+ * // score: 0.1606510728597641,
1175
+ * // label: 'sunglasses',
1176
+ * // box: { xmin: 347, ymin: 229, xmax: 429, ymax: 264 }
1177
+ * // },
1178
+ * // {
1179
+ * // score: 0.08935828506946564,
1180
+ * // label: 'hat',
1181
+ * // box: { xmin: 38, ymin: 174, xmax: 258, ymax: 364 }
1182
+ * // },
1183
+ * // {
1184
+ * // score: 0.08530698716640472,
1185
+ * // label: 'camera',
1186
+ * // box: { xmin: 187, ymin: 350, xmax: 260, ymax: 411 }
1187
+ * // },
1188
+ * // {
1189
+ * // score: 0.08349756896495819,
1190
+ * // label: 'book',
1191
+ * // box: { xmin: 261, ymin: 280, xmax: 494, ymax: 425 }
1192
+ * // }
1193
+ * // ]
1194
+ * ```
1195
+ */
1196
+ export class ZeroShotObjectDetectionPipeline extends ZeroShotObjectDetectionPipeline_base {
1197
+ _call(images: ImagePipelineInputs, candidate_labels: string[], options?: ZeroShotObjectDetectionPipelineOptions): Promise<ZeroShotObjectDetectionOutput[] | ZeroShotObjectDetectionOutput[][]>;
1198
+ }
1199
+ declare const DocumentQuestionAnsweringPipeline_base: new (options: TextImagePipelineConstructorArgs) => DocumentQuestionAnsweringPipelineType;
1200
+ /**
1201
+ * @typedef {Object} DocumentQuestionAnsweringSingle
1202
+ * @property {string} answer The generated text.
1203
+ * @typedef {DocumentQuestionAnsweringSingle[]} DocumentQuestionAnsweringOutput
1204
+ *
1205
+ * @callback DocumentQuestionAnsweringPipelineCallback Answer the question given as input by using the document.
1206
+ * @param {ImageInput} image The image of the document to use.
1207
+ * @param {string} question A question to ask of the document.
1208
+ * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
1209
+ * @returns {Promise<DocumentQuestionAnsweringOutput|DocumentQuestionAnsweringOutput[]>} An object (or array of objects) containing the answer(s).
1210
+ *
1211
+ * @typedef {TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable} DocumentQuestionAnsweringPipelineType
1212
+ */
1213
+ /**
1214
+ * Document Question Answering pipeline using any `AutoModelForDocumentQuestionAnswering`.
1215
+ * The inputs/outputs are similar to the (extractive) question answering pipeline; however,
1216
+ * the pipeline takes an image (and optional OCR'd words/boxes) as input instead of text context.
1217
+ *
1218
+ * **Example:** Answer questions about a document with `Xenova/donut-base-finetuned-docvqa`.
1219
+ * ```javascript
1220
+ * const qa_pipeline = await pipeline('document-question-answering', 'Xenova/donut-base-finetuned-docvqa');
1221
+ * const image = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/invoice.png';
1222
+ * const question = 'What is the invoice number?';
1223
+ * const output = await qa_pipeline(image, question);
1224
+ * // [{ answer: 'us-001' }]
1225
+ * ```
1226
+ */
1227
+ export class DocumentQuestionAnsweringPipeline extends DocumentQuestionAnsweringPipeline_base {
1228
+ _call(image: ImageInput, question: string, options?: Partial<import('./generation/configuration_utils.js').GenerationConfig>): Promise<DocumentQuestionAnsweringOutput | DocumentQuestionAnsweringOutput[]>;
1229
+ }
1230
+ declare const TextToAudioPipeline_base: new (options: TextToAudioPipelineConstructorArgs) => TextToAudioPipelineType;
1231
+ /**
1232
+ * @typedef {Object} VocoderOptions
1233
+ * @property {PreTrainedModel} [vocoder] The vocoder used by the pipeline (if the model uses one). If not provided, use the default HifiGan vocoder.
1234
+ * @typedef {TextAudioPipelineConstructorArgs & VocoderOptions} TextToAudioPipelineConstructorArgs
1235
+ */
1236
+ /**
1237
+ * @typedef {Object} TextToAudioOutput
1238
+ * @property {Float32Array} audio The generated audio waveform.
1239
+ * @property {number} sampling_rate The sampling rate of the generated audio waveform.
1240
+ *
1241
+ * @typedef {Object} TextToAudioPipelineOptions Parameters specific to text-to-audio pipelines.
1242
+ * @property {Tensor|Float32Array|string|URL} [speaker_embeddings=null] The speaker embeddings (if the model requires it).
1243
+ *
1244
+ * @callback TextToAudioPipelineCallback Generates speech/audio from the inputs.
1245
+ * @param {string|string[]} texts The text(s) to generate.
1246
+ * @param {TextToAudioPipelineOptions} options Parameters passed to the model generation/forward method.
1247
+ * @returns {Promise<TextToAudioOutput>} An object containing the generated audio and sampling rate.
1248
+ *
1249
+ * @typedef {TextToAudioPipelineConstructorArgs & TextToAudioPipelineCallback & Disposable} TextToAudioPipelineType
1250
+ */
1251
+ /**
1252
+ * Text-to-audio generation pipeline using any `AutoModelForTextToWaveform` or `AutoModelForTextToSpectrogram`.
1253
+ * This pipeline generates an audio file from an input text and optional other conditional inputs.
1254
+ *
1255
+ * **Example:** Generate audio from text with `Xenova/speecht5_tts`.
1256
+ * ```javascript
1257
+ * const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false });
1258
+ * const speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
1259
+ * const out = await synthesizer('Hello, my dog is cute', { speaker_embeddings });
1260
+ * // {
1261
+ * // audio: Float32Array(26112) [-0.00005657337896991521, 0.00020583874720614403, ...],
1262
+ * // sampling_rate: 16000
1263
+ * // }
1264
+ * ```
1265
+ *
1266
+ * You can then save the audio to a .wav file with the `wavefile` package:
1267
+ * ```javascript
1268
+ * import wavefile from 'wavefile';
1269
+ * import fs from 'fs';
1270
+ *
1271
+ * const wav = new wavefile.WaveFile();
1272
+ * wav.fromScratch(1, out.sampling_rate, '32f', out.audio);
1273
+ * fs.writeFileSync('out.wav', wav.toBuffer());
1274
+ * ```
1275
+ *
1276
+ * **Example:** Multilingual speech generation with `Xenova/mms-tts-fra`. See [here](https://huggingface.co/models?pipeline_tag=text-to-speech&other=vits&sort=trending) for the full list of available languages (1107).
1277
+ * ```javascript
1278
+ * const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra');
1279
+ * const out = await synthesizer('Bonjour');
1280
+ * // {
1281
+ * // audio: Float32Array(23808) [-0.00037693005288019776, 0.0003325853613205254, ...],
1282
+ * // sampling_rate: 16000
1283
+ * // }
1284
+ * ```
1285
+ */
1286
+ export class TextToAudioPipeline extends TextToAudioPipeline_base {
1287
+ DEFAULT_VOCODER_ID: string;
1288
+ vocoder: PreTrainedModel;
1289
+ _call(texts: string | string[], options: TextToAudioPipelineOptions): Promise<TextToAudioOutput>;
1290
+ _call_text_to_waveform(text_inputs: any): Promise<{
1291
+ audio: any;
1292
+ sampling_rate: any;
1293
+ }>;
1294
+ _call_text_to_spectrogram(text_inputs: any, { speaker_embeddings }: {
1295
+ speaker_embeddings: any;
1296
+ }): Promise<{
1297
+ audio: any;
1298
+ sampling_rate: any;
1299
+ }>;
1300
+ }
1301
+ declare const ImageToImagePipeline_base: new (options: ImagePipelineConstructorArgs) => ImageToImagePipelineType;
1302
+ /**
1303
+ * @callback ImageToImagePipelineCallback Transform the image(s) passed as inputs.
1304
+ * @param {ImagePipelineInputs} images The images to transform.
1305
+ * @returns {Promise<RawImage|RawImage[]>} The transformed image or list of images.
1306
+ *
1307
+ * @typedef {ImagePipelineConstructorArgs & ImageToImagePipelineCallback & Disposable} ImageToImagePipelineType
1308
+ */
1309
+ /**
1310
+ * Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input.
1311
+ *
1312
+ * **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`
1313
+ * ```javascript
1314
+ * const upscaler = await pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64');
1315
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/butterfly.jpg';
1316
+ * const output = await upscaler(url);
1317
+ * // RawImage {
1318
+ * // data: Uint8Array(786432) [ 41, 31, 24, 43, ... ],
1319
+ * // width: 512,
1320
+ * // height: 512,
1321
+ * // channels: 3
1322
+ * // }
1323
+ * ```
1324
+ */
1325
+ export class ImageToImagePipeline extends ImageToImagePipeline_base {
1326
+ _call(images: ImagePipelineInputs): Promise<RawImage | RawImage[]>;
1327
+ }
1328
+ declare const DepthEstimationPipeline_base: new (options: ImagePipelineConstructorArgs) => DepthEstimationPipelineType;
1329
+ /**
1330
+ * @typedef {Object} DepthEstimationPipelineOutput
1331
+ * @property {Tensor} predicted_depth The raw depth map predicted by the model.
1332
+ * @property {RawImage} depth The processed depth map as an image (with the same size as the input image).
1333
+ *
1334
+ * @callback DepthEstimationPipelineCallback Predicts the depth for the image(s) passed as inputs.
1335
+ * @param {ImagePipelineInputs} images The images to compute depth for.
1336
+ * @returns {Promise<DepthEstimationPipelineOutput|DepthEstimationPipelineOutput[]>} An image or a list of images containing result(s).
1337
+ *
1338
+ * @typedef {ImagePipelineConstructorArgs & DepthEstimationPipelineCallback & Disposable} DepthEstimationPipelineType
1339
+ */
1340
+ /**
1341
+ * Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image.
1342
+ *
1343
+ * **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`
1344
+ * ```javascript
1345
+ * const depth_estimator = await pipeline('depth-estimation', 'Xenova/dpt-hybrid-midas');
1346
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg';
1347
+ * const out = await depth_estimator(url);
1348
+ * // {
1349
+ * // predicted_depth: Tensor {
1350
+ * // dims: [ 384, 384 ],
1351
+ * // type: 'float32',
1352
+ * // data: Float32Array(147456) [ 542.859130859375, 545.2833862304688, 546.1649169921875, ... ],
1353
+ * // size: 147456
1354
+ * // },
1355
+ * // depth: RawImage {
1356
+ * // data: Uint8Array(307200) [ 86, 86, 86, ... ],
1357
+ * // width: 640,
1358
+ * // height: 480,
1359
+ * // channels: 1
1360
+ * // }
1361
+ * // }
1362
+ * ```
1363
+ */
1364
+ export class DepthEstimationPipeline extends DepthEstimationPipeline_base {
1365
+ _call(images: ImagePipelineInputs): Promise<DepthEstimationPipelineOutput | DepthEstimationPipelineOutput[]>;
1366
+ }
1367
+ export type ImageInput = string | RawImage | URL;
1368
+ export type ImagePipelineInputs = ImageInput | ImageInput[];
1369
+ export type AudioInput = string | URL | Float32Array | Float64Array;
1370
+ export type AudioPipelineInputs = AudioInput | AudioInput[];
1371
+ export type BoundingBox = {
1372
+ /**
1373
+ * The minimum x coordinate of the bounding box.
1374
+ */
1375
+ xmin: number;
1376
+ /**
1377
+ * The minimum y coordinate of the bounding box.
1378
+ */
1379
+ ymin: number;
1380
+ /**
1381
+ * The maximum x coordinate of the bounding box.
1382
+ */
1383
+ xmax: number;
1384
+ /**
1385
+ * The maximum y coordinate of the bounding box.
1386
+ */
1387
+ ymax: number;
1388
+ };
1389
+ export type TaskType = keyof typeof SUPPORTED_TASKS;
1390
+ export type AliasType = keyof typeof TASK_ALIASES;
1391
+ /**
1392
+ * All possible pipeline types.
1393
+ */
1394
+ export type PipelineType = TaskType | AliasType;
1395
+ /**
1396
+ * A mapping of pipeline names to their corresponding pipeline classes.
1397
+ */
1398
+ export type SupportedTasks = {
1399
+ "text-classification": TextClassificationPipeline;
1400
+ "token-classification": TokenClassificationPipeline;
1401
+ "question-answering": QuestionAnsweringPipeline;
1402
+ "fill-mask": FillMaskPipeline;
1403
+ summarization: SummarizationPipeline;
1404
+ translation: TranslationPipeline;
1405
+ "text2text-generation": Text2TextGenerationPipeline;
1406
+ "text-generation": TextGenerationPipeline;
1407
+ "zero-shot-classification": ZeroShotClassificationPipeline;
1408
+ "audio-classification": AudioClassificationPipeline;
1409
+ "zero-shot-audio-classification": ZeroShotAudioClassificationPipeline;
1410
+ "automatic-speech-recognition": AutomaticSpeechRecognitionPipeline;
1411
+ "text-to-audio": TextToAudioPipeline;
1412
+ "image-to-text": ImageToTextPipeline;
1413
+ "image-classification": ImageClassificationPipeline;
1414
+ "image-segmentation": ImageSegmentationPipeline;
1415
+ "zero-shot-image-classification": ZeroShotImageClassificationPipeline;
1416
+ "object-detection": ObjectDetectionPipeline;
1417
+ "zero-shot-object-detection": ZeroShotObjectDetectionPipeline;
1418
+ "document-question-answering": DocumentQuestionAnsweringPipeline;
1419
+ "image-to-image": ImageToImagePipeline;
1420
+ "depth-estimation": DepthEstimationPipeline;
1421
+ "feature-extraction": FeatureExtractionPipeline;
1422
+ "image-feature-extraction": ImageFeatureExtractionPipeline;
1423
+ };
1424
+ /**
1425
+ * A mapping from pipeline aliases to their corresponding pipeline classes.
1426
+ */
1427
+ export type AliasTasks = {
1428
+ embeddings: FeatureExtractionPipeline;
1429
+ "sentiment-analysis": TextClassificationPipeline;
1430
+ ner: TokenClassificationPipeline;
1431
+ asr: AutomaticSpeechRecognitionPipeline;
1432
+ "text-to-speech": TextToAudioPipeline;
1433
+ };
1434
+ /**
1435
+ * A mapping from all pipeline names and aliases to their corresponding pipeline classes.
1436
+ */
1437
+ export type AllTasks = SupportedTasks & AliasTasks;
1438
+ /**
1439
+ * Disposes the item.
1440
+ */
1441
+ export type DisposeType = () => Promise<void>;
1442
+ export type Disposable = {
1443
+ /**
1444
+ * A promise that resolves when the pipeline has been disposed.
1445
+ */
1446
+ dispose: DisposeType;
1447
+ };
1448
+ export type ModelTokenizerConstructorArgs = {
1449
+ /**
1450
+ * The task of the pipeline. Useful for specifying subtasks.
1451
+ */
1452
+ task: string;
1453
+ /**
1454
+ * The model used by the pipeline.
1455
+ */
1456
+ model: PreTrainedModel;
1457
+ /**
1458
+ * The tokenizer used by the pipeline.
1459
+ */
1460
+ tokenizer: PreTrainedTokenizer;
1461
+ };
1462
+ /**
1463
+ * An object used to instantiate a text-based pipeline.
1464
+ */
1465
+ export type TextPipelineConstructorArgs = ModelTokenizerConstructorArgs;
1466
+ export type ModelProcessorConstructorArgs = {
1467
+ /**
1468
+ * The task of the pipeline. Useful for specifying subtasks.
1469
+ */
1470
+ task: string;
1471
+ /**
1472
+ * The model used by the pipeline.
1473
+ */
1474
+ model: PreTrainedModel;
1475
+ /**
1476
+ * The processor used by the pipeline.
1477
+ */
1478
+ processor: Processor;
1479
+ };
1480
+ /**
1481
+ * An object used to instantiate an audio-based pipeline.
1482
+ */
1483
+ export type AudioPipelineConstructorArgs = ModelProcessorConstructorArgs;
1484
+ /**
1485
+ * An object used to instantiate an image-based pipeline.
1486
+ */
1487
+ export type ImagePipelineConstructorArgs = ModelProcessorConstructorArgs;
1488
+ export type ModelTokenizerProcessorConstructorArgs = {
1489
+ /**
1490
+ * The task of the pipeline. Useful for specifying subtasks.
1491
+ */
1492
+ task: string;
1493
+ /**
1494
+ * The model used by the pipeline.
1495
+ */
1496
+ model: PreTrainedModel;
1497
+ /**
1498
+ * The tokenizer used by the pipeline.
1499
+ */
1500
+ tokenizer: PreTrainedTokenizer;
1501
+ /**
1502
+ * The processor used by the pipeline.
1503
+ */
1504
+ processor: Processor;
1505
+ };
1506
+ /**
1507
+ * An object used to instantiate a text- and audio-based pipeline.
1508
+ */
1509
+ export type TextAudioPipelineConstructorArgs = ModelTokenizerProcessorConstructorArgs;
1510
+ /**
1511
+ * An object used to instantiate a text- and image-based pipeline.
1512
+ */
1513
+ export type TextImagePipelineConstructorArgs = ModelTokenizerProcessorConstructorArgs;
1514
+ export type TextClassificationSingle = {
1515
+ /**
1516
+ * The label predicted.
1517
+ */
1518
+ label: string;
1519
+ /**
1520
+ * The corresponding probability.
1521
+ */
1522
+ score: number;
1523
+ };
1524
+ export type TextClassificationOutput = TextClassificationSingle[];
1525
+ /**
1526
+ * Parameters specific to text classification pipelines.
1527
+ */
1528
+ export type TextClassificationPipelineOptions = {
1529
+ /**
1530
+ * The number of top predictions to be returned.
1531
+ */
1532
+ top_k?: number;
1533
+ };
1534
+ /**
1535
+ * Classify the text(s) given as inputs.
1536
+ */
1537
+ export type TextClassificationPipelineCallback = (texts: string | string[], options?: TextClassificationPipelineOptions) => Promise<TextClassificationOutput | TextClassificationOutput[]>;
1538
+ export type TextClassificationPipelineType = TextPipelineConstructorArgs & TextClassificationPipelineCallback & Disposable;
1539
+ export type TokenClassificationSingle = {
1540
+ /**
1541
+ * The token/word classified. This is obtained by decoding the selected tokens.
1542
+ */
1543
+ word: string;
1544
+ /**
1545
+ * The corresponding probability for `entity`.
1546
+ */
1547
+ score: number;
1548
+ /**
1549
+ * The entity predicted for that token/word.
1550
+ */
1551
+ entity: string;
1552
+ /**
1553
+ * The index of the corresponding token in the sentence.
1554
+ */
1555
+ index: number;
1556
+ /**
1557
+ * The index of the start of the corresponding entity in the sentence.
1558
+ */
1559
+ start?: number;
1560
+ /**
1561
+ * The index of the end of the corresponding entity in the sentence.
1562
+ */
1563
+ end?: number;
1564
+ };
1565
+ export type TokenClassificationOutput = TokenClassificationSingle[];
1566
+ /**
1567
+ * Parameters specific to token classification pipelines.
1568
+ */
1569
+ export type TokenClassificationPipelineOptions = {
1570
+ /**
1571
+ * A list of labels to ignore.
1572
+ */
1573
+ ignore_labels?: string[];
1574
+ };
1575
+ /**
1576
+ * Classify each token of the text(s) given as inputs.
1577
+ */
1578
+ export type TokenClassificationPipelineCallback = (texts: string | string[], options?: TokenClassificationPipelineOptions) => Promise<TokenClassificationOutput | TokenClassificationOutput[]>;
1579
+ export type TokenClassificationPipelineType = TextPipelineConstructorArgs & TokenClassificationPipelineCallback & Disposable;
1580
+ export type QuestionAnsweringOutput = {
1581
+ /**
1582
+ * The probability associated to the answer.
1583
+ */
1584
+ score: number;
1585
+ /**
1586
+ * The character start index of the answer (in the tokenized version of the input).
1587
+ */
1588
+ start?: number;
1589
+ /**
1590
+ * The character end index of the answer (in the tokenized version of the input).
1591
+ */
1592
+ end?: number;
1593
+ /**
1594
+ * The answer to the question.
1595
+ */
1596
+ answer: string;
1597
+ };
1598
+ /**
1599
+ * Parameters specific to question answering pipelines.
1600
+ */
1601
+ export type QuestionAnsweringPipelineOptions = {
1602
+ /**
1603
+ * The number of top answer predictions to be returned.
1604
+ */
1605
+ top_k?: number;
1606
+ };
1607
+ /**
1608
+ * Answer the question(s) given as inputs by using the context(s).
1609
+ */
1610
+ export type QuestionAnsweringPipelineCallback = (question: string | string[], context: string | string[], options?: QuestionAnsweringPipelineOptions) => Promise<QuestionAnsweringOutput | QuestionAnsweringOutput[]>;
1611
+ export type QuestionAnsweringPipelineType = TextPipelineConstructorArgs & QuestionAnsweringPipelineCallback & Disposable;
1612
+ export type FillMaskSingle = {
1613
+ /**
1614
+ * The corresponding input with the mask token prediction.
1615
+ */
1616
+ sequence: string;
1617
+ /**
1618
+ * The corresponding probability.
1619
+ */
1620
+ score: number;
1621
+ /**
1622
+ * The predicted token id (to replace the masked one).
1623
+ */
1624
+ token: number;
1625
+ /**
1626
+ * The predicted token (to replace the masked one).
1627
+ */
1628
+ token_str: string;
1629
+ };
1630
+ export type FillMaskOutput = FillMaskSingle[];
1631
+ /**
1632
+ * Parameters specific to fill mask pipelines.
1633
+ */
1634
+ export type FillMaskPipelineOptions = {
1635
+ /**
1636
+ * When passed, overrides the number of predictions to return.
1637
+ */
1638
+ top_k?: number;
1639
+ };
1640
+ /**
1641
+ * Fill the masked token in the text(s) given as inputs.
1642
+ */
1643
+ export type FillMaskPipelineCallback = (texts: string | string[], options?: FillMaskPipelineOptions) => Promise<FillMaskOutput | FillMaskOutput[]>;
1644
+ export type FillMaskPipelineType = TextPipelineConstructorArgs & FillMaskPipelineCallback & Disposable;
1645
+ export type Text2TextGenerationSingle = {
1646
+ /**
1647
+ * The generated text.
1648
+ */
1649
+ generated_text: string;
1650
+ };
1651
+ export type Text2TextGenerationOutput = Text2TextGenerationSingle[];
1652
+ /**
1653
+ * Generate the output text(s) using text(s) given as inputs.
1654
+ */
1655
+ export type Text2TextGenerationPipelineCallback = (texts: string | string[], options?: Partial<import('./generation/configuration_utils.js').GenerationConfig>) => Promise<Text2TextGenerationOutput | Text2TextGenerationOutput[]>;
1656
+ export type Text2TextGenerationPipelineType = TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable;
1657
+ export type SummarizationSingle = {
1658
+ /**
1659
+ * The summary text.
1660
+ */
1661
+ summary_text: string;
1662
+ };
1663
+ export type SummarizationOutput = SummarizationSingle[];
1664
+ /**
1665
+ * Summarize the text(s) given as inputs.
1666
+ */
1667
+ export type SummarizationPipelineCallback = (texts: string | string[], options?: import('./generation/configuration_utils.js').GenerationConfig) => Promise<SummarizationOutput | SummarizationOutput[]>;
1668
+ export type SummarizationPipelineType = TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable;
1669
+ export type TranslationSingle = {
1670
+ /**
1671
+ * The translated text.
1672
+ */
1673
+ translation_text: string;
1674
+ };
1675
+ export type TranslationOutput = TranslationSingle[];
1676
+ /**
1677
+ * Translate the text(s) given as inputs.
1678
+ */
1679
+ export type TranslationPipelineCallback = (texts: string | string[], options?: import('./generation/configuration_utils.js').GenerationConfig) => Promise<TranslationOutput | TranslationOutput[]>;
1680
+ export type TranslationPipelineType = TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable;
1681
+ export type Chat = import('./tokenizers.js').Message[];
1682
+ export type TextGenerationSingle = {
1683
+ /**
1684
+ * The generated text.
1685
+ */
1686
+ generated_text: string | Chat;
1687
+ };
1688
+ export type TextGenerationOutput = TextGenerationSingle[];
1689
+ /**
1690
+ * Parameters specific to text-generation pipelines.
1691
+ */
1692
+ export type TextGenerationSpecificParams = {
1693
+ /**
1694
+ * Whether or not to add special tokens when tokenizing the sequences.
1695
+ */
1696
+ add_special_tokens?: boolean;
1697
+ /**
1698
+ * If set to `false` only added text is returned, otherwise the full text is returned.
1699
+ */
1700
+ return_full_text?: boolean;
1701
+ };
1702
+ export type TextGenerationConfig = import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams;
1703
+ /**
1704
+ * Complete the prompt(s) given as inputs.
1705
+ */
1706
+ export type TextGenerationPipelineCallback = (texts: string | string[] | Chat | Chat[], options?: Partial<TextGenerationConfig>) => Promise<TextGenerationOutput | TextGenerationOutput[]>;
1707
+ export type TextGenerationPipelineType = TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable;
1708
+ export type ZeroShotClassificationOutput = {
1709
+ /**
1710
+ * The sequence for which this is the output.
1711
+ */
1712
+ sequence: string;
1713
+ /**
1714
+ * The labels sorted by order of likelihood.
1715
+ */
1716
+ labels: string[];
1717
+ /**
1718
+ * The probabilities for each of the labels.
1719
+ */
1720
+ scores: number[];
1721
+ };
1722
+ /**
1723
+ * Parameters specific to zero-shot classification pipelines.
1724
+ */
1725
+ export type ZeroShotClassificationPipelineOptions = {
1726
+ /**
1727
+ * The template used to turn each
1728
+ * candidate label into an NLI-style hypothesis. The candidate label will replace the {} placeholder.
1729
+ */
1730
+ hypothesis_template?: string;
1731
+ /**
1732
+ * Whether or not multiple candidate labels can be true.
1733
+ * If `false`, the scores are normalized such that the sum of the label likelihoods for each sequence
1734
+ * is 1. If `true`, the labels are considered independent and probabilities are normalized for each
1735
+ * candidate by doing a softmax of the entailment score vs. the contradiction score.
1736
+ */
1737
+ multi_label?: boolean;
1738
+ };
1739
+ /**
1740
+ * Classify the sequence(s) given as inputs.
1741
+ */
1742
+ export type ZeroShotClassificationPipelineCallback = (texts: string | string[], candidate_labels: string | string[], options?: ZeroShotClassificationPipelineOptions) => Promise<ZeroShotClassificationOutput | ZeroShotClassificationOutput[]>;
1743
+ export type ZeroShotClassificationPipelineType = TextPipelineConstructorArgs & ZeroShotClassificationPipelineCallback & Disposable;
1744
+ /**
1745
+ * Parameters specific to feature extraction pipelines.
1746
+ */
1747
+ export type FeatureExtractionPipelineOptions = {
1748
+ /**
1749
+ * The pooling method to use.
1750
+ */
1751
+ pooling?: 'none' | 'mean' | 'cls';
1752
+ /**
1753
+ * Whether or not to normalize the embeddings in the last dimension.
1754
+ */
1755
+ normalize?: boolean;
1756
+ /**
1757
+ * Whether or not to quantize the embeddings.
1758
+ */
1759
+ quantize?: boolean;
1760
+ /**
1761
+ * The precision to use for quantization.
1762
+ */
1763
+ precision?: 'binary' | 'ubinary';
1764
+ };
1765
+ /**
1766
+ * Extract the features of the input(s).
1767
+ */
1768
+ export type FeatureExtractionPipelineCallback = (texts: string | string[], options?: FeatureExtractionPipelineOptions) => Promise<Tensor>;
1769
+ export type FeatureExtractionPipelineType = TextPipelineConstructorArgs & FeatureExtractionPipelineCallback & Disposable;
1770
+ /**
1771
+ * Parameters specific to image feature extraction pipelines.
1772
+ */
1773
+ export type ImageFeatureExtractionPipelineOptions = {
1774
+ /**
1775
+ * Whether or not to return the pooled output. If set to `false`, the model will return the raw hidden states.
1776
+ */
1777
+ pool?: boolean;
1778
+ };
1779
+ /**
1780
+ * Extract the features of the input(s).
1781
+ */
1782
+ export type ImageFeatureExtractionPipelineCallback = (images: ImagePipelineInputs, options?: ImageFeatureExtractionPipelineOptions) => Promise<Tensor>;
1783
+ export type ImageFeatureExtractionPipelineType = ImagePipelineConstructorArgs & ImageFeatureExtractionPipelineCallback & Disposable;
1784
+ export type AudioClassificationSingle = {
1785
+ /**
1786
+ * The label predicted.
1787
+ */
1788
+ label: string;
1789
+ /**
1790
+ * The corresponding probability.
1791
+ */
1792
+ score: number;
1793
+ };
1794
+ export type AudioClassificationOutput = AudioClassificationSingle[];
1795
+ /**
1796
+ * Parameters specific to audio classification pipelines.
1797
+ */
1798
+ export type AudioClassificationPipelineOptions = {
1799
+ /**
1800
+ * The number of top labels that will be returned by the pipeline.
1801
+ * If the provided number is `null` or higher than the number of labels available in the model configuration,
1802
+ * it will default to the number of labels.
1803
+ */
1804
+ top_k?: number;
1805
+ };
1806
+ /**
1807
+ * Classify the sequence(s) given as inputs.
1808
+ */
1809
+ export type AudioClassificationPipelineCallback = (audio: AudioPipelineInputs, options?: AudioClassificationPipelineOptions) => Promise<AudioClassificationOutput | AudioClassificationOutput[]>;
1810
+ export type AudioClassificationPipelineType = AudioPipelineConstructorArgs & AudioClassificationPipelineCallback & Disposable;
1811
+ export type ZeroShotAudioClassificationOutput = {
1812
+ /**
1813
+ * The label identified by the model. It is one of the suggested `candidate_label`.
1814
+ */
1815
+ label: string;
1816
+ /**
1817
+ * The score attributed by the model for that label (between 0 and 1).
1818
+ */
1819
+ score: number;
1820
+ };
1821
+ /**
1822
+ * Parameters specific to zero-shot audio classification pipelines.
1823
+ */
1824
+ export type ZeroShotAudioClassificationPipelineOptions = {
1825
+ /**
1826
+ * The sentence used in conjunction with `candidate_labels`
1827
+ * to attempt the audio classification by replacing the placeholder with the candidate_labels.
1828
+ * Then likelihood is estimated by using `logits_per_audio`.
1829
+ */
1830
+ hypothesis_template?: string;
1831
+ };
1832
+ /**
1833
+ * Classify the sequence(s) given as inputs.
1834
+ */
1835
+ export type ZeroShotAudioClassificationPipelineCallback = (audio: AudioPipelineInputs, candidate_labels: string[], options?: ZeroShotAudioClassificationPipelineOptions) => Promise<ZeroShotAudioClassificationOutput[] | ZeroShotAudioClassificationOutput[][]>;
1836
+ export type ZeroShotAudioClassificationPipelineType = TextAudioPipelineConstructorArgs & ZeroShotAudioClassificationPipelineCallback & Disposable;
1837
+ export type Chunk = {
1838
+ /**
1839
+ * The start and end timestamp of the chunk in seconds.
1840
+ */
1841
+ timestamp: [number, number];
1842
+ /**
1843
+ * The recognized text.
1844
+ */
1845
+ text: string;
1846
+ };
1847
+ export type AutomaticSpeechRecognitionOutput = {
1848
+ /**
1849
+ * The recognized text.
1850
+ */
1851
+ text: string;
1852
+ /**
1853
+ * When using `return_timestamps`, the `chunks` will become a list
1854
+ * containing all the various text chunks identified by the model.
1855
+ */
1856
+ chunks?: Chunk[];
1857
+ };
1858
+ /**
1859
+ * Parameters specific to automatic-speech-recognition pipelines.
1860
+ */
1861
+ export type AutomaticSpeechRecognitionSpecificParams = {
1862
+ /**
1863
+ * Whether to return timestamps or not. Default is `false`.
1864
+ */
1865
+ return_timestamps?: boolean | 'word';
1866
+ /**
1867
+ * The length of audio chunks to process in seconds. Default is 0 (no chunking).
1868
+ */
1869
+ chunk_length_s?: number;
1870
+ /**
1871
+ * The length of overlap between consecutive audio chunks in seconds. If not provided, defaults to `chunk_length_s / 6`.
1872
+ */
1873
+ stride_length_s?: number;
1874
+ /**
1875
+ * Whether to force outputting full sequences or not. Default is `false`.
1876
+ */
1877
+ force_full_sequences?: boolean;
1878
+ /**
1879
+ * The source language. Default is `null`, meaning it should be auto-detected. Use this to potentially improve performance if the source language is known.
1880
+ */
1881
+ language?: string;
1882
+ /**
1883
+ * The task to perform. Default is `null`, meaning it should be auto-detected.
1884
+ */
1885
+ task?: string;
1886
+ /**
1887
+ * The number of frames in the input audio.
1888
+ */
1889
+ num_frames?: number;
1890
+ };
1891
+ export type AutomaticSpeechRecognitionConfig = import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams;
1892
+ /**
1893
+ * Transcribe the audio sequence(s) given as inputs to text.
1894
+ */
1895
+ export type AutomaticSpeechRecognitionPipelineCallback = (audio: AudioPipelineInputs, options?: Partial<AutomaticSpeechRecognitionConfig>) => Promise<AutomaticSpeechRecognitionOutput | AutomaticSpeechRecognitionOutput[]>;
1896
+ export type AutomaticSpeechRecognitionPipelineType = TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable;
1897
+ export type ImageToTextSingle = {
1898
+ /**
1899
+ * The generated text.
1900
+ */
1901
+ generated_text: string;
1902
+ };
1903
+ export type ImageToTextOutput = ImageToTextSingle[];
1904
+ /**
1905
+ * Assign labels to the image(s) passed as inputs.
1906
+ */
1907
+ export type ImageToTextPipelineCallback = (texts: ImagePipelineInputs, options?: Partial<import('./generation/configuration_utils.js').GenerationConfig>) => Promise<ImageToTextOutput | ImageToTextOutput[]>;
1908
+ export type ImageToTextPipelineType = TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable;
1909
+ export type ImageClassificationSingle = {
1910
+ /**
1911
+ * The label identified by the model.
1912
+ */
1913
+ label: string;
1914
+ /**
1915
+ * The score attributed by the model for that label.
1916
+ */
1917
+ score: number;
1918
+ };
1919
+ export type ImageClassificationOutput = ImageClassificationSingle[];
1920
+ /**
1921
+ * Parameters specific to image classification pipelines.
1922
+ */
1923
+ export type ImageClassificationPipelineOptions = {
1924
+ /**
1925
+ * The number of top labels that will be returned by the pipeline.
1926
+ */
1927
+ top_k?: number;
1928
+ };
1929
+ /**
1930
+ * Assign labels to the image(s) passed as inputs.
1931
+ */
1932
+ export type ImageClassificationPipelineCallback = (images: ImagePipelineInputs, options?: ImageClassificationPipelineOptions) => Promise<ImageClassificationOutput | ImageClassificationOutput[]>;
1933
+ export type ImageClassificationPipelineType = ImagePipelineConstructorArgs & ImageClassificationPipelineCallback & Disposable;
1934
+ export type ImageSegmentationPipelineOutput = {
1935
+ /**
1936
+ * The label of the segment.
1937
+ */
1938
+ label: string;
1939
+ /**
1940
+ * The score of the segment.
1941
+ */
1942
+ score: number | null;
1943
+ /**
1944
+ * The mask of the segment.
1945
+ */
1946
+ mask: RawImage;
1947
+ };
1948
+ /**
1949
+ * Parameters specific to image segmentation pipelines.
1950
+ */
1951
+ export type ImageSegmentationPipelineOptions = {
1952
+ /**
1953
+ * Probability threshold to filter out predicted masks.
1954
+ */
1955
+ threshold?: number;
1956
+ /**
1957
+ * Threshold to use when turning the predicted masks into binary values.
1958
+ */
1959
+ mask_threshold?: number;
1960
+ /**
1961
+ * Mask overlap threshold to eliminate small, disconnected segments.
1962
+ */
1963
+ overlap_mask_area_threshold?: number;
1964
+ /**
1965
+ * Segmentation task to be performed. One of [`panoptic`, `instance`, and `semantic`],
1966
+ * depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order).
1967
+ */
1968
+ subtask?: null | string;
1969
+ /**
1970
+ * List of label ids to fuse. If not set, do not fuse any labels.
1971
+ */
1972
+ label_ids_to_fuse?: number[];
1973
+ /**
1974
+ * List of target sizes for the input images. If not set, use the original image sizes.
1975
+ */
1976
+ target_sizes?: number[][];
1977
+ };
1978
+ /**
1979
+ * Segment the input images.
1980
+ */
1981
+ export type ImageSegmentationPipelineCallback = (images: ImagePipelineInputs, options?: ImageSegmentationPipelineOptions) => Promise<ImageSegmentationPipelineOutput[]>;
1982
+ export type ImageSegmentationPipelineType = ImagePipelineConstructorArgs & ImageSegmentationPipelineCallback & Disposable;
1983
+ export type ZeroShotImageClassificationOutput = {
1984
+ /**
1985
+ * The label identified by the model. It is one of the suggested `candidate_label`.
1986
+ */
1987
+ label: string;
1988
+ /**
1989
+ * The score attributed by the model for that label (between 0 and 1).
1990
+ */
1991
+ score: number;
1992
+ };
1993
+ /**
1994
+ * Parameters specific to zero-shot image classification pipelines.
1995
+ */
1996
+ export type ZeroShotImageClassificationPipelineOptions = {
1997
+ /**
1998
+ * The sentence used in conjunction with `candidate_labels`
1999
+ * to attempt the image classification by replacing the placeholder with the candidate_labels.
2000
+ * Then likelihood is estimated by using `logits_per_image`.
2001
+ */
2002
+ hypothesis_template?: string;
2003
+ };
2004
+ /**
2005
+ * Assign labels to the image(s) passed as inputs.
2006
+ */
2007
+ export type ZeroShotImageClassificationPipelineCallback = (images: ImagePipelineInputs, candidate_labels: string[], options?: ZeroShotImageClassificationPipelineOptions) => Promise<ZeroShotImageClassificationOutput[] | ZeroShotImageClassificationOutput[][]>;
2008
+ export type ZeroShotImageClassificationPipelineType = TextImagePipelineConstructorArgs & ZeroShotImageClassificationPipelineCallback & Disposable;
2009
+ export type ObjectDetectionPipelineSingle = {
2010
+ /**
2011
+ * The class label identified by the model.
2012
+ */
2013
+ label: string;
2014
+ /**
2015
+ * The score attributed by the model for that label.
2016
+ */
2017
+ score: number;
2018
+ /**
2019
+ * The bounding box of detected object in image's original size, or as a percentage if `percentage` is set to true.
2020
+ */
2021
+ box: BoundingBox;
2022
+ };
2023
+ export type ObjectDetectionPipelineOutput = ObjectDetectionPipelineSingle[];
2024
+ /**
2025
+ * Parameters specific to object detection pipelines.
2026
+ */
2027
+ export type ObjectDetectionPipelineOptions = {
2028
+ /**
2029
+ * The threshold used to filter boxes by score.
2030
+ */
2031
+ threshold?: number;
2032
+ /**
2033
+ * Whether to return the boxes coordinates in percentage (true) or in pixels (false).
2034
+ */
2035
+ percentage?: boolean;
2036
+ };
2037
+ /**
2038
+ * Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
2039
+ */
2040
+ export type ObjectDetectionPipelineCallback = (images: ImagePipelineInputs, options?: ObjectDetectionPipelineOptions) => Promise<ObjectDetectionPipelineOutput | ObjectDetectionPipelineOutput[]>;
2041
+ export type ObjectDetectionPipelineType = ImagePipelineConstructorArgs & ObjectDetectionPipelineCallback & Disposable;
2042
+ export type ZeroShotObjectDetectionOutput = {
2043
+ /**
2044
+ * Text query corresponding to the found object.
2045
+ */
2046
+ label: string;
2047
+ /**
2048
+ * Score corresponding to the object (between 0 and 1).
2049
+ */
2050
+ score: number;
2051
+ /**
2052
+ * Bounding box of the detected object in image's original size, or as a percentage if `percentage` is set to true.
2053
+ */
2054
+ box: BoundingBox;
2055
+ };
2056
+ /**
2057
+ * Parameters specific to zero-shot object detection pipelines.
2058
+ */
2059
+ export type ZeroShotObjectDetectionPipelineOptions = {
2060
+ /**
2061
+ * The probability necessary to make a prediction.
2062
+ */
2063
+ threshold?: number;
2064
+ /**
2065
+ * The number of top predictions that will be returned by the pipeline.
2066
+ * If the provided number is `null` or higher than the number of predictions available, it will default
2067
+ * to the number of predictions.
2068
+ */
2069
+ top_k?: number;
2070
+ /**
2071
+ * Whether to return the boxes coordinates in percentage (true) or in pixels (false).
2072
+ */
2073
+ percentage?: boolean;
2074
+ };
2075
+ /**
2076
+ * Detect objects (bounding boxes & classes) in the image(s) passed as inputs.
2077
+ */
2078
+ export type ZeroShotObjectDetectionPipelineCallback = (images: ImagePipelineInputs, candidate_labels: string[], options?: ZeroShotObjectDetectionPipelineOptions) => Promise<ZeroShotObjectDetectionOutput[] | ZeroShotObjectDetectionOutput[][]>;
2079
+ export type ZeroShotObjectDetectionPipelineType = TextImagePipelineConstructorArgs & ZeroShotObjectDetectionPipelineCallback & Disposable;
2080
+ export type DocumentQuestionAnsweringSingle = {
2081
+ /**
2082
+ * The generated text.
2083
+ */
2084
+ answer: string;
2085
+ };
2086
+ export type DocumentQuestionAnsweringOutput = DocumentQuestionAnsweringSingle[];
2087
+ /**
2088
+ * Answer the question given as input by using the document.
2089
+ */
2090
+ export type DocumentQuestionAnsweringPipelineCallback = (image: ImageInput, question: string, options?: Partial<import('./generation/configuration_utils.js').GenerationConfig>) => Promise<DocumentQuestionAnsweringOutput | DocumentQuestionAnsweringOutput[]>;
2091
+ export type DocumentQuestionAnsweringPipelineType = TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable;
2092
+ export type VocoderOptions = {
2093
+ /**
2094
+ * The vocoder used by the pipeline (if the model uses one). If not provided, use the default HifiGan vocoder.
2095
+ */
2096
+ vocoder?: PreTrainedModel;
2097
+ };
2098
+ export type TextToAudioPipelineConstructorArgs = TextAudioPipelineConstructorArgs & VocoderOptions;
2099
+ export type TextToAudioOutput = {
2100
+ /**
2101
+ * The generated audio waveform.
2102
+ */
2103
+ audio: Float32Array;
2104
+ /**
2105
+ * The sampling rate of the generated audio waveform.
2106
+ */
2107
+ sampling_rate: number;
2108
+ };
2109
+ /**
2110
+ * Parameters specific to text-to-audio pipelines.
2111
+ */
2112
+ export type TextToAudioPipelineOptions = {
2113
+ /**
2114
+ * The speaker embeddings (if the model requires it).
2115
+ */
2116
+ speaker_embeddings?: Tensor | Float32Array | string | URL;
2117
+ };
2118
+ /**
2119
+ * Generates speech/audio from the inputs.
2120
+ */
2121
+ export type TextToAudioPipelineCallback = (texts: string | string[], options: TextToAudioPipelineOptions) => Promise<TextToAudioOutput>;
2122
+ export type TextToAudioPipelineType = TextToAudioPipelineConstructorArgs & TextToAudioPipelineCallback & Disposable;
2123
+ /**
2124
+ * Transform the image(s) passed as inputs.
2125
+ */
2126
+ export type ImageToImagePipelineCallback = (images: ImagePipelineInputs) => Promise<RawImage | RawImage[]>;
2127
+ export type ImageToImagePipelineType = ImagePipelineConstructorArgs & ImageToImagePipelineCallback & Disposable;
2128
+ export type DepthEstimationPipelineOutput = {
2129
+ /**
2130
+ * The raw depth map predicted by the model.
2131
+ */
2132
+ predicted_depth: Tensor;
2133
+ /**
2134
+ * The processed depth map as an image (with the same size as the input image).
2135
+ */
2136
+ depth: RawImage;
2137
+ };
2138
+ /**
2139
+ * Predicts the depth for the image(s) passed as inputs.
2140
+ */
2141
+ export type DepthEstimationPipelineCallback = (images: ImagePipelineInputs) => Promise<DepthEstimationPipelineOutput | DepthEstimationPipelineOutput[]>;
2142
+ export type DepthEstimationPipelineType = ImagePipelineConstructorArgs & DepthEstimationPipelineCallback & Disposable;
2143
+ import { PreTrainedModel } from './models.js';
2144
+ import { PreTrainedTokenizer } from './tokenizers.js';
2145
+ import { Processor } from './processors.js';
2146
+ import { Tensor } from './utils/tensor.js';
2147
+ import { RawImage } from './utils/image.js';
2148
+ declare const SUPPORTED_TASKS: Readonly<{
2149
+ "text-classification": {
2150
+ tokenizer: typeof AutoTokenizer;
2151
+ pipeline: typeof TextClassificationPipeline;
2152
+ model: typeof AutoModelForSequenceClassification;
2153
+ default: {
2154
+ model: string;
2155
+ };
2156
+ type: string;
2157
+ };
2158
+ "token-classification": {
2159
+ tokenizer: typeof AutoTokenizer;
2160
+ pipeline: typeof TokenClassificationPipeline;
2161
+ model: typeof AutoModelForTokenClassification;
2162
+ default: {
2163
+ model: string;
2164
+ };
2165
+ type: string;
2166
+ };
2167
+ "question-answering": {
2168
+ tokenizer: typeof AutoTokenizer;
2169
+ pipeline: typeof QuestionAnsweringPipeline;
2170
+ model: typeof AutoModelForQuestionAnswering;
2171
+ default: {
2172
+ model: string;
2173
+ };
2174
+ type: string;
2175
+ };
2176
+ "fill-mask": {
2177
+ tokenizer: typeof AutoTokenizer;
2178
+ pipeline: typeof FillMaskPipeline;
2179
+ model: typeof AutoModelForMaskedLM;
2180
+ default: {
2181
+ model: string;
2182
+ };
2183
+ type: string;
2184
+ };
2185
+ summarization: {
2186
+ tokenizer: typeof AutoTokenizer;
2187
+ pipeline: typeof SummarizationPipeline;
2188
+ model: typeof AutoModelForSeq2SeqLM;
2189
+ default: {
2190
+ model: string;
2191
+ };
2192
+ type: string;
2193
+ };
2194
+ translation: {
2195
+ tokenizer: typeof AutoTokenizer;
2196
+ pipeline: typeof TranslationPipeline;
2197
+ model: typeof AutoModelForSeq2SeqLM;
2198
+ default: {
2199
+ model: string;
2200
+ };
2201
+ type: string;
2202
+ };
2203
+ "text2text-generation": {
2204
+ tokenizer: typeof AutoTokenizer;
2205
+ pipeline: typeof Text2TextGenerationPipeline;
2206
+ model: typeof AutoModelForSeq2SeqLM;
2207
+ default: {
2208
+ model: string;
2209
+ };
2210
+ type: string;
2211
+ };
2212
+ "text-generation": {
2213
+ tokenizer: typeof AutoTokenizer;
2214
+ pipeline: typeof TextGenerationPipeline;
2215
+ model: typeof AutoModelForCausalLM;
2216
+ default: {
2217
+ model: string;
2218
+ };
2219
+ type: string;
2220
+ };
2221
+ "zero-shot-classification": {
2222
+ tokenizer: typeof AutoTokenizer;
2223
+ pipeline: typeof ZeroShotClassificationPipeline;
2224
+ model: typeof AutoModelForSequenceClassification;
2225
+ default: {
2226
+ model: string;
2227
+ };
2228
+ type: string;
2229
+ };
2230
+ "audio-classification": {
2231
+ pipeline: typeof AudioClassificationPipeline;
2232
+ model: typeof AutoModelForAudioClassification;
2233
+ processor: typeof AutoProcessor;
2234
+ default: {
2235
+ model: string;
2236
+ };
2237
+ type: string;
2238
+ };
2239
+ "zero-shot-audio-classification": {
2240
+ tokenizer: typeof AutoTokenizer;
2241
+ pipeline: typeof ZeroShotAudioClassificationPipeline;
2242
+ model: typeof AutoModel;
2243
+ processor: typeof AutoProcessor;
2244
+ default: {
2245
+ model: string;
2246
+ };
2247
+ type: string;
2248
+ };
2249
+ "automatic-speech-recognition": {
2250
+ tokenizer: typeof AutoTokenizer;
2251
+ pipeline: typeof AutomaticSpeechRecognitionPipeline;
2252
+ model: (typeof AutoModelForSpeechSeq2Seq | typeof AutoModelForCTC)[];
2253
+ processor: typeof AutoProcessor;
2254
+ default: {
2255
+ model: string;
2256
+ };
2257
+ type: string;
2258
+ };
2259
+ "text-to-audio": {
2260
+ tokenizer: typeof AutoTokenizer;
2261
+ pipeline: typeof TextToAudioPipeline;
2262
+ model: (typeof AutoModelForTextToSpectrogram | typeof AutoModelForTextToWaveform)[];
2263
+ processor: (typeof AutoProcessor)[];
2264
+ default: {
2265
+ model: string;
2266
+ };
2267
+ type: string;
2268
+ };
2269
+ "image-to-text": {
2270
+ tokenizer: typeof AutoTokenizer;
2271
+ pipeline: typeof ImageToTextPipeline;
2272
+ model: typeof AutoModelForVision2Seq;
2273
+ processor: typeof AutoProcessor;
2274
+ default: {
2275
+ model: string;
2276
+ };
2277
+ type: string;
2278
+ };
2279
+ "image-classification": {
2280
+ pipeline: typeof ImageClassificationPipeline;
2281
+ model: typeof AutoModelForImageClassification;
2282
+ processor: typeof AutoProcessor;
2283
+ default: {
2284
+ model: string;
2285
+ };
2286
+ type: string;
2287
+ };
2288
+ "image-segmentation": {
2289
+ pipeline: typeof ImageSegmentationPipeline;
2290
+ model: (typeof AutoModelForImageSegmentation)[];
2291
+ processor: typeof AutoProcessor;
2292
+ default: {
2293
+ model: string;
2294
+ };
2295
+ type: string;
2296
+ };
2297
+ "zero-shot-image-classification": {
2298
+ tokenizer: typeof AutoTokenizer;
2299
+ pipeline: typeof ZeroShotImageClassificationPipeline;
2300
+ model: typeof AutoModel;
2301
+ processor: typeof AutoProcessor;
2302
+ default: {
2303
+ model: string;
2304
+ };
2305
+ type: string;
2306
+ };
2307
+ "object-detection": {
2308
+ pipeline: typeof ObjectDetectionPipeline;
2309
+ model: typeof AutoModelForObjectDetection;
2310
+ processor: typeof AutoProcessor;
2311
+ default: {
2312
+ model: string;
2313
+ };
2314
+ type: string;
2315
+ };
2316
+ "zero-shot-object-detection": {
2317
+ tokenizer: typeof AutoTokenizer;
2318
+ pipeline: typeof ZeroShotObjectDetectionPipeline;
2319
+ model: typeof AutoModelForZeroShotObjectDetection;
2320
+ processor: typeof AutoProcessor;
2321
+ default: {
2322
+ model: string;
2323
+ };
2324
+ type: string;
2325
+ };
2326
+ "document-question-answering": {
2327
+ tokenizer: typeof AutoTokenizer;
2328
+ pipeline: typeof DocumentQuestionAnsweringPipeline;
2329
+ model: typeof AutoModelForDocumentQuestionAnswering;
2330
+ processor: typeof AutoProcessor;
2331
+ default: {
2332
+ model: string;
2333
+ };
2334
+ type: string;
2335
+ };
2336
+ "image-to-image": {
2337
+ pipeline: typeof ImageToImagePipeline;
2338
+ model: typeof AutoModelForImageToImage;
2339
+ processor: typeof AutoProcessor;
2340
+ default: {
2341
+ model: string;
2342
+ };
2343
+ type: string;
2344
+ };
2345
+ "depth-estimation": {
2346
+ pipeline: typeof DepthEstimationPipeline;
2347
+ model: typeof AutoModelForDepthEstimation;
2348
+ processor: typeof AutoProcessor;
2349
+ default: {
2350
+ model: string;
2351
+ };
2352
+ type: string;
2353
+ };
2354
+ "feature-extraction": {
2355
+ tokenizer: typeof AutoTokenizer;
2356
+ pipeline: typeof FeatureExtractionPipeline;
2357
+ model: typeof AutoModel;
2358
+ default: {
2359
+ model: string;
2360
+ };
2361
+ type: string;
2362
+ };
2363
+ "image-feature-extraction": {
2364
+ processor: typeof AutoProcessor;
2365
+ pipeline: typeof ImageFeatureExtractionPipeline;
2366
+ model: (typeof AutoModel)[];
2367
+ default: {
2368
+ model: string;
2369
+ };
2370
+ type: string;
2371
+ };
2372
+ }>;
2373
+ declare const TASK_ALIASES: Readonly<{
2374
+ "sentiment-analysis": "text-classification";
2375
+ ner: "token-classification";
2376
+ asr: "automatic-speech-recognition";
2377
+ "text-to-speech": "text-to-audio";
2378
+ embeddings: "feature-extraction";
2379
+ }>;
2380
+ import { AutoTokenizer } from './tokenizers.js';
2381
+ import { AutoModelForSequenceClassification } from './models.js';
2382
+ import { AutoModelForTokenClassification } from './models.js';
2383
+ import { AutoModelForQuestionAnswering } from './models.js';
2384
+ import { AutoModelForMaskedLM } from './models.js';
2385
+ import { AutoModelForSeq2SeqLM } from './models.js';
2386
+ import { AutoModelForCausalLM } from './models.js';
2387
+ import { AutoModelForAudioClassification } from './models.js';
2388
+ import { AutoProcessor } from './processors.js';
2389
+ import { AutoModel } from './models.js';
2390
+ import { AutoModelForSpeechSeq2Seq } from './models.js';
2391
+ import { AutoModelForCTC } from './models.js';
2392
+ import { AutoModelForTextToSpectrogram } from './models.js';
2393
+ import { AutoModelForTextToWaveform } from './models.js';
2394
+ import { AutoModelForVision2Seq } from './models.js';
2395
+ import { AutoModelForImageClassification } from './models.js';
2396
+ import { AutoModelForImageSegmentation } from './models.js';
2397
+ import { AutoModelForObjectDetection } from './models.js';
2398
+ import { AutoModelForZeroShotObjectDetection } from './models.js';
2399
+ import { AutoModelForDocumentQuestionAnswering } from './models.js';
2400
+ import { AutoModelForImageToImage } from './models.js';
2401
+ import { AutoModelForDepthEstimation } from './models.js';
2402
+ export {};
2403
+ //# sourceMappingURL=pipelines.d.ts.map