@huggingface/inference 2.6.7 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +126 -27
  2. package/dist/index.cjs +78 -12
  3. package/dist/index.js +76 -12
  4. package/dist/src/HfInference.d.ts +28 -0
  5. package/dist/src/HfInference.d.ts.map +1 -0
  6. package/dist/src/index.d.ts +5 -0
  7. package/dist/src/index.d.ts.map +1 -0
  8. package/dist/src/lib/InferenceOutputError.d.ts +4 -0
  9. package/dist/src/lib/InferenceOutputError.d.ts.map +1 -0
  10. package/dist/src/lib/getDefaultTask.d.ts +12 -0
  11. package/dist/src/lib/getDefaultTask.d.ts.map +1 -0
  12. package/dist/src/lib/isUrl.d.ts +2 -0
  13. package/dist/src/lib/isUrl.d.ts.map +1 -0
  14. package/dist/src/lib/makeRequestOptions.d.ts +18 -0
  15. package/dist/src/lib/makeRequestOptions.d.ts.map +1 -0
  16. package/dist/src/tasks/audio/audioClassification.d.ts +24 -0
  17. package/dist/src/tasks/audio/audioClassification.d.ts.map +1 -0
  18. package/dist/src/tasks/audio/audioToAudio.d.ts +28 -0
  19. package/dist/src/tasks/audio/audioToAudio.d.ts.map +1 -0
  20. package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts +19 -0
  21. package/dist/src/tasks/audio/automaticSpeechRecognition.d.ts.map +1 -0
  22. package/dist/src/tasks/audio/textToSpeech.d.ts +14 -0
  23. package/dist/src/tasks/audio/textToSpeech.d.ts.map +1 -0
  24. package/dist/src/tasks/custom/request.d.ts +13 -0
  25. package/dist/src/tasks/custom/request.d.ts.map +1 -0
  26. package/dist/src/tasks/custom/streamingRequest.d.ts +13 -0
  27. package/dist/src/tasks/custom/streamingRequest.d.ts.map +1 -0
  28. package/dist/src/tasks/cv/imageClassification.d.ts +24 -0
  29. package/dist/src/tasks/cv/imageClassification.d.ts.map +1 -0
  30. package/dist/src/tasks/cv/imageSegmentation.d.ts +28 -0
  31. package/dist/src/tasks/cv/imageSegmentation.d.ts.map +1 -0
  32. package/dist/src/tasks/cv/imageToImage.d.ts +55 -0
  33. package/dist/src/tasks/cv/imageToImage.d.ts.map +1 -0
  34. package/dist/src/tasks/cv/imageToText.d.ts +18 -0
  35. package/dist/src/tasks/cv/imageToText.d.ts.map +1 -0
  36. package/dist/src/tasks/cv/objectDetection.d.ts +33 -0
  37. package/dist/src/tasks/cv/objectDetection.d.ts.map +1 -0
  38. package/dist/src/tasks/cv/textToImage.d.ts +36 -0
  39. package/dist/src/tasks/cv/textToImage.d.ts.map +1 -0
  40. package/dist/src/tasks/cv/zeroShotImageClassification.d.ts +26 -0
  41. package/dist/src/tasks/cv/zeroShotImageClassification.d.ts.map +1 -0
  42. package/dist/src/tasks/index.d.ts +32 -0
  43. package/dist/src/tasks/index.d.ts.map +1 -0
  44. package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts +35 -0
  45. package/dist/src/tasks/multimodal/documentQuestionAnswering.d.ts.map +1 -0
  46. package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts +27 -0
  47. package/dist/src/tasks/multimodal/visualQuestionAnswering.d.ts.map +1 -0
  48. package/dist/src/tasks/nlp/chatCompletion.d.ts +7 -0
  49. package/dist/src/tasks/nlp/chatCompletion.d.ts.map +1 -0
  50. package/dist/src/tasks/nlp/chatCompletionStream.d.ts +7 -0
  51. package/dist/src/tasks/nlp/chatCompletionStream.d.ts.map +1 -0
  52. package/dist/src/tasks/nlp/featureExtraction.d.ts +19 -0
  53. package/dist/src/tasks/nlp/featureExtraction.d.ts.map +1 -0
  54. package/dist/src/tasks/nlp/fillMask.d.ts +27 -0
  55. package/dist/src/tasks/nlp/fillMask.d.ts.map +1 -0
  56. package/dist/src/tasks/nlp/questionAnswering.d.ts +30 -0
  57. package/dist/src/tasks/nlp/questionAnswering.d.ts.map +1 -0
  58. package/dist/src/tasks/nlp/sentenceSimilarity.d.ts +19 -0
  59. package/dist/src/tasks/nlp/sentenceSimilarity.d.ts.map +1 -0
  60. package/dist/src/tasks/nlp/summarization.d.ts +48 -0
  61. package/dist/src/tasks/nlp/summarization.d.ts.map +1 -0
  62. package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts +36 -0
  63. package/dist/src/tasks/nlp/tableQuestionAnswering.d.ts.map +1 -0
  64. package/dist/src/tasks/nlp/textClassification.d.ts +22 -0
  65. package/dist/src/tasks/nlp/textClassification.d.ts.map +1 -0
  66. package/dist/src/tasks/nlp/textGeneration.d.ts +8 -0
  67. package/dist/src/tasks/nlp/textGeneration.d.ts.map +1 -0
  68. package/dist/src/tasks/nlp/textGenerationStream.d.ts +81 -0
  69. package/dist/src/tasks/nlp/textGenerationStream.d.ts.map +1 -0
  70. package/dist/src/tasks/nlp/tokenClassification.d.ts +51 -0
  71. package/dist/src/tasks/nlp/tokenClassification.d.ts.map +1 -0
  72. package/dist/src/tasks/nlp/translation.d.ts +19 -0
  73. package/dist/src/tasks/nlp/translation.d.ts.map +1 -0
  74. package/dist/src/tasks/nlp/zeroShotClassification.d.ts +28 -0
  75. package/dist/src/tasks/nlp/zeroShotClassification.d.ts.map +1 -0
  76. package/dist/src/tasks/tabular/tabularClassification.d.ts +20 -0
  77. package/dist/src/tasks/tabular/tabularClassification.d.ts.map +1 -0
  78. package/dist/src/tasks/tabular/tabularRegression.d.ts +20 -0
  79. package/dist/src/tasks/tabular/tabularRegression.d.ts.map +1 -0
  80. package/dist/src/types.d.ts +69 -0
  81. package/dist/src/types.d.ts.map +1 -0
  82. package/dist/src/utils/base64FromBytes.d.ts +2 -0
  83. package/dist/src/utils/base64FromBytes.d.ts.map +1 -0
  84. package/dist/src/utils/distributive-omit.d.ts +9 -0
  85. package/dist/src/utils/distributive-omit.d.ts.map +1 -0
  86. package/dist/src/utils/isBackend.d.ts +2 -0
  87. package/dist/src/utils/isBackend.d.ts.map +1 -0
  88. package/dist/src/utils/isFrontend.d.ts +2 -0
  89. package/dist/src/utils/isFrontend.d.ts.map +1 -0
  90. package/dist/src/utils/omit.d.ts +5 -0
  91. package/dist/src/utils/omit.d.ts.map +1 -0
  92. package/dist/src/utils/pick.d.ts +5 -0
  93. package/dist/src/utils/pick.d.ts.map +1 -0
  94. package/dist/src/utils/toArray.d.ts +2 -0
  95. package/dist/src/utils/toArray.d.ts.map +1 -0
  96. package/dist/src/utils/typedInclude.d.ts +2 -0
  97. package/dist/src/utils/typedInclude.d.ts.map +1 -0
  98. package/dist/src/vendor/fetch-event-source/parse.d.ts +69 -0
  99. package/dist/src/vendor/fetch-event-source/parse.d.ts.map +1 -0
  100. package/dist/src/vendor/fetch-event-source/parse.spec.d.ts +2 -0
  101. package/dist/src/vendor/fetch-event-source/parse.spec.d.ts.map +1 -0
  102. package/dist/test/HfInference.spec.d.ts +2 -0
  103. package/dist/test/HfInference.spec.d.ts.map +1 -0
  104. package/dist/test/expect-closeto.d.ts +2 -0
  105. package/dist/test/expect-closeto.d.ts.map +1 -0
  106. package/dist/test/test-files.d.ts +2 -0
  107. package/dist/test/test-files.d.ts.map +1 -0
  108. package/dist/test/vcr.d.ts +2 -0
  109. package/dist/test/vcr.d.ts.map +1 -0
  110. package/package.json +9 -7
  111. package/src/HfInference.ts +4 -4
  112. package/src/lib/makeRequestOptions.ts +17 -7
  113. package/src/tasks/custom/request.ts +5 -0
  114. package/src/tasks/custom/streamingRequest.ts +8 -0
  115. package/src/tasks/cv/imageToImage.ts +1 -1
  116. package/src/tasks/cv/zeroShotImageClassification.ts +1 -1
  117. package/src/tasks/index.ts +2 -0
  118. package/src/tasks/multimodal/documentQuestionAnswering.ts +1 -1
  119. package/src/tasks/multimodal/visualQuestionAnswering.ts +1 -1
  120. package/src/tasks/nlp/chatCompletion.ts +32 -0
  121. package/src/tasks/nlp/chatCompletionStream.ts +17 -0
  122. package/src/tasks/nlp/textGeneration.ts +2 -202
  123. package/src/tasks/nlp/textGenerationStream.ts +2 -1
  124. package/src/types.ts +14 -3
  125. package/src/utils/base64FromBytes.ts +11 -0
  126. package/src/utils/{distributive-omit.d.ts → distributive-omit.ts} +0 -2
  127. package/src/utils/isBackend.ts +6 -0
  128. package/src/utils/isFrontend.ts +3 -0
  129. package/dist/index.d.ts +0 -1536
package/dist/index.d.ts DELETED
@@ -1,1536 +0,0 @@
1
-
2
- export interface Options {
3
- /**
4
- * (Default: true) Boolean. If a request 503s and wait_for_model is set to false, the request will be retried with the same parameters but with wait_for_model set to true.
5
- */
6
- retry_on_error?: boolean;
7
- /**
8
- * (Default: true). Boolean. There is a cache layer on Inference API (serverless) to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query.
9
- */
10
- use_cache?: boolean;
11
- /**
12
- * (Default: false). Boolean. Do not load the model if it's not already available.
13
- */
14
- dont_load_model?: boolean;
15
- /**
16
- * (Default: false). Boolean to use GPU instead of CPU for inference (requires Startup plan at least).
17
- */
18
- use_gpu?: boolean;
19
-
20
- /**
21
- * (Default: false) Boolean. If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error as it will limit hanging in your application to known places.
22
- */
23
- wait_for_model?: boolean;
24
- /**
25
- * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
26
- */
27
- fetch?: typeof fetch;
28
- /**
29
- * Abort Controller signal to use for request interruption.
30
- */
31
- signal?: AbortSignal;
32
-
33
- /**
34
- * Credentials to use for the request. If this is a string, it will be passed straight on. If it's a boolean, true will be "include" and false will not send credentials at all (which defaults to "same-origin" inside browsers).
35
- */
36
- includeCredentials?: string | boolean;
37
- }
38
-
39
- export type InferenceTask =
40
- | "audio-classification"
41
- | "audio-to-audio"
42
- | "automatic-speech-recognition"
43
- | "depth-estimation"
44
- | "document-question-answering"
45
- | "feature-extraction"
46
- | "fill-mask"
47
- | "graph-ml"
48
- | "image-classification"
49
- | "image-feature-extraction"
50
- | "image-segmentation"
51
- | "image-text-to-text"
52
- | "image-to-3d"
53
- | "image-to-image"
54
- | "image-to-text"
55
- | "image-to-video"
56
- | "mask-generation"
57
- | "multiple-choice"
58
- | "object-detection"
59
- | "question-answering"
60
- | "reinforcement-learning"
61
- | "robotics"
62
- | "sentence-similarity"
63
- | "summarization"
64
- | "table-question-answering"
65
- | "table-to-text"
66
- | "tabular-classification"
67
- | "tabular-regression"
68
- | "tabular-to-text"
69
- | "text-classification"
70
- | "text-generation"
71
- | "text-retrieval"
72
- | "text-to-3d"
73
- | "text-to-audio"
74
- | "text-to-image"
75
- | "text-to-speech"
76
- | "text-to-video"
77
- | "text2text-generation"
78
- | "time-series-forecasting"
79
- | "token-classification"
80
- | "translation"
81
- | "unconditional-image-generation"
82
- | "video-classification"
83
- | "visual-question-answering"
84
- | "voice-activity-detection"
85
- | "zero-shot-classification"
86
- | "zero-shot-image-classification"
87
- | "zero-shot-object-detection";
88
-
89
- export interface BaseArgs {
90
- /**
91
- * The access token to use. Without it, you'll get rate-limited quickly.
92
- *
93
- * Can be created for free in hf.co/settings/token
94
- */
95
- accessToken?: string;
96
- /**
97
- * The model to use. Can be a full URL for a dedicated inference endpoint.
98
- *
99
- * If not specified, will call huggingface.co/api/tasks to get the default model for the task.
100
- */
101
- model?: string;
102
- }
103
-
104
- export type RequestArgs = BaseArgs &
105
- ({ data: Blob | ArrayBuffer } | { inputs: unknown }) & {
106
- parameters?: Record<string, unknown>;
107
- accessToken?: string;
108
- };
109
-
110
- export class InferenceOutputError extends TypeError {}
111
- export type AudioClassificationArgs = BaseArgs & {
112
- /**
113
- * Binary audio data
114
- */
115
- data: Blob | ArrayBuffer;
116
- };
117
- export type AudioClassificationReturn = AudioClassificationOutputValue[];
118
- export interface AudioClassificationOutputValue {
119
- /**
120
- * The label for the class (model specific)
121
- */
122
- label: string;
123
-
124
- /**
125
- * A float that represents how likely it is that the audio file belongs to this class.
126
- */
127
- score: number;
128
- }
129
- /**
130
- * This task reads some audio input and outputs the likelihood of classes.
131
- * Recommended model: superb/hubert-large-superb-er
132
- */
133
- export function audioClassification(
134
- args: AudioClassificationArgs,
135
- options?: Options
136
- ): Promise<AudioClassificationReturn>;
137
- export type AudioToAudioArgs = BaseArgs & {
138
- /**
139
- * Binary audio data
140
- */
141
- data: Blob | ArrayBuffer;
142
- };
143
- export type AudioToAudioReturn = AudioToAudioOutputValue[];
144
- export interface AudioToAudioOutputValue {
145
- /**
146
- * The label for the audio output (model specific)
147
- */
148
- label: string;
149
-
150
- /**
151
- * Base64 encoded audio output.
152
- */
153
- blob: string;
154
-
155
- /**
156
- * Content-type for blob, e.g. audio/flac
157
- */
158
- "content-type": string;
159
- }
160
- /**
161
- * This task reads some audio input and outputs one or multiple audio files.
162
- * Example model: speechbrain/sepformer-wham does audio source separation.
163
- */
164
- export function audioToAudio(args: AudioToAudioArgs, options?: Options): Promise<AudioToAudioReturn>;
165
- export type AutomaticSpeechRecognitionArgs = BaseArgs & {
166
- /**
167
- * Binary audio data
168
- */
169
- data: Blob | ArrayBuffer;
170
- };
171
- export interface AutomaticSpeechRecognitionOutput {
172
- /**
173
- * The text that was recognized from the audio
174
- */
175
- text: string;
176
- }
177
- /**
178
- * This task reads some audio input and outputs the said words within the audio files.
179
- * Recommended model (english language): facebook/wav2vec2-large-960h-lv60-self
180
- */
181
- export function automaticSpeechRecognition(
182
- args: AutomaticSpeechRecognitionArgs,
183
- options?: Options
184
- ): Promise<AutomaticSpeechRecognitionOutput>;
185
- export type TextToSpeechArgs = BaseArgs & {
186
- /**
187
- * The text to generate an audio from
188
- */
189
- inputs: string;
190
- };
191
- export type TextToSpeechOutput = Blob;
192
- /**
193
- * This task synthesize an audio of a voice pronouncing a given text.
194
- * Recommended model: espnet/kan-bayashi_ljspeech_vits
195
- */
196
- export function textToSpeech(args: TextToSpeechArgs, options?: Options): Promise<TextToSpeechOutput>;
197
- /**
198
- * Primitive to make custom calls to Inference Endpoints
199
- */
200
- export function request<T>(
201
- args: RequestArgs,
202
- options?: Options & {
203
- /** When a model can be used for multiple tasks, and we want to run a non-default task */
204
- task?: string | InferenceTask;
205
- /** To load default model if needed */
206
- taskHint?: InferenceTask;
207
- }
208
- ): Promise<T>;
209
- /**
210
- * Primitive to make custom inference calls that expect server-sent events, and returns the response through a generator
211
- */
212
- export function streamingRequest<T>(
213
- args: RequestArgs,
214
- options?: Options & {
215
- /** When a model can be used for multiple tasks, and we want to run a non-default task */
216
- task?: string | InferenceTask;
217
- /** To load default model if needed */
218
- taskHint?: InferenceTask;
219
- }
220
- ): AsyncGenerator<T>;
221
- export type ImageClassificationArgs = BaseArgs & {
222
- /**
223
- * Binary image data
224
- */
225
- data: Blob | ArrayBuffer;
226
- };
227
- export type ImageClassificationOutput = ImageClassificationOutputValue[];
228
- export interface ImageClassificationOutputValue {
229
- /**
230
- * The label for the class (model specific)
231
- */
232
- label: string;
233
- /**
234
- * A float that represents how likely it is that the image file belongs to this class.
235
- */
236
- score: number;
237
- }
238
- /**
239
- * This task reads some image input and outputs the likelihood of classes.
240
- * Recommended model: google/vit-base-patch16-224
241
- */
242
- export function imageClassification(
243
- args: ImageClassificationArgs,
244
- options?: Options
245
- ): Promise<ImageClassificationOutput>;
246
- export type ImageSegmentationArgs = BaseArgs & {
247
- /**
248
- * Binary image data
249
- */
250
- data: Blob | ArrayBuffer;
251
- };
252
- export type ImageSegmentationOutput = ImageSegmentationOutputValue[];
253
- export interface ImageSegmentationOutputValue {
254
- /**
255
- * The label for the class (model specific) of a segment.
256
- */
257
- label: string;
258
- /**
259
- * A str (base64 str of a single channel black-and-white img) representing the mask of a segment.
260
- */
261
- mask: string;
262
- /**
263
- * A float that represents how likely it is that the detected object belongs to the given class.
264
- */
265
- score: number;
266
- }
267
- /**
268
- * This task reads some image input and outputs the likelihood of classes & bounding boxes of detected objects.
269
- * Recommended model: facebook/detr-resnet-50-panoptic
270
- */
271
- export function imageSegmentation(
272
- args: ImageSegmentationArgs,
273
- options?: Options
274
- ): Promise<ImageSegmentationOutput>;
275
- export type ImageToImageArgs = BaseArgs & {
276
- /**
277
- * The initial image condition
278
- *
279
- **/
280
- inputs: Blob | ArrayBuffer;
281
-
282
- parameters?: {
283
- /**
284
- * The text prompt to guide the image generation.
285
- */
286
- prompt?: string;
287
- /**
288
- * strengh param only works for SD img2img and alt diffusion img2img models
289
- * Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
290
- * will be used as a starting point, adding more noise to it the larger the `strength`. The number of
291
- * denoising steps depends on the amount of noise initially added. When `strength` is 1, added noise will
292
- * be maximum and the denoising process will run for the full number of iterations specified in
293
- * `num_inference_steps`. A value of 1, therefore, essentially ignores `image`.
294
- **/
295
- strength?: number;
296
- /**
297
- * An optional negative prompt for the image generation
298
- */
299
- negative_prompt?: string;
300
- /**
301
- * The height in pixels of the generated image
302
- */
303
- height?: number;
304
- /**
305
- * The width in pixels of the generated image
306
- */
307
- width?: number;
308
- /**
309
- * The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
310
- */
311
- num_inference_steps?: number;
312
- /**
313
- * Guidance scale: Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
314
- */
315
- guidance_scale?: number;
316
- /**
317
- * guess_mode only works for ControlNet models, defaults to False In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
318
- * you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
319
- */
320
- guess_mode?: boolean;
321
- };
322
- };
323
- export type ImageToImageOutput = Blob;
324
- /**
325
- * This task reads some text input and outputs an image.
326
- * Recommended model: lllyasviel/sd-controlnet-depth
327
- */
328
- export function imageToImage(args: ImageToImageArgs, options?: Options): Promise<ImageToImageOutput>;
329
- export type ImageToTextArgs = BaseArgs & {
330
- /**
331
- * Binary image data
332
- */
333
- data: Blob | ArrayBuffer;
334
- };
335
- export interface ImageToTextOutput {
336
- /**
337
- * The generated caption
338
- */
339
- generated_text: string;
340
- }
341
- /**
342
- * This task reads some image input and outputs the text caption.
343
- */
344
- export function imageToText(args: ImageToTextArgs, options?: Options): Promise<ImageToTextOutput>;
345
- export type ObjectDetectionArgs = BaseArgs & {
346
- /**
347
- * Binary image data
348
- */
349
- data: Blob | ArrayBuffer;
350
- };
351
- export type ObjectDetectionOutput = ObjectDetectionOutputValue[];
352
- export interface ObjectDetectionOutputValue {
353
- /**
354
- * A dict (with keys [xmin,ymin,xmax,ymax]) representing the bounding box of a detected object.
355
- */
356
- box: {
357
- xmax: number;
358
- xmin: number;
359
- ymax: number;
360
- ymin: number;
361
- };
362
- /**
363
- * The label for the class (model specific) of a detected object.
364
- */
365
- label: string;
366
-
367
- /**
368
- * A float that represents how likely it is that the detected object belongs to the given class.
369
- */
370
- score: number;
371
- }
372
- /**
373
- * This task reads some image input and outputs the likelihood of classes & bounding boxes of detected objects.
374
- * Recommended model: facebook/detr-resnet-50
375
- */
376
- export function objectDetection(args: ObjectDetectionArgs, options?: Options): Promise<ObjectDetectionOutput>;
377
- export type TextToImageArgs = BaseArgs & {
378
- /**
379
- * The text to generate an image from
380
- */
381
- inputs: string;
382
-
383
- parameters?: {
384
- /**
385
- * An optional negative prompt for the image generation
386
- */
387
- negative_prompt?: string;
388
- /**
389
- * The height in pixels of the generated image
390
- */
391
- height?: number;
392
- /**
393
- * The width in pixels of the generated image
394
- */
395
- width?: number;
396
- /**
397
- * The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
398
- */
399
- num_inference_steps?: number;
400
- /**
401
- * Guidance scale: Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
402
- */
403
- guidance_scale?: number;
404
- };
405
- };
406
- export type TextToImageOutput = Blob;
407
- /**
408
- * This task reads some text input and outputs an image.
409
- * Recommended model: stabilityai/stable-diffusion-2
410
- */
411
- export function textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageOutput>;
412
- export type ZeroShotImageClassificationArgs = BaseArgs & {
413
- inputs: {
414
- /**
415
- * Binary image data
416
- */
417
- image: Blob | ArrayBuffer;
418
- };
419
- parameters: {
420
- /**
421
- * A list of strings that are potential classes for inputs. (max 10)
422
- */
423
- candidate_labels: string[];
424
- };
425
- };
426
- export type ZeroShotImageClassificationOutput = ZeroShotImageClassificationOutputValue[];
427
- export interface ZeroShotImageClassificationOutputValue {
428
- label: string;
429
- score: number;
430
- }
431
- /**
432
- * Classify an image to specified classes.
433
- * Recommended model: openai/clip-vit-large-patch14-336
434
- */
435
- export function zeroShotImageClassification(
436
- args: ZeroShotImageClassificationArgs,
437
- options?: Options
438
- ): Promise<ZeroShotImageClassificationOutput>;
439
- export type DocumentQuestionAnsweringArgs = BaseArgs & {
440
- inputs: {
441
- /**
442
- * Raw image
443
- *
444
- * You can use native `File` in browsers, or `new Blob([buffer])` in node, or for a base64 image `new Blob([btoa(base64String)])`, or even `await (await fetch('...)).blob()`
445
- **/
446
- image: Blob | ArrayBuffer;
447
- question: string;
448
- };
449
- };
450
- export interface DocumentQuestionAnsweringOutput {
451
- /**
452
- * A string that’s the answer within the document.
453
- */
454
- answer: string;
455
- /**
456
- * ?
457
- */
458
- end?: number;
459
- /**
460
- * A float that represents how likely that the answer is correct
461
- */
462
- score?: number;
463
- /**
464
- * ?
465
- */
466
- start?: number;
467
- }
468
- /**
469
- * Answers a question on a document image. Recommended model: impira/layoutlm-document-qa.
470
- */
471
- export function documentQuestionAnswering(
472
- args: DocumentQuestionAnsweringArgs,
473
- options?: Options
474
- ): Promise<DocumentQuestionAnsweringOutput>;
475
- export type VisualQuestionAnsweringArgs = BaseArgs & {
476
- inputs: {
477
- /**
478
- * Raw image
479
- *
480
- * You can use native `File` in browsers, or `new Blob([buffer])` in node, or for a base64 image `new Blob([btoa(base64String)])`, or even `await (await fetch('...)).blob()`
481
- **/
482
- image: Blob | ArrayBuffer;
483
- question: string;
484
- };
485
- };
486
- export interface VisualQuestionAnsweringOutput {
487
- /**
488
- * A string that’s the answer to a visual question.
489
- */
490
- answer: string;
491
- /**
492
- * Answer correctness score.
493
- */
494
- score: number;
495
- }
496
- /**
497
- * Answers a question on an image. Recommended model: dandelin/vilt-b32-finetuned-vqa.
498
- */
499
- export function visualQuestionAnswering(
500
- args: VisualQuestionAnsweringArgs,
501
- options?: Options
502
- ): Promise<VisualQuestionAnsweringOutput>;
503
- export type FeatureExtractionArgs = BaseArgs & {
504
- /**
505
- * The inputs is a string or a list of strings to get the features from.
506
- *
507
- * inputs: "That is a happy person",
508
- *
509
- */
510
- inputs: string | string[];
511
- };
512
- /**
513
- * Returned values are a multidimensional array of floats (dimension depending on if you sent a string or a list of string, and if the automatic reduction, usually mean_pooling for instance was applied for you or not. This should be explained on the model's README).
514
- */
515
- export type FeatureExtractionOutput = (number | number[] | number[][])[];
516
- /**
517
- * This task reads some text and outputs raw float values, that are usually consumed as part of a semantic database/semantic search.
518
- */
519
- export function featureExtraction(
520
- args: FeatureExtractionArgs,
521
- options?: Options
522
- ): Promise<FeatureExtractionOutput>;
523
- export type FillMaskArgs = BaseArgs & {
524
- inputs: string;
525
- };
526
- export type FillMaskOutput = {
527
- /**
528
- * The probability for this token.
529
- */
530
- score: number;
531
- /**
532
- * The actual sequence of tokens that ran against the model (may contain special tokens)
533
- */
534
- sequence: string;
535
- /**
536
- * The id of the token
537
- */
538
- token: number;
539
- /**
540
- * The string representation of the token
541
- */
542
- token_str: string;
543
- }[];
544
- /**
545
- * Tries to fill in a hole with a missing word (token to be precise). That’s the base task for BERT models.
546
- */
547
- export function fillMask(args: FillMaskArgs, options?: Options): Promise<FillMaskOutput>;
548
- export type QuestionAnsweringArgs = BaseArgs & {
549
- inputs: {
550
- context: string;
551
- question: string;
552
- };
553
- };
554
- export interface QuestionAnsweringOutput {
555
- /**
556
- * A string that’s the answer within the text.
557
- */
558
- answer: string;
559
- /**
560
- * The index (string wise) of the stop of the answer within context.
561
- */
562
- end: number;
563
- /**
564
- * A float that represents how likely that the answer is correct
565
- */
566
- score: number;
567
- /**
568
- * The index (string wise) of the start of the answer within context.
569
- */
570
- start: number;
571
- }
572
- /**
573
- * Want to have a nice know-it-all bot that can answer any question?. Recommended model: deepset/roberta-base-squad2
574
- */
575
- export function questionAnswering(
576
- args: QuestionAnsweringArgs,
577
- options?: Options
578
- ): Promise<QuestionAnsweringOutput>;
579
- export type SentenceSimilarityArgs = BaseArgs & {
580
- /**
581
- * The inputs vary based on the model.
582
- *
583
- * For example when using sentence-transformers/paraphrase-xlm-r-multilingual-v1 the inputs will have a `source_sentence` string and
584
- * a `sentences` array of strings
585
- */
586
- inputs: Record<string, unknown> | Record<string, unknown>[];
587
- };
588
- /**
589
- * Returned values are a list of floats
590
- */
591
- export type SentenceSimilarityOutput = number[];
592
- /**
593
- * Calculate the semantic similarity between one text and a list of other sentences by comparing their embeddings.
594
- */
595
- export function sentenceSimilarity(
596
- args: SentenceSimilarityArgs,
597
- options?: Options
598
- ): Promise<SentenceSimilarityOutput>;
599
- export type SummarizationArgs = BaseArgs & {
600
- /**
601
- * A string to be summarized
602
- */
603
- inputs: string;
604
- parameters?: {
605
- /**
606
- * (Default: None). Integer to define the maximum length in tokens of the output summary.
607
- */
608
- max_length?: number;
609
- /**
610
- * (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit.
611
- */
612
- max_time?: number;
613
- /**
614
- * (Default: None). Integer to define the minimum length in tokens of the output summary.
615
- */
616
- min_length?: number;
617
- /**
618
- * (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
619
- */
620
- repetition_penalty?: number;
621
- /**
622
- * (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
623
- */
624
- temperature?: number;
625
- /**
626
- * (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
627
- */
628
- top_k?: number;
629
- /**
630
- * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
631
- */
632
- top_p?: number;
633
- };
634
- };
635
- export interface SummarizationOutput {
636
- /**
637
- * The string after translation
638
- */
639
- summary_text: string;
640
- }
641
- /**
642
- * This task is well known to summarize longer text into shorter text. Be careful, some models have a maximum length of input. That means that the summary cannot handle full books for instance. Be careful when choosing your model.
643
- */
644
- export function summarization(args: SummarizationArgs, options?: Options): Promise<SummarizationOutput>;
645
- export type TableQuestionAnsweringArgs = BaseArgs & {
646
- inputs: {
647
- /**
648
- * The query in plain text that you want to ask the table
649
- */
650
- query: string;
651
- /**
652
- * A table of data represented as a dict of list where entries are headers and the lists are all the values, all lists must have the same size.
653
- */
654
- table: Record<string, string[]>;
655
- };
656
- };
657
- export interface TableQuestionAnsweringOutput {
658
- /**
659
- * The aggregator used to get the answer
660
- */
661
- aggregator: string;
662
- /**
663
- * The plaintext answer
664
- */
665
- answer: string;
666
- /**
667
- * A list of coordinates of the cells contents
668
- */
669
- cells: string[];
670
- /**
671
- * a list of coordinates of the cells referenced in the answer
672
- */
673
- coordinates: number[][];
674
- }
675
- /**
676
- * Don’t know SQL? Don’t want to dive into a large spreadsheet? Ask questions in plain english! Recommended model: google/tapas-base-finetuned-wtq.
677
- */
678
- export function tableQuestionAnswering(
679
- args: TableQuestionAnsweringArgs,
680
- options?: Options
681
- ): Promise<TableQuestionAnsweringOutput>;
682
- export type TextClassificationArgs = BaseArgs & {
683
- /**
684
- * A string to be classified
685
- */
686
- inputs: string;
687
- };
688
- export type TextClassificationOutput = {
689
- /**
690
- * The label for the class (model specific)
691
- */
692
- label: string;
693
- /**
694
- * A floats that represents how likely is that the text belongs to this class.
695
- */
696
- score: number;
697
- }[];
698
- /**
699
- * Usually used for sentiment-analysis this will output the likelihood of classes of an input. Recommended model: distilbert-base-uncased-finetuned-sst-2-english
700
- */
701
- export function textClassification(
702
- args: TextClassificationArgs,
703
- options?: Options
704
- ): Promise<TextClassificationOutput>;
705
- /**
706
- * The reason why the generation was stopped.
707
- *
708
- * length: The generated sequence reached the maximum allowed length
709
- *
710
- * eos_token: The model generated an end-of-sentence (EOS) token
711
- *
712
- * stop_sequence: One of the sequence in stop_sequences was generated
713
- */
714
- export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
715
- /**
716
- * Inputs for Text Generation inference
717
- */
718
- export interface TextGenerationInput {
719
- /**
720
- * The text to initialize generation with
721
- */
722
- inputs: string;
723
- /**
724
- * Additional inference parameters
725
- */
726
- parameters?: TextGenerationParameters;
727
- /**
728
- * Whether to stream output tokens
729
- */
730
- stream?: boolean;
731
- [property: string]: unknown;
732
- }
733
- /**
734
- * Additional inference parameters
735
- *
736
- * Additional inference parameters for Text Generation
737
- */
738
- export interface TextGenerationParameters {
739
- /**
740
- * The number of sampling queries to run. Only the best one (in terms of total logprob) will
741
- * be returned.
742
- */
743
- best_of?: number;
744
- /**
745
- * Whether or not to output decoder input details
746
- */
747
- decoder_input_details?: boolean;
748
- /**
749
- * Whether or not to output details
750
- */
751
- details?: boolean;
752
- /**
753
- * Whether to use logits sampling instead of greedy decoding when generating new tokens.
754
- */
755
- do_sample?: boolean;
756
- /**
757
- * The maximum number of tokens to generate.
758
- */
759
- max_new_tokens?: number;
760
- /**
761
- * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
762
- * paper](https://hf.co/papers/1909.05858) for more details.
763
- */
764
- repetition_penalty?: number;
765
- /**
766
- * Whether to prepend the prompt to the generated text.
767
- */
768
- return_full_text?: boolean;
769
- /**
770
- * The random sampling seed.
771
- */
772
- seed?: number;
773
- /**
774
- * Stop generating tokens if a member of `stop_sequences` is generated.
775
- */
776
- stop_sequences?: string[];
777
- /**
778
- * The value used to modulate the logits distribution.
779
- */
780
- temperature?: number;
781
- /**
782
- * The number of highest probability vocabulary tokens to keep for top-k-filtering.
783
- */
784
- top_k?: number;
785
- /**
786
- * If set to < 1, only the smallest set of most probable tokens with probabilities that add
787
- * up to `top_p` or higher are kept for generation.
788
- */
789
- top_p?: number;
790
- /**
791
- * Truncate input tokens to the given size.
792
- */
793
- truncate?: number;
794
- /**
795
- * Typical Decoding mass. See [Typical Decoding for Natural Language
796
- * Generation](https://hf.co/papers/2202.00666) for more information
797
- */
798
- typical_p?: number;
799
- /**
800
- * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
801
- */
802
- watermark?: boolean;
803
- [property: string]: unknown;
804
- }
805
- /**
806
- * Outputs for Text Generation inference
807
- */
808
- export interface TextGenerationOutput {
809
- /**
810
- * When enabled, details about the generation
811
- */
812
- details?: TextGenerationOutputDetails;
813
- /**
814
- * The generated text
815
- */
816
- generated_text: string;
817
- [property: string]: unknown;
818
- }
819
- /**
820
- * When enabled, details about the generation
821
- */
822
- export interface TextGenerationOutputDetails {
823
- /**
824
- * Details about additional sequences when best_of is provided
825
- */
826
- best_of_sequences?: TextGenerationOutputSequenceDetails[];
827
- /**
828
- * The reason why the generation was stopped.
829
- */
830
- finish_reason: TextGenerationFinishReason;
831
- /**
832
- * The number of generated tokens
833
- */
834
- generated_tokens: number;
835
- prefill: TextGenerationPrefillToken[];
836
- /**
837
- * The random seed used for generation
838
- */
839
- seed?: number;
840
- /**
841
- * The generated tokens and associated details
842
- */
843
- tokens: TextGenerationOutputToken[];
844
- /**
845
- * Most likely tokens
846
- */
847
- top_tokens?: Array<TextGenerationOutputToken[]>;
848
- [property: string]: unknown;
849
- }
850
- export interface TextGenerationOutputSequenceDetails {
851
- finish_reason: TextGenerationFinishReason;
852
- /**
853
- * The generated text
854
- */
855
- generated_text: string;
856
- /**
857
- * The number of generated tokens
858
- */
859
- generated_tokens: number;
860
- prefill: TextGenerationPrefillToken[];
861
- /**
862
- * The random seed used for generation
863
- */
864
- seed?: number;
865
- /**
866
- * The generated tokens and associated details
867
- */
868
- tokens: TextGenerationOutputToken[];
869
- /**
870
- * Most likely tokens
871
- */
872
- top_tokens?: Array<TextGenerationOutputToken[]>;
873
- [property: string]: unknown;
874
- }
875
- export interface TextGenerationPrefillToken {
876
- id: number;
877
- logprob: number;
878
- /**
879
- * The text associated with that token
880
- */
881
- text: string;
882
- [property: string]: unknown;
883
- }
884
- /**
885
- * Generated token.
886
- */
887
- export interface TextGenerationOutputToken {
888
- id: number;
889
- logprob?: number;
890
- /**
891
- * Whether or not that token is a special one
892
- */
893
- special: boolean;
894
- /**
895
- * The text associated with that token
896
- */
897
- text: string;
898
- [property: string]: unknown;
899
- }
900
- /**
901
- * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
902
- */
903
- export function textGeneration(
904
- args: BaseArgs & TextGenerationInput,
905
- options?: Options
906
- ): Promise<TextGenerationOutput>;
907
- export type TextGenerationStreamFinishReason =
908
- /** number of generated tokens == `max_new_tokens` */
909
- | "length"
910
- /** the model generated its end of sequence token */
911
- | "eos_token"
912
- /** the model generated a text included in `stop_sequences` */
913
- | "stop_sequence";
914
- export interface TextGenerationStreamToken {
915
- /** Token ID from the model tokenizer */
916
- id: number;
917
- /** Token text */
918
- text: string;
919
- /** Logprob */
920
- logprob: number;
921
- /**
922
- * Is the token a special token
923
- * Can be used to ignore tokens when concatenating
924
- */
925
- special: boolean;
926
- }
927
- export interface TextGenerationStreamPrefillToken {
928
- /** Token ID from the model tokenizer */
929
- id: number;
930
- /** Token text */
931
- text: string;
932
- /**
933
- * Logprob
934
- * Optional since the logprob of the first token cannot be computed
935
- */
936
- logprob?: number;
937
- }
938
- export interface TextGenerationStreamBestOfSequence {
939
- /** Generated text */
940
- generated_text: string;
941
- /** Generation finish reason */
942
- finish_reason: TextGenerationStreamFinishReason;
943
- /** Number of generated tokens */
944
- generated_tokens: number;
945
- /** Sampling seed if sampling was activated */
946
- seed?: number;
947
- /** Prompt tokens */
948
- prefill: TextGenerationStreamPrefillToken[];
949
- /** Generated tokens */
950
- tokens: TextGenerationStreamToken[];
951
- }
952
- export interface TextGenerationStreamDetails {
953
- /** Generation finish reason */
954
- finish_reason: TextGenerationStreamFinishReason;
955
- /** Number of generated tokens */
956
- generated_tokens: number;
957
- /** Sampling seed if sampling was activated */
958
- seed?: number;
959
- /** Prompt tokens */
960
- prefill: TextGenerationStreamPrefillToken[];
961
- /** */
962
- tokens: TextGenerationStreamToken[];
963
- /** Additional sequences when using the `best_of` parameter */
964
- best_of_sequences?: TextGenerationStreamBestOfSequence[];
965
- }
966
- export interface TextGenerationStreamOutput {
967
- /** Generated token, one at a time */
968
- token: TextGenerationStreamToken;
969
- /**
970
- * Complete generated text
971
- * Only available when the generation is finished
972
- */
973
- generated_text: string | null;
974
- /**
975
- * Generation details
976
- * Only available when the generation is finished
977
- */
978
- details: TextGenerationStreamDetails | null;
979
- }
980
- /**
981
- * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
982
- */
983
- export function textGenerationStream(
984
- args: BaseArgs & TextGenerationInput,
985
- options?: Options
986
- ): AsyncGenerator<TextGenerationStreamOutput>;
987
- export type TokenClassificationArgs = BaseArgs & {
988
- /**
989
- * A string to be classified
990
- */
991
- inputs: string;
992
- parameters?: {
993
- /**
994
- * (Default: simple). There are several aggregation strategies:
995
- *
996
- * none: Every token gets classified without further aggregation.
997
- *
998
- * simple: Entities are grouped according to the default schema (B-, I- tags get merged when the tag is similar).
999
- *
1000
- * first: Same as the simple strategy except words cannot end up with different tags. Words will use the tag of the first token when there is ambiguity.
1001
- *
1002
- * average: Same as the simple strategy except words cannot end up with different tags. Scores are averaged across tokens and then the maximum label is applied.
1003
- *
1004
- * max: Same as the simple strategy except words cannot end up with different tags. Word entity will be the token with the maximum score.
1005
- */
1006
- aggregation_strategy?: "none" | "simple" | "first" | "average" | "max";
1007
- };
1008
- };
1009
- export type TokenClassificationOutput = TokenClassificationOutputValue[];
1010
- export interface TokenClassificationOutputValue {
1011
- /**
1012
- * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
1013
- */
1014
- end: number;
1015
- /**
1016
- * The type for the entity being recognized (model specific).
1017
- */
1018
- entity_group: string;
1019
- /**
1020
- * How likely the entity was recognized.
1021
- */
1022
- score: number;
1023
- /**
1024
- * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
1025
- */
1026
- start: number;
1027
- /**
1028
- * The string that was captured
1029
- */
1030
- word: string;
1031
- }
1032
- /**
1033
- * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
1034
- */
1035
- export function tokenClassification(
1036
- args: TokenClassificationArgs,
1037
- options?: Options
1038
- ): Promise<TokenClassificationOutput>;
1039
- export type TranslationArgs = BaseArgs & {
1040
- /**
1041
- * A string to be translated
1042
- */
1043
- inputs: string | string[];
1044
- };
1045
- export type TranslationOutput = TranslationOutputValue | TranslationOutputValue[];
1046
- export interface TranslationOutputValue {
1047
- /**
1048
- * The string after translation
1049
- */
1050
- translation_text: string;
1051
- }
1052
- /**
1053
- * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
1054
- */
1055
- export function translation(args: TranslationArgs, options?: Options): Promise<TranslationOutput>;
1056
- export type ZeroShotClassificationArgs = BaseArgs & {
1057
- /**
1058
- * a string or list of strings
1059
- */
1060
- inputs: string | string[];
1061
- parameters: {
1062
- /**
1063
- * a list of strings that are potential classes for inputs. (max 10 candidate_labels, for more, simply run multiple requests, results are going to be misleading if using too many candidate_labels anyway. If you want to keep the exact same, you can simply run multi_label=True and do the scaling on your end.
1064
- */
1065
- candidate_labels: string[];
1066
- /**
1067
- * (Default: false) Boolean that is set to True if classes can overlap
1068
- */
1069
- multi_label?: boolean;
1070
- };
1071
- };
1072
- export type ZeroShotClassificationOutput = ZeroShotClassificationOutputValue[];
1073
- export interface ZeroShotClassificationOutputValue {
1074
- labels: string[];
1075
- scores: number[];
1076
- sequence: string;
1077
- }
1078
- /**
1079
- * This task is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. Recommended model: facebook/bart-large-mnli.
1080
- */
1081
- export function zeroShotClassification(
1082
- args: ZeroShotClassificationArgs,
1083
- options?: Options
1084
- ): Promise<ZeroShotClassificationOutput>;
1085
- export type TabularClassificationArgs = BaseArgs & {
1086
- inputs: {
1087
- /**
1088
- * A table of data represented as a dict of list where entries are headers and the lists are all the values, all lists must have the same size.
1089
- */
1090
- data: Record<string, string[]>;
1091
- };
1092
- };
1093
- /**
1094
- * A list of predicted labels for each row
1095
- */
1096
- export type TabularClassificationOutput = number[];
1097
- /**
1098
- * Predicts target label for a given set of features in tabular form.
1099
- * Typically, you will want to train a classification model on your training data and use it with your new data of the same format.
1100
- * Example model: vvmnnnkv/wine-quality
1101
- */
1102
- export function tabularClassification(
1103
- args: TabularClassificationArgs,
1104
- options?: Options
1105
- ): Promise<TabularClassificationOutput>;
1106
- export type TabularRegressionArgs = BaseArgs & {
1107
- inputs: {
1108
- /**
1109
- * A table of data represented as a dict of list where entries are headers and the lists are all the values, all lists must have the same size.
1110
- */
1111
- data: Record<string, string[]>;
1112
- };
1113
- };
1114
- /**
1115
- * a list of predicted values for each row
1116
- */
1117
- export type TabularRegressionOutput = number[];
1118
- /**
1119
- * Predicts target value for a given set of features in tabular form.
1120
- * Typically, you will want to train a regression model on your training data and use it with your new data of the same format.
1121
- * Example model: scikit-learn/Fish-Weight
1122
- */
1123
- export function tabularRegression(
1124
- args: TabularRegressionArgs,
1125
- options?: Options
1126
- ): Promise<TabularRegressionOutput>;
1127
- export class HfInference {
1128
- constructor(accessToken?: string, defaultOptions?: Options);
1129
- /**
1130
- * Returns copy of HfInference tied to a specified endpoint.
1131
- */
1132
- endpoint(endpointUrl: string): HfInferenceEndpoint;
1133
- /**
1134
- * This task reads some audio input and outputs the likelihood of classes.
1135
- * Recommended model: superb/hubert-large-superb-er
1136
- */
1137
- audioClassification(
1138
- args: Omit<AudioClassificationArgs, 'accessToken'>,
1139
- options?: Options
1140
- ): Promise<AudioClassificationReturn>;
1141
- /**
1142
- * This task reads some audio input and outputs one or multiple audio files.
1143
- * Example model: speechbrain/sepformer-wham does audio source separation.
1144
- */
1145
- audioToAudio(args: Omit<AudioToAudioArgs, 'accessToken'>, options?: Options): Promise<AudioToAudioReturn>;
1146
- /**
1147
- * This task reads some audio input and outputs the said words within the audio files.
1148
- * Recommended model (english language): facebook/wav2vec2-large-960h-lv60-self
1149
- */
1150
- automaticSpeechRecognition(
1151
- args: Omit<AutomaticSpeechRecognitionArgs, 'accessToken'>,
1152
- options?: Options
1153
- ): Promise<AutomaticSpeechRecognitionOutput>;
1154
- /**
1155
- * This task synthesize an audio of a voice pronouncing a given text.
1156
- * Recommended model: espnet/kan-bayashi_ljspeech_vits
1157
- */
1158
- textToSpeech(args: Omit<TextToSpeechArgs, 'accessToken'>, options?: Options): Promise<TextToSpeechOutput>;
1159
- /**
1160
- * Primitive to make custom calls to Inference Endpoints
1161
- */
1162
- request<T>(
1163
- args: Omit<RequestArgs, 'accessToken'>,
1164
- options?: Options & {
1165
- /** When a model can be used for multiple tasks, and we want to run a non-default task */
1166
- task?: string | InferenceTask;
1167
- /** To load default model if needed */
1168
- taskHint?: InferenceTask;
1169
- }
1170
- ): Promise<T>;
1171
- /**
1172
- * Primitive to make custom inference calls that expect server-sent events, and returns the response through a generator
1173
- */
1174
- streamingRequest<T>(
1175
- args: Omit<RequestArgs, 'accessToken'>,
1176
- options?: Options & {
1177
- /** When a model can be used for multiple tasks, and we want to run a non-default task */
1178
- task?: string | InferenceTask;
1179
- /** To load default model if needed */
1180
- taskHint?: InferenceTask;
1181
- }
1182
- ): AsyncGenerator<T>;
1183
- /**
1184
- * This task reads some image input and outputs the likelihood of classes.
1185
- * Recommended model: google/vit-base-patch16-224
1186
- */
1187
- imageClassification(
1188
- args: Omit<ImageClassificationArgs, 'accessToken'>,
1189
- options?: Options
1190
- ): Promise<ImageClassificationOutput>;
1191
- /**
1192
- * This task reads some image input and outputs the likelihood of classes & bounding boxes of detected objects.
1193
- * Recommended model: facebook/detr-resnet-50-panoptic
1194
- */
1195
- imageSegmentation(
1196
- args: Omit<ImageSegmentationArgs, 'accessToken'>,
1197
- options?: Options
1198
- ): Promise<ImageSegmentationOutput>;
1199
- /**
1200
- * This task reads some text input and outputs an image.
1201
- * Recommended model: lllyasviel/sd-controlnet-depth
1202
- */
1203
- imageToImage(args: Omit<ImageToImageArgs, 'accessToken'>, options?: Options): Promise<ImageToImageOutput>;
1204
- /**
1205
- * This task reads some image input and outputs the text caption.
1206
- */
1207
- imageToText(args: Omit<ImageToTextArgs, 'accessToken'>, options?: Options): Promise<ImageToTextOutput>;
1208
- /**
1209
- * This task reads some image input and outputs the likelihood of classes & bounding boxes of detected objects.
1210
- * Recommended model: facebook/detr-resnet-50
1211
- */
1212
- objectDetection(args: Omit<ObjectDetectionArgs, 'accessToken'>, options?: Options): Promise<ObjectDetectionOutput>;
1213
- /**
1214
- * This task reads some text input and outputs an image.
1215
- * Recommended model: stabilityai/stable-diffusion-2
1216
- */
1217
- textToImage(args: Omit<TextToImageArgs, 'accessToken'>, options?: Options): Promise<TextToImageOutput>;
1218
- /**
1219
- * Classify an image to specified classes.
1220
- * Recommended model: openai/clip-vit-large-patch14-336
1221
- */
1222
- zeroShotImageClassification(
1223
- args: Omit<ZeroShotImageClassificationArgs, 'accessToken'>,
1224
- options?: Options
1225
- ): Promise<ZeroShotImageClassificationOutput>;
1226
- /**
1227
- * Answers a question on a document image. Recommended model: impira/layoutlm-document-qa.
1228
- */
1229
- documentQuestionAnswering(
1230
- args: Omit<DocumentQuestionAnsweringArgs, 'accessToken'>,
1231
- options?: Options
1232
- ): Promise<DocumentQuestionAnsweringOutput>;
1233
- /**
1234
- * Answers a question on an image. Recommended model: dandelin/vilt-b32-finetuned-vqa.
1235
- */
1236
- visualQuestionAnswering(
1237
- args: Omit<VisualQuestionAnsweringArgs, 'accessToken'>,
1238
- options?: Options
1239
- ): Promise<VisualQuestionAnsweringOutput>;
1240
- /**
1241
- * This task reads some text and outputs raw float values, that are usually consumed as part of a semantic database/semantic search.
1242
- */
1243
- featureExtraction(
1244
- args: Omit<FeatureExtractionArgs, 'accessToken'>,
1245
- options?: Options
1246
- ): Promise<FeatureExtractionOutput>;
1247
- /**
1248
- * Tries to fill in a hole with a missing word (token to be precise). That’s the base task for BERT models.
1249
- */
1250
- fillMask(args: Omit<FillMaskArgs, 'accessToken'>, options?: Options): Promise<FillMaskOutput>;
1251
- /**
1252
- * Want to have a nice know-it-all bot that can answer any question?. Recommended model: deepset/roberta-base-squad2
1253
- */
1254
- questionAnswering(
1255
- args: Omit<QuestionAnsweringArgs, 'accessToken'>,
1256
- options?: Options
1257
- ): Promise<QuestionAnsweringOutput>;
1258
- /**
1259
- * Calculate the semantic similarity between one text and a list of other sentences by comparing their embeddings.
1260
- */
1261
- sentenceSimilarity(
1262
- args: Omit<SentenceSimilarityArgs, 'accessToken'>,
1263
- options?: Options
1264
- ): Promise<SentenceSimilarityOutput>;
1265
- /**
1266
- * This task is well known to summarize longer text into shorter text. Be careful, some models have a maximum length of input. That means that the summary cannot handle full books for instance. Be careful when choosing your model.
1267
- */
1268
- summarization(args: Omit<SummarizationArgs, 'accessToken'>, options?: Options): Promise<SummarizationOutput>;
1269
- /**
1270
- * Don’t know SQL? Don’t want to dive into a large spreadsheet? Ask questions in plain english! Recommended model: google/tapas-base-finetuned-wtq.
1271
- */
1272
- tableQuestionAnswering(
1273
- args: Omit<TableQuestionAnsweringArgs, 'accessToken'>,
1274
- options?: Options
1275
- ): Promise<TableQuestionAnsweringOutput>;
1276
- /**
1277
- * Usually used for sentiment-analysis this will output the likelihood of classes of an input. Recommended model: distilbert-base-uncased-finetuned-sst-2-english
1278
- */
1279
- textClassification(
1280
- args: Omit<TextClassificationArgs, 'accessToken'>,
1281
- options?: Options
1282
- ): Promise<TextClassificationOutput>;
1283
- /**
1284
- * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
1285
- */
1286
- textGeneration(
1287
- args: Omit<BaseArgs, 'accessToken'> & TextGenerationInput,
1288
- options?: Options
1289
- ): Promise<TextGenerationOutput>;
1290
- /**
1291
- * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
1292
- */
1293
- textGenerationStream(
1294
- args: Omit<BaseArgs, 'accessToken'> & TextGenerationInput,
1295
- options?: Options
1296
- ): AsyncGenerator<TextGenerationStreamOutput>;
1297
- /**
1298
- * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
1299
- */
1300
- tokenClassification(
1301
- args: Omit<TokenClassificationArgs, 'accessToken'>,
1302
- options?: Options
1303
- ): Promise<TokenClassificationOutput>;
1304
- /**
1305
- * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
1306
- */
1307
- translation(args: Omit<TranslationArgs, 'accessToken'>, options?: Options): Promise<TranslationOutput>;
1308
- /**
1309
- * This task is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. Recommended model: facebook/bart-large-mnli.
1310
- */
1311
- zeroShotClassification(
1312
- args: Omit<ZeroShotClassificationArgs, 'accessToken'>,
1313
- options?: Options
1314
- ): Promise<ZeroShotClassificationOutput>;
1315
- /**
1316
- * Predicts target label for a given set of features in tabular form.
1317
- * Typically, you will want to train a classification model on your training data and use it with your new data of the same format.
1318
- * Example model: vvmnnnkv/wine-quality
1319
- */
1320
- tabularClassification(
1321
- args: Omit<TabularClassificationArgs, 'accessToken'>,
1322
- options?: Options
1323
- ): Promise<TabularClassificationOutput>;
1324
- /**
1325
- * Predicts target value for a given set of features in tabular form.
1326
- * Typically, you will want to train a regression model on your training data and use it with your new data of the same format.
1327
- * Example model: scikit-learn/Fish-Weight
1328
- */
1329
- tabularRegression(
1330
- args: Omit<TabularRegressionArgs, 'accessToken'>,
1331
- options?: Options
1332
- ): Promise<TabularRegressionOutput>;
1333
- }
1334
- export class HfInferenceEndpoint {
1335
- constructor(endpointUrl: string, accessToken?: string, defaultOptions?: Options);
1336
- /**
1337
- * This task reads some audio input and outputs the likelihood of classes.
1338
- * Recommended model: superb/hubert-large-superb-er
1339
- */
1340
- audioClassification(
1341
- args: Omit<AudioClassificationArgs, 'accessToken' | 'model'>,
1342
- options?: Options
1343
- ): Promise<AudioClassificationReturn>;
1344
- /**
1345
- * This task reads some audio input and outputs one or multiple audio files.
1346
- * Example model: speechbrain/sepformer-wham does audio source separation.
1347
- */
1348
- audioToAudio(args: Omit<AudioToAudioArgs, 'accessToken' | 'model'>, options?: Options): Promise<AudioToAudioReturn>;
1349
- /**
1350
- * This task reads some audio input and outputs the said words within the audio files.
1351
- * Recommended model (english language): facebook/wav2vec2-large-960h-lv60-self
1352
- */
1353
- automaticSpeechRecognition(
1354
- args: Omit<AutomaticSpeechRecognitionArgs, 'accessToken' | 'model'>,
1355
- options?: Options
1356
- ): Promise<AutomaticSpeechRecognitionOutput>;
1357
- /**
1358
- * This task synthesize an audio of a voice pronouncing a given text.
1359
- * Recommended model: espnet/kan-bayashi_ljspeech_vits
1360
- */
1361
- textToSpeech(args: Omit<TextToSpeechArgs, 'accessToken' | 'model'>, options?: Options): Promise<TextToSpeechOutput>;
1362
- /**
1363
- * Primitive to make custom calls to Inference Endpoints
1364
- */
1365
- request<T>(
1366
- args: Omit<RequestArgs, 'accessToken' | 'model'>,
1367
- options?: Options & {
1368
- /** When a model can be used for multiple tasks, and we want to run a non-default task */
1369
- task?: string | InferenceTask;
1370
- /** To load default model if needed */
1371
- taskHint?: InferenceTask;
1372
- }
1373
- ): Promise<T>;
1374
- /**
1375
- * Primitive to make custom inference calls that expect server-sent events, and returns the response through a generator
1376
- */
1377
- streamingRequest<T>(
1378
- args: Omit<RequestArgs, 'accessToken' | 'model'>,
1379
- options?: Options & {
1380
- /** When a model can be used for multiple tasks, and we want to run a non-default task */
1381
- task?: string | InferenceTask;
1382
- /** To load default model if needed */
1383
- taskHint?: InferenceTask;
1384
- }
1385
- ): AsyncGenerator<T>;
1386
- /**
1387
- * This task reads some image input and outputs the likelihood of classes.
1388
- * Recommended model: google/vit-base-patch16-224
1389
- */
1390
- imageClassification(
1391
- args: Omit<ImageClassificationArgs, 'accessToken' | 'model'>,
1392
- options?: Options
1393
- ): Promise<ImageClassificationOutput>;
1394
- /**
1395
- * This task reads some image input and outputs the likelihood of classes & bounding boxes of detected objects.
1396
- * Recommended model: facebook/detr-resnet-50-panoptic
1397
- */
1398
- imageSegmentation(
1399
- args: Omit<ImageSegmentationArgs, 'accessToken' | 'model'>,
1400
- options?: Options
1401
- ): Promise<ImageSegmentationOutput>;
1402
- /**
1403
- * This task reads some text input and outputs an image.
1404
- * Recommended model: lllyasviel/sd-controlnet-depth
1405
- */
1406
- imageToImage(args: Omit<ImageToImageArgs, 'accessToken' | 'model'>, options?: Options): Promise<ImageToImageOutput>;
1407
- /**
1408
- * This task reads some image input and outputs the text caption.
1409
- */
1410
- imageToText(args: Omit<ImageToTextArgs, 'accessToken' | 'model'>, options?: Options): Promise<ImageToTextOutput>;
1411
- /**
1412
- * This task reads some image input and outputs the likelihood of classes & bounding boxes of detected objects.
1413
- * Recommended model: facebook/detr-resnet-50
1414
- */
1415
- objectDetection(args: Omit<ObjectDetectionArgs, 'accessToken' | 'model'>, options?: Options): Promise<ObjectDetectionOutput>;
1416
- /**
1417
- * This task reads some text input and outputs an image.
1418
- * Recommended model: stabilityai/stable-diffusion-2
1419
- */
1420
- textToImage(args: Omit<TextToImageArgs, 'accessToken' | 'model'>, options?: Options): Promise<TextToImageOutput>;
1421
- /**
1422
- * Classify an image to specified classes.
1423
- * Recommended model: openai/clip-vit-large-patch14-336
1424
- */
1425
- zeroShotImageClassification(
1426
- args: Omit<ZeroShotImageClassificationArgs, 'accessToken' | 'model'>,
1427
- options?: Options
1428
- ): Promise<ZeroShotImageClassificationOutput>;
1429
- /**
1430
- * Answers a question on a document image. Recommended model: impira/layoutlm-document-qa.
1431
- */
1432
- documentQuestionAnswering(
1433
- args: Omit<DocumentQuestionAnsweringArgs, 'accessToken' | 'model'>,
1434
- options?: Options
1435
- ): Promise<DocumentQuestionAnsweringOutput>;
1436
- /**
1437
- * Answers a question on an image. Recommended model: dandelin/vilt-b32-finetuned-vqa.
1438
- */
1439
- visualQuestionAnswering(
1440
- args: Omit<VisualQuestionAnsweringArgs, 'accessToken' | 'model'>,
1441
- options?: Options
1442
- ): Promise<VisualQuestionAnsweringOutput>;
1443
- /**
1444
- * This task reads some text and outputs raw float values, that are usually consumed as part of a semantic database/semantic search.
1445
- */
1446
- featureExtraction(
1447
- args: Omit<FeatureExtractionArgs, 'accessToken' | 'model'>,
1448
- options?: Options
1449
- ): Promise<FeatureExtractionOutput>;
1450
- /**
1451
- * Tries to fill in a hole with a missing word (token to be precise). That’s the base task for BERT models.
1452
- */
1453
- fillMask(args: Omit<FillMaskArgs, 'accessToken' | 'model'>, options?: Options): Promise<FillMaskOutput>;
1454
- /**
1455
- * Want to have a nice know-it-all bot that can answer any question?. Recommended model: deepset/roberta-base-squad2
1456
- */
1457
- questionAnswering(
1458
- args: Omit<QuestionAnsweringArgs, 'accessToken' | 'model'>,
1459
- options?: Options
1460
- ): Promise<QuestionAnsweringOutput>;
1461
- /**
1462
- * Calculate the semantic similarity between one text and a list of other sentences by comparing their embeddings.
1463
- */
1464
- sentenceSimilarity(
1465
- args: Omit<SentenceSimilarityArgs, 'accessToken' | 'model'>,
1466
- options?: Options
1467
- ): Promise<SentenceSimilarityOutput>;
1468
- /**
1469
- * This task is well known to summarize longer text into shorter text. Be careful, some models have a maximum length of input. That means that the summary cannot handle full books for instance. Be careful when choosing your model.
1470
- */
1471
- summarization(args: Omit<SummarizationArgs, 'accessToken' | 'model'>, options?: Options): Promise<SummarizationOutput>;
1472
- /**
1473
- * Don’t know SQL? Don’t want to dive into a large spreadsheet? Ask questions in plain english! Recommended model: google/tapas-base-finetuned-wtq.
1474
- */
1475
- tableQuestionAnswering(
1476
- args: Omit<TableQuestionAnsweringArgs, 'accessToken' | 'model'>,
1477
- options?: Options
1478
- ): Promise<TableQuestionAnsweringOutput>;
1479
- /**
1480
- * Usually used for sentiment-analysis this will output the likelihood of classes of an input. Recommended model: distilbert-base-uncased-finetuned-sst-2-english
1481
- */
1482
- textClassification(
1483
- args: Omit<TextClassificationArgs, 'accessToken' | 'model'>,
1484
- options?: Options
1485
- ): Promise<TextClassificationOutput>;
1486
- /**
1487
- * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
1488
- */
1489
- textGeneration(
1490
- args: Omit<BaseArgs, 'accessToken' | 'model'> & TextGenerationInput,
1491
- options?: Options
1492
- ): Promise<TextGenerationOutput>;
1493
- /**
1494
- * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
1495
- */
1496
- textGenerationStream(
1497
- args: Omit<BaseArgs, 'accessToken' | 'model'> & TextGenerationInput,
1498
- options?: Options
1499
- ): AsyncGenerator<TextGenerationStreamOutput>;
1500
- /**
1501
- * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
1502
- */
1503
- tokenClassification(
1504
- args: Omit<TokenClassificationArgs, 'accessToken' | 'model'>,
1505
- options?: Options
1506
- ): Promise<TokenClassificationOutput>;
1507
- /**
1508
- * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
1509
- */
1510
- translation(args: Omit<TranslationArgs, 'accessToken' | 'model'>, options?: Options): Promise<TranslationOutput>;
1511
- /**
1512
- * This task is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. Recommended model: facebook/bart-large-mnli.
1513
- */
1514
- zeroShotClassification(
1515
- args: Omit<ZeroShotClassificationArgs, 'accessToken' | 'model'>,
1516
- options?: Options
1517
- ): Promise<ZeroShotClassificationOutput>;
1518
- /**
1519
- * Predicts target label for a given set of features in tabular form.
1520
- * Typically, you will want to train a classification model on your training data and use it with your new data of the same format.
1521
- * Example model: vvmnnnkv/wine-quality
1522
- */
1523
- tabularClassification(
1524
- args: Omit<TabularClassificationArgs, 'accessToken' | 'model'>,
1525
- options?: Options
1526
- ): Promise<TabularClassificationOutput>;
1527
- /**
1528
- * Predicts target value for a given set of features in tabular form.
1529
- * Typically, you will want to train a regression model on your training data and use it with your new data of the same format.
1530
- * Example model: scikit-learn/Fish-Weight
1531
- */
1532
- tabularRegression(
1533
- args: Omit<TabularRegressionArgs, 'accessToken' | 'model'>,
1534
- options?: Options
1535
- ): Promise<TabularRegressionOutput>;
1536
- }