@huggingface/inference 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,527 @@
1
+ type Options = {
2
+ /**
3
+ * (Default: true) Boolean. If a request 503s and wait_for_model is set to false, the request will be retried with the same parameters but with wait_for_model set to true.
4
+ */
5
+ retry_on_error?: boolean;
6
+ /**
7
+ * (Default: true). Boolean. There is a cache layer on the inference API to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query.
8
+ */
9
+ use_cache?: boolean;
10
+ /**
11
+ * (Default: false). Boolean to use GPU instead of CPU for inference (requires Startup plan at least).
12
+ */
13
+ use_gpu?: boolean;
14
+ /**
15
+ * (Default: false) Boolean. If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error as it will limit hanging in your application to known places.
16
+ */
17
+ wait_for_model?: boolean;
18
+ };
19
+ type Args = {
20
+ model: string;
21
+ };
22
+ type FillMaskArgs = Args & {
23
+ inputs: string;
24
+ };
25
+ type FillMaskReturn = {
26
+ /**
27
+ * The probability for this token.
28
+ */
29
+ score: number;
30
+ /**
31
+ * The actual sequence of tokens that ran against the model (may contain special tokens)
32
+ */
33
+ sequence: string;
34
+ /**
35
+ * The id of the token
36
+ */
37
+ token: number;
38
+ /**
39
+ * The string representation of the token
40
+ */
41
+ token_str: string;
42
+ }[];
43
+ type SummarizationArgs = Args & {
44
+ /**
45
+ * A string to be summarized
46
+ */
47
+ inputs: string;
48
+ parameters?: {
49
+ /**
50
+ * (Default: None). Integer to define the maximum length in tokens of the output summary.
51
+ */
52
+ max_length?: number;
53
+ /**
54
+ * (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit.
55
+ */
56
+ max_time?: number;
57
+ /**
58
+ * (Default: None). Integer to define the minimum length in tokens of the output summary.
59
+ */
60
+ min_length?: number;
61
+ /**
62
+ * (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
63
+ */
64
+ repetition_penalty?: number;
65
+ /**
66
+ * (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
67
+ */
68
+ temperature?: number;
69
+ /**
70
+ * (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
71
+ */
72
+ top_k?: number;
73
+ /**
74
+ * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
75
+ */
76
+ top_p?: number;
77
+ };
78
+ };
79
+ type SummarizationReturn = {
80
+ /**
81
+ * The string after translation
82
+ */
83
+ summary_text: string;
84
+ };
85
+ type QuestionAnswerArgs = Args & {
86
+ inputs: {
87
+ context: string;
88
+ question: string;
89
+ };
90
+ };
91
+ type QuestionAnswerReturn = {
92
+ /**
93
+ * A string that’s the answer within the text.
94
+ */
95
+ answer: string;
96
+ /**
97
+ * The index (string wise) of the stop of the answer within context.
98
+ */
99
+ end: number;
100
+ /**
101
+ * A float that represents how likely that the answer is correct
102
+ */
103
+ score: number;
104
+ /**
105
+ * The index (string wise) of the start of the answer within context.
106
+ */
107
+ start: number;
108
+ };
109
+ type TableQuestionAnswerArgs = Args & {
110
+ inputs: {
111
+ /**
112
+ * The query in plain text that you want to ask the table
113
+ */
114
+ query: string;
115
+ /**
116
+ * A table of data represented as a dict of list where entries are headers and the lists are all the values, all lists must have the same size.
117
+ */
118
+ table: Record<string, string[]>;
119
+ };
120
+ };
121
+ type TableQuestionAnswerReturn = {
122
+ /**
123
+ * The aggregator used to get the answer
124
+ */
125
+ aggregator: string;
126
+ /**
127
+ * The plaintext answer
128
+ */
129
+ answer: string;
130
+ /**
131
+ * A list of coordinates of the cells contents
132
+ */
133
+ cells: string[];
134
+ /**
135
+ * a list of coordinates of the cells referenced in the answer
136
+ */
137
+ coordinates: number[][];
138
+ };
139
+ type TextClassificationArgs = Args & {
140
+ /**
141
+ * A string to be classified
142
+ */
143
+ inputs: string;
144
+ };
145
+ type TextClassificationReturn = {
146
+ /**
147
+ * The label for the class (model specific)
148
+ */
149
+ label: string;
150
+ /**
151
+ * A floats that represents how likely is that the text belongs to this class.
152
+ */
153
+ score: number;
154
+ }[];
155
+ type TextGenerationArgs = Args & {
156
+ /**
157
+ * A string to be generated from
158
+ */
159
+ inputs: string;
160
+ parameters?: {
161
+ /**
162
+ * (Optional: True). Bool. Whether or not to use sampling, use greedy decoding otherwise.
163
+ */
164
+ do_sample?: boolean;
165
+ /**
166
+ * (Default: None). Int (0-250). The amount of new tokens to be generated, this does not include the input length it is a estimate of the size of generated text you want. Each new tokens slows down the request, so look for balance between response times and length of text generated.
167
+ */
168
+ max_new_tokens?: number;
169
+ /**
170
+ * (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit. Use that in combination with max_new_tokens for best results.
171
+ */
172
+ max_time?: number;
173
+ /**
174
+ * (Default: 1). Integer. The number of proposition you want to be returned.
175
+ */
176
+ num_return_sequences?: number;
177
+ /**
178
+ * (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
179
+ */
180
+ repetition_penalty?: number;
181
+ /**
182
+ * (Default: True). Bool. If set to False, the return results will not contain the original query making it easier for prompting.
183
+ */
184
+ return_full_text?: boolean;
185
+ /**
186
+ * (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
187
+ */
188
+ temperature?: number;
189
+ /**
190
+ * (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
191
+ */
192
+ top_k?: number;
193
+ /**
194
+ * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
195
+ */
196
+ top_p?: number;
197
+ };
198
+ };
199
+ type TextGenerationReturn = {
200
+ /**
201
+ * The continuated string
202
+ */
203
+ generated_text: string;
204
+ };
205
+ type TokenClassificationArgs = Args & {
206
+ /**
207
+ * A string to be classified
208
+ */
209
+ inputs: string;
210
+ parameters?: {
211
+ /**
212
+ * (Default: simple). There are several aggregation strategies:
213
+ *
214
+ * none: Every token gets classified without further aggregation.
215
+ *
216
+ * simple: Entities are grouped according to the default schema (B-, I- tags get merged when the tag is similar).
217
+ *
218
+ * first: Same as the simple strategy except words cannot end up with different tags. Words will use the tag of the first token when there is ambiguity.
219
+ *
220
+ * average: Same as the simple strategy except words cannot end up with different tags. Scores are averaged across tokens and then the maximum label is applied.
221
+ *
222
+ * max: Same as the simple strategy except words cannot end up with different tags. Word entity will be the token with the maximum score.
223
+ */
224
+ aggregation_strategy?: "none" | "simple" | "first" | "average" | "max";
225
+ };
226
+ };
227
+ type TokenClassificationReturnValue = {
228
+ /**
229
+ * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
230
+ */
231
+ end: number;
232
+ /**
233
+ * The type for the entity being recognized (model specific).
234
+ */
235
+ entity_group: string;
236
+ /**
237
+ * How likely the entity was recognized.
238
+ */
239
+ score: number;
240
+ /**
241
+ * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
242
+ */
243
+ start: number;
244
+ /**
245
+ * The string that was captured
246
+ */
247
+ word: string;
248
+ };
249
+ type TokenClassificationReturn = TokenClassificationReturnValue[];
250
+ type TranslationArgs = Args & {
251
+ /**
252
+ * A string to be translated
253
+ */
254
+ inputs: string;
255
+ };
256
+ type TranslationReturn = {
257
+ /**
258
+ * The string after translation
259
+ */
260
+ translation_text: string;
261
+ };
262
+ type ZeroShotClassificationArgs = Args & {
263
+ /**
264
+ * a string or list of strings
265
+ */
266
+ inputs: string | string[];
267
+ parameters: {
268
+ /**
269
+ * a list of strings that are potential classes for inputs. (max 10 candidate_labels, for more, simply run multiple requests, results are going to be misleading if using too many candidate_labels anyway. If you want to keep the exact same, you can simply run multi_label=True and do the scaling on your end.
270
+ */
271
+ candidate_labels: string[];
272
+ /**
273
+ * (Default: false) Boolean that is set to True if classes can overlap
274
+ */
275
+ multi_label?: boolean;
276
+ };
277
+ };
278
+ type ZeroShotClassificationReturnValue = {
279
+ labels: string[];
280
+ scores: number[];
281
+ sequence: string;
282
+ };
283
+ type ZeroShotClassificationReturn = ZeroShotClassificationReturnValue[];
284
+ type ConversationalArgs = Args & {
285
+ inputs: {
286
+ /**
287
+ * A list of strings corresponding to the earlier replies from the model.
288
+ */
289
+ generated_responses?: string[];
290
+ /**
291
+ * A list of strings corresponding to the earlier replies from the user. Should be of the same length of generated_responses.
292
+ */
293
+ past_user_inputs?: string[];
294
+ /**
295
+ * The last input from the user in the conversation.
296
+ */
297
+ text: string;
298
+ };
299
+ parameters?: {
300
+ /**
301
+ * (Default: None). Integer to define the maximum length in tokens of the output summary.
302
+ */
303
+ max_length?: number;
304
+ /**
305
+ * (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit.
306
+ */
307
+ max_time?: number;
308
+ /**
309
+ * (Default: None). Integer to define the minimum length in tokens of the output summary.
310
+ */
311
+ min_length?: number;
312
+ /**
313
+ * (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
314
+ */
315
+ repetition_penalty?: number;
316
+ /**
317
+ * (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
318
+ */
319
+ temperature?: number;
320
+ /**
321
+ * (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
322
+ */
323
+ top_k?: number;
324
+ /**
325
+ * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
326
+ */
327
+ top_p?: number;
328
+ };
329
+ };
330
+ type ConversationalReturn = {
331
+ conversation: {
332
+ generated_responses: string[];
333
+ past_user_inputs: string[];
334
+ };
335
+ generated_text: string;
336
+ warnings: string[];
337
+ };
338
+ type FeatureExtractionArgs = Args & {
339
+ /**
340
+ * The inputs vary based on the model. For example when using sentence-transformers/paraphrase-xlm-r-multilingual-v1 the inputs will look like this:
341
+ *
342
+ * inputs: {
343
+ * "source_sentence": "That is a happy person",
344
+ * "sentences": ["That is a happy dog", "That is a very happy person", "Today is a sunny day"]
345
+ */
346
+ inputs: Record<string, any> | Record<string, any>[];
347
+ };
348
+ /**
349
+ * Returned values are a list of floats, or a list of list of floats (depending on if you sent a string or a list of string, and if the automatic reduction, usually mean_pooling for instance was applied for you or not. This should be explained on the model's README.
350
+ */
351
+ type FeatureExtractionReturn = (number | number[])[];
352
+ type ImageClassificationArgs = Args & {
353
+ /**
354
+ * Binary image data
355
+ */
356
+ data: any;
357
+ };
358
+ type ImageClassificationReturnValue = {
359
+ /**
360
+ * A float that represents how likely it is that the image file belongs to this class.
361
+ */
362
+ label: string;
363
+ /**
364
+ * The label for the class (model specific)
365
+ */
366
+ score: number;
367
+ };
368
+ type ImageClassificationReturn = ImageClassificationReturnValue[];
369
+ type ObjectDetectionArgs = Args & {
370
+ /**
371
+ * Binary image data
372
+ */
373
+ data: any;
374
+ };
375
+ type ObjectDetectionReturnValue = {
376
+ /**
377
+ * A dict (with keys [xmin,ymin,xmax,ymax]) representing the bounding box of a detected object.
378
+ */
379
+ box: {
380
+ xmax: number;
381
+ xmin: number;
382
+ ymax: number;
383
+ ymin: number;
384
+ };
385
+ /**
386
+ * The label for the class (model specific) of a detected object.
387
+ */
388
+ label: string;
389
+ /**
390
+ * A float that represents how likely it is that the detected object belongs to the given class.
391
+ */
392
+ score: number;
393
+ };
394
+ type ObjectDetectionReturn = ObjectDetectionReturnValue[];
395
+ type ImageSegmentationArgs = Args & {
396
+ /**
397
+ * Binary image data
398
+ */
399
+ data: any;
400
+ };
401
+ type ImageSegmentationReturnValue = {
402
+ /**
403
+ * The label for the class (model specific) of a segment.
404
+ */
405
+ label: string;
406
+ /**
407
+ * A str (base64 str of a single channel black-and-white img) representing the mask of a segment.
408
+ */
409
+ mask: string;
410
+ /**
411
+ * A float that represents how likely it is that the detected object belongs to the given class.
412
+ */
413
+ score: number;
414
+ };
415
+ type ImageSegmentationReturn = ImageSegmentationReturnValue[];
416
+ type AutomaticSpeechRecognitionArgs = Args & {
417
+ /**
418
+ * Binary audio data
419
+ */
420
+ data: any;
421
+ };
422
+ type AutomaticSpeechRecognitionReturn = {
423
+ /**
424
+ * The text that was recognized from the audio
425
+ */
426
+ text: string;
427
+ };
428
+ type AudioClassificationArgs = Args & {
429
+ /**
430
+ * Binary audio data
431
+ */
432
+ data: any;
433
+ };
434
+ type AudioClassificationReturnValue = {
435
+ /**
436
+ * The label for the class (model specific)
437
+ */
438
+ label: string;
439
+ /**
440
+ * A float that represents how likely it is that the audio file belongs to this class.
441
+ */
442
+ score: number;
443
+ };
444
+ type AudioClassificationReturn = AudioClassificationReturnValue[];
445
+ declare class HuggingFace {
446
+ private readonly apiKey;
447
+ private readonly defaultOptions;
448
+ constructor(apiKey: string, defaultOptions?: Options);
449
+ /**
450
+ * Tries to fill in a hole with a missing word (token to be precise). That’s the base task for BERT models.
451
+ */
452
+ fillMask(args: FillMaskArgs, options?: Options): Promise<FillMaskReturn>;
453
+ /**
454
+ * This task is well known to summarize longer text into shorter text. Be careful, some models have a maximum length of input. That means that the summary cannot handle full books for instance. Be careful when choosing your model.
455
+ */
456
+ summarization(args: SummarizationArgs, options?: Options): Promise<SummarizationReturn>;
457
+ /**
458
+ * Want to have a nice know-it-all bot that can answer any question?. Recommended model: deepset/roberta-base-squad2
459
+ */
460
+ questionAnswer(args: QuestionAnswerArgs, options?: Options): Promise<QuestionAnswerReturn>;
461
+ /**
462
+ * Don’t know SQL? Don’t want to dive into a large spreadsheet? Ask questions in plain english! Recommended model: google/tapas-base-finetuned-wtq.
463
+ */
464
+ tableQuestionAnswer(args: TableQuestionAnswerArgs, options?: Options): Promise<TableQuestionAnswerReturn>;
465
+ /**
466
+ * Usually used for sentiment-analysis this will output the likelihood of classes of an input. Recommended model: distilbert-base-uncased-finetuned-sst-2-english
467
+ */
468
+ textClassification(args: TextClassificationArgs, options?: Options): Promise<TextClassificationReturn>;
469
+ /**
470
+ * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
471
+ */
472
+ textGeneration(args: TextGenerationArgs, options?: Options): Promise<TextGenerationReturn>;
473
+ /**
474
+ * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
475
+ */
476
+ tokenClassification(args: TokenClassificationArgs, options?: Options): Promise<TokenClassificationReturn>;
477
+ /**
478
+ * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
479
+ */
480
+ translation(args: TranslationArgs, options?: Options): Promise<TranslationReturn>;
481
+ /**
482
+ * This task is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. Recommended model: facebook/bart-large-mnli.
483
+ */
484
+ zeroShotClassification(args: ZeroShotClassificationArgs, options?: Options): Promise<ZeroShotClassificationReturn>;
485
+ /**
486
+ * This task corresponds to any chatbot like structure. Models tend to have shorter max_length, so please check with caution when using a given model if you need long range dependency or not. Recommended model: microsoft/DialoGPT-large.
487
+ *
488
+ */
489
+ conversational(args: ConversationalArgs, options?: Options): Promise<ConversationalReturn>;
490
+ /**
491
+ * This task reads some text and outputs raw float values, that are usually consumed as part of a semantic database/semantic search.
492
+ */
493
+ featureExtraction(args: FeatureExtractionArgs, options?: Options): Promise<FeatureExtractionReturn>;
494
+ /**
495
+ * This task reads some audio input and outputs the said words within the audio files.
496
+ * Recommended model (english language): facebook/wav2vec2-large-960h-lv60-self
497
+ */
498
+ automaticSpeechRecognition(args: AutomaticSpeechRecognitionArgs, options?: Options): Promise<AutomaticSpeechRecognitionReturn>;
499
+ /**
500
+ * This task reads some audio input and outputs the likelihood of classes.
501
+ * Recommended model: superb/hubert-large-superb-er
502
+ */
503
+ audioClassification(args: AudioClassificationArgs, options?: Options): Promise<AudioClassificationReturn>;
504
+ /**
505
+ * This task reads some image input and outputs the likelihood of classes.
506
+ * Recommended model: google/vit-base-patch16-224
507
+ */
508
+ imageClassification(args: ImageClassificationArgs, options?: Options): Promise<ImageClassificationReturn>;
509
+ /**
510
+ * This task reads some image input and outputs the likelihood of classes & bounding boxes of detected objects.
511
+ * Recommended model: facebook/detr-resnet-50
512
+ */
513
+ objectDetection(args: ObjectDetectionArgs, options?: Options): Promise<ObjectDetectionReturn>;
514
+ /**
515
+ * This task reads some image input and outputs the likelihood of classes & bounding boxes of detected objects.
516
+ * Recommended model: facebook/detr-resnet-50-panoptic
517
+ */
518
+ imageSegmentation(args: ImageSegmentationArgs, options?: Options): Promise<ImageSegmentationReturn>;
519
+ request(args: Args & {
520
+ data?: any;
521
+ }, options?: Options & {
522
+ binary?: boolean;
523
+ }): Promise<any>;
524
+ private static toArray;
525
+ }
526
+
527
+ export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HuggingFace, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue, HuggingFace as default };