@huggingface/inference 1.6.0 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -191,4 +191,4 @@ HF_ACCESS_TOKEN="your access token" npm run test
191
191
 
192
192
  We have an informative documentation project called [Tasks](https://huggingface.co/tasks) to list available models for each task and explain how each task works in detail.
193
193
 
194
- It also contain demos, example outputs and other resources should you want to dig more into the ML-side of things.
194
+ It also contains demos, example outputs, and other resources should you want to dig deeper into the ML side of things.
package/dist/index.d.ts CHANGED
@@ -7,6 +7,10 @@ interface Options {
7
7
  * (Default: true). Boolean. There is a cache layer on the inference API to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query.
8
8
  */
9
9
  use_cache?: boolean;
10
+ /**
11
+ * (Default: false). Boolean. Do not load the model if it's not already available.
12
+ */
13
+ dont_load_model?: boolean;
10
14
  /**
11
15
  * (Default: false). Boolean to use GPU instead of CPU for inference (requires Startup plan at least).
12
16
  */
@@ -344,7 +348,7 @@ type FeatureExtractionArgs = Args & {
344
348
  * "sentences": ["That is a happy dog", "That is a very happy person", "Today is a sunny day"]
345
349
  * }
346
350
  */
347
- inputs: Record<string, any> | Record<string, any>[];
351
+ inputs: Record<string, unknown> | Record<string, unknown>[];
348
352
  };
349
353
  /**
350
354
  * Returned values are a list of floats, or a list of list of floats (depending on if you sent a string or a list of string, and if the automatic reduction, usually mean_pooling for instance was applied for you or not. This should be explained on the model's README.
@@ -354,7 +358,7 @@ type ImageClassificationArgs = Args & {
354
358
  /**
355
359
  * Binary image data
356
360
  */
357
- data: any;
361
+ data: Blob | ArrayBuffer;
358
362
  };
359
363
  interface ImageClassificationReturnValue {
360
364
  /**
@@ -371,7 +375,7 @@ type ObjectDetectionArgs = Args & {
371
375
  /**
372
376
  * Binary image data
373
377
  */
374
- data: any;
378
+ data: Blob | ArrayBuffer;
375
379
  };
376
380
  interface ObjectDetectionReturnValue {
377
381
  /**
@@ -397,7 +401,7 @@ type ImageSegmentationArgs = Args & {
397
401
  /**
398
402
  * Binary image data
399
403
  */
400
- data: any;
404
+ data: Blob | ArrayBuffer;
401
405
  };
402
406
  interface ImageSegmentationReturnValue {
403
407
  /**
@@ -418,7 +422,7 @@ type AutomaticSpeechRecognitionArgs = Args & {
418
422
  /**
419
423
  * Binary audio data
420
424
  */
421
- data: any;
425
+ data: Blob | ArrayBuffer;
422
426
  };
423
427
  interface AutomaticSpeechRecognitionReturn {
424
428
  /**
@@ -430,7 +434,7 @@ type AudioClassificationArgs = Args & {
430
434
  /**
431
435
  * Binary audio data
432
436
  */
433
- data: any;
437
+ data: Blob | ArrayBuffer;
434
438
  };
435
439
  interface AudioClassificationReturnValue {
436
440
  /**
@@ -533,13 +537,14 @@ declare class HfInference {
533
537
  * Recommended model: stabilityai/stable-diffusion-2
534
538
  */
535
539
  textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn>;
536
- request(args: Args & {
537
- data?: any;
540
+ request<T>(args: Args & {
541
+ data?: Blob | ArrayBuffer;
538
542
  }, options?: Options & {
539
543
  binary?: boolean;
540
544
  blob?: boolean;
541
- }): Promise<any>;
542
- private static toArray;
545
+ /** For internal HF use, which is why it's not exposed in {@link Options} */
546
+ includeCredentials?: boolean;
547
+ }): Promise<T>;
543
548
  }
544
549
 
545
550
  export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
package/dist/index.js CHANGED
@@ -23,6 +23,14 @@ __export(src_exports, {
23
23
  });
24
24
  module.exports = __toCommonJS(src_exports);
25
25
 
26
+ // src/utils/to-array.ts
27
+ function toArray(obj) {
28
+ if (Array.isArray(obj)) {
29
+ return obj;
30
+ }
31
+ return [obj];
32
+ }
33
+
26
34
  // src/HfInference.ts
27
35
  var HfInference = class {
28
36
  apiKey;
@@ -71,7 +79,7 @@ var HfInference = class {
71
79
  * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
72
80
  */
73
81
  async tokenClassification(args, options) {
74
- return HfInference.toArray(await this.request(args, options));
82
+ return toArray(await this.request(args, options));
75
83
  }
76
84
  /**
77
85
  * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
@@ -83,7 +91,9 @@ var HfInference = class {
83
91
  * This task is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. Recommended model: facebook/bart-large-mnli.
84
92
  */
85
93
  async zeroShotClassification(args, options) {
86
- return HfInference.toArray(await this.request(args, options));
94
+ return toArray(
95
+ await this.request(args, options)
96
+ );
87
97
  }
88
98
  /**
89
99
  * This task corresponds to any chatbot like structure. Models tend to have shorter max_length, so please check with caution when using a given model if you need long range dependency or not. Recommended model: microsoft/DialoGPT-large.
@@ -168,8 +178,16 @@ var HfInference = class {
168
178
  if (!options?.binary) {
169
179
  headers["Content-Type"] = "application/json";
170
180
  }
171
- if (options?.binary && mergedOptions.wait_for_model) {
172
- headers["X-Wait-For-Model"] = "true";
181
+ if (options?.binary) {
182
+ if (mergedOptions.wait_for_model) {
183
+ headers["X-Wait-For-Model"] = "true";
184
+ }
185
+ if (mergedOptions.use_cache === false) {
186
+ headers["X-Use-Cache"] = "false";
187
+ }
188
+ if (mergedOptions.dont_load_model) {
189
+ headers["X-Load-Model"] = "0";
190
+ }
173
191
  }
174
192
  const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
175
193
  headers,
@@ -177,7 +195,8 @@ var HfInference = class {
177
195
  body: options?.binary ? args.data : JSON.stringify({
178
196
  ...otherArgs,
179
197
  options: mergedOptions
180
- })
198
+ }),
199
+ credentials: options?.includeCredentials ? "include" : "same-origin"
181
200
  });
182
201
  if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
183
202
  return this.request(args, {
@@ -197,12 +216,6 @@ var HfInference = class {
197
216
  }
198
217
  return output;
199
218
  }
200
- static toArray(obj) {
201
- if (Array.isArray(obj)) {
202
- return obj;
203
- }
204
- return [obj];
205
- }
206
219
  };
207
220
  // Annotate the CommonJS export names for ESM import in node:
208
221
  0 && (module.exports = {
package/dist/index.mjs CHANGED
@@ -1,3 +1,11 @@
1
+ // src/utils/to-array.ts
2
+ function toArray(obj) {
3
+ if (Array.isArray(obj)) {
4
+ return obj;
5
+ }
6
+ return [obj];
7
+ }
8
+
1
9
  // src/HfInference.ts
2
10
  var HfInference = class {
3
11
  apiKey;
@@ -46,7 +54,7 @@ var HfInference = class {
46
54
  * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
47
55
  */
48
56
  async tokenClassification(args, options) {
49
- return HfInference.toArray(await this.request(args, options));
57
+ return toArray(await this.request(args, options));
50
58
  }
51
59
  /**
52
60
  * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
@@ -58,7 +66,9 @@ var HfInference = class {
58
66
  * This task is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. Recommended model: facebook/bart-large-mnli.
59
67
  */
60
68
  async zeroShotClassification(args, options) {
61
- return HfInference.toArray(await this.request(args, options));
69
+ return toArray(
70
+ await this.request(args, options)
71
+ );
62
72
  }
63
73
  /**
64
74
  * This task corresponds to any chatbot like structure. Models tend to have shorter max_length, so please check with caution when using a given model if you need long range dependency or not. Recommended model: microsoft/DialoGPT-large.
@@ -143,8 +153,16 @@ var HfInference = class {
143
153
  if (!options?.binary) {
144
154
  headers["Content-Type"] = "application/json";
145
155
  }
146
- if (options?.binary && mergedOptions.wait_for_model) {
147
- headers["X-Wait-For-Model"] = "true";
156
+ if (options?.binary) {
157
+ if (mergedOptions.wait_for_model) {
158
+ headers["X-Wait-For-Model"] = "true";
159
+ }
160
+ if (mergedOptions.use_cache === false) {
161
+ headers["X-Use-Cache"] = "false";
162
+ }
163
+ if (mergedOptions.dont_load_model) {
164
+ headers["X-Load-Model"] = "0";
165
+ }
148
166
  }
149
167
  const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
150
168
  headers,
@@ -152,7 +170,8 @@ var HfInference = class {
152
170
  body: options?.binary ? args.data : JSON.stringify({
153
171
  ...otherArgs,
154
172
  options: mergedOptions
155
- })
173
+ }),
174
+ credentials: options?.includeCredentials ? "include" : "same-origin"
156
175
  });
157
176
  if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
158
177
  return this.request(args, {
@@ -172,12 +191,6 @@ var HfInference = class {
172
191
  }
173
192
  return output;
174
193
  }
175
- static toArray(obj) {
176
- if (Array.isArray(obj)) {
177
- return obj;
178
- }
179
- return [obj];
180
- }
181
194
  };
182
195
  export {
183
196
  HfInference
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/inference",
3
- "version": "1.6.0",
3
+ "version": "1.6.2",
4
4
  "license": "MIT",
5
5
  "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
6
6
  "description": "Typescript wrapper for the Hugging Face Inference API",
@@ -48,9 +48,12 @@
48
48
  "resolutions": {},
49
49
  "scripts": {
50
50
  "build": "tsup src/index.ts --format cjs,esm --clean --dts",
51
- "format": "prettier --write . && eslint --quiet --fix --ext .cjs,.ts .",
51
+ "lint": "eslint --quiet --fix --ext .cjs,.ts .",
52
+ "lint:check": "eslint --ext .cjs,.ts .",
53
+ "format": "prettier --write .",
54
+ "format:check": "prettier --check .",
52
55
  "test": "vitest run",
53
- "test:ci": "pnpm run test -- --coverage",
56
+ "test:browser": "vitest run --browser.name=chrome --browser.headless",
54
57
  "type-check": "tsc"
55
58
  }
56
59
  }
@@ -1,3 +1,5 @@
1
+ import { toArray } from "./utils/to-array";
2
+
1
3
  export interface Options {
2
4
  /**
3
5
  * (Default: true) Boolean. If a request 503s and wait_for_model is set to false, the request will be retried with the same parameters but with wait_for_model set to true.
@@ -6,11 +8,15 @@ export interface Options {
6
8
  /**
7
9
  * (Default: true). Boolean. There is a cache layer on the inference API to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query.
8
10
  */
9
- use_cache?: boolean;
11
+ use_cache?: boolean;
12
+ /**
13
+ * (Default: false). Boolean. Do not load the model if it's not already available.
14
+ */
15
+ dont_load_model?: boolean;
10
16
  /**
11
17
  * (Default: false). Boolean to use GPU instead of CPU for inference (requires Startup plan at least).
12
18
  */
13
- use_gpu?: boolean;
19
+ use_gpu?: boolean;
14
20
 
15
21
  /**
16
22
  * (Default: false) Boolean. If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error as it will limit hanging in your application to known places.
@@ -30,15 +36,15 @@ export type FillMaskReturn = {
30
36
  /**
31
37
  * The probability for this token.
32
38
  */
33
- score: number;
39
+ score: number;
34
40
  /**
35
41
  * The actual sequence of tokens that ran against the model (may contain special tokens)
36
42
  */
37
- sequence: string;
43
+ sequence: string;
38
44
  /**
39
45
  * The id of the token
40
46
  */
41
- token: number;
47
+ token: number;
42
48
  /**
43
49
  * The string representation of the token
44
50
  */
@@ -49,20 +55,20 @@ export type SummarizationArgs = Args & {
49
55
  /**
50
56
  * A string to be summarized
51
57
  */
52
- inputs: string;
58
+ inputs: string;
53
59
  parameters?: {
54
60
  /**
55
61
  * (Default: None). Integer to define the maximum length in tokens of the output summary.
56
62
  */
57
- max_length?: number;
63
+ max_length?: number;
58
64
  /**
59
65
  * (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit.
60
66
  */
61
- max_time?: number;
67
+ max_time?: number;
62
68
  /**
63
69
  * (Default: None). Integer to define the minimum length in tokens of the output summary.
64
70
  */
65
- min_length?: number;
71
+ min_length?: number;
66
72
  /**
67
73
  * (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
68
74
  */
@@ -70,15 +76,15 @@ export type SummarizationArgs = Args & {
70
76
  /**
71
77
  * (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
72
78
  */
73
- temperature?: number;
79
+ temperature?: number;
74
80
  /**
75
81
  * (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
76
82
  */
77
- top_k?: number;
83
+ top_k?: number;
78
84
  /**
79
85
  * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
80
86
  */
81
- top_p?: number;
87
+ top_p?: number;
82
88
  };
83
89
  };
84
90
 
@@ -91,7 +97,7 @@ export interface SummarizationReturn {
91
97
 
92
98
  export type QuestionAnswerArgs = Args & {
93
99
  inputs: {
94
- context: string;
100
+ context: string;
95
101
  question: string;
96
102
  };
97
103
  };
@@ -104,15 +110,15 @@ export interface QuestionAnswerReturn {
104
110
  /**
105
111
  * The index (string wise) of the stop of the answer within context.
106
112
  */
107
- end: number;
113
+ end: number;
108
114
  /**
109
115
  * A float that represents how likely that the answer is correct
110
116
  */
111
- score: number;
117
+ score: number;
112
118
  /**
113
119
  * The index (string wise) of the start of the answer within context.
114
120
  */
115
- start: number;
121
+ start: number;
116
122
  }
117
123
 
118
124
  export type TableQuestionAnswerArgs = Args & {
@@ -132,15 +138,15 @@ export interface TableQuestionAnswerReturn {
132
138
  /**
133
139
  * The aggregator used to get the answer
134
140
  */
135
- aggregator: string;
141
+ aggregator: string;
136
142
  /**
137
143
  * The plaintext answer
138
144
  */
139
- answer: string;
145
+ answer: string;
140
146
  /**
141
147
  * A list of coordinates of the cells contents
142
148
  */
143
- cells: string[];
149
+ cells: string[];
144
150
  /**
145
151
  * a list of coordinates of the cells referenced in the answer
146
152
  */
@@ -169,20 +175,20 @@ export type TextGenerationArgs = Args & {
169
175
  /**
170
176
  * A string to be generated from
171
177
  */
172
- inputs: string;
178
+ inputs: string;
173
179
  parameters?: {
174
180
  /**
175
181
  * (Optional: True). Bool. Whether or not to use sampling, use greedy decoding otherwise.
176
182
  */
177
- do_sample?: boolean;
183
+ do_sample?: boolean;
178
184
  /**
179
185
  * (Default: None). Int (0-250). The amount of new tokens to be generated, this does not include the input length it is a estimate of the size of generated text you want. Each new tokens slows down the request, so look for balance between response times and length of text generated.
180
186
  */
181
- max_new_tokens?: number;
187
+ max_new_tokens?: number;
182
188
  /**
183
189
  * (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit. Use that in combination with max_new_tokens for best results.
184
190
  */
185
- max_time?: number;
191
+ max_time?: number;
186
192
  /**
187
193
  * (Default: 1). Integer. The number of proposition you want to be returned.
188
194
  */
@@ -190,23 +196,23 @@ export type TextGenerationArgs = Args & {
190
196
  /**
191
197
  * (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
192
198
  */
193
- repetition_penalty?: number;
199
+ repetition_penalty?: number;
194
200
  /**
195
201
  * (Default: True). Bool. If set to False, the return results will not contain the original query making it easier for prompting.
196
202
  */
197
- return_full_text?: boolean;
203
+ return_full_text?: boolean;
198
204
  /**
199
205
  * (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
200
206
  */
201
- temperature?: number;
207
+ temperature?: number;
202
208
  /**
203
209
  * (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
204
210
  */
205
- top_k?: number;
211
+ top_k?: number;
206
212
  /**
207
213
  * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
208
214
  */
209
- top_p?: number;
215
+ top_p?: number;
210
216
  };
211
217
  };
212
218
 
@@ -221,7 +227,7 @@ export type TokenClassificationArgs = Args & {
221
227
  /**
222
228
  * A string to be classified
223
229
  */
224
- inputs: string;
230
+ inputs: string;
225
231
  parameters?: {
226
232
  /**
227
233
  * (Default: simple). There are several aggregation strategies:
@@ -244,7 +250,7 @@ export interface TokenClassificationReturnValue {
244
250
  /**
245
251
  * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
246
252
  */
247
- end: number;
253
+ end: number;
248
254
  /**
249
255
  * The type for the entity being recognized (model specific).
250
256
  */
@@ -252,15 +258,15 @@ export interface TokenClassificationReturnValue {
252
258
  /**
253
259
  * How likely the entity was recognized.
254
260
  */
255
- score: number;
261
+ score: number;
256
262
  /**
257
263
  * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
258
264
  */
259
- start: number;
265
+ start: number;
260
266
  /**
261
267
  * The string that was captured
262
268
  */
263
- word: string;
269
+ word: string;
264
270
  }
265
271
 
266
272
  export type TokenClassificationReturn = TokenClassificationReturnValue[];
@@ -283,7 +289,7 @@ export type ZeroShotClassificationArgs = Args & {
283
289
  /**
284
290
  * a string or list of strings
285
291
  */
286
- inputs: string | string[];
292
+ inputs: string | string[];
287
293
  parameters: {
288
294
  /**
289
295
  * a list of strings that are potential classes for inputs. (max 10 candidate_labels, for more, simply run multiple requests, results are going to be misleading if using too many candidate_labels anyway. If you want to keep the exact same, you can simply run multi_label=True and do the scaling on your end.
@@ -292,13 +298,13 @@ export type ZeroShotClassificationArgs = Args & {
292
298
  /**
293
299
  * (Default: false) Boolean that is set to True if classes can overlap
294
300
  */
295
- multi_label?: boolean;
301
+ multi_label?: boolean;
296
302
  };
297
303
  };
298
304
 
299
305
  export interface ZeroShotClassificationReturnValue {
300
- labels: string[];
301
- scores: number[];
306
+ labels: string[];
307
+ scores: number[];
302
308
  sequence: string;
303
309
  }
304
310
 
@@ -313,25 +319,25 @@ export type ConversationalArgs = Args & {
313
319
  /**
314
320
  * A list of strings corresponding to the earlier replies from the user. Should be of the same length of generated_responses.
315
321
  */
316
- past_user_inputs?: string[];
322
+ past_user_inputs?: string[];
317
323
  /**
318
324
  * The last input from the user in the conversation.
319
325
  */
320
- text: string;
326
+ text: string;
321
327
  };
322
328
  parameters?: {
323
329
  /**
324
330
  * (Default: None). Integer to define the maximum length in tokens of the output summary.
325
331
  */
326
- max_length?: number;
332
+ max_length?: number;
327
333
  /**
328
334
  * (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit.
329
335
  */
330
- max_time?: number;
336
+ max_time?: number;
331
337
  /**
332
338
  * (Default: None). Integer to define the minimum length in tokens of the output summary.
333
339
  */
334
- min_length?: number;
340
+ min_length?: number;
335
341
  /**
336
342
  * (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
337
343
  */
@@ -339,25 +345,25 @@ export type ConversationalArgs = Args & {
339
345
  /**
340
346
  * (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
341
347
  */
342
- temperature?: number;
348
+ temperature?: number;
343
349
  /**
344
350
  * (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
345
351
  */
346
- top_k?: number;
352
+ top_k?: number;
347
353
  /**
348
354
  * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
349
355
  */
350
- top_p?: number;
356
+ top_p?: number;
351
357
  };
352
358
  };
353
359
 
354
360
  export interface ConversationalReturn {
355
361
  conversation: {
356
362
  generated_responses: string[];
357
- past_user_inputs: string[];
363
+ past_user_inputs: string[];
358
364
  };
359
365
  generated_text: string;
360
- warnings: string[];
366
+ warnings: string[];
361
367
  }
362
368
 
363
369
  export type FeatureExtractionArgs = Args & {
@@ -369,7 +375,7 @@ export type FeatureExtractionArgs = Args & {
369
375
  * "sentences": ["That is a happy dog", "That is a very happy person", "Today is a sunny day"]
370
376
  * &#125;
371
377
  */
372
- inputs: Record<string, any> | Record<string, any>[];
378
+ inputs: Record<string, unknown> | Record<string, unknown>[];
373
379
  };
374
380
 
375
381
  /**
@@ -381,7 +387,7 @@ export type ImageClassificationArgs = Args & {
381
387
  /**
382
388
  * Binary image data
383
389
  */
384
- data: any;
390
+ data: Blob | ArrayBuffer;
385
391
  };
386
392
 
387
393
  export interface ImageClassificationReturnValue {
@@ -401,7 +407,7 @@ export type ObjectDetectionArgs = Args & {
401
407
  /**
402
408
  * Binary image data
403
409
  */
404
- data: any;
410
+ data: Blob | ArrayBuffer;
405
411
  };
406
412
 
407
413
  export interface ObjectDetectionReturnValue {
@@ -431,7 +437,7 @@ export type ImageSegmentationArgs = Args & {
431
437
  /**
432
438
  * Binary image data
433
439
  */
434
- data: any;
440
+ data: Blob | ArrayBuffer;
435
441
  };
436
442
 
437
443
  export interface ImageSegmentationReturnValue {
@@ -442,7 +448,7 @@ export interface ImageSegmentationReturnValue {
442
448
  /**
443
449
  * A str (base64 str of a single channel black-and-white img) representing the mask of a segment.
444
450
  */
445
- mask: string;
451
+ mask: string;
446
452
  /**
447
453
  * A float that represents how likely it is that the detected object belongs to the given class.
448
454
  */
@@ -455,7 +461,7 @@ export type AutomaticSpeechRecognitionArgs = Args & {
455
461
  /**
456
462
  * Binary audio data
457
463
  */
458
- data: any;
464
+ data: Blob | ArrayBuffer;
459
465
  };
460
466
 
461
467
  export interface AutomaticSpeechRecognitionReturn {
@@ -469,7 +475,7 @@ export type AudioClassificationArgs = Args & {
469
475
  /**
470
476
  * Binary audio data
471
477
  */
472
- data: any;
478
+ data: Blob | ArrayBuffer;
473
479
  };
474
480
 
475
481
  export interface AudioClassificationReturnValue {
@@ -501,7 +507,7 @@ export type TextToImageArgs = Args & {
501
507
  export type TextToImageReturn = Blob;
502
508
 
503
509
  export class HfInference {
504
- private readonly apiKey: string;
510
+ private readonly apiKey: string;
505
511
  private readonly defaultOptions: Options;
506
512
 
507
513
  constructor(apiKey = "", defaultOptions: Options = {}) {
@@ -520,7 +526,7 @@ export class HfInference {
520
526
  * This task is well known to summarize longer text into shorter text. Be careful, some models have a maximum length of input. That means that the summary cannot handle full books for instance. Be careful when choosing your model.
521
527
  */
522
528
  public async summarization(args: SummarizationArgs, options?: Options): Promise<SummarizationReturn> {
523
- return (await this.request(args, options))?.[0];
529
+ return (await this.request<SummarizationReturn[]>(args, options))?.[0];
524
530
  }
525
531
 
526
532
  /**
@@ -544,14 +550,14 @@ export class HfInference {
544
550
  * Usually used for sentiment-analysis this will output the likelihood of classes of an input. Recommended model: distilbert-base-uncased-finetuned-sst-2-english
545
551
  */
546
552
  public async textClassification(args: TextClassificationArgs, options?: Options): Promise<TextClassificationReturn> {
547
- return (await this.request(args, options))?.[0];
553
+ return (await this.request<TextClassificationReturn[]>(args, options))?.[0];
548
554
  }
549
555
 
550
556
  /**
551
557
  * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
552
558
  */
553
559
  public async textGeneration(args: TextGenerationArgs, options?: Options): Promise<TextGenerationReturn> {
554
- return (await this.request(args, options))?.[0];
560
+ return (await this.request<TextGenerationReturn[]>(args, options))?.[0];
555
561
  }
556
562
 
557
563
  /**
@@ -561,14 +567,14 @@ export class HfInference {
561
567
  args: TokenClassificationArgs,
562
568
  options?: Options
563
569
  ): Promise<TokenClassificationReturn> {
564
- return HfInference.toArray(await this.request(args, options));
570
+ return toArray(await this.request(args, options));
565
571
  }
566
572
 
567
573
  /**
568
574
  * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
569
575
  */
570
576
  public async translation(args: TranslationArgs, options?: Options): Promise<TranslationReturn> {
571
- return (await this.request(args, options))?.[0];
577
+ return (await this.request<TranslationReturn[]>(args, options))?.[0];
572
578
  }
573
579
 
574
580
  /**
@@ -578,7 +584,9 @@ export class HfInference {
578
584
  args: ZeroShotClassificationArgs,
579
585
  options?: Options
580
586
  ): Promise<ZeroShotClassificationReturn> {
581
- return HfInference.toArray(await this.request(args, options));
587
+ return toArray(
588
+ await this.request<ZeroShotClassificationReturnValue | ZeroShotClassificationReturnValue[]>(args, options)
589
+ );
582
590
  }
583
591
 
584
592
  /**
@@ -671,13 +679,15 @@ export class HfInference {
671
679
  });
672
680
  }
673
681
 
674
- public async request(
675
- args: Args & { data?: any },
682
+ public async request<T>(
683
+ args: Args & { data?: Blob | ArrayBuffer },
676
684
  options?: Options & {
677
685
  binary?: boolean;
678
- blob?: boolean;
686
+ blob?: boolean;
687
+ /** For internal HF use, which is why it's not exposed in {@link Options} */
688
+ includeCredentials?: boolean;
679
689
  }
680
- ): Promise<any> {
690
+ ): Promise<T> {
681
691
  const mergedOptions = { ...this.defaultOptions, ...options };
682
692
  const { model, ...otherArgs } = args;
683
693
 
@@ -690,19 +700,28 @@ export class HfInference {
690
700
  headers["Content-Type"] = "application/json";
691
701
  }
692
702
 
693
- if (options?.binary && mergedOptions.wait_for_model) {
694
- headers["X-Wait-For-Model"] = "true";
703
+ if (options?.binary) {
704
+ if (mergedOptions.wait_for_model) {
705
+ headers["X-Wait-For-Model"] = "true";
706
+ }
707
+ if (mergedOptions.use_cache === false) {
708
+ headers["X-Use-Cache"] = "false";
709
+ }
710
+ if (mergedOptions.dont_load_model) {
711
+ headers["X-Load-Model"] = "0";
712
+ }
695
713
  }
696
714
 
697
715
  const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
698
716
  headers,
699
717
  method: "POST",
700
- body: options?.binary
718
+ body: options?.binary
701
719
  ? args.data
702
720
  : JSON.stringify({
703
721
  ...otherArgs,
704
722
  options: mergedOptions,
705
723
  }),
724
+ credentials: options?.includeCredentials ? "include" : "same-origin",
706
725
  });
707
726
 
708
727
  if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
@@ -716,7 +735,7 @@ export class HfInference {
716
735
  if (!response.ok) {
717
736
  throw new Error("An error occurred while fetching the blob");
718
737
  }
719
- return await response.blob();
738
+ return (await response.blob()) as T;
720
739
  }
721
740
 
722
741
  const output = await response.json();
@@ -725,11 +744,4 @@ export class HfInference {
725
744
  }
726
745
  return output;
727
746
  }
728
-
729
- private static toArray(obj: any): any[] {
730
- if (Array.isArray(obj)) {
731
- return obj;
732
- }
733
- return [obj];
734
- }
735
747
  }
@@ -0,0 +1,7 @@
1
+ const isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
2
+
3
+ const isWebWorker =
4
+ typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
5
+
6
+ export const isFrontend = isBrowser || isWebWorker;
7
+ export const isBackend = !isBrowser && !isWebWorker;
@@ -0,0 +1,6 @@
1
+ export function toArray<T>(obj: T): T extends unknown[] ? T : T[] {
2
+ if (Array.isArray(obj)) {
3
+ return obj as T extends unknown[] ? T : T[];
4
+ }
5
+ return [obj] as T extends unknown[] ? T : T[];
6
+ }