@huggingface/inference 1.6.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.d.ts +15 -10
- package/dist/index.js +24 -11
- package/dist/index.mjs +24 -11
- package/package.json +6 -3
- package/src/HfInference.ts +84 -72
- package/src/utils.ts +6 -0
package/README.md
CHANGED
|
@@ -191,4 +191,4 @@ HF_ACCESS_TOKEN="your access token" npm run test
|
|
|
191
191
|
|
|
192
192
|
We have an informative documentation project called [Tasks](https://huggingface.co/tasks) to list available models for each task and explain how each task works in detail.
|
|
193
193
|
|
|
194
|
-
It also
|
|
194
|
+
It also contains demos, example outputs, and other resources should you want to dig deeper into the ML side of things.
|
package/dist/index.d.ts
CHANGED
|
@@ -7,6 +7,10 @@ interface Options {
|
|
|
7
7
|
* (Default: true). Boolean. There is a cache layer on the inference API to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query.
|
|
8
8
|
*/
|
|
9
9
|
use_cache?: boolean;
|
|
10
|
+
/**
|
|
11
|
+
* (Default: false). Boolean. Do not load the model if it's not already available.
|
|
12
|
+
*/
|
|
13
|
+
dont_load_model?: boolean;
|
|
10
14
|
/**
|
|
11
15
|
* (Default: false). Boolean to use GPU instead of CPU for inference (requires Startup plan at least).
|
|
12
16
|
*/
|
|
@@ -344,7 +348,7 @@ type FeatureExtractionArgs = Args & {
|
|
|
344
348
|
* "sentences": ["That is a happy dog", "That is a very happy person", "Today is a sunny day"]
|
|
345
349
|
* }
|
|
346
350
|
*/
|
|
347
|
-
inputs: Record<string,
|
|
351
|
+
inputs: Record<string, unknown> | Record<string, unknown>[];
|
|
348
352
|
};
|
|
349
353
|
/**
|
|
350
354
|
* Returned values are a list of floats, or a list of list of floats (depending on if you sent a string or a list of string, and if the automatic reduction, usually mean_pooling for instance was applied for you or not. This should be explained on the model's README.
|
|
@@ -354,7 +358,7 @@ type ImageClassificationArgs = Args & {
|
|
|
354
358
|
/**
|
|
355
359
|
* Binary image data
|
|
356
360
|
*/
|
|
357
|
-
data:
|
|
361
|
+
data: Blob | ArrayBuffer;
|
|
358
362
|
};
|
|
359
363
|
interface ImageClassificationReturnValue {
|
|
360
364
|
/**
|
|
@@ -371,7 +375,7 @@ type ObjectDetectionArgs = Args & {
|
|
|
371
375
|
/**
|
|
372
376
|
* Binary image data
|
|
373
377
|
*/
|
|
374
|
-
data:
|
|
378
|
+
data: Blob | ArrayBuffer;
|
|
375
379
|
};
|
|
376
380
|
interface ObjectDetectionReturnValue {
|
|
377
381
|
/**
|
|
@@ -397,7 +401,7 @@ type ImageSegmentationArgs = Args & {
|
|
|
397
401
|
/**
|
|
398
402
|
* Binary image data
|
|
399
403
|
*/
|
|
400
|
-
data:
|
|
404
|
+
data: Blob | ArrayBuffer;
|
|
401
405
|
};
|
|
402
406
|
interface ImageSegmentationReturnValue {
|
|
403
407
|
/**
|
|
@@ -418,7 +422,7 @@ type AutomaticSpeechRecognitionArgs = Args & {
|
|
|
418
422
|
/**
|
|
419
423
|
* Binary audio data
|
|
420
424
|
*/
|
|
421
|
-
data:
|
|
425
|
+
data: Blob | ArrayBuffer;
|
|
422
426
|
};
|
|
423
427
|
interface AutomaticSpeechRecognitionReturn {
|
|
424
428
|
/**
|
|
@@ -430,7 +434,7 @@ type AudioClassificationArgs = Args & {
|
|
|
430
434
|
/**
|
|
431
435
|
* Binary audio data
|
|
432
436
|
*/
|
|
433
|
-
data:
|
|
437
|
+
data: Blob | ArrayBuffer;
|
|
434
438
|
};
|
|
435
439
|
interface AudioClassificationReturnValue {
|
|
436
440
|
/**
|
|
@@ -533,13 +537,14 @@ declare class HfInference {
|
|
|
533
537
|
* Recommended model: stabilityai/stable-diffusion-2
|
|
534
538
|
*/
|
|
535
539
|
textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn>;
|
|
536
|
-
request(args: Args & {
|
|
537
|
-
data?:
|
|
540
|
+
request<T>(args: Args & {
|
|
541
|
+
data?: Blob | ArrayBuffer;
|
|
538
542
|
}, options?: Options & {
|
|
539
543
|
binary?: boolean;
|
|
540
544
|
blob?: boolean;
|
|
541
|
-
|
|
542
|
-
|
|
545
|
+
/** For internal HF use, which is why it's not exposed in {@link Options} */
|
|
546
|
+
includeCredentials?: boolean;
|
|
547
|
+
}): Promise<T>;
|
|
543
548
|
}
|
|
544
549
|
|
|
545
550
|
export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
|
package/dist/index.js
CHANGED
|
@@ -23,6 +23,14 @@ __export(src_exports, {
|
|
|
23
23
|
});
|
|
24
24
|
module.exports = __toCommonJS(src_exports);
|
|
25
25
|
|
|
26
|
+
// src/utils.ts
|
|
27
|
+
function toArray(obj) {
|
|
28
|
+
if (Array.isArray(obj)) {
|
|
29
|
+
return obj;
|
|
30
|
+
}
|
|
31
|
+
return [obj];
|
|
32
|
+
}
|
|
33
|
+
|
|
26
34
|
// src/HfInference.ts
|
|
27
35
|
var HfInference = class {
|
|
28
36
|
apiKey;
|
|
@@ -71,7 +79,7 @@ var HfInference = class {
|
|
|
71
79
|
* Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
|
|
72
80
|
*/
|
|
73
81
|
async tokenClassification(args, options) {
|
|
74
|
-
return
|
|
82
|
+
return toArray(await this.request(args, options));
|
|
75
83
|
}
|
|
76
84
|
/**
|
|
77
85
|
* This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
|
|
@@ -83,7 +91,9 @@ var HfInference = class {
|
|
|
83
91
|
* This task is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. Recommended model: facebook/bart-large-mnli.
|
|
84
92
|
*/
|
|
85
93
|
async zeroShotClassification(args, options) {
|
|
86
|
-
return
|
|
94
|
+
return toArray(
|
|
95
|
+
await this.request(args, options)
|
|
96
|
+
);
|
|
87
97
|
}
|
|
88
98
|
/**
|
|
89
99
|
* This task corresponds to any chatbot like structure. Models tend to have shorter max_length, so please check with caution when using a given model if you need long range dependency or not. Recommended model: microsoft/DialoGPT-large.
|
|
@@ -168,8 +178,16 @@ var HfInference = class {
|
|
|
168
178
|
if (!options?.binary) {
|
|
169
179
|
headers["Content-Type"] = "application/json";
|
|
170
180
|
}
|
|
171
|
-
if (options?.binary
|
|
172
|
-
|
|
181
|
+
if (options?.binary) {
|
|
182
|
+
if (mergedOptions.wait_for_model) {
|
|
183
|
+
headers["X-Wait-For-Model"] = "true";
|
|
184
|
+
}
|
|
185
|
+
if (mergedOptions.use_cache === false) {
|
|
186
|
+
headers["X-Use-Cache"] = "false";
|
|
187
|
+
}
|
|
188
|
+
if (mergedOptions.dont_load_model) {
|
|
189
|
+
headers["X-Load-Model"] = "0";
|
|
190
|
+
}
|
|
173
191
|
}
|
|
174
192
|
const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
|
|
175
193
|
headers,
|
|
@@ -177,7 +195,8 @@ var HfInference = class {
|
|
|
177
195
|
body: options?.binary ? args.data : JSON.stringify({
|
|
178
196
|
...otherArgs,
|
|
179
197
|
options: mergedOptions
|
|
180
|
-
})
|
|
198
|
+
}),
|
|
199
|
+
credentials: options?.includeCredentials ? "include" : "same-origin"
|
|
181
200
|
});
|
|
182
201
|
if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
|
|
183
202
|
return this.request(args, {
|
|
@@ -197,12 +216,6 @@ var HfInference = class {
|
|
|
197
216
|
}
|
|
198
217
|
return output;
|
|
199
218
|
}
|
|
200
|
-
static toArray(obj) {
|
|
201
|
-
if (Array.isArray(obj)) {
|
|
202
|
-
return obj;
|
|
203
|
-
}
|
|
204
|
-
return [obj];
|
|
205
|
-
}
|
|
206
219
|
};
|
|
207
220
|
// Annotate the CommonJS export names for ESM import in node:
|
|
208
221
|
0 && (module.exports = {
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
// src/utils.ts
|
|
2
|
+
function toArray(obj) {
|
|
3
|
+
if (Array.isArray(obj)) {
|
|
4
|
+
return obj;
|
|
5
|
+
}
|
|
6
|
+
return [obj];
|
|
7
|
+
}
|
|
8
|
+
|
|
1
9
|
// src/HfInference.ts
|
|
2
10
|
var HfInference = class {
|
|
3
11
|
apiKey;
|
|
@@ -46,7 +54,7 @@ var HfInference = class {
|
|
|
46
54
|
* Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
|
|
47
55
|
*/
|
|
48
56
|
async tokenClassification(args, options) {
|
|
49
|
-
return
|
|
57
|
+
return toArray(await this.request(args, options));
|
|
50
58
|
}
|
|
51
59
|
/**
|
|
52
60
|
* This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
|
|
@@ -58,7 +66,9 @@ var HfInference = class {
|
|
|
58
66
|
* This task is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. Recommended model: facebook/bart-large-mnli.
|
|
59
67
|
*/
|
|
60
68
|
async zeroShotClassification(args, options) {
|
|
61
|
-
return
|
|
69
|
+
return toArray(
|
|
70
|
+
await this.request(args, options)
|
|
71
|
+
);
|
|
62
72
|
}
|
|
63
73
|
/**
|
|
64
74
|
* This task corresponds to any chatbot like structure. Models tend to have shorter max_length, so please check with caution when using a given model if you need long range dependency or not. Recommended model: microsoft/DialoGPT-large.
|
|
@@ -143,8 +153,16 @@ var HfInference = class {
|
|
|
143
153
|
if (!options?.binary) {
|
|
144
154
|
headers["Content-Type"] = "application/json";
|
|
145
155
|
}
|
|
146
|
-
if (options?.binary
|
|
147
|
-
|
|
156
|
+
if (options?.binary) {
|
|
157
|
+
if (mergedOptions.wait_for_model) {
|
|
158
|
+
headers["X-Wait-For-Model"] = "true";
|
|
159
|
+
}
|
|
160
|
+
if (mergedOptions.use_cache === false) {
|
|
161
|
+
headers["X-Use-Cache"] = "false";
|
|
162
|
+
}
|
|
163
|
+
if (mergedOptions.dont_load_model) {
|
|
164
|
+
headers["X-Load-Model"] = "0";
|
|
165
|
+
}
|
|
148
166
|
}
|
|
149
167
|
const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
|
|
150
168
|
headers,
|
|
@@ -152,7 +170,8 @@ var HfInference = class {
|
|
|
152
170
|
body: options?.binary ? args.data : JSON.stringify({
|
|
153
171
|
...otherArgs,
|
|
154
172
|
options: mergedOptions
|
|
155
|
-
})
|
|
173
|
+
}),
|
|
174
|
+
credentials: options?.includeCredentials ? "include" : "same-origin"
|
|
156
175
|
});
|
|
157
176
|
if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
|
|
158
177
|
return this.request(args, {
|
|
@@ -172,12 +191,6 @@ var HfInference = class {
|
|
|
172
191
|
}
|
|
173
192
|
return output;
|
|
174
193
|
}
|
|
175
|
-
static toArray(obj) {
|
|
176
|
-
if (Array.isArray(obj)) {
|
|
177
|
-
return obj;
|
|
178
|
-
}
|
|
179
|
-
return [obj];
|
|
180
|
-
}
|
|
181
194
|
};
|
|
182
195
|
export {
|
|
183
196
|
HfInference
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/inference",
|
|
3
|
-
"version": "1.6.
|
|
3
|
+
"version": "1.6.1",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
|
|
6
6
|
"description": "Typescript wrapper for the Hugging Face Inference API",
|
|
@@ -48,9 +48,12 @@
|
|
|
48
48
|
"resolutions": {},
|
|
49
49
|
"scripts": {
|
|
50
50
|
"build": "tsup src/index.ts --format cjs,esm --clean --dts",
|
|
51
|
-
"
|
|
51
|
+
"lint": "eslint --quiet --fix --ext .cjs,.ts .",
|
|
52
|
+
"lint:check": "eslint --ext .cjs,.ts .",
|
|
53
|
+
"format": "prettier --write .",
|
|
54
|
+
"format:check": "prettier --check .",
|
|
52
55
|
"test": "vitest run",
|
|
53
|
-
"test:
|
|
56
|
+
"test:browser": "vitest run --browser.name=chrome --browser.headless",
|
|
54
57
|
"type-check": "tsc"
|
|
55
58
|
}
|
|
56
59
|
}
|
package/src/HfInference.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { toArray } from "./utils";
|
|
2
|
+
|
|
1
3
|
export interface Options {
|
|
2
4
|
/**
|
|
3
5
|
* (Default: true) Boolean. If a request 503s and wait_for_model is set to false, the request will be retried with the same parameters but with wait_for_model set to true.
|
|
@@ -6,11 +8,15 @@ export interface Options {
|
|
|
6
8
|
/**
|
|
7
9
|
* (Default: true). Boolean. There is a cache layer on the inference API to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query.
|
|
8
10
|
*/
|
|
9
|
-
use_cache?:
|
|
11
|
+
use_cache?: boolean;
|
|
12
|
+
/**
|
|
13
|
+
* (Default: false). Boolean. Do not load the model if it's not already available.
|
|
14
|
+
*/
|
|
15
|
+
dont_load_model?: boolean;
|
|
10
16
|
/**
|
|
11
17
|
* (Default: false). Boolean to use GPU instead of CPU for inference (requires Startup plan at least).
|
|
12
18
|
*/
|
|
13
|
-
use_gpu?:
|
|
19
|
+
use_gpu?: boolean;
|
|
14
20
|
|
|
15
21
|
/**
|
|
16
22
|
* (Default: false) Boolean. If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error as it will limit hanging in your application to known places.
|
|
@@ -30,15 +36,15 @@ export type FillMaskReturn = {
|
|
|
30
36
|
/**
|
|
31
37
|
* The probability for this token.
|
|
32
38
|
*/
|
|
33
|
-
score:
|
|
39
|
+
score: number;
|
|
34
40
|
/**
|
|
35
41
|
* The actual sequence of tokens that ran against the model (may contain special tokens)
|
|
36
42
|
*/
|
|
37
|
-
sequence:
|
|
43
|
+
sequence: string;
|
|
38
44
|
/**
|
|
39
45
|
* The id of the token
|
|
40
46
|
*/
|
|
41
|
-
token:
|
|
47
|
+
token: number;
|
|
42
48
|
/**
|
|
43
49
|
* The string representation of the token
|
|
44
50
|
*/
|
|
@@ -49,20 +55,20 @@ export type SummarizationArgs = Args & {
|
|
|
49
55
|
/**
|
|
50
56
|
* A string to be summarized
|
|
51
57
|
*/
|
|
52
|
-
inputs:
|
|
58
|
+
inputs: string;
|
|
53
59
|
parameters?: {
|
|
54
60
|
/**
|
|
55
61
|
* (Default: None). Integer to define the maximum length in tokens of the output summary.
|
|
56
62
|
*/
|
|
57
|
-
max_length?:
|
|
63
|
+
max_length?: number;
|
|
58
64
|
/**
|
|
59
65
|
* (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit.
|
|
60
66
|
*/
|
|
61
|
-
max_time?:
|
|
67
|
+
max_time?: number;
|
|
62
68
|
/**
|
|
63
69
|
* (Default: None). Integer to define the minimum length in tokens of the output summary.
|
|
64
70
|
*/
|
|
65
|
-
min_length?:
|
|
71
|
+
min_length?: number;
|
|
66
72
|
/**
|
|
67
73
|
* (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
|
|
68
74
|
*/
|
|
@@ -70,15 +76,15 @@ export type SummarizationArgs = Args & {
|
|
|
70
76
|
/**
|
|
71
77
|
* (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
|
|
72
78
|
*/
|
|
73
|
-
temperature?:
|
|
79
|
+
temperature?: number;
|
|
74
80
|
/**
|
|
75
81
|
* (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
|
|
76
82
|
*/
|
|
77
|
-
top_k?:
|
|
83
|
+
top_k?: number;
|
|
78
84
|
/**
|
|
79
85
|
* (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
|
|
80
86
|
*/
|
|
81
|
-
top_p?:
|
|
87
|
+
top_p?: number;
|
|
82
88
|
};
|
|
83
89
|
};
|
|
84
90
|
|
|
@@ -91,7 +97,7 @@ export interface SummarizationReturn {
|
|
|
91
97
|
|
|
92
98
|
export type QuestionAnswerArgs = Args & {
|
|
93
99
|
inputs: {
|
|
94
|
-
context:
|
|
100
|
+
context: string;
|
|
95
101
|
question: string;
|
|
96
102
|
};
|
|
97
103
|
};
|
|
@@ -104,15 +110,15 @@ export interface QuestionAnswerReturn {
|
|
|
104
110
|
/**
|
|
105
111
|
* The index (string wise) of the stop of the answer within context.
|
|
106
112
|
*/
|
|
107
|
-
end:
|
|
113
|
+
end: number;
|
|
108
114
|
/**
|
|
109
115
|
* A float that represents how likely that the answer is correct
|
|
110
116
|
*/
|
|
111
|
-
score:
|
|
117
|
+
score: number;
|
|
112
118
|
/**
|
|
113
119
|
* The index (string wise) of the start of the answer within context.
|
|
114
120
|
*/
|
|
115
|
-
start:
|
|
121
|
+
start: number;
|
|
116
122
|
}
|
|
117
123
|
|
|
118
124
|
export type TableQuestionAnswerArgs = Args & {
|
|
@@ -132,15 +138,15 @@ export interface TableQuestionAnswerReturn {
|
|
|
132
138
|
/**
|
|
133
139
|
* The aggregator used to get the answer
|
|
134
140
|
*/
|
|
135
|
-
aggregator:
|
|
141
|
+
aggregator: string;
|
|
136
142
|
/**
|
|
137
143
|
* The plaintext answer
|
|
138
144
|
*/
|
|
139
|
-
answer:
|
|
145
|
+
answer: string;
|
|
140
146
|
/**
|
|
141
147
|
* A list of coordinates of the cells contents
|
|
142
148
|
*/
|
|
143
|
-
cells:
|
|
149
|
+
cells: string[];
|
|
144
150
|
/**
|
|
145
151
|
* a list of coordinates of the cells referenced in the answer
|
|
146
152
|
*/
|
|
@@ -169,20 +175,20 @@ export type TextGenerationArgs = Args & {
|
|
|
169
175
|
/**
|
|
170
176
|
* A string to be generated from
|
|
171
177
|
*/
|
|
172
|
-
inputs:
|
|
178
|
+
inputs: string;
|
|
173
179
|
parameters?: {
|
|
174
180
|
/**
|
|
175
181
|
* (Optional: True). Bool. Whether or not to use sampling, use greedy decoding otherwise.
|
|
176
182
|
*/
|
|
177
|
-
do_sample?:
|
|
183
|
+
do_sample?: boolean;
|
|
178
184
|
/**
|
|
179
185
|
* (Default: None). Int (0-250). The amount of new tokens to be generated, this does not include the input length it is a estimate of the size of generated text you want. Each new tokens slows down the request, so look for balance between response times and length of text generated.
|
|
180
186
|
*/
|
|
181
|
-
max_new_tokens?:
|
|
187
|
+
max_new_tokens?: number;
|
|
182
188
|
/**
|
|
183
189
|
* (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit. Use that in combination with max_new_tokens for best results.
|
|
184
190
|
*/
|
|
185
|
-
max_time?:
|
|
191
|
+
max_time?: number;
|
|
186
192
|
/**
|
|
187
193
|
* (Default: 1). Integer. The number of proposition you want to be returned.
|
|
188
194
|
*/
|
|
@@ -190,23 +196,23 @@ export type TextGenerationArgs = Args & {
|
|
|
190
196
|
/**
|
|
191
197
|
* (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
|
|
192
198
|
*/
|
|
193
|
-
repetition_penalty?:
|
|
199
|
+
repetition_penalty?: number;
|
|
194
200
|
/**
|
|
195
201
|
* (Default: True). Bool. If set to False, the return results will not contain the original query making it easier for prompting.
|
|
196
202
|
*/
|
|
197
|
-
return_full_text?:
|
|
203
|
+
return_full_text?: boolean;
|
|
198
204
|
/**
|
|
199
205
|
* (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
|
|
200
206
|
*/
|
|
201
|
-
temperature?:
|
|
207
|
+
temperature?: number;
|
|
202
208
|
/**
|
|
203
209
|
* (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
|
|
204
210
|
*/
|
|
205
|
-
top_k?:
|
|
211
|
+
top_k?: number;
|
|
206
212
|
/**
|
|
207
213
|
* (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
|
|
208
214
|
*/
|
|
209
|
-
top_p?:
|
|
215
|
+
top_p?: number;
|
|
210
216
|
};
|
|
211
217
|
};
|
|
212
218
|
|
|
@@ -221,7 +227,7 @@ export type TokenClassificationArgs = Args & {
|
|
|
221
227
|
/**
|
|
222
228
|
* A string to be classified
|
|
223
229
|
*/
|
|
224
|
-
inputs:
|
|
230
|
+
inputs: string;
|
|
225
231
|
parameters?: {
|
|
226
232
|
/**
|
|
227
233
|
* (Default: simple). There are several aggregation strategies:
|
|
@@ -244,7 +250,7 @@ export interface TokenClassificationReturnValue {
|
|
|
244
250
|
/**
|
|
245
251
|
* The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
|
|
246
252
|
*/
|
|
247
|
-
end:
|
|
253
|
+
end: number;
|
|
248
254
|
/**
|
|
249
255
|
* The type for the entity being recognized (model specific).
|
|
250
256
|
*/
|
|
@@ -252,15 +258,15 @@ export interface TokenClassificationReturnValue {
|
|
|
252
258
|
/**
|
|
253
259
|
* How likely the entity was recognized.
|
|
254
260
|
*/
|
|
255
|
-
score:
|
|
261
|
+
score: number;
|
|
256
262
|
/**
|
|
257
263
|
* The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
|
|
258
264
|
*/
|
|
259
|
-
start:
|
|
265
|
+
start: number;
|
|
260
266
|
/**
|
|
261
267
|
* The string that was captured
|
|
262
268
|
*/
|
|
263
|
-
word:
|
|
269
|
+
word: string;
|
|
264
270
|
}
|
|
265
271
|
|
|
266
272
|
export type TokenClassificationReturn = TokenClassificationReturnValue[];
|
|
@@ -283,7 +289,7 @@ export type ZeroShotClassificationArgs = Args & {
|
|
|
283
289
|
/**
|
|
284
290
|
* a string or list of strings
|
|
285
291
|
*/
|
|
286
|
-
inputs:
|
|
292
|
+
inputs: string | string[];
|
|
287
293
|
parameters: {
|
|
288
294
|
/**
|
|
289
295
|
* a list of strings that are potential classes for inputs. (max 10 candidate_labels, for more, simply run multiple requests, results are going to be misleading if using too many candidate_labels anyway. If you want to keep the exact same, you can simply run multi_label=True and do the scaling on your end.
|
|
@@ -292,13 +298,13 @@ export type ZeroShotClassificationArgs = Args & {
|
|
|
292
298
|
/**
|
|
293
299
|
* (Default: false) Boolean that is set to True if classes can overlap
|
|
294
300
|
*/
|
|
295
|
-
multi_label?:
|
|
301
|
+
multi_label?: boolean;
|
|
296
302
|
};
|
|
297
303
|
};
|
|
298
304
|
|
|
299
305
|
export interface ZeroShotClassificationReturnValue {
|
|
300
|
-
labels:
|
|
301
|
-
scores:
|
|
306
|
+
labels: string[];
|
|
307
|
+
scores: number[];
|
|
302
308
|
sequence: string;
|
|
303
309
|
}
|
|
304
310
|
|
|
@@ -313,25 +319,25 @@ export type ConversationalArgs = Args & {
|
|
|
313
319
|
/**
|
|
314
320
|
* A list of strings corresponding to the earlier replies from the user. Should be of the same length of generated_responses.
|
|
315
321
|
*/
|
|
316
|
-
past_user_inputs?:
|
|
322
|
+
past_user_inputs?: string[];
|
|
317
323
|
/**
|
|
318
324
|
* The last input from the user in the conversation.
|
|
319
325
|
*/
|
|
320
|
-
text:
|
|
326
|
+
text: string;
|
|
321
327
|
};
|
|
322
328
|
parameters?: {
|
|
323
329
|
/**
|
|
324
330
|
* (Default: None). Integer to define the maximum length in tokens of the output summary.
|
|
325
331
|
*/
|
|
326
|
-
max_length?:
|
|
332
|
+
max_length?: number;
|
|
327
333
|
/**
|
|
328
334
|
* (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit.
|
|
329
335
|
*/
|
|
330
|
-
max_time?:
|
|
336
|
+
max_time?: number;
|
|
331
337
|
/**
|
|
332
338
|
* (Default: None). Integer to define the minimum length in tokens of the output summary.
|
|
333
339
|
*/
|
|
334
|
-
min_length?:
|
|
340
|
+
min_length?: number;
|
|
335
341
|
/**
|
|
336
342
|
* (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
|
|
337
343
|
*/
|
|
@@ -339,25 +345,25 @@ export type ConversationalArgs = Args & {
|
|
|
339
345
|
/**
|
|
340
346
|
* (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
|
|
341
347
|
*/
|
|
342
|
-
temperature?:
|
|
348
|
+
temperature?: number;
|
|
343
349
|
/**
|
|
344
350
|
* (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
|
|
345
351
|
*/
|
|
346
|
-
top_k?:
|
|
352
|
+
top_k?: number;
|
|
347
353
|
/**
|
|
348
354
|
* (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
|
|
349
355
|
*/
|
|
350
|
-
top_p?:
|
|
356
|
+
top_p?: number;
|
|
351
357
|
};
|
|
352
358
|
};
|
|
353
359
|
|
|
354
360
|
export interface ConversationalReturn {
|
|
355
361
|
conversation: {
|
|
356
362
|
generated_responses: string[];
|
|
357
|
-
past_user_inputs:
|
|
363
|
+
past_user_inputs: string[];
|
|
358
364
|
};
|
|
359
365
|
generated_text: string;
|
|
360
|
-
warnings:
|
|
366
|
+
warnings: string[];
|
|
361
367
|
}
|
|
362
368
|
|
|
363
369
|
export type FeatureExtractionArgs = Args & {
|
|
@@ -369,7 +375,7 @@ export type FeatureExtractionArgs = Args & {
|
|
|
369
375
|
* "sentences": ["That is a happy dog", "That is a very happy person", "Today is a sunny day"]
|
|
370
376
|
* }
|
|
371
377
|
*/
|
|
372
|
-
inputs: Record<string,
|
|
378
|
+
inputs: Record<string, unknown> | Record<string, unknown>[];
|
|
373
379
|
};
|
|
374
380
|
|
|
375
381
|
/**
|
|
@@ -381,7 +387,7 @@ export type ImageClassificationArgs = Args & {
|
|
|
381
387
|
/**
|
|
382
388
|
* Binary image data
|
|
383
389
|
*/
|
|
384
|
-
data:
|
|
390
|
+
data: Blob | ArrayBuffer;
|
|
385
391
|
};
|
|
386
392
|
|
|
387
393
|
export interface ImageClassificationReturnValue {
|
|
@@ -401,7 +407,7 @@ export type ObjectDetectionArgs = Args & {
|
|
|
401
407
|
/**
|
|
402
408
|
* Binary image data
|
|
403
409
|
*/
|
|
404
|
-
data:
|
|
410
|
+
data: Blob | ArrayBuffer;
|
|
405
411
|
};
|
|
406
412
|
|
|
407
413
|
export interface ObjectDetectionReturnValue {
|
|
@@ -431,7 +437,7 @@ export type ImageSegmentationArgs = Args & {
|
|
|
431
437
|
/**
|
|
432
438
|
* Binary image data
|
|
433
439
|
*/
|
|
434
|
-
data:
|
|
440
|
+
data: Blob | ArrayBuffer;
|
|
435
441
|
};
|
|
436
442
|
|
|
437
443
|
export interface ImageSegmentationReturnValue {
|
|
@@ -442,7 +448,7 @@ export interface ImageSegmentationReturnValue {
|
|
|
442
448
|
/**
|
|
443
449
|
* A str (base64 str of a single channel black-and-white img) representing the mask of a segment.
|
|
444
450
|
*/
|
|
445
|
-
mask:
|
|
451
|
+
mask: string;
|
|
446
452
|
/**
|
|
447
453
|
* A float that represents how likely it is that the detected object belongs to the given class.
|
|
448
454
|
*/
|
|
@@ -455,7 +461,7 @@ export type AutomaticSpeechRecognitionArgs = Args & {
|
|
|
455
461
|
/**
|
|
456
462
|
* Binary audio data
|
|
457
463
|
*/
|
|
458
|
-
data:
|
|
464
|
+
data: Blob | ArrayBuffer;
|
|
459
465
|
};
|
|
460
466
|
|
|
461
467
|
export interface AutomaticSpeechRecognitionReturn {
|
|
@@ -469,7 +475,7 @@ export type AudioClassificationArgs = Args & {
|
|
|
469
475
|
/**
|
|
470
476
|
* Binary audio data
|
|
471
477
|
*/
|
|
472
|
-
data:
|
|
478
|
+
data: Blob | ArrayBuffer;
|
|
473
479
|
};
|
|
474
480
|
|
|
475
481
|
export interface AudioClassificationReturnValue {
|
|
@@ -501,7 +507,7 @@ export type TextToImageArgs = Args & {
|
|
|
501
507
|
export type TextToImageReturn = Blob;
|
|
502
508
|
|
|
503
509
|
export class HfInference {
|
|
504
|
-
private readonly apiKey:
|
|
510
|
+
private readonly apiKey: string;
|
|
505
511
|
private readonly defaultOptions: Options;
|
|
506
512
|
|
|
507
513
|
constructor(apiKey = "", defaultOptions: Options = {}) {
|
|
@@ -561,7 +567,7 @@ export class HfInference {
|
|
|
561
567
|
args: TokenClassificationArgs,
|
|
562
568
|
options?: Options
|
|
563
569
|
): Promise<TokenClassificationReturn> {
|
|
564
|
-
return
|
|
570
|
+
return toArray(await this.request(args, options));
|
|
565
571
|
}
|
|
566
572
|
|
|
567
573
|
/**
|
|
@@ -578,7 +584,9 @@ export class HfInference {
|
|
|
578
584
|
args: ZeroShotClassificationArgs,
|
|
579
585
|
options?: Options
|
|
580
586
|
): Promise<ZeroShotClassificationReturn> {
|
|
581
|
-
return
|
|
587
|
+
return toArray(
|
|
588
|
+
await this.request<ZeroShotClassificationReturnValue | ZeroShotClassificationReturnValue[]>(args, options)
|
|
589
|
+
);
|
|
582
590
|
}
|
|
583
591
|
|
|
584
592
|
/**
|
|
@@ -671,13 +679,15 @@ export class HfInference {
|
|
|
671
679
|
});
|
|
672
680
|
}
|
|
673
681
|
|
|
674
|
-
public async request(
|
|
675
|
-
args: Args & { data?:
|
|
682
|
+
public async request<T>(
|
|
683
|
+
args: Args & { data?: Blob | ArrayBuffer },
|
|
676
684
|
options?: Options & {
|
|
677
685
|
binary?: boolean;
|
|
678
|
-
blob?:
|
|
686
|
+
blob?: boolean;
|
|
687
|
+
/** For internal HF use, which is why it's not exposed in {@link Options} */
|
|
688
|
+
includeCredentials?: boolean;
|
|
679
689
|
}
|
|
680
|
-
): Promise<
|
|
690
|
+
): Promise<T> {
|
|
681
691
|
const mergedOptions = { ...this.defaultOptions, ...options };
|
|
682
692
|
const { model, ...otherArgs } = args;
|
|
683
693
|
|
|
@@ -690,19 +700,28 @@ export class HfInference {
|
|
|
690
700
|
headers["Content-Type"] = "application/json";
|
|
691
701
|
}
|
|
692
702
|
|
|
693
|
-
if (options?.binary
|
|
694
|
-
|
|
703
|
+
if (options?.binary) {
|
|
704
|
+
if (mergedOptions.wait_for_model) {
|
|
705
|
+
headers["X-Wait-For-Model"] = "true";
|
|
706
|
+
}
|
|
707
|
+
if (mergedOptions.use_cache === false) {
|
|
708
|
+
headers["X-Use-Cache"] = "false";
|
|
709
|
+
}
|
|
710
|
+
if (mergedOptions.dont_load_model) {
|
|
711
|
+
headers["X-Load-Model"] = "0";
|
|
712
|
+
}
|
|
695
713
|
}
|
|
696
714
|
|
|
697
715
|
const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
|
|
698
716
|
headers,
|
|
699
717
|
method: "POST",
|
|
700
|
-
body:
|
|
718
|
+
body: options?.binary
|
|
701
719
|
? args.data
|
|
702
720
|
: JSON.stringify({
|
|
703
721
|
...otherArgs,
|
|
704
722
|
options: mergedOptions,
|
|
705
723
|
}),
|
|
724
|
+
credentials: options?.includeCredentials ? "include" : "same-origin",
|
|
706
725
|
});
|
|
707
726
|
|
|
708
727
|
if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
|
|
@@ -716,7 +735,7 @@ export class HfInference {
|
|
|
716
735
|
if (!response.ok) {
|
|
717
736
|
throw new Error("An error occurred while fetching the blob");
|
|
718
737
|
}
|
|
719
|
-
return await response.blob();
|
|
738
|
+
return (await response.blob()) as T;
|
|
720
739
|
}
|
|
721
740
|
|
|
722
741
|
const output = await response.json();
|
|
@@ -725,11 +744,4 @@ export class HfInference {
|
|
|
725
744
|
}
|
|
726
745
|
return output;
|
|
727
746
|
}
|
|
728
|
-
|
|
729
|
-
private static toArray(obj: any): any[] {
|
|
730
|
-
if (Array.isArray(obj)) {
|
|
731
|
-
return obj;
|
|
732
|
-
}
|
|
733
|
-
return [obj];
|
|
734
|
-
}
|
|
735
747
|
}
|