@huggingface/inference 2.6.5 → 2.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/dist/index.cjs +18 -11
- package/dist/index.js +18 -11
- package/package.json +1 -1
- package/src/lib/makeRequestOptions.ts +19 -11
package/README.md
CHANGED
|
@@ -506,6 +506,21 @@ const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/
|
|
|
506
506
|
const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
|
|
507
507
|
```
|
|
508
508
|
|
|
509
|
+
By default, all calls to the inference endpoint will wait until the model is
|
|
510
|
+
loaded. When [scaling to
|
|
511
|
+
0](https://huggingface.co/docs/inference-endpoints/en/autoscaling#scaling-to-0)
|
|
512
|
+
is enabled on the endpoint, this can result in non-trivial waiting time. If
|
|
513
|
+
you'd rather disable this behavior and handle the endpoint's returned 500 HTTP
|
|
514
|
+
errors yourself, you can do so like so:
|
|
515
|
+
|
|
516
|
+
```typescript
|
|
517
|
+
const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
|
|
518
|
+
const { generated_text } = await gpt2.textGeneration(
|
|
519
|
+
{inputs: 'The answer to the universe is'},
|
|
520
|
+
{retry_on_error: false},
|
|
521
|
+
);
|
|
522
|
+
```
|
|
523
|
+
|
|
509
524
|
## Running tests
|
|
510
525
|
|
|
511
526
|
```console
|
package/dist/index.cjs
CHANGED
|
@@ -132,7 +132,15 @@ var tasks = null;
|
|
|
132
132
|
async function makeRequestOptions(args, options) {
|
|
133
133
|
const { accessToken, model: _model, ...otherArgs } = args;
|
|
134
134
|
let { model } = args;
|
|
135
|
-
const {
|
|
135
|
+
const {
|
|
136
|
+
forceTask: task,
|
|
137
|
+
includeCredentials,
|
|
138
|
+
taskHint,
|
|
139
|
+
wait_for_model,
|
|
140
|
+
use_cache,
|
|
141
|
+
dont_load_model,
|
|
142
|
+
...otherOptions
|
|
143
|
+
} = options ?? {};
|
|
136
144
|
const headers = {};
|
|
137
145
|
if (accessToken) {
|
|
138
146
|
headers["Authorization"] = `Bearer ${accessToken}`;
|
|
@@ -155,16 +163,15 @@ async function makeRequestOptions(args, options) {
|
|
|
155
163
|
const binary = "data" in args && !!args.data;
|
|
156
164
|
if (!binary) {
|
|
157
165
|
headers["Content-Type"] = "application/json";
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
}
|
|
166
|
+
}
|
|
167
|
+
if (wait_for_model) {
|
|
168
|
+
headers["X-Wait-For-Model"] = "true";
|
|
169
|
+
}
|
|
170
|
+
if (use_cache === false) {
|
|
171
|
+
headers["X-Use-Cache"] = "false";
|
|
172
|
+
}
|
|
173
|
+
if (dont_load_model) {
|
|
174
|
+
headers["X-Load-Model"] = "0";
|
|
168
175
|
}
|
|
169
176
|
const url = (() => {
|
|
170
177
|
if (isUrl(model)) {
|
package/dist/index.js
CHANGED
|
@@ -81,7 +81,15 @@ var tasks = null;
|
|
|
81
81
|
async function makeRequestOptions(args, options) {
|
|
82
82
|
const { accessToken, model: _model, ...otherArgs } = args;
|
|
83
83
|
let { model } = args;
|
|
84
|
-
const {
|
|
84
|
+
const {
|
|
85
|
+
forceTask: task,
|
|
86
|
+
includeCredentials,
|
|
87
|
+
taskHint,
|
|
88
|
+
wait_for_model,
|
|
89
|
+
use_cache,
|
|
90
|
+
dont_load_model,
|
|
91
|
+
...otherOptions
|
|
92
|
+
} = options ?? {};
|
|
85
93
|
const headers = {};
|
|
86
94
|
if (accessToken) {
|
|
87
95
|
headers["Authorization"] = `Bearer ${accessToken}`;
|
|
@@ -104,16 +112,15 @@ async function makeRequestOptions(args, options) {
|
|
|
104
112
|
const binary = "data" in args && !!args.data;
|
|
105
113
|
if (!binary) {
|
|
106
114
|
headers["Content-Type"] = "application/json";
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
}
|
|
115
|
+
}
|
|
116
|
+
if (wait_for_model) {
|
|
117
|
+
headers["X-Wait-For-Model"] = "true";
|
|
118
|
+
}
|
|
119
|
+
if (use_cache === false) {
|
|
120
|
+
headers["X-Use-Cache"] = "false";
|
|
121
|
+
}
|
|
122
|
+
if (dont_load_model) {
|
|
123
|
+
headers["X-Load-Model"] = "0";
|
|
117
124
|
}
|
|
118
125
|
const url = (() => {
|
|
119
126
|
if (isUrl(model)) {
|
package/package.json
CHANGED
|
@@ -27,7 +27,15 @@ export async function makeRequestOptions(
|
|
|
27
27
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
28
28
|
const { accessToken, model: _model, ...otherArgs } = args;
|
|
29
29
|
let { model } = args;
|
|
30
|
-
const {
|
|
30
|
+
const {
|
|
31
|
+
forceTask: task,
|
|
32
|
+
includeCredentials,
|
|
33
|
+
taskHint,
|
|
34
|
+
wait_for_model,
|
|
35
|
+
use_cache,
|
|
36
|
+
dont_load_model,
|
|
37
|
+
...otherOptions
|
|
38
|
+
} = options ?? {};
|
|
31
39
|
|
|
32
40
|
const headers: Record<string, string> = {};
|
|
33
41
|
if (accessToken) {
|
|
@@ -57,16 +65,16 @@ export async function makeRequestOptions(
|
|
|
57
65
|
|
|
58
66
|
if (!binary) {
|
|
59
67
|
headers["Content-Type"] = "application/json";
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (wait_for_model) {
|
|
71
|
+
headers["X-Wait-For-Model"] = "true";
|
|
72
|
+
}
|
|
73
|
+
if (use_cache === false) {
|
|
74
|
+
headers["X-Use-Cache"] = "false";
|
|
75
|
+
}
|
|
76
|
+
if (dont_load_model) {
|
|
77
|
+
headers["X-Load-Model"] = "0";
|
|
70
78
|
}
|
|
71
79
|
|
|
72
80
|
const url = (() => {
|