@huggingface/inference 2.6.5 → 2.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -506,6 +506,21 @@ const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/
506
506
  const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
507
507
  ```
508
508
 
509
+ By default, all calls to the inference endpoint will wait until the model is
510
+ loaded. When [scaling to
511
+ 0](https://huggingface.co/docs/inference-endpoints/en/autoscaling#scaling-to-0)
512
+ is enabled on the endpoint, this can result in non-trivial waiting time. If
513
+ you'd rather disable this behavior and handle the endpoint's returned 500 HTTP
514
+ errors yourself, you can do so like so:
515
+
516
+ ```typescript
517
+ const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
518
+ const { generated_text } = await gpt2.textGeneration(
519
+ {inputs: 'The answer to the universe is'},
520
+ {retry_on_error: false},
521
+ );
522
+ ```
523
+
509
524
  ## Running tests
510
525
 
511
526
  ```console
package/dist/index.cjs CHANGED
@@ -132,7 +132,15 @@ var tasks = null;
132
132
  async function makeRequestOptions(args, options) {
133
133
  const { accessToken, model: _model, ...otherArgs } = args;
134
134
  let { model } = args;
135
- const { forceTask: task, includeCredentials, taskHint, ...otherOptions } = options ?? {};
135
+ const {
136
+ forceTask: task,
137
+ includeCredentials,
138
+ taskHint,
139
+ wait_for_model,
140
+ use_cache,
141
+ dont_load_model,
142
+ ...otherOptions
143
+ } = options ?? {};
136
144
  const headers = {};
137
145
  if (accessToken) {
138
146
  headers["Authorization"] = `Bearer ${accessToken}`;
@@ -155,16 +163,15 @@ async function makeRequestOptions(args, options) {
155
163
  const binary = "data" in args && !!args.data;
156
164
  if (!binary) {
157
165
  headers["Content-Type"] = "application/json";
158
- } else {
159
- if (options?.wait_for_model) {
160
- headers["X-Wait-For-Model"] = "true";
161
- }
162
- if (options?.use_cache === false) {
163
- headers["X-Use-Cache"] = "false";
164
- }
165
- if (options?.dont_load_model) {
166
- headers["X-Load-Model"] = "0";
167
- }
166
+ }
167
+ if (wait_for_model) {
168
+ headers["X-Wait-For-Model"] = "true";
169
+ }
170
+ if (use_cache === false) {
171
+ headers["X-Use-Cache"] = "false";
172
+ }
173
+ if (dont_load_model) {
174
+ headers["X-Load-Model"] = "0";
168
175
  }
169
176
  const url = (() => {
170
177
  if (isUrl(model)) {
package/dist/index.js CHANGED
@@ -81,7 +81,15 @@ var tasks = null;
81
81
  async function makeRequestOptions(args, options) {
82
82
  const { accessToken, model: _model, ...otherArgs } = args;
83
83
  let { model } = args;
84
- const { forceTask: task, includeCredentials, taskHint, ...otherOptions } = options ?? {};
84
+ const {
85
+ forceTask: task,
86
+ includeCredentials,
87
+ taskHint,
88
+ wait_for_model,
89
+ use_cache,
90
+ dont_load_model,
91
+ ...otherOptions
92
+ } = options ?? {};
85
93
  const headers = {};
86
94
  if (accessToken) {
87
95
  headers["Authorization"] = `Bearer ${accessToken}`;
@@ -104,16 +112,15 @@ async function makeRequestOptions(args, options) {
104
112
  const binary = "data" in args && !!args.data;
105
113
  if (!binary) {
106
114
  headers["Content-Type"] = "application/json";
107
- } else {
108
- if (options?.wait_for_model) {
109
- headers["X-Wait-For-Model"] = "true";
110
- }
111
- if (options?.use_cache === false) {
112
- headers["X-Use-Cache"] = "false";
113
- }
114
- if (options?.dont_load_model) {
115
- headers["X-Load-Model"] = "0";
116
- }
115
+ }
116
+ if (wait_for_model) {
117
+ headers["X-Wait-For-Model"] = "true";
118
+ }
119
+ if (use_cache === false) {
120
+ headers["X-Use-Cache"] = "false";
121
+ }
122
+ if (dont_load_model) {
123
+ headers["X-Load-Model"] = "0";
117
124
  }
118
125
  const url = (() => {
119
126
  if (isUrl(model)) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/inference",
3
- "version": "2.6.5",
3
+ "version": "2.6.6",
4
4
  "packageManager": "pnpm@8.10.5",
5
5
  "license": "MIT",
6
6
  "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
@@ -27,7 +27,15 @@ export async function makeRequestOptions(
27
27
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
28
28
  const { accessToken, model: _model, ...otherArgs } = args;
29
29
  let { model } = args;
30
- const { forceTask: task, includeCredentials, taskHint, ...otherOptions } = options ?? {};
30
+ const {
31
+ forceTask: task,
32
+ includeCredentials,
33
+ taskHint,
34
+ wait_for_model,
35
+ use_cache,
36
+ dont_load_model,
37
+ ...otherOptions
38
+ } = options ?? {};
31
39
 
32
40
  const headers: Record<string, string> = {};
33
41
  if (accessToken) {
@@ -57,16 +65,16 @@ export async function makeRequestOptions(
57
65
 
58
66
  if (!binary) {
59
67
  headers["Content-Type"] = "application/json";
60
- } else {
61
- if (options?.wait_for_model) {
62
- headers["X-Wait-For-Model"] = "true";
63
- }
64
- if (options?.use_cache === false) {
65
- headers["X-Use-Cache"] = "false";
66
- }
67
- if (options?.dont_load_model) {
68
- headers["X-Load-Model"] = "0";
69
- }
68
+ }
69
+
70
+ if (wait_for_model) {
71
+ headers["X-Wait-For-Model"] = "true";
72
+ }
73
+ if (use_cache === false) {
74
+ headers["X-Use-Cache"] = "false";
75
+ }
76
+ if (dont_load_model) {
77
+ headers["X-Load-Model"] = "0";
70
78
  }
71
79
 
72
80
  const url = (() => {