npm - @huggingface/inference - Versions diffs - 2.6.5 → 2.6.6 - Mend

@huggingface/inference 2.6.5 → 2.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +15 -0
package/dist/index.cjs +18 -11
package/dist/index.js +18 -11
package/package.json +1 -1
package/src/lib/makeRequestOptions.ts +19 -11

package/README.md CHANGED Viewed

@@ -506,6 +506,21 @@ const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/
 const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
 ```
+By default, all calls to the inference endpoint will wait until the model is
+loaded. When [scaling to
+0](https://huggingface.co/docs/inference-endpoints/en/autoscaling#scaling-to-0)
+is enabled on the endpoint, this can result in non-trivial waiting time. If
+you'd rather disable this behavior and handle the endpoint's returned 500 HTTP
+errors yourself, you can do so like so:
+```typescript
+const gpt2 = hf.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
+const { generated_text } = await gpt2.textGeneration(
+  {inputs: 'The answer to the universe is'},
+  {retry_on_error: false},
+);
+```
 ## Running tests
 ```console

package/dist/index.cjs CHANGED Viewed

@@ -132,7 +132,15 @@ var tasks = null;
 async function makeRequestOptions(args, options) {
   const { accessToken, model: _model, ...otherArgs } = args;
   let { model } = args;
-  const { forceTask: task, includeCredentials, taskHint, ...otherOptions } = options ?? {};
+  const {
+    forceTask: task,
+    includeCredentials,
+    taskHint,
+    wait_for_model,
+    use_cache,
+    dont_load_model,
+    ...otherOptions
+  } = options ?? {};
   const headers = {};
   if (accessToken) {
     headers["Authorization"] = `Bearer ${accessToken}`;
@@ -155,16 +163,15 @@ async function makeRequestOptions(args, options) {
   const binary = "data" in args && !!args.data;
   if (!binary) {
     headers["Content-Type"] = "application/json";
-  } else {
-    if (options?.wait_for_model) {
-      headers["X-Wait-For-Model"] = "true";
-    }
-    if (options?.use_cache === false) {
-      headers["X-Use-Cache"] = "false";
-    }
-    if (options?.dont_load_model) {
-      headers["X-Load-Model"] = "0";
-    }
+  }
+  if (wait_for_model) {
+    headers["X-Wait-For-Model"] = "true";
+  }
+  if (use_cache === false) {
+    headers["X-Use-Cache"] = "false";
+  }
+  if (dont_load_model) {
+    headers["X-Load-Model"] = "0";
   }
   const url = (() => {
     if (isUrl(model)) {

package/dist/index.js CHANGED Viewed

@@ -81,7 +81,15 @@ var tasks = null;
 async function makeRequestOptions(args, options) {
   const { accessToken, model: _model, ...otherArgs } = args;
   let { model } = args;
-  const { forceTask: task, includeCredentials, taskHint, ...otherOptions } = options ?? {};
+  const {
+    forceTask: task,
+    includeCredentials,
+    taskHint,
+    wait_for_model,
+    use_cache,
+    dont_load_model,
+    ...otherOptions
+  } = options ?? {};
   const headers = {};
   if (accessToken) {
     headers["Authorization"] = `Bearer ${accessToken}`;
@@ -104,16 +112,15 @@ async function makeRequestOptions(args, options) {
   const binary = "data" in args && !!args.data;
   if (!binary) {
     headers["Content-Type"] = "application/json";
-  } else {
-    if (options?.wait_for_model) {
-      headers["X-Wait-For-Model"] = "true";
-    }
-    if (options?.use_cache === false) {
-      headers["X-Use-Cache"] = "false";
-    }
-    if (options?.dont_load_model) {
-      headers["X-Load-Model"] = "0";
-    }
+  }
+  if (wait_for_model) {
+    headers["X-Wait-For-Model"] = "true";
+  }
+  if (use_cache === false) {
+    headers["X-Use-Cache"] = "false";
+  }
+  if (dont_load_model) {
+    headers["X-Load-Model"] = "0";
   }
   const url = (() => {
     if (isUrl(model)) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@huggingface/inference",
-  "version": "2.6.5",
+  "version": "2.6.6",
   "packageManager": "pnpm@8.10.5",
   "license": "MIT",
   "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",

package/src/lib/makeRequestOptions.ts CHANGED Viewed

@@ -27,7 +27,15 @@ export async function makeRequestOptions(
 	// eslint-disable-next-line @typescript-eslint/no-unused-vars
 	const { accessToken, model: _model, ...otherArgs } = args;
 	let { model } = args;
-	const { forceTask: task, includeCredentials, taskHint, ...otherOptions } = options ?? {};
+	const {
+		forceTask: task,
+		includeCredentials,
+		taskHint,
+		wait_for_model,
+		use_cache,
+		dont_load_model,
+		...otherOptions
+	} = options ?? {};
 	const headers: Record<string, string> = {};
 	if (accessToken) {
@@ -57,16 +65,16 @@ export async function makeRequestOptions(
 	if (!binary) {
 		headers["Content-Type"] = "application/json";
-	} else {
-		if (options?.wait_for_model) {
-			headers["X-Wait-For-Model"] = "true";
-		}
-		if (options?.use_cache === false) {
-			headers["X-Use-Cache"] = "false";
-		}
-		if (options?.dont_load_model) {
-			headers["X-Load-Model"] = "0";
-		}
+	}
+	if (wait_for_model) {
+		headers["X-Wait-For-Model"] = "true";
+	}
+	if (use_cache === false) {
+		headers["X-Use-Cache"] = "false";
+	}
+	if (dont_load_model) {
+		headers["X-Load-Model"] = "0";
 	}
 	const url = (() => {