npm - @huggingface/inference - Versions diffs - 1.6.0 → 1.6.1 - Mend

@huggingface/inference 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -191,4 +191,4 @@ HF_ACCESS_TOKEN="your access token" npm run test
 We have an informative documentation project called [Tasks](https://huggingface.co/tasks) to list available models for each task and explain how each task works in detail.
-It also contain demos, example outputs and other resources should you want to dig more into the ML-side of things.
+It also contains demos, example outputs, and other resources should you want to dig deeper into the ML side of things.

package/dist/index.d.ts CHANGED Viewed

@@ -7,6 +7,10 @@ interface Options {
      * (Default: true). Boolean. There is a cache layer on the inference API to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query.
      */
     use_cache?: boolean;
+    /**
+     * (Default: false). Boolean. Do not load the model if it's not already available.
+     */
+    dont_load_model?: boolean;
     /**
      * (Default: false). Boolean to use GPU instead of CPU for inference (requires Startup plan at least).
      */
@@ -344,7 +348,7 @@ type FeatureExtractionArgs = Args & {
      *    "sentences": ["That is a happy dog", "That is a very happy person", "Today is a sunny day"]
      *  &#125;
      */
-    inputs: Record<string, any> | Record<string, any>[];
+    inputs: Record<string, unknown> | Record<string, unknown>[];
 };
 /**
  * Returned values are a list of floats, or a list of list of floats (depending on if you sent a string or a list of string, and if the automatic reduction, usually mean_pooling for instance was applied for you or not. This should be explained on the model's README.
@@ -354,7 +358,7 @@ type ImageClassificationArgs = Args & {
     /**
      * Binary image data
      */
-    data: any;
+    data: Blob | ArrayBuffer;
 };
 interface ImageClassificationReturnValue {
     /**
@@ -371,7 +375,7 @@ type ObjectDetectionArgs = Args & {
     /**
      * Binary image data
      */
-    data: any;
+    data: Blob | ArrayBuffer;
 };
 interface ObjectDetectionReturnValue {
     /**
@@ -397,7 +401,7 @@ type ImageSegmentationArgs = Args & {
     /**
      * Binary image data
      */
-    data: any;
+    data: Blob | ArrayBuffer;
 };
 interface ImageSegmentationReturnValue {
     /**
@@ -418,7 +422,7 @@ type AutomaticSpeechRecognitionArgs = Args & {
     /**
      * Binary audio data
      */
-    data: any;
+    data: Blob | ArrayBuffer;
 };
 interface AutomaticSpeechRecognitionReturn {
     /**
@@ -430,7 +434,7 @@ type AudioClassificationArgs = Args & {
     /**
      * Binary audio data
      */
-    data: any;
+    data: Blob | ArrayBuffer;
 };
 interface AudioClassificationReturnValue {
     /**
@@ -533,13 +537,14 @@ declare class HfInference {
      * Recommended model: stabilityai/stable-diffusion-2
      */
     textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn>;
-    request(args: Args & {
-        data?: any;
+    request<T>(args: Args & {
+        data?: Blob | ArrayBuffer;
     }, options?: Options & {
         binary?: boolean;
         blob?: boolean;
-    }): Promise<any>;
-    private static toArray;
+        /** For internal HF use, which is why it's not exposed in {@link Options} */
+        includeCredentials?: boolean;
+    }): Promise<T>;
 }
 export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };

package/dist/index.js CHANGED Viewed

@@ -23,6 +23,14 @@ __export(src_exports, {
 });
 module.exports = __toCommonJS(src_exports);
+// src/utils.ts
+function toArray(obj) {
+  if (Array.isArray(obj)) {
+    return obj;
+  }
+  return [obj];
+}
 // src/HfInference.ts
 var HfInference = class {
   apiKey;
@@ -71,7 +79,7 @@ var HfInference = class {
    * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
    */
   async tokenClassification(args, options) {
-    return HfInference.toArray(await this.request(args, options));
+    return toArray(await this.request(args, options));
   }
   /**
    * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
@@ -83,7 +91,9 @@ var HfInference = class {
    * This task is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. Recommended model: facebook/bart-large-mnli.
    */
   async zeroShotClassification(args, options) {
-    return HfInference.toArray(await this.request(args, options));
+    return toArray(
+      await this.request(args, options)
+    );
   }
   /**
    * This task corresponds to any chatbot like structure. Models tend to have shorter max_length, so please check with caution when using a given model if you need long range dependency or not. Recommended model: microsoft/DialoGPT-large.
@@ -168,8 +178,16 @@ var HfInference = class {
     if (!options?.binary) {
       headers["Content-Type"] = "application/json";
     }
-    if (options?.binary && mergedOptions.wait_for_model) {
-      headers["X-Wait-For-Model"] = "true";
+    if (options?.binary) {
+      if (mergedOptions.wait_for_model) {
+        headers["X-Wait-For-Model"] = "true";
+      }
+      if (mergedOptions.use_cache === false) {
+        headers["X-Use-Cache"] = "false";
+      }
+      if (mergedOptions.dont_load_model) {
+        headers["X-Load-Model"] = "0";
+      }
     }
     const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
       headers,
@@ -177,7 +195,8 @@ var HfInference = class {
       body: options?.binary ? args.data : JSON.stringify({
         ...otherArgs,
         options: mergedOptions
-      })
+      }),
+      credentials: options?.includeCredentials ? "include" : "same-origin"
     });
     if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
       return this.request(args, {
@@ -197,12 +216,6 @@ var HfInference = class {
     }
     return output;
   }
-  static toArray(obj) {
-    if (Array.isArray(obj)) {
-      return obj;
-    }
-    return [obj];
-  }
 };
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {

package/dist/index.mjs CHANGED Viewed

@@ -1,3 +1,11 @@
+// src/utils.ts
+function toArray(obj) {
+  if (Array.isArray(obj)) {
+    return obj;
+  }
+  return [obj];
+}
 // src/HfInference.ts
 var HfInference = class {
   apiKey;
@@ -46,7 +54,7 @@ var HfInference = class {
    * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
    */
   async tokenClassification(args, options) {
-    return HfInference.toArray(await this.request(args, options));
+    return toArray(await this.request(args, options));
   }
   /**
    * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
@@ -58,7 +66,9 @@ var HfInference = class {
    * This task is super useful to try out classification with zero code, you simply pass a sentence/paragraph and the possible labels for that sentence, and you get a result. Recommended model: facebook/bart-large-mnli.
    */
   async zeroShotClassification(args, options) {
-    return HfInference.toArray(await this.request(args, options));
+    return toArray(
+      await this.request(args, options)
+    );
   }
   /**
    * This task corresponds to any chatbot like structure. Models tend to have shorter max_length, so please check with caution when using a given model if you need long range dependency or not. Recommended model: microsoft/DialoGPT-large.
@@ -143,8 +153,16 @@ var HfInference = class {
     if (!options?.binary) {
       headers["Content-Type"] = "application/json";
     }
-    if (options?.binary && mergedOptions.wait_for_model) {
-      headers["X-Wait-For-Model"] = "true";
+    if (options?.binary) {
+      if (mergedOptions.wait_for_model) {
+        headers["X-Wait-For-Model"] = "true";
+      }
+      if (mergedOptions.use_cache === false) {
+        headers["X-Use-Cache"] = "false";
+      }
+      if (mergedOptions.dont_load_model) {
+        headers["X-Load-Model"] = "0";
+      }
     }
     const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
       headers,
@@ -152,7 +170,8 @@ var HfInference = class {
       body: options?.binary ? args.data : JSON.stringify({
         ...otherArgs,
         options: mergedOptions
-      })
+      }),
+      credentials: options?.includeCredentials ? "include" : "same-origin"
     });
     if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
       return this.request(args, {
@@ -172,12 +191,6 @@ var HfInference = class {
     }
     return output;
   }
-  static toArray(obj) {
-    if (Array.isArray(obj)) {
-      return obj;
-    }
-    return [obj];
-  }
 };
 export {
   HfInference

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@huggingface/inference",
-  "version": "1.6.0",
+  "version": "1.6.1",
   "license": "MIT",
   "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
   "description": "Typescript wrapper for the Hugging Face Inference API",
@@ -48,9 +48,12 @@
   "resolutions": {},
   "scripts": {
     "build": "tsup src/index.ts --format cjs,esm --clean --dts",
-    "format": "prettier --write . && eslint --quiet --fix --ext .cjs,.ts .",
+    "lint": "eslint --quiet --fix --ext .cjs,.ts .",
+    "lint:check": "eslint --ext .cjs,.ts .",
+    "format": "prettier --write .",
+    "format:check": "prettier --check .",
     "test": "vitest run",
-    "test:ci": "pnpm run test -- --coverage",
+    "test:browser": "vitest run --browser.name=chrome --browser.headless",
     "type-check": "tsc"
   }
 }

package/src/HfInference.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+import { toArray } from "./utils";
 export interface Options {
 	/**
 	 * (Default: true) Boolean. If a request 503s and wait_for_model is set to false, the request will be retried with the same parameters but with wait_for_model set to true.
@@ -6,11 +8,15 @@ export interface Options {
 	/**
 	 * (Default: true). Boolean. There is a cache layer on the inference API to speedup requests we have already seen. Most models can use those results as is as models are deterministic (meaning the results will be the same anyway). However if you use a non deterministic model, you can set this parameter to prevent the caching mechanism from being used resulting in a real new query.
 	 */
-	use_cache?:      boolean;
+	use_cache?: boolean;
+	/**
+	 * (Default: false). Boolean. Do not load the model if it's not already available.
+	 */
+	dont_load_model?: boolean;
 	/**
 	 * (Default: false). Boolean to use GPU instead of CPU for inference (requires Startup plan at least).
 	 */
-	use_gpu?:        boolean;
+	use_gpu?: boolean;
 	/**
 	 * (Default: false) Boolean. If the model is not ready, wait for it instead of receiving 503. It limits the number of requests required to get your inference done. It is advised to only set this flag to true after receiving a 503 error as it will limit hanging in your application to known places.
@@ -30,15 +36,15 @@ export type FillMaskReturn = {
 	/**
 	 * The probability for this token.
 	 */
-	score:     number;
+	score: number;
 	/**
 	 * The actual sequence of tokens that ran against the model (may contain special tokens)
 	 */
-	sequence:  string;
+	sequence: string;
 	/**
 	 * The id of the token
 	 */
-	token:     number;
+	token: number;
 	/**
 	 * The string representation of the token
 	 */
@@ -49,20 +55,20 @@ export type SummarizationArgs = Args & {
 	/**
 	 * A string to be summarized
 	 */
-	inputs:      string;
+	inputs: string;
 	parameters?: {
 		/**
 		 * (Default: None). Integer to define the maximum length in tokens of the output summary.
 		 */
-		max_length?:         number;
+		max_length?: number;
 		/**
 		 * (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit.
 		 */
-		max_time?:           number;
+		max_time?: number;
 		/**
 		 * (Default: None). Integer to define the minimum length in tokens of the output summary.
 		 */
-		min_length?:         number;
+		min_length?: number;
 		/**
 		 * (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
 		 */
@@ -70,15 +76,15 @@ export type SummarizationArgs = Args & {
 		/**
 		 * (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
 		 */
-		temperature?:        number;
+		temperature?: number;
 		/**
 		 * (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
 		 */
-		top_k?:              number;
+		top_k?: number;
 		/**
 		 * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
 		 */
-		top_p?:              number;
+		top_p?: number;
 	};
 };
@@ -91,7 +97,7 @@ export interface SummarizationReturn {
 export type QuestionAnswerArgs = Args & {
 	inputs: {
-		context:  string;
+		context: string;
 		question: string;
 	};
 };
@@ -104,15 +110,15 @@ export interface QuestionAnswerReturn {
 	/**
 	 * The index (string wise) of the stop of the answer within context.
 	 */
-	end:    number;
+	end: number;
 	/**
 	 * A float that represents how likely that the answer is correct
 	 */
-	score:  number;
+	score: number;
 	/**
 	 * The index (string wise) of the start of the answer within context.
 	 */
-	start:  number;
+	start: number;
 }
 export type TableQuestionAnswerArgs = Args & {
@@ -132,15 +138,15 @@ export interface TableQuestionAnswerReturn {
 	/**
 	 * The aggregator used to get the answer
 	 */
-	aggregator:  string;
+	aggregator: string;
 	/**
 	 * The plaintext answer
 	 */
-	answer:      string;
+	answer: string;
 	/**
 	 * A list of coordinates of the cells contents
 	 */
-	cells:       string[];
+	cells: string[];
 	/**
 	 * a list of coordinates of the cells referenced in the answer
 	 */
@@ -169,20 +175,20 @@ export type TextGenerationArgs = Args & {
 	/**
 	 * A string to be generated from
 	 */
-	inputs:      string;
+	inputs: string;
 	parameters?: {
 		/**
 		 * (Optional: True). Bool. Whether or not to use sampling, use greedy decoding otherwise.
 		 */
-		do_sample?:            boolean;
+		do_sample?: boolean;
 		/**
 		 * (Default: None). Int (0-250). The amount of new tokens to be generated, this does not include the input length it is a estimate of the size of generated text you want. Each new tokens slows down the request, so look for balance between response times and length of text generated.
 		 */
-		max_new_tokens?:       number;
+		max_new_tokens?: number;
 		/**
 		 * (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit. Use that in combination with max_new_tokens for best results.
 		 */
-		max_time?:             number;
+		max_time?: number;
 		/**
 		 * (Default: 1). Integer. The number of proposition you want to be returned.
 		 */
@@ -190,23 +196,23 @@ export type TextGenerationArgs = Args & {
 		/**
 		 * (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
 		 */
-		repetition_penalty?:   number;
+		repetition_penalty?: number;
 		/**
 		 * (Default: True). Bool. If set to False, the return results will not contain the original query making it easier for prompting.
 		 */
-		return_full_text?:     boolean;
+		return_full_text?: boolean;
 		/**
 		 * (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
 		 */
-		temperature?:          number;
+		temperature?: number;
 		/**
 		 * (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
 		 */
-		top_k?:                number;
+		top_k?: number;
 		/**
 		 * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
 		 */
-		top_p?:                number;
+		top_p?: number;
 	};
 };
@@ -221,7 +227,7 @@ export type TokenClassificationArgs = Args & {
 	/**
 	 * A string to be classified
 	 */
-	inputs:      string;
+	inputs: string;
 	parameters?: {
 		/**
 		 * (Default: simple). There are several aggregation strategies:
@@ -244,7 +250,7 @@ export interface TokenClassificationReturnValue {
 	/**
 	 * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
 	 */
-	end:          number;
+	end: number;
 	/**
 	 * The type for the entity being recognized (model specific).
 	 */
@@ -252,15 +258,15 @@ export interface TokenClassificationReturnValue {
 	/**
 	 * How likely the entity was recognized.
 	 */
-	score:        number;
+	score: number;
 	/**
 	 * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times.
 	 */
-	start:        number;
+	start: number;
 	/**
 	 * The string that was captured
 	 */
-	word:         string;
+	word: string;
 }
 export type TokenClassificationReturn = TokenClassificationReturnValue[];
@@ -283,7 +289,7 @@ export type ZeroShotClassificationArgs = Args & {
 	/**
 	 * a string or list of strings
 	 */
-	inputs:     string | string[];
+	inputs: string | string[];
 	parameters: {
 		/**
 		 * a list of strings that are potential classes for inputs. (max 10 candidate_labels, for more, simply run multiple requests, results are going to be misleading if using too many candidate_labels anyway. If you want to keep the exact same, you can simply run multi_label=True and do the scaling on your end.
@@ -292,13 +298,13 @@ export type ZeroShotClassificationArgs = Args & {
 		/**
 		 * (Default: false) Boolean that is set to True if classes can overlap
 		 */
-		multi_label?:     boolean;
+		multi_label?: boolean;
 	};
 };
 export interface ZeroShotClassificationReturnValue {
-	labels:   string[];
-	scores:   number[];
+	labels: string[];
+	scores: number[];
 	sequence: string;
 }
@@ -313,25 +319,25 @@ export type ConversationalArgs = Args & {
 		/**
 		 * A list of strings corresponding to the earlier replies from the user. Should be of the same length of generated_responses.
 		 */
-		past_user_inputs?:    string[];
+		past_user_inputs?: string[];
 		/**
 		 * The last input from the user in the conversation.
 		 */
-		text:                 string;
+		text: string;
 	};
 	parameters?: {
 		/**
 		 * (Default: None). Integer to define the maximum length in tokens of the output summary.
 		 */
-		max_length?:         number;
+		max_length?: number;
 		/**
 		 * (Default: None). Float (0-120.0). The amount of time in seconds that the query should take maximum. Network can cause some overhead so it will be a soft limit.
 		 */
-		max_time?:           number;
+		max_time?: number;
 		/**
 		 * (Default: None). Integer to define the minimum length in tokens of the output summary.
 		 */
-		min_length?:         number;
+		min_length?: number;
 		/**
 		 * (Default: None). Float (0.0-100.0). The more a token is used within generation the more it is penalized to not be picked in successive generation passes.
 		 */
@@ -339,25 +345,25 @@ export type ConversationalArgs = Args & {
 		/**
 		 * (Default: 1.0). Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
 		 */
-		temperature?:        number;
+		temperature?: number;
 		/**
 		 * (Default: None). Integer to define the top tokens considered within the sample operation to create new text.
 		 */
-		top_k?:              number;
+		top_k?: number;
 		/**
 		 * (Default: None). Float to define the tokens that are within the sample operation of text generation. Add tokens in the sample for more probable to least probable until the sum of the probabilities is greater than top_p.
 		 */
-		top_p?:              number;
+		top_p?: number;
 	};
 };
 export interface ConversationalReturn {
 	conversation: {
 		generated_responses: string[];
-		past_user_inputs:    string[];
+		past_user_inputs: string[];
 	};
 	generated_text: string;
-	warnings:       string[];
+	warnings: string[];
 }
 export type FeatureExtractionArgs = Args & {
@@ -369,7 +375,7 @@ export type FeatureExtractionArgs = Args & {
 	 *    "sentences": ["That is a happy dog", "That is a very happy person", "Today is a sunny day"]
 	 *  &#125;
 	 */
-	inputs: Record<string, any> | Record<string, any>[];
+	inputs: Record<string, unknown> | Record<string, unknown>[];
 };
 /**
@@ -381,7 +387,7 @@ export type ImageClassificationArgs = Args & {
 	/**
 	 * Binary image data
 	 */
-	data: any;
+	data: Blob | ArrayBuffer;
 };
 export interface ImageClassificationReturnValue {
@@ -401,7 +407,7 @@ export type ObjectDetectionArgs = Args & {
 	/**
 	 * Binary image data
 	 */
-	data: any;
+	data: Blob | ArrayBuffer;
 };
 export interface ObjectDetectionReturnValue {
@@ -431,7 +437,7 @@ export type ImageSegmentationArgs = Args & {
 	/**
 	 * Binary image data
 	 */
-	data: any;
+	data: Blob | ArrayBuffer;
 };
 export interface ImageSegmentationReturnValue {
@@ -442,7 +448,7 @@ export interface ImageSegmentationReturnValue {
 	/**
 	 * A str (base64 str of a single channel black-and-white img) representing the mask of a segment.
 	 */
-	mask:  string;
+	mask: string;
 	/**
 	 * A float that represents how likely it is that the detected object belongs to the given class.
 	 */
@@ -455,7 +461,7 @@ export type AutomaticSpeechRecognitionArgs = Args & {
 	/**
 	 * Binary audio data
 	 */
-	data: any;
+	data: Blob | ArrayBuffer;
 };
 export interface AutomaticSpeechRecognitionReturn {
@@ -469,7 +475,7 @@ export type AudioClassificationArgs = Args & {
 	/**
 	 * Binary audio data
 	 */
-	data: any;
+	data: Blob | ArrayBuffer;
 };
 export interface AudioClassificationReturnValue {
@@ -501,7 +507,7 @@ export type TextToImageArgs = Args & {
 export type TextToImageReturn = Blob;
 export class HfInference {
-	private readonly apiKey:         string;
+	private readonly apiKey: string;
 	private readonly defaultOptions: Options;
 	constructor(apiKey = "", defaultOptions: Options = {}) {
@@ -561,7 +567,7 @@ export class HfInference {
 		args: TokenClassificationArgs,
 		options?: Options
 	): Promise<TokenClassificationReturn> {
-		return HfInference.toArray(await this.request(args, options));
+		return toArray(await this.request(args, options));
 	}
 	/**
@@ -578,7 +584,9 @@ export class HfInference {
 		args: ZeroShotClassificationArgs,
 		options?: Options
 	): Promise<ZeroShotClassificationReturn> {
-		return HfInference.toArray(await this.request(args, options));
+		return toArray(
+			await this.request<ZeroShotClassificationReturnValue | ZeroShotClassificationReturnValue[]>(args, options)
+		);
 	}
 	/**
@@ -671,13 +679,15 @@ export class HfInference {
 		});
 	}
-	public async request(
-		args: Args & { data?: any },
+	public async request<T>(
+		args: Args & { data?: Blob | ArrayBuffer },
 		options?: Options & {
 			binary?: boolean;
-			blob?:   boolean;
+			blob?: boolean;
+			/** For internal HF use, which is why it's not exposed in {@link Options} */
+			includeCredentials?: boolean;
 		}
-	): Promise<any> {
+	): Promise<T> {
 		const mergedOptions = { ...this.defaultOptions, ...options };
 		const { model, ...otherArgs } = args;
@@ -690,19 +700,28 @@ export class HfInference {
 			headers["Content-Type"] = "application/json";
 		}
-		if (options?.binary && mergedOptions.wait_for_model) {
-			headers["X-Wait-For-Model"] = "true";
+		if (options?.binary) {
+			if (mergedOptions.wait_for_model) {
+				headers["X-Wait-For-Model"] = "true";
+			}
+			if (mergedOptions.use_cache === false) {
+				headers["X-Use-Cache"] = "false";
+			}
+			if (mergedOptions.dont_load_model) {
+				headers["X-Load-Model"] = "0";
+			}
 		}
 		const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
 			headers,
 			method: "POST",
-			body:   options?.binary
+			body: options?.binary
 				? args.data
 				: JSON.stringify({
 						...otherArgs,
 						options: mergedOptions,
 				  }),
+			credentials: options?.includeCredentials ? "include" : "same-origin",
 		});
 		if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
@@ -716,7 +735,7 @@ export class HfInference {
 			if (!response.ok) {
 				throw new Error("An error occurred while fetching the blob");
 			}
-			return await response.blob();
+			return (await response.blob()) as T;
 		}
 		const output = await response.json();
@@ -725,11 +744,4 @@ export class HfInference {
 		}
 		return output;
 	}
-	private static toArray(obj: any): any[] {
-		if (Array.isArray(obj)) {
-			return obj;
-		}
-		return [obj];
-	}
 }

package/src/utils.ts ADDED Viewed

@@ -0,0 +1,6 @@
+export function toArray<T>(obj: T): T extends unknown[] ? T : T[] {
+	if (Array.isArray(obj)) {
+		return obj as T extends unknown[] ? T : T[];
+	}
+	return [obj] as T extends unknown[] ? T : T[];
+}