npm - @huggingface/inference - Versions diffs - 1.7.0 → 1.8.0 - Mend

@huggingface/inference 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +13 -6
package/dist/index.d.ts +40 -6
package/dist/index.js +26 -5
package/dist/index.mjs +25 -5
package/package.json +4 -5
package/src/HfInference.ts +67 -10
package/src/utils/omit.ts +11 -0
package/src/utils/pick.ts +16 -0
package/src/utils/typedInclude.ts +3 -0
package/src/utils/env-predicates.ts +0 -7
/package/src/utils/{to-array.ts → toArray.ts} +0 -0

package/README.md CHANGED Viewed

@@ -4,8 +4,6 @@ A Typescript powered wrapper for the Hugging Face Inference API. Learn more abou
 Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README) or try out a live [interactive notebook](https://observablehq.com/@huggingface/hello-huggingface-js-inference).
 ## Install
 ```console
@@ -18,7 +16,7 @@ pnpm add @huggingface/inference
 ## Usage
-❗**Important note:** Using an API key is optional to get started, however you will be rate limited eventually. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your API key for **free**.
+❗**Important note:** Using an API key is optional to get started, however you will be rate limited eventually. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your API key for **free**.
 Your API key should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the API key.
@@ -76,9 +74,10 @@ await hf.textGeneration({
   inputs: 'The answer to the universe is'
 })
-for await const (output of hf.textGenerationStream({
+for await (const output of hf.textGenerationStream({
   model: "google/flan-t5-xxl",
-  inputs: 'repeat "one two three four"'
+  inputs: 'repeat "one two three four"',
+  parameters: { max_new_tokens: 250 }
 })) {
   console.log(output.token.text, output.generated_text);
 }
@@ -153,8 +152,15 @@ await hf.imageSegmentation({
 await hf.textToImage({
   inputs: 'award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]',
-  negative_prompt: 'blurry',
   model: 'stabilityai/stable-diffusion-2',
+  parameters: {
+    negative_prompt: 'blurry',
+  }
+})
+await hf.imageToText({
+  data: readFileSync('test/cats.png'),
+  model: 'nlpconnect/vit-gpt2-image-captioning'
 })
 ```
@@ -187,6 +193,7 @@ await hf.textToImage({
 - [x] Object detection
 - [x] Image segmentation
 - [x] Text to image
+- [x] Image to text
 ## Running tests

package/dist/index.d.ts CHANGED Viewed

@@ -273,12 +273,12 @@ interface TextGenerationStreamReturn {
      * Complete generated text
      * Only available when the generation is finished
      */
-    generated_text?: string;
+    generated_text: string | null;
     /**
      * Generation details
      * Only available when the generation is finished
      */
-    details?: TextGenerationStreamDetails;
+    details: TextGenerationStreamDetails | null;
 }
 type TokenClassificationArgs = Args & {
     /**
@@ -526,12 +526,42 @@ type TextToImageArgs = Args & {
      * The text to generate an image from
      */
     inputs: string;
+    parameters?: {
+        /**
+         * An optional negative prompt for the image generation
+         */
+        negative_prompt?: string;
+        /**
+         * The height in pixels of the generated image
+         */
+        height?: number;
+        /**
+         * The width in pixels of the generated image
+         */
+        width?: number;
+        /**
+         * The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
+         */
+        num_inference_steps?: number;
+        /**
+         * Guidance scale: Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
+         */
+        guidance_scale?: number;
+    };
+};
+type TextToImageReturn = Blob;
+type ImageToTextArgs = Args & {
     /**
-     * An optional negative prompt for the image generation
+     * Binary image data
      */
-    negative_prompt?: string;
+    data: Blob | ArrayBuffer;
 };
-type TextToImageReturn = Blob;
+interface ImageToTextReturn {
+    /**
+     * The generated caption
+     */
+    generated_text: string;
+}
 declare class HfInference {
     private readonly apiKey;
     private readonly defaultOptions;
@@ -615,6 +645,10 @@ declare class HfInference {
      * Recommended model: stabilityai/stable-diffusion-2
      */
     textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn>;
+    /**
+     * This task reads some image input and outputs the text caption.
+     */
+    imageToText(args: ImageToTextArgs, options?: Options): Promise<ImageToTextReturn>;
     /**
      * Helper that prepares request arguments
      */
@@ -640,4 +674,4 @@ declare class HfInference {
     }): AsyncGenerator<T>;
 }
-export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextGenerationStreamBestOfSequence, TextGenerationStreamDetails, TextGenerationStreamFinishReason, TextGenerationStreamPrefillToken, TextGenerationStreamReturn, TextGenerationStreamToken, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
+export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ImageToTextArgs, ImageToTextReturn, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextGenerationStreamBestOfSequence, TextGenerationStreamDetails, TextGenerationStreamFinishReason, TextGenerationStreamPrefillToken, TextGenerationStreamReturn, TextGenerationStreamToken, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };

package/dist/index.js CHANGED Viewed

@@ -1,3 +1,4 @@
+"use strict";
 var __defProp = Object.defineProperty;
 var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
 var __getOwnPropNames = Object.getOwnPropertyNames;
@@ -24,7 +25,7 @@ __export(src_exports, {
 });
 module.exports = __toCommonJS(src_exports);
-// src/utils/to-array.ts
+// src/utils/toArray.ts
 function toArray(obj) {
   if (Array.isArray(obj)) {
     return obj;
@@ -386,6 +387,15 @@ var HfInference = class {
     }
     return res;
   }
+  /**
+   * This task reads some image input and outputs the text caption.
+   */
+  async imageToText(args, options) {
+    return (await this.request(args, {
+      ...options,
+      binary: true
+    }))?.[0];
+  }
   /**
    * Helper that prepares request arguments
    */
@@ -456,13 +466,24 @@ var HfInference = class {
       });
     }
     if (!response.ok) {
+      if (response.headers.get("Content-Type")?.startsWith("application/json")) {
+        const output = await response.json();
+        if (output.error) {
+          throw new Error(output.error);
+        }
+      }
       throw new Error(`Server response contains error: ${response.status}`);
     }
     if (response.headers.get("content-type") !== "text/event-stream") {
-      throw new Error(`Server does not support event stream content type`);
+      throw new Error(
+        `Server does not support event stream content type, it returned ` + response.headers.get("content-type")
+      );
+    }
+    if (!response.body) {
+      return;
     }
     const reader = response.body.getReader();
-    const events = [];
+    let events = [];
     const onEvent = (event) => {
       events.push(event);
     };
@@ -481,12 +502,12 @@ var HfInference = class {
         if (done)
           return;
         onChunk(value);
-        while (events.length > 0) {
-          const event = events.shift();
+        for (const event of events) {
           if (event.data.length > 0) {
             yield JSON.parse(event.data);
           }
         }
+        events = [];
       }
     } finally {
       reader.releaseLock();

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-// src/utils/to-array.ts
+// src/utils/toArray.ts
 function toArray(obj) {
   if (Array.isArray(obj)) {
     return obj;
@@ -360,6 +360,15 @@ var HfInference = class {
     }
     return res;
   }
+  /**
+   * This task reads some image input and outputs the text caption.
+   */
+  async imageToText(args, options) {
+    return (await this.request(args, {
+      ...options,
+      binary: true
+    }))?.[0];
+  }
   /**
    * Helper that prepares request arguments
    */
@@ -430,13 +439,24 @@ var HfInference = class {
       });
     }
     if (!response.ok) {
+      if (response.headers.get("Content-Type")?.startsWith("application/json")) {
+        const output = await response.json();
+        if (output.error) {
+          throw new Error(output.error);
+        }
+      }
       throw new Error(`Server response contains error: ${response.status}`);
     }
     if (response.headers.get("content-type") !== "text/event-stream") {
-      throw new Error(`Server does not support event stream content type`);
+      throw new Error(
+        `Server does not support event stream content type, it returned ` + response.headers.get("content-type")
+      );
+    }
+    if (!response.body) {
+      return;
     }
     const reader = response.body.getReader();
-    const events = [];
+    let events = [];
     const onEvent = (event) => {
       events.push(event);
     };
@@ -455,12 +475,12 @@ var HfInference = class {
         if (done)
           return;
         onChunk(value);
-        while (events.length > 0) {
-          const event = events.shift();
+        for (const event of events) {
           if (event.data.length > 0) {
             yield JSON.parse(event.data);
           }
         }
+        events = [];
       }
     } finally {
       reader.releaseLock();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@huggingface/inference",
-  "version": "1.7.0",
+  "version": "1.8.0",
   "license": "MIT",
   "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
   "description": "Typescript wrapper for the Hugging Face Inference API",
@@ -40,10 +40,9 @@
   },
   "devDependencies": {
     "@types/node": "18.13.0",
-    "tsup": "^6.6.3",
     "typescript": "4.9.5",
     "vite": "^4.1.4",
-    "vitest": "^0.29.2"
+    "vitest": "^0.29.8"
   },
   "resolutions": {},
   "scripts": {
@@ -52,8 +51,8 @@
     "lint:check": "eslint --ext .cjs,.ts .",
     "format": "prettier --write .",
     "format:check": "prettier --check .",
-    "test": "vitest run",
-    "test:browser": "vitest run --browser.name=chrome --browser.headless",
+    "test": "vitest run --config vitest.config.ts",
+    "test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest.config.ts",
     "type-check": "tsc"
   }
 }

package/src/HfInference.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { toArray } from "./utils/to-array";
+import { toArray } from "./utils/toArray";
 import type { EventSourceMessage } from "./vendor/fetch-event-source/parse";
 import { getLines, getMessages } from "./vendor/fetch-event-source/parse";
@@ -299,12 +299,12 @@ export interface TextGenerationStreamReturn {
 	 * Complete generated text
 	 * Only available when the generation is finished
 	 */
-	generated_text?: string;
+	generated_text: string | null;
 	/**
 	 * Generation details
 	 * Only available when the generation is finished
 	 */
-	details?: TextGenerationStreamDetails;
+	details: TextGenerationStreamDetails | null;
 }
 export type TokenClassificationArgs = Args & {
@@ -582,13 +582,45 @@ export type TextToImageArgs = Args & {
 	 */
 	inputs: string;
+	parameters?: {
+		/**
+		 * An optional negative prompt for the image generation
+		 */
+		negative_prompt?: string;
+		/**
+		 * The height in pixels of the generated image
+		 */
+		height?: number;
+		/**
+		 * The width in pixels of the generated image
+		 */
+		width?: number;
+		/**
+		 * The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
+		 */
+		num_inference_steps?: number;
+		/**
+		 * Guidance scale: Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, usually at the expense of lower image quality.
+		 */
+		guidance_scale?: number;
+	};
+};
+export type TextToImageReturn = Blob;
+export type ImageToTextArgs = Args & {
 	/**
-	 * An optional negative prompt for the image generation
+	 * Binary image data
 	 */
-	negative_prompt?: string;
+	data: Blob | ArrayBuffer;
 };
-export type TextToImageReturn = Blob;
+export interface ImageToTextReturn {
+	/**
+	 * The generated caption
+	 */
+	generated_text: string;
+}
 export class HfInference {
 	private readonly apiKey: string;
@@ -928,6 +960,18 @@ export class HfInference {
 		return res;
 	}
+	/**
+	 * This task reads some image input and outputs the text caption.
+	 */
+	public async imageToText(args: ImageToTextArgs, options?: Options): Promise<ImageToTextReturn> {
+		return (
+			await this.request<[ImageToTextReturn]>(args, {
+				...options,
+				binary: true,
+			})
+		)?.[0];
+	}
 	/**
 	 * Helper that prepares request arguments
 	 */
@@ -1038,14 +1082,27 @@ export class HfInference {
 			});
 		}
 		if (!response.ok) {
+			if (response.headers.get("Content-Type")?.startsWith("application/json")) {
+				const output = await response.json();
+				if (output.error) {
+					throw new Error(output.error);
+				}
+			}
 			throw new Error(`Server response contains error: ${response.status}`);
 		}
 		if (response.headers.get("content-type") !== "text/event-stream") {
-			throw new Error(`Server does not support event stream content type`);
+			throw new Error(
+				`Server does not support event stream content type, it returned ` + response.headers.get("content-type")
+			);
+		}
+		if (!response.body) {
+			return;
 		}
 		const reader = response.body.getReader();
-		const events: EventSourceMessage[] = [];
+		let events: EventSourceMessage[] = [];
 		const onEvent = (event: EventSourceMessage) => {
 			// accumulate events in array
@@ -1065,12 +1122,12 @@ export class HfInference {
 				const { done, value } = await reader.read();
 				if (done) return;
 				onChunk(value);
-				while (events.length > 0) {
-					const event = events.shift();
+				for (const event of events) {
 					if (event.data.length > 0) {
 						yield JSON.parse(event.data) as T;
 					}
 				}
+				events = [];
 			}
 		} finally {
 			reader.releaseLock();

package/src/utils/omit.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import { pick } from "./pick";
+import { typedInclude } from "./typedInclude";
+/**
+ * Return copy of object, omitting blocklisted array of props
+ */
+export function omit<T extends object, K extends keyof T>(o: T, props: K[] | K): Pick<T, Exclude<keyof T, K>> {
+	const propsArr = Array.isArray(props) ? props : [props];
+	const letsKeep = (Object.keys(o) as (keyof T)[]).filter((prop) => !typedInclude(propsArr, prop));
+	return pick(o, letsKeep);
+}

package/src/utils/pick.ts ADDED Viewed

@@ -0,0 +1,16 @@
+/**
+ * Return copy of object, only keeping allowlisted properties.
+ *
+ * This doesn't add {p: undefined} anymore, for props not in the o object.
+ */
+export function pick<T, K extends keyof T>(o: T, props: K[] | ReadonlyArray<K>): Pick<T, K> {
+	// inspired by stackoverflow.com/questions/25553910/one-liner-to-take-some-properties-from-object-in-es-6
+	return Object.assign(
+		{},
+		...props.map((prop) => {
+			if (o[prop] !== undefined) {
+				return { [prop]: o[prop] };
+			}
+		})
+	);
+}

package/src/utils/typedInclude.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export function typedInclude<V, T extends V>(arr: readonly T[], v: V): v is T {
+	return arr.includes(v as T);
+}

package/src/utils/env-predicates.ts DELETED Viewed

@@ -1,7 +0,0 @@
-const isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";
-const isWebWorker =
-	typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";
-export const isFrontend = isBrowser || isWebWorker;
-export const isBackend = !isBrowser && !isWebWorker;

/package/src/utils/{to-array.ts → toArray.ts} RENAMED Viewed

File without changes