npm - @huggingface/inference - Versions diffs - 1.6.2 → 1.7.0 - Mend

@huggingface/inference 1.6.2 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +7 -0
package/dist/index.d.ts +94 -1
package/dist/index.js +299 -22
package/dist/index.mjs +297 -21
package/package.json +1 -1
package/src/HfInference.ts +355 -23
package/src/vendor/fetch-event-source/parse.spec.ts +389 -0
package/src/vendor/fetch-event-source/parse.ts +216 -0

package/src/HfInference.ts CHANGED Viewed

@@ -1,4 +1,8 @@
 import { toArray } from "./utils/to-array";
+import type { EventSourceMessage } from "./vendor/fetch-event-source/parse";
+import { getLines, getMessages } from "./vendor/fetch-event-source/parse";
+const HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
 export interface Options {
 	/**
@@ -223,6 +227,86 @@ export interface TextGenerationReturn {
 	generated_text: string;
 }
+export interface TextGenerationStreamToken {
+	/** Token ID from the model tokenizer */
+	id: number;
+	/** Token text */
+	text: string;
+	/** Logprob */
+	logprob: number;
+	/**
+	 * Is the token a special token
+	 * Can be used to ignore tokens when concatenating
+	 */
+	special: boolean;
+}
+export interface TextGenerationStreamPrefillToken {
+	/** Token ID from the model tokenizer */
+	id: number;
+	/** Token text */
+	text: string;
+	/**
+	 * Logprob
+	 * Optional since the logprob of the first token cannot be computed
+	 */
+	logprob?: number;
+}
+export interface TextGenerationStreamBestOfSequence {
+	/** Generated text */
+	generated_text: string;
+	/** Generation finish reason */
+	finish_reason: TextGenerationStreamFinishReason;
+	/** Number of generated tokens */
+	generated_tokens: number;
+	/** Sampling seed if sampling was activated */
+	seed?: number;
+	/** Prompt tokens */
+	prefill: TextGenerationStreamPrefillToken[];
+	/** Generated tokens */
+	tokens: TextGenerationStreamToken[];
+}
+export enum TextGenerationStreamFinishReason {
+	/** number of generated tokens == `max_new_tokens` */
+	Length = "length",
+	/** the model generated its end of sequence token */
+	EndOfSequenceToken = "eos_token",
+	/** the model generated a text included in `stop_sequences` */
+	StopSequence = "stop_sequence",
+}
+export interface TextGenerationStreamDetails {
+	/** Generation finish reason */
+	finish_reason: TextGenerationStreamFinishReason;
+	/** Number of generated tokens */
+	generated_tokens: number;
+	/** Sampling seed if sampling was activated */
+	seed?: number;
+	/** Prompt tokens */
+	prefill: TextGenerationStreamPrefillToken[];
+	/** */
+	tokens: TextGenerationStreamToken[];
+	/** Additional sequences when using the `best_of` parameter */
+	best_of_sequences?: TextGenerationStreamBestOfSequence[];
+}
+export interface TextGenerationStreamReturn {
+	/** Generated token, one at a time */
+	token: TextGenerationStreamToken;
+	/**
+	 * Complete generated text
+	 * Only available when the generation is finished
+	 */
+	generated_text?: string;
+	/**
+	 * Generation details
+	 * Only available when the generation is finished
+	 */
+	details?: TextGenerationStreamDetails;
+}
 export type TokenClassificationArgs = Args & {
 	/**
 	 * A string to be classified
@@ -519,21 +603,52 @@ export class HfInference {
 	 * Tries to fill in a hole with a missing word (token to be precise). That’s the base task for BERT models.
 	 */
 	public async fillMask(args: FillMaskArgs, options?: Options): Promise<FillMaskReturn> {
-		return this.request(args, options);
+		const res = await this.request<FillMaskReturn>(args, options);
+		const isValidOutput =
+			Array.isArray(res) &&
+			res.every(
+				(x) =>
+					typeof x.score === "number" &&
+					typeof x.sequence === "string" &&
+					typeof x.token === "number" &&
+					typeof x.token_str === "string"
+			);
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type Array<score: number, sequence:string, token:number, token_str:string>"
+			);
+		}
+		return res;
 	}
 	/**
 	 * This task is well known to summarize longer text into shorter text. Be careful, some models have a maximum length of input. That means that the summary cannot handle full books for instance. Be careful when choosing your model.
 	 */
 	public async summarization(args: SummarizationArgs, options?: Options): Promise<SummarizationReturn> {
-		return (await this.request<SummarizationReturn[]>(args, options))?.[0];
+		const res = await this.request<SummarizationReturn[]>(args, options);
+		const isValidOutput = Array.isArray(res) && res.every((x) => typeof x.summary_text === "string");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<summary_text: string>");
+		}
+		return res?.[0];
 	}
 	/**
 	 * Want to have a nice know-it-all bot that can answer any question?. Recommended model: deepset/roberta-base-squad2
 	 */
 	public async questionAnswer(args: QuestionAnswerArgs, options?: Options): Promise<QuestionAnswerReturn> {
-		return await this.request(args, options);
+		const res = await this.request<QuestionAnswerReturn>(args, options);
+		const isValidOutput =
+			typeof res.answer === "string" &&
+			typeof res.end === "number" &&
+			typeof res.score === "number" &&
+			typeof res.start === "number";
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type <answer: string, end: number, score: number, start: number>"
+			);
+		}
+		return res;
 	}
 	/**
@@ -543,21 +658,55 @@ export class HfInference {
 		args: TableQuestionAnswerArgs,
 		options?: Options
 	): Promise<TableQuestionAnswerReturn> {
-		return await this.request(args, options);
+		const res = await this.request<TableQuestionAnswerReturn>(args, options);
+		const isValidOutput =
+			typeof res.aggregator === "string" &&
+			typeof res.answer === "string" &&
+			Array.isArray(res.cells) &&
+			res.cells.every((x) => typeof x === "string") &&
+			Array.isArray(res.coordinates) &&
+			res.coordinates.every((coord) => Array.isArray(coord) && coord.every((x) => typeof x === "number"));
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type <aggregator: string, answer: string, cells: string[], coordinates: number[][]>"
+			);
+		}
+		return res;
 	}
 	/**
 	 * Usually used for sentiment-analysis this will output the likelihood of classes of an input. Recommended model: distilbert-base-uncased-finetuned-sst-2-english
 	 */
 	public async textClassification(args: TextClassificationArgs, options?: Options): Promise<TextClassificationReturn> {
-		return (await this.request<TextClassificationReturn[]>(args, options))?.[0];
+		const res = (await this.request<TextClassificationReturn[]>(args, options))?.[0];
+		const isValidOutput =
+			Array.isArray(res) && res.every((x) => typeof x.label === "string" && typeof x.score === "number");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<label: string, score: number>");
+		}
+		return res;
 	}
 	/**
 	 * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
 	 */
 	public async textGeneration(args: TextGenerationArgs, options?: Options): Promise<TextGenerationReturn> {
-		return (await this.request<TextGenerationReturn[]>(args, options))?.[0];
+		const res = await this.request<TextGenerationReturn[]>(args, options);
+		const isValidOutput = Array.isArray(res) && res.every((x) => typeof x.generated_text === "string");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<generated_text: string>");
+		}
+		return res?.[0];
+	}
+	/**
+	 * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
+	 */
+	public async *textGenerationStream(
+		args: TextGenerationArgs,
+		options?: Options
+	): AsyncGenerator<TextGenerationStreamReturn> {
+		yield* this.streamingRequest<TextGenerationStreamReturn>(args, options);
 	}
 	/**
@@ -567,14 +716,35 @@ export class HfInference {
 		args: TokenClassificationArgs,
 		options?: Options
 	): Promise<TokenClassificationReturn> {
-		return toArray(await this.request(args, options));
+		const res = toArray(await this.request<TokenClassificationReturnValue | TokenClassificationReturn>(args, options));
+		const isValidOutput =
+			Array.isArray(res) &&
+			res.every(
+				(x) =>
+					typeof x.end === "number" &&
+					typeof x.entity_group === "string" &&
+					typeof x.score === "number" &&
+					typeof x.start === "number" &&
+					typeof x.word === "string"
+			);
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type Array<end: number, entity_group: string, score: number, start: number, word: string>"
+			);
+		}
+		return res;
 	}
 	/**
 	 * This task is well known to translate text from one language to another. Recommended model: Helsinki-NLP/opus-mt-ru-en.
 	 */
 	public async translation(args: TranslationArgs, options?: Options): Promise<TranslationReturn> {
-		return (await this.request<TranslationReturn[]>(args, options))?.[0];
+		const res = await this.request<TranslationReturn[]>(args, options);
+		const isValidOutput = Array.isArray(res) && res.every((x) => typeof x.translation_text === "string");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<translation_text: string>");
+		}
+		return res?.[0];
 	}
 	/**
@@ -584,9 +754,25 @@ export class HfInference {
 		args: ZeroShotClassificationArgs,
 		options?: Options
 	): Promise<ZeroShotClassificationReturn> {
-		return toArray(
-			await this.request<ZeroShotClassificationReturnValue | ZeroShotClassificationReturnValue[]>(args, options)
+		const res = toArray(
+			await this.request<ZeroShotClassificationReturnValue | ZeroShotClassificationReturn>(args, options)
 		);
+		const isValidOutput =
+			Array.isArray(res) &&
+			res.every(
+				(x) =>
+					Array.isArray(x.labels) &&
+					x.labels.every((_label) => typeof _label === "string") &&
+					Array.isArray(x.scores) &&
+					x.scores.every((_score) => typeof _score === "number") &&
+					typeof x.sequence === "string"
+			);
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type Array<labels: string[], scores: number[], sequence: string>"
+			);
+		}
+		return res;
 	}
 	/**
@@ -594,14 +780,29 @@ export class HfInference {
 	 *
 	 */
 	public async conversational(args: ConversationalArgs, options?: Options): Promise<ConversationalReturn> {
-		return await this.request(args, options);
+		const res = await this.request<ConversationalReturn>(args, options);
+		const isValidOutput =
+			Array.isArray(res.conversation.generated_responses) &&
+			res.conversation.generated_responses.every((x) => typeof x === "string") &&
+			Array.isArray(res.conversation.past_user_inputs) &&
+			res.conversation.past_user_inputs.every((x) => typeof x === "string") &&
+			typeof res.generated_text === "string" &&
+			Array.isArray(res.warnings) &&
+			res.warnings.every((x) => typeof x === "string");
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type <conversation: {generated_responses: string[], past_user_inputs: string[]}, generated_text: string, warnings: string[]>"
+			);
+		}
+		return res;
 	}
 	/**
 	 * This task reads some text and outputs raw float values, that are usually consumed as part of a semantic database/semantic search.
 	 */
 	public async featureExtraction(args: FeatureExtractionArgs, options?: Options): Promise<FeatureExtractionReturn> {
-		return await this.request(args, options);
+		const res = await this.request<FeatureExtractionReturn>(args, options);
+		return res;
 	}
 	/**
@@ -612,10 +813,15 @@ export class HfInference {
 		args: AutomaticSpeechRecognitionArgs,
 		options?: Options
 	): Promise<AutomaticSpeechRecognitionReturn> {
-		return await this.request(args, {
+		const res = await this.request<AutomaticSpeechRecognitionReturn>(args, {
 			...options,
 			binary: true,
 		});
+		const isValidOutput = typeof res.text === "string";
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type <text: string>");
+		}
+		return res;
 	}
 	/**
@@ -626,10 +832,16 @@ export class HfInference {
 		args: AudioClassificationArgs,
 		options?: Options
 	): Promise<AudioClassificationReturn> {
-		return await this.request(args, {
+		const res = await this.request<AudioClassificationReturn>(args, {
 			...options,
 			binary: true,
 		});
+		const isValidOutput =
+			Array.isArray(res) && res.every((x) => typeof x.label === "string" && typeof x.score === "number");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<label: string, score: number>");
+		}
+		return res;
 	}
 	/**
@@ -640,10 +852,16 @@ export class HfInference {
 		args: ImageClassificationArgs,
 		options?: Options
 	): Promise<ImageClassificationReturn> {
-		return await this.request(args, {
+		const res = await this.request<ImageClassificationReturn>(args, {
 			...options,
 			binary: true,
 		});
+		const isValidOutput =
+			Array.isArray(res) && res.every((x) => typeof x.label === "string" && typeof x.score === "number");
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type Array<label: string, score: number>");
+		}
+		return res;
 	}
 	/**
@@ -651,10 +869,27 @@ export class HfInference {
 	 * Recommended model: facebook/detr-resnet-50
 	 */
 	public async objectDetection(args: ObjectDetectionArgs, options?: Options): Promise<ObjectDetectionReturn> {
-		return await this.request(args, {
+		const res = await this.request<ObjectDetectionReturn>(args, {
 			...options,
 			binary: true,
 		});
+		const isValidOutput =
+			Array.isArray(res) &&
+			res.every(
+				(x) =>
+					typeof x.label === "string" &&
+					typeof x.score === "number" &&
+					typeof x.box.xmin === "number" &&
+					typeof x.box.ymin === "number" &&
+					typeof x.box.xmax === "number" &&
+					typeof x.box.ymax === "number"
+			);
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type Array<{label:string; score:number; box:{xmin:number; ymin:number; xmax:number; ymax:number}}>"
+			);
+		}
+		return res;
 	}
 	/**
@@ -662,10 +897,19 @@ export class HfInference {
 	 * Recommended model: facebook/detr-resnet-50-panoptic
 	 */
 	public async imageSegmentation(args: ImageSegmentationArgs, options?: Options): Promise<ImageSegmentationReturn> {
-		return await this.request(args, {
+		const res = await this.request<ImageSegmentationReturn>(args, {
 			...options,
 			binary: true,
 		});
+		const isValidOutput =
+			Array.isArray(res) &&
+			res.every((x) => typeof x.label === "string" && typeof x.mask === "string" && typeof x.score === "number");
+		if (!isValidOutput) {
+			throw new TypeError(
+				"Invalid inference output: output must be of type Array<label: string, mask: string, score: number>"
+			);
+		}
+		return res;
 	}
 	/**
@@ -673,21 +917,32 @@ export class HfInference {
 	 * Recommended model: stabilityai/stable-diffusion-2
 	 */
 	public async textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn> {
-		return await this.request(args, {
+		const res = await this.request<TextToImageReturn>(args, {
 			...options,
 			blob: true,
 		});
+		const isValidOutput = res && res instanceof Blob;
+		if (!isValidOutput) {
+			throw new TypeError("Invalid inference output: output must be of type object & of instance Blob");
+		}
+		return res;
 	}
-	public async request<T>(
-		args: Args & { data?: Blob | ArrayBuffer },
+	/**
+	 * Helper that prepares request arguments
+	 */
+	private makeRequestOptions(
+		args: Args & {
+			data?: Blob | ArrayBuffer;
+			stream?: boolean;
+		},
 		options?: Options & {
 			binary?: boolean;
 			blob?: boolean;
 			/** For internal HF use, which is why it's not exposed in {@link Options} */
 			includeCredentials?: boolean;
 		}
-	): Promise<T> {
+	) {
 		const mergedOptions = { ...this.defaultOptions, ...options };
 		const { model, ...otherArgs } = args;
@@ -712,7 +967,8 @@ export class HfInference {
 			}
 		}
-		const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
+		const url = `${HF_INFERENCE_API_BASE_URL}${model}`;
+		const info: RequestInit = {
 			headers,
 			method: "POST",
 			body: options?.binary
@@ -722,7 +978,22 @@ export class HfInference {
 						options: mergedOptions,
 				  }),
 			credentials: options?.includeCredentials ? "include" : "same-origin",
-		});
+		};
+		return { url, info, mergedOptions };
+	}
+	public async request<T>(
+		args: Args & { data?: Blob | ArrayBuffer },
+		options?: Options & {
+			binary?: boolean;
+			blob?: boolean;
+			/** For internal HF use, which is why it's not exposed in {@link Options} */
+			includeCredentials?: boolean;
+		}
+	): Promise<T> {
+		const { url, info, mergedOptions } = this.makeRequestOptions(args, options);
+		const response = await fetch(url, info);
 		if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
 			return this.request(args, {
@@ -744,4 +1015,65 @@ export class HfInference {
 		}
 		return output;
 	}
+	/**
+	 * Make request that uses server-sent events and returns response as a generator
+	 */
+	public async *streamingRequest<T>(
+		args: Args & { data?: Blob | ArrayBuffer },
+		options?: Options & {
+			binary?: boolean;
+			blob?: boolean;
+			/** For internal HF use, which is why it's not exposed in {@link Options} */
+			includeCredentials?: boolean;
+		}
+	): AsyncGenerator<T> {
+		const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options);
+		const response = await fetch(url, info);
+		if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
+			return this.streamingRequest(args, {
+				...mergedOptions,
+				wait_for_model: true,
+			});
+		}
+		if (!response.ok) {
+			throw new Error(`Server response contains error: ${response.status}`);
+		}
+		if (response.headers.get("content-type") !== "text/event-stream") {
+			throw new Error(`Server does not support event stream content type`);
+		}
+		const reader = response.body.getReader();
+		const events: EventSourceMessage[] = [];
+		const onEvent = (event: EventSourceMessage) => {
+			// accumulate events in array
+			events.push(event);
+		};
+		const onChunk = getLines(
+			getMessages(
+				() => {},
+				() => {},
+				onEvent
+			)
+		);
+		try {
+			while (true) {
+				const { done, value } = await reader.read();
+				if (done) return;
+				onChunk(value);
+				while (events.length > 0) {
+					const event = events.shift();
+					if (event.data.length > 0) {
+						yield JSON.parse(event.data) as T;
+					}
+				}
+			}
+		} finally {
+			reader.releaseLock();
+		}
+	}
 }