npm - @huggingface/inference - Versions diffs - 1.6.3 → 1.7.0 - Mend

@huggingface/inference 1.6.3 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +7 -0
package/dist/index.d.ts +94 -1
package/dist/index.js +177 -5
package/dist/index.mjs +175 -4
package/package.json +1 -1
package/src/HfInference.ts +182 -5
package/src/vendor/fetch-event-source/parse.spec.ts +389 -0
package/src/vendor/fetch-event-source/parse.ts +216 -0

package/src/HfInference.ts CHANGED Viewed

@@ -1,4 +1,8 @@
 import { toArray } from "./utils/to-array";
+import type { EventSourceMessage } from "./vendor/fetch-event-source/parse";
+import { getLines, getMessages } from "./vendor/fetch-event-source/parse";
+const HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
 export interface Options {
 	/**
@@ -223,6 +227,86 @@ export interface TextGenerationReturn {
 	generated_text: string;
 }
+export interface TextGenerationStreamToken {
+	/** Token ID from the model tokenizer */
+	id: number;
+	/** Token text */
+	text: string;
+	/** Logprob */
+	logprob: number;
+	/**
+	 * Is the token a special token
+	 * Can be used to ignore tokens when concatenating
+	 */
+	special: boolean;
+}
+export interface TextGenerationStreamPrefillToken {
+	/** Token ID from the model tokenizer */
+	id: number;
+	/** Token text */
+	text: string;
+	/**
+	 * Logprob
+	 * Optional since the logprob of the first token cannot be computed
+	 */
+	logprob?: number;
+}
+export interface TextGenerationStreamBestOfSequence {
+	/** Generated text */
+	generated_text: string;
+	/** Generation finish reason */
+	finish_reason: TextGenerationStreamFinishReason;
+	/** Number of generated tokens */
+	generated_tokens: number;
+	/** Sampling seed if sampling was activated */
+	seed?: number;
+	/** Prompt tokens */
+	prefill: TextGenerationStreamPrefillToken[];
+	/** Generated tokens */
+	tokens: TextGenerationStreamToken[];
+}
+export enum TextGenerationStreamFinishReason {
+	/** number of generated tokens == `max_new_tokens` */
+	Length = "length",
+	/** the model generated its end of sequence token */
+	EndOfSequenceToken = "eos_token",
+	/** the model generated a text included in `stop_sequences` */
+	StopSequence = "stop_sequence",
+}
+export interface TextGenerationStreamDetails {
+	/** Generation finish reason */
+	finish_reason: TextGenerationStreamFinishReason;
+	/** Number of generated tokens */
+	generated_tokens: number;
+	/** Sampling seed if sampling was activated */
+	seed?: number;
+	/** Prompt tokens */
+	prefill: TextGenerationStreamPrefillToken[];
+	/** */
+	tokens: TextGenerationStreamToken[];
+	/** Additional sequences when using the `best_of` parameter */
+	best_of_sequences?: TextGenerationStreamBestOfSequence[];
+}
+export interface TextGenerationStreamReturn {
+	/** Generated token, one at a time */
+	token: TextGenerationStreamToken;
+	/**
+	 * Complete generated text
+	 * Only available when the generation is finished
+	 */
+	generated_text?: string;
+	/**
+	 * Generation details
+	 * Only available when the generation is finished
+	 */
+	details?: TextGenerationStreamDetails;
+}
 export type TokenClassificationArgs = Args & {
 	/**
 	 * A string to be classified
@@ -615,6 +699,16 @@ export class HfInference {
 		return res?.[0];
 	}
+	/**
+	 * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
+	 */
+	public async *textGenerationStream(
+		args: TextGenerationArgs,
+		options?: Options
+	): AsyncGenerator<TextGenerationStreamReturn> {
+		yield* this.streamingRequest<TextGenerationStreamReturn>(args, options);
+	}
 	/**
 	 * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
 	 */
@@ -834,15 +928,21 @@ export class HfInference {
 		return res;
 	}
-	public async request<T>(
-		args: Args & { data?: Blob | ArrayBuffer },
+	/**
+	 * Helper that prepares request arguments
+	 */
+	private makeRequestOptions(
+		args: Args & {
+			data?: Blob | ArrayBuffer;
+			stream?: boolean;
+		},
 		options?: Options & {
 			binary?: boolean;
 			blob?: boolean;
 			/** For internal HF use, which is why it's not exposed in {@link Options} */
 			includeCredentials?: boolean;
 		}
-	): Promise<T> {
+	) {
 		const mergedOptions = { ...this.defaultOptions, ...options };
 		const { model, ...otherArgs } = args;
@@ -867,7 +967,8 @@ export class HfInference {
 			}
 		}
-		const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
+		const url = `${HF_INFERENCE_API_BASE_URL}${model}`;
+		const info: RequestInit = {
 			headers,
 			method: "POST",
 			body: options?.binary
@@ -877,7 +978,22 @@ export class HfInference {
 						options: mergedOptions,
 				  }),
 			credentials: options?.includeCredentials ? "include" : "same-origin",
-		});
+		};
+		return { url, info, mergedOptions };
+	}
+	public async request<T>(
+		args: Args & { data?: Blob | ArrayBuffer },
+		options?: Options & {
+			binary?: boolean;
+			blob?: boolean;
+			/** For internal HF use, which is why it's not exposed in {@link Options} */
+			includeCredentials?: boolean;
+		}
+	): Promise<T> {
+		const { url, info, mergedOptions } = this.makeRequestOptions(args, options);
+		const response = await fetch(url, info);
 		if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
 			return this.request(args, {
@@ -899,4 +1015,65 @@ export class HfInference {
 		}
 		return output;
 	}
+	/**
+	 * Make request that uses server-sent events and returns response as a generator
+	 */
+	public async *streamingRequest<T>(
+		args: Args & { data?: Blob | ArrayBuffer },
+		options?: Options & {
+			binary?: boolean;
+			blob?: boolean;
+			/** For internal HF use, which is why it's not exposed in {@link Options} */
+			includeCredentials?: boolean;
+		}
+	): AsyncGenerator<T> {
+		const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options);
+		const response = await fetch(url, info);
+		if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
+			return this.streamingRequest(args, {
+				...mergedOptions,
+				wait_for_model: true,
+			});
+		}
+		if (!response.ok) {
+			throw new Error(`Server response contains error: ${response.status}`);
+		}
+		if (response.headers.get("content-type") !== "text/event-stream") {
+			throw new Error(`Server does not support event stream content type`);
+		}
+		const reader = response.body.getReader();
+		const events: EventSourceMessage[] = [];
+		const onEvent = (event: EventSourceMessage) => {
+			// accumulate events in array
+			events.push(event);
+		};
+		const onChunk = getLines(
+			getMessages(
+				() => {},
+				() => {},
+				onEvent
+			)
+		);
+		try {
+			while (true) {
+				const { done, value } = await reader.read();
+				if (done) return;
+				onChunk(value);
+				while (events.length > 0) {
+					const event = events.shift();
+					if (event.data.length > 0) {
+						yield JSON.parse(event.data) as T;
+					}
+				}
+			}
+		} finally {
+			reader.releaseLock();
+		}
+	}
 }

package/src/vendor/fetch-event-source/parse.spec.ts ADDED Viewed

@@ -0,0 +1,389 @@
+import { expect, it, describe } from "vitest";
+const fail = (msg: string) => { throw new Error(msg) };
+/**
+ This file is a part of fetch-event-source package (as of v2.0.1)
+ https://github.com/Azure/fetch-event-source/blob/v2.0.1/src/parse.spec.ts
+ Full package can be used after it is made compatible with nodejs:
+ https://github.com/Azure/fetch-event-source/issues/20
+ Below is the fetch-event-source package license:
+ MIT License
+ Copyright (c) Microsoft Corporation.
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+ The above copyright notice and this permission notice shall be included in all
+ copies or substantial portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE
+ */
+import * as parse from './parse';
+describe('parse', () => {
+    const encoder = new TextEncoder();
+    const decoder = new TextDecoder();
+    describe('getLines', () => {
+        it('single line', () => {
+            // arrange:
+            let lineNum = 0;
+            const next = parse.getLines((line, fieldLength) => {
+                ++lineNum;
+                expect(decoder.decode(line)).toEqual('id: abc');
+                expect(fieldLength).toEqual(2);
+            });
+            // act:
+            next(encoder.encode('id: abc\n'));
+            // assert:
+            expect(lineNum).toBe(1);
+        });
+        it('multiple lines', () => {
+            // arrange:
+            let lineNum = 0;
+            const next = parse.getLines((line, fieldLength) => {
+                ++lineNum;
+                expect(decoder.decode(line)).toEqual(lineNum === 1 ? 'id: abc' : 'data: def');
+                expect(fieldLength).toEqual(lineNum === 1 ? 2 : 4);
+            });
+            // act:
+            next(encoder.encode('id: abc\n'));
+            next(encoder.encode('data: def\n'));
+            // assert:
+            expect(lineNum).toBe(2);
+        });
+        it('single line split across multiple arrays', () => {
+            // arrange:
+            let lineNum = 0;
+            const next = parse.getLines((line, fieldLength) => {
+                ++lineNum;
+                expect(decoder.decode(line)).toEqual('id: abc');
+                expect(fieldLength).toEqual(2);
+            });
+            // act:
+            next(encoder.encode('id: a'));
+            next(encoder.encode('bc\n'));
+            // assert:
+            expect(lineNum).toBe(1);
+        });
+        it('multiple lines split across multiple arrays', () => {
+            // arrange:
+            let lineNum = 0;
+            const next = parse.getLines((line, fieldLength) => {
+                ++lineNum;
+                expect(decoder.decode(line)).toEqual(lineNum === 1 ? 'id: abc' : 'data: def');
+                expect(fieldLength).toEqual(lineNum === 1 ? 2 : 4);
+            });
+            // act:
+            next(encoder.encode('id: ab'));
+            next(encoder.encode('c\nda'));
+            next(encoder.encode('ta: def\n'));
+            // assert:
+            expect(lineNum).toBe(2);
+        });
+        it('new line', () => {
+            // arrange:
+            let lineNum = 0;
+            const next = parse.getLines((line, fieldLength) => {
+                ++lineNum;
+                expect(decoder.decode(line)).toEqual('');
+                expect(fieldLength).toEqual(-1);
+            });
+            // act:
+            next(encoder.encode('\n'));
+            // assert:
+            expect(lineNum).toBe(1);
+        });
+        it('comment line', () => {
+            // arrange:
+            let lineNum = 0;
+            const next = parse.getLines((line, fieldLength) => {
+                ++lineNum;
+                expect(decoder.decode(line)).toEqual(': this is a comment');
+                expect(fieldLength).toEqual(0);
+            });
+            // act:
+            next(encoder.encode(': this is a comment\n'));
+            // assert:
+            expect(lineNum).toBe(1);
+        });
+        it('line with no field', () => {
+            // arrange:
+            let lineNum = 0;
+            const next = parse.getLines((line, fieldLength) => {
+                ++lineNum;
+                expect(decoder.decode(line)).toEqual('this is an invalid line');
+                expect(fieldLength).toEqual(-1);
+            });
+            // act:
+            next(encoder.encode('this is an invalid line\n'));
+            // assert:
+            expect(lineNum).toBe(1);
+        });
+        it('line with multiple colons', () => {
+            // arrange:
+            let lineNum = 0;
+            const next = parse.getLines((line, fieldLength) => {
+                ++lineNum;
+                expect(decoder.decode(line)).toEqual('id: abc: def');
+                expect(fieldLength).toEqual(2);
+            });
+            // act:
+            next(encoder.encode('id: abc: def\n'));
+            // assert:
+            expect(lineNum).toBe(1);
+        });
+        it('single byte array with multiple lines separated by \\n', () => {
+            // arrange:
+            let lineNum = 0;
+            const next = parse.getLines((line, fieldLength) => {
+                ++lineNum;
+                expect(decoder.decode(line)).toEqual(lineNum === 1 ? 'id: abc' : 'data: def');
+                expect(fieldLength).toEqual(lineNum === 1 ? 2 : 4);
+            });
+            // act:
+            next(encoder.encode('id: abc\ndata: def\n'));
+            // assert:
+            expect(lineNum).toBe(2);
+        });
+        it('single byte array with multiple lines separated by \\r', () => {
+            // arrange:
+            let lineNum = 0;
+            const next = parse.getLines((line, fieldLength) => {
+                ++lineNum;
+                expect(decoder.decode(line)).toEqual(lineNum === 1 ? 'id: abc' : 'data: def');
+                expect(fieldLength).toEqual(lineNum === 1 ? 2 : 4);
+            });
+            // act:
+            next(encoder.encode('id: abc\rdata: def\r'));
+            // assert:
+            expect(lineNum).toBe(2);
+        });
+        it('single byte array with multiple lines separated by \\r\\n', () => {
+            // arrange:
+            let lineNum = 0;
+            const next = parse.getLines((line, fieldLength) => {
+                ++lineNum;
+                expect(decoder.decode(line)).toEqual(lineNum === 1 ? 'id: abc' : 'data: def');
+                expect(fieldLength).toEqual(lineNum === 1 ? 2 : 4);
+            });
+            // act:
+            next(encoder.encode('id: abc\r\ndata: def\r\n'));
+            // assert:
+            expect(lineNum).toBe(2);
+        });
+    });
+    describe('getMessages', () => {
+        it('happy path', () => {
+            // arrange:
+            let msgNum = 0;
+            const next = parse.getMessages(id => {
+                expect(id).toEqual('abc');
+            }, retry => {
+                expect(retry).toEqual(42);
+            }, msg => {
+                ++msgNum;
+                expect(msg).toEqual({
+                    retry: 42,
+                    id: 'abc',
+                    event: 'def',
+                    data: 'ghi'
+                });
+            });
+            // act:
+            next(encoder.encode('retry: 42'), 5);
+            next(encoder.encode('id: abc'), 2);
+            next(encoder.encode('event:def'), 5);
+            next(encoder.encode('data:ghi'), 4);
+            next(encoder.encode(''), -1);
+            // assert:
+            expect(msgNum).toBe(1);
+        });
+        it('skip unknown fields', () => {
+            let msgNum = 0;
+            const next = parse.getMessages(id => {
+                expect(id).toEqual('abc');
+            }, _retry => {
+                fail('retry should not be called');
+            }, msg => {
+                ++msgNum;
+                expect(msg).toEqual({
+                    id: 'abc',
+                    data: '',
+                    event: '',
+                    retry: undefined,
+                });
+            });
+            // act:
+            next(encoder.encode('id: abc'), 2);
+            next(encoder.encode('foo: null'), 3);
+            next(encoder.encode(''), -1);
+            // assert:
+            expect(msgNum).toBe(1);
+        });
+        it('ignore non-integer retry', () => {
+            let msgNum = 0;
+            const next = parse.getMessages(_id => {
+                fail('id should not be called');
+            }, _retry => {
+                fail('retry should not be called');
+            }, msg => {
+                ++msgNum;
+                expect(msg).toEqual({
+                    id: '',
+                    data: '',
+                    event: '',
+                    retry: undefined,
+                });
+            });
+            // act:
+            next(encoder.encode('retry: def'), 5);
+            next(encoder.encode(''), -1);
+            // assert:
+            expect(msgNum).toBe(1);
+        });
+        it('skip comment-only messages', () => {
+            // arrange:
+            let msgNum = 0;
+            const next = parse.getMessages(id => {
+                expect(id).toEqual('123');
+            }, _retry => {
+                fail('retry should not be called');
+            }, msg => {
+                ++msgNum;
+                expect(msg).toEqual({
+                    retry: undefined,
+                    id: '123',
+                    event: 'foo ',
+                    data: '',
+                });
+            });
+            // act:
+            next(encoder.encode('id:123'), 2);
+            next(encoder.encode(':'), 0);
+            next(encoder.encode(':    '), 0);
+            next(encoder.encode('event: foo '), 5);
+            next(encoder.encode(''), -1);
+            // assert:
+            expect(msgNum).toBe(1);
+        });
+        it('should append data split across multiple lines', () => {
+            // arrange:
+            let msgNum = 0;
+            const next = parse.getMessages(_id => {
+                fail('id should not be called');
+            }, _retry => {
+                fail('retry should not be called');
+            }, msg => {
+                ++msgNum;
+                expect(msg).toEqual({
+                    data: 'YHOO\n+2\n\n10',
+                    id: '',
+                    event: '',
+                    retry: undefined,
+                });
+            });
+            // act:
+            next(encoder.encode('data:YHOO'), 4);
+            next(encoder.encode('data: +2'), 4);
+            next(encoder.encode('data'), 4);
+            next(encoder.encode('data: 10'), 4);
+            next(encoder.encode(''), -1);
+            // assert:
+            expect(msgNum).toBe(1);
+        });
+        it('should reset id if sent multiple times', () => {
+            // arrange:
+            const expectedIds = ['foo', ''];
+            let idsIdx = 0;
+            let msgNum = 0;
+            const next = parse.getMessages(id => {
+                expect(id).toEqual(expectedIds[idsIdx]);
+                ++idsIdx;
+            }, _retry => {
+                fail('retry should not be called');
+            }, msg => {
+                ++msgNum;
+                expect(msg).toEqual({
+                    data: '',
+                    id: '',
+                    event: '',
+                    retry: undefined,
+                });
+            });
+            // act:
+            next(encoder.encode('id: foo'), 2);
+            next(encoder.encode('id'), 2);
+            next(encoder.encode(''), -1);
+            // assert:
+            expect(idsIdx).toBe(2);
+            expect(msgNum).toBe(1);
+        });
+    });
+});