npm - @huggingface/inference - Versions diffs - 1.6.3 → 1.7.1 - Mend

@huggingface/inference 1.6.3 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +8 -0
package/dist/index.d.ts +94 -1
package/dist/index.js +183 -5
package/dist/index.mjs +181 -4
package/package.json +1 -1
package/src/HfInference.ts +189 -5
package/src/vendor/fetch-event-source/parse.spec.ts +389 -0
package/src/vendor/fetch-event-source/parse.ts +216 -0

package/README.md CHANGED Viewed

@@ -76,6 +76,14 @@ await hf.textGeneration({
   inputs: 'The answer to the universe is'
 })
+for await (const output of hf.textGenerationStream({
+  model: "google/flan-t5-xxl",
+  inputs: 'repeat "one two three four"',
+  parameters: { max_new_tokens: 250 }
+})) {
+  console.log(output.token.text, output.generated_text);
+}
 await hf.tokenClassification({
   model: 'dbmdz/bert-large-cased-finetuned-conll03-english',
   inputs: 'My name is Sarah Jessica Parker but you can call me Jessica'

package/dist/index.d.ts CHANGED Viewed

@@ -206,6 +206,80 @@ interface TextGenerationReturn {
      */
     generated_text: string;
 }
+interface TextGenerationStreamToken {
+    /** Token ID from the model tokenizer */
+    id: number;
+    /** Token text */
+    text: string;
+    /** Logprob */
+    logprob: number;
+    /**
+     * Is the token a special token
+     * Can be used to ignore tokens when concatenating
+     */
+    special: boolean;
+}
+interface TextGenerationStreamPrefillToken {
+    /** Token ID from the model tokenizer */
+    id: number;
+    /** Token text */
+    text: string;
+    /**
+     * Logprob
+     * Optional since the logprob of the first token cannot be computed
+     */
+    logprob?: number;
+}
+interface TextGenerationStreamBestOfSequence {
+    /** Generated text */
+    generated_text: string;
+    /** Generation finish reason */
+    finish_reason: TextGenerationStreamFinishReason;
+    /** Number of generated tokens */
+    generated_tokens: number;
+    /** Sampling seed if sampling was activated */
+    seed?: number;
+    /** Prompt tokens */
+    prefill: TextGenerationStreamPrefillToken[];
+    /** Generated tokens */
+    tokens: TextGenerationStreamToken[];
+}
+declare enum TextGenerationStreamFinishReason {
+    /** number of generated tokens == `max_new_tokens` */
+    Length = "length",
+    /** the model generated its end of sequence token */
+    EndOfSequenceToken = "eos_token",
+    /** the model generated a text included in `stop_sequences` */
+    StopSequence = "stop_sequence"
+}
+interface TextGenerationStreamDetails {
+    /** Generation finish reason */
+    finish_reason: TextGenerationStreamFinishReason;
+    /** Number of generated tokens */
+    generated_tokens: number;
+    /** Sampling seed if sampling was activated */
+    seed?: number;
+    /** Prompt tokens */
+    prefill: TextGenerationStreamPrefillToken[];
+    /** */
+    tokens: TextGenerationStreamToken[];
+    /** Additional sequences when using the `best_of` parameter */
+    best_of_sequences?: TextGenerationStreamBestOfSequence[];
+}
+interface TextGenerationStreamReturn {
+    /** Generated token, one at a time */
+    token: TextGenerationStreamToken;
+    /**
+     * Complete generated text
+     * Only available when the generation is finished
+     */
+    generated_text?: string;
+    /**
+     * Generation details
+     * Only available when the generation is finished
+     */
+    details?: TextGenerationStreamDetails;
+}
 type TokenClassificationArgs = Args & {
     /**
      * A string to be classified
@@ -486,6 +560,10 @@ declare class HfInference {
      * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
      */
     textGeneration(args: TextGenerationArgs, options?: Options): Promise<TextGenerationReturn>;
+    /**
+     * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
+     */
+    textGenerationStream(args: TextGenerationArgs, options?: Options): AsyncGenerator<TextGenerationStreamReturn>;
     /**
      * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
      */
@@ -537,6 +615,10 @@ declare class HfInference {
      * Recommended model: stabilityai/stable-diffusion-2
      */
     textToImage(args: TextToImageArgs, options?: Options): Promise<TextToImageReturn>;
+    /**
+     * Helper that prepares request arguments
+     */
+    private makeRequestOptions;
     request<T>(args: Args & {
         data?: Blob | ArrayBuffer;
     }, options?: Options & {
@@ -545,6 +627,17 @@ declare class HfInference {
         /** For internal HF use, which is why it's not exposed in {@link Options} */
         includeCredentials?: boolean;
     }): Promise<T>;
+    /**
+     * Make request that uses server-sent events and returns response as a generator
+     */
+    streamingRequest<T>(args: Args & {
+        data?: Blob | ArrayBuffer;
+    }, options?: Options & {
+        binary?: boolean;
+        blob?: boolean;
+        /** For internal HF use, which is why it's not exposed in {@link Options} */
+        includeCredentials?: boolean;
+    }): AsyncGenerator<T>;
 }
-export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };
+export { Args, AudioClassificationArgs, AudioClassificationReturn, AudioClassificationReturnValue, AutomaticSpeechRecognitionArgs, AutomaticSpeechRecognitionReturn, ConversationalArgs, ConversationalReturn, FeatureExtractionArgs, FeatureExtractionReturn, FillMaskArgs, FillMaskReturn, HfInference, ImageClassificationArgs, ImageClassificationReturn, ImageClassificationReturnValue, ImageSegmentationArgs, ImageSegmentationReturn, ImageSegmentationReturnValue, ObjectDetectionArgs, ObjectDetectionReturn, ObjectDetectionReturnValue, Options, QuestionAnswerArgs, QuestionAnswerReturn, SummarizationArgs, SummarizationReturn, TableQuestionAnswerArgs, TableQuestionAnswerReturn, TextClassificationArgs, TextClassificationReturn, TextGenerationArgs, TextGenerationReturn, TextGenerationStreamBestOfSequence, TextGenerationStreamDetails, TextGenerationStreamFinishReason, TextGenerationStreamPrefillToken, TextGenerationStreamReturn, TextGenerationStreamToken, TextToImageArgs, TextToImageReturn, TokenClassificationArgs, TokenClassificationReturn, TokenClassificationReturnValue, TranslationArgs, TranslationReturn, ZeroShotClassificationArgs, ZeroShotClassificationReturn, ZeroShotClassificationReturnValue };

package/dist/index.js CHANGED Viewed

@@ -19,7 +19,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 // src/index.ts
 var src_exports = {};
 __export(src_exports, {
-  HfInference: () => HfInference
+  HfInference: () => HfInference,
+  TextGenerationStreamFinishReason: () => TextGenerationStreamFinishReason
 });
 module.exports = __toCommonJS(src_exports);
@@ -31,7 +32,113 @@ function toArray(obj) {
   return [obj];
 }
+// src/vendor/fetch-event-source/parse.ts
+function getLines(onLine) {
+  let buffer;
+  let position;
+  let fieldLength;
+  let discardTrailingNewline = false;
+  return function onChunk(arr) {
+    if (buffer === void 0) {
+      buffer = arr;
+      position = 0;
+      fieldLength = -1;
+    } else {
+      buffer = concat(buffer, arr);
+    }
+    const bufLength = buffer.length;
+    let lineStart = 0;
+    while (position < bufLength) {
+      if (discardTrailingNewline) {
+        if (buffer[position] === 10 /* NewLine */) {
+          lineStart = ++position;
+        }
+        discardTrailingNewline = false;
+      }
+      let lineEnd = -1;
+      for (; position < bufLength && lineEnd === -1; ++position) {
+        switch (buffer[position]) {
+          case 58 /* Colon */:
+            if (fieldLength === -1) {
+              fieldLength = position - lineStart;
+            }
+            break;
+          case 13 /* CarriageReturn */:
+            discardTrailingNewline = true;
+          case 10 /* NewLine */:
+            lineEnd = position;
+            break;
+        }
+      }
+      if (lineEnd === -1) {
+        break;
+      }
+      onLine(buffer.subarray(lineStart, lineEnd), fieldLength);
+      lineStart = position;
+      fieldLength = -1;
+    }
+    if (lineStart === bufLength) {
+      buffer = void 0;
+    } else if (lineStart !== 0) {
+      buffer = buffer.subarray(lineStart);
+      position -= lineStart;
+    }
+  };
+}
+function getMessages(onId, onRetry, onMessage) {
+  let message = newMessage();
+  const decoder = new TextDecoder();
+  return function onLine(line, fieldLength) {
+    if (line.length === 0) {
+      onMessage?.(message);
+      message = newMessage();
+    } else if (fieldLength > 0) {
+      const field = decoder.decode(line.subarray(0, fieldLength));
+      const valueOffset = fieldLength + (line[fieldLength + 1] === 32 /* Space */ ? 2 : 1);
+      const value = decoder.decode(line.subarray(valueOffset));
+      switch (field) {
+        case "data":
+          message.data = message.data ? message.data + "\n" + value : value;
+          break;
+        case "event":
+          message.event = value;
+          break;
+        case "id":
+          onId(message.id = value);
+          break;
+        case "retry":
+          const retry = parseInt(value, 10);
+          if (!isNaN(retry)) {
+            onRetry(message.retry = retry);
+          }
+          break;
+      }
+    }
+  };
+}
+function concat(a, b) {
+  const res = new Uint8Array(a.length + b.length);
+  res.set(a);
+  res.set(b, a.length);
+  return res;
+}
+function newMessage() {
+  return {
+    data: "",
+    event: "",
+    id: "",
+    retry: void 0
+  };
+}
 // src/HfInference.ts
+var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
+var TextGenerationStreamFinishReason = /* @__PURE__ */ ((TextGenerationStreamFinishReason2) => {
+  TextGenerationStreamFinishReason2["Length"] = "length";
+  TextGenerationStreamFinishReason2["EndOfSequenceToken"] = "eos_token";
+  TextGenerationStreamFinishReason2["StopSequence"] = "stop_sequence";
+  return TextGenerationStreamFinishReason2;
+})(TextGenerationStreamFinishReason || {});
 var HfInference = class {
   apiKey;
   defaultOptions;
@@ -113,6 +220,12 @@ var HfInference = class {
     }
     return res?.[0];
   }
+  /**
+   * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
+   */
+  async *textGenerationStream(args, options) {
+    yield* this.streamingRequest(args, options);
+  }
   /**
    * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
    */
@@ -273,7 +386,10 @@ var HfInference = class {
     }
     return res;
   }
-  async request(args, options) {
+  /**
+   * Helper that prepares request arguments
+   */
+  makeRequestOptions(args, options) {
     const mergedOptions = { ...this.defaultOptions, ...options };
     const { model, ...otherArgs } = args;
     const headers = {};
@@ -294,7 +410,8 @@ var HfInference = class {
         headers["X-Load-Model"] = "0";
       }
     }
-    const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
+    const url = `${HF_INFERENCE_API_BASE_URL}${model}`;
+    const info = {
       headers,
       method: "POST",
       body: options?.binary ? args.data : JSON.stringify({
@@ -302,7 +419,12 @@ var HfInference = class {
         options: mergedOptions
       }),
       credentials: options?.includeCredentials ? "include" : "same-origin"
-    });
+    };
+    return { url, info, mergedOptions };
+  }
+  async request(args, options) {
+    const { url, info, mergedOptions } = this.makeRequestOptions(args, options);
+    const response = await fetch(url, info);
     if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
       return this.request(args, {
         ...mergedOptions,
@@ -321,8 +443,64 @@ var HfInference = class {
     }
     return output;
   }
+  /**
+   * Make request that uses server-sent events and returns response as a generator
+   */
+  async *streamingRequest(args, options) {
+    const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options);
+    const response = await fetch(url, info);
+    if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
+      return this.streamingRequest(args, {
+        ...mergedOptions,
+        wait_for_model: true
+      });
+    }
+    if (!response.ok) {
+      if (response.headers.get("Content-Type")?.startsWith("application/json")) {
+        const output = await response.json();
+        if (output.error) {
+          throw new Error(output.error);
+        }
+      }
+      throw new Error(`Server response contains error: ${response.status}`);
+    }
+    if (response.headers.get("content-type") !== "text/event-stream") {
+      throw new Error(`Server does not support event stream content type`);
+    }
+    const reader = response.body.getReader();
+    const events = [];
+    const onEvent = (event) => {
+      events.push(event);
+    };
+    const onChunk = getLines(
+      getMessages(
+        () => {
+        },
+        () => {
+        },
+        onEvent
+      )
+    );
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done)
+          return;
+        onChunk(value);
+        while (events.length > 0) {
+          const event = events.shift();
+          if (event.data.length > 0) {
+            yield JSON.parse(event.data);
+          }
+        }
+      }
+    } finally {
+      reader.releaseLock();
+    }
+  }
 };
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
-  HfInference
+  HfInference,
+  TextGenerationStreamFinishReason
 });

package/dist/index.mjs CHANGED Viewed

@@ -6,7 +6,113 @@ function toArray(obj) {
   return [obj];
 }
+// src/vendor/fetch-event-source/parse.ts
+function getLines(onLine) {
+  let buffer;
+  let position;
+  let fieldLength;
+  let discardTrailingNewline = false;
+  return function onChunk(arr) {
+    if (buffer === void 0) {
+      buffer = arr;
+      position = 0;
+      fieldLength = -1;
+    } else {
+      buffer = concat(buffer, arr);
+    }
+    const bufLength = buffer.length;
+    let lineStart = 0;
+    while (position < bufLength) {
+      if (discardTrailingNewline) {
+        if (buffer[position] === 10 /* NewLine */) {
+          lineStart = ++position;
+        }
+        discardTrailingNewline = false;
+      }
+      let lineEnd = -1;
+      for (; position < bufLength && lineEnd === -1; ++position) {
+        switch (buffer[position]) {
+          case 58 /* Colon */:
+            if (fieldLength === -1) {
+              fieldLength = position - lineStart;
+            }
+            break;
+          case 13 /* CarriageReturn */:
+            discardTrailingNewline = true;
+          case 10 /* NewLine */:
+            lineEnd = position;
+            break;
+        }
+      }
+      if (lineEnd === -1) {
+        break;
+      }
+      onLine(buffer.subarray(lineStart, lineEnd), fieldLength);
+      lineStart = position;
+      fieldLength = -1;
+    }
+    if (lineStart === bufLength) {
+      buffer = void 0;
+    } else if (lineStart !== 0) {
+      buffer = buffer.subarray(lineStart);
+      position -= lineStart;
+    }
+  };
+}
+function getMessages(onId, onRetry, onMessage) {
+  let message = newMessage();
+  const decoder = new TextDecoder();
+  return function onLine(line, fieldLength) {
+    if (line.length === 0) {
+      onMessage?.(message);
+      message = newMessage();
+    } else if (fieldLength > 0) {
+      const field = decoder.decode(line.subarray(0, fieldLength));
+      const valueOffset = fieldLength + (line[fieldLength + 1] === 32 /* Space */ ? 2 : 1);
+      const value = decoder.decode(line.subarray(valueOffset));
+      switch (field) {
+        case "data":
+          message.data = message.data ? message.data + "\n" + value : value;
+          break;
+        case "event":
+          message.event = value;
+          break;
+        case "id":
+          onId(message.id = value);
+          break;
+        case "retry":
+          const retry = parseInt(value, 10);
+          if (!isNaN(retry)) {
+            onRetry(message.retry = retry);
+          }
+          break;
+      }
+    }
+  };
+}
+function concat(a, b) {
+  const res = new Uint8Array(a.length + b.length);
+  res.set(a);
+  res.set(b, a.length);
+  return res;
+}
+function newMessage() {
+  return {
+    data: "",
+    event: "",
+    id: "",
+    retry: void 0
+  };
+}
 // src/HfInference.ts
+var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
+var TextGenerationStreamFinishReason = /* @__PURE__ */ ((TextGenerationStreamFinishReason2) => {
+  TextGenerationStreamFinishReason2["Length"] = "length";
+  TextGenerationStreamFinishReason2["EndOfSequenceToken"] = "eos_token";
+  TextGenerationStreamFinishReason2["StopSequence"] = "stop_sequence";
+  return TextGenerationStreamFinishReason2;
+})(TextGenerationStreamFinishReason || {});
 var HfInference = class {
   apiKey;
   defaultOptions;
@@ -88,6 +194,12 @@ var HfInference = class {
     }
     return res?.[0];
   }
+  /**
+   * Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
+   */
+  async *textGenerationStream(args, options) {
+    yield* this.streamingRequest(args, options);
+  }
   /**
    * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english
    */
@@ -248,7 +360,10 @@ var HfInference = class {
     }
     return res;
   }
-  async request(args, options) {
+  /**
+   * Helper that prepares request arguments
+   */
+  makeRequestOptions(args, options) {
     const mergedOptions = { ...this.defaultOptions, ...options };
     const { model, ...otherArgs } = args;
     const headers = {};
@@ -269,7 +384,8 @@ var HfInference = class {
         headers["X-Load-Model"] = "0";
       }
     }
-    const response = await fetch(`https://api-inference.huggingface.co/models/${model}`, {
+    const url = `${HF_INFERENCE_API_BASE_URL}${model}`;
+    const info = {
       headers,
       method: "POST",
       body: options?.binary ? args.data : JSON.stringify({
@@ -277,7 +393,12 @@ var HfInference = class {
         options: mergedOptions
       }),
       credentials: options?.includeCredentials ? "include" : "same-origin"
-    });
+    };
+    return { url, info, mergedOptions };
+  }
+  async request(args, options) {
+    const { url, info, mergedOptions } = this.makeRequestOptions(args, options);
+    const response = await fetch(url, info);
     if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
       return this.request(args, {
         ...mergedOptions,
@@ -296,7 +417,63 @@ var HfInference = class {
     }
     return output;
   }
+  /**
+   * Make request that uses server-sent events and returns response as a generator
+   */
+  async *streamingRequest(args, options) {
+    const { url, info, mergedOptions } = this.makeRequestOptions({ ...args, stream: true }, options);
+    const response = await fetch(url, info);
+    if (mergedOptions.retry_on_error !== false && response.status === 503 && !mergedOptions.wait_for_model) {
+      return this.streamingRequest(args, {
+        ...mergedOptions,
+        wait_for_model: true
+      });
+    }
+    if (!response.ok) {
+      if (response.headers.get("Content-Type")?.startsWith("application/json")) {
+        const output = await response.json();
+        if (output.error) {
+          throw new Error(output.error);
+        }
+      }
+      throw new Error(`Server response contains error: ${response.status}`);
+    }
+    if (response.headers.get("content-type") !== "text/event-stream") {
+      throw new Error(`Server does not support event stream content type`);
+    }
+    const reader = response.body.getReader();
+    const events = [];
+    const onEvent = (event) => {
+      events.push(event);
+    };
+    const onChunk = getLines(
+      getMessages(
+        () => {
+        },
+        () => {
+        },
+        onEvent
+      )
+    );
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done)
+          return;
+        onChunk(value);
+        while (events.length > 0) {
+          const event = events.shift();
+          if (event.data.length > 0) {
+            yield JSON.parse(event.data);
+          }
+        }
+      }
+    } finally {
+      reader.releaseLock();
+    }
+  }
 };
 export {
-  HfInference
+  HfInference,
+  TextGenerationStreamFinishReason
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@huggingface/inference",
-  "version": "1.6.3",
+  "version": "1.7.1",
   "license": "MIT",
   "author": "Tim Mikeladze <tim.mikeladze@gmail.com>",
   "description": "Typescript wrapper for the Hugging Face Inference API",