npm - @huggingface/tasks - Versions diffs - 0.5.2 → 0.6.1 - Mend

@huggingface/tasks 0.5.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/index.cjs +46 -0
package/dist/index.d.ts +230 -22
package/dist/index.js +46 -0
package/package.json +2 -2
package/src/model-libraries-snippets.ts +12 -0
package/src/model-libraries.ts +36 -0
package/src/tasks/chat-completion/inference.ts +158 -0
package/src/tasks/chat-completion/spec/input.json +63 -0
package/src/tasks/chat-completion/spec/output.json +58 -0
package/src/tasks/chat-completion/spec/output_stream.json +48 -0
package/src/tasks/index.ts +15 -4
package/src/tasks/text-generation/inference.ts +75 -19
package/src/tasks/text-generation/spec/input.json +4 -0
package/src/tasks/text-generation/spec/output.json +101 -56
package/src/tasks/text-generation/spec/output_stream.json +47 -0

package/src/tasks/chat-completion/inference.ts ADDED Viewed

@@ -0,0 +1,158 @@
+/**
+ * Inference code generated from the JSON schema spec in ./spec
+ *
+ * Using src/scripts/inference-codegen
+ */
+/**
+ * Inputs for ChatCompletion inference
+ */
+export interface ChatCompletionInput {
+	/**
+	 * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
+	 * frequency in the text so far, decreasing the model's likelihood to repeat the same line
+	 * verbatim.
+	 */
+	frequency_penalty?: number;
+	/**
+	 * The maximum number of tokens that can be generated in the chat completion.
+	 */
+	max_tokens?: number;
+	messages: ChatCompletionInputMessage[];
+	/**
+	 * The random sampling seed.
+	 */
+	seed?: number;
+	/**
+	 * Stop generating tokens if a stop token is generated.
+	 */
+	stop?: ChatCompletionInputStopReason;
+	/**
+	 * If set, partial message deltas will be sent.
+	 */
+	stream?: boolean;
+	/**
+	 * The value used to modulate the logits distribution.
+	 */
+	temperature?: number;
+	/**
+	 * If set to < 1, only the smallest set of most probable tokens with probabilities that add
+	 * up to `top_p` or higher are kept for generation.
+	 */
+	top_p?: number;
+	[property: string]: unknown;
+}
+export interface ChatCompletionInputMessage {
+	/**
+	 * The content of the message.
+	 */
+	content: string;
+	role: ChatCompletionMessageRole;
+	[property: string]: unknown;
+}
+/**
+ * The role of the message author.
+ */
+export type ChatCompletionMessageRole = "assistant" | "system" | "user";
+/**
+ * Stop generating tokens if a stop token is generated.
+ */
+export type ChatCompletionInputStopReason = string[] | string;
+/**
+ * Outputs for Chat Completion inference
+ */
+export interface ChatCompletionOutput {
+	/**
+	 * A list of chat completion choices.
+	 */
+	choices: ChatCompletionOutputChoice[];
+	/**
+	 * The Unix timestamp (in seconds) of when the chat completion was created.
+	 */
+	created: number;
+	[property: string]: unknown;
+}
+export interface ChatCompletionOutputChoice {
+	/**
+	 * The reason why the generation was stopped.
+	 */
+	finish_reason: ChatCompletionFinishReason;
+	/**
+	 * The index of the choice in the list of choices.
+	 */
+	index: number;
+	message: ChatCompletionOutputChoiceMessage;
+	[property: string]: unknown;
+}
+/**
+ * The reason why the generation was stopped.
+ *
+ * The generated sequence reached the maximum allowed length
+ *
+ * The model generated an end-of-sentence (EOS) token
+ *
+ * One of the sequence in stop_sequences was generated
+ */
+export type ChatCompletionFinishReason = "length" | "eos_token" | "stop_sequence";
+export interface ChatCompletionOutputChoiceMessage {
+	/**
+	 * The content of the chat completion message.
+	 */
+	content: string;
+	role: ChatCompletionMessageRole;
+	[property: string]: unknown;
+}
+/**
+ * Chat Completion Stream Output
+ */
+export interface ChatCompletionStreamOutput {
+	/**
+	 * A list of chat completion choices.
+	 */
+	choices: ChatCompletionStreamOutputChoice[];
+	/**
+	 * The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has
+	 * the same timestamp.
+	 */
+	created: number;
+	[property: string]: unknown;
+}
+export interface ChatCompletionStreamOutputChoice {
+	/**
+	 * A chat completion delta generated by streamed model responses.
+	 */
+	delta: ChatCompletionStreamOutputDelta;
+	/**
+	 * The reason why the generation was stopped.
+	 */
+	finish_reason?: ChatCompletionFinishReason;
+	/**
+	 * The index of the choice in the list of choices.
+	 */
+	index: number;
+	[property: string]: unknown;
+}
+/**
+ * A chat completion delta generated by streamed model responses.
+ */
+export interface ChatCompletionStreamOutputDelta {
+	/**
+	 * The contents of the chunk message.
+	 */
+	content?: string;
+	/**
+	 * The role of the author of this message.
+	 */
+	role?: string;
+	[property: string]: unknown;
+}

package/src/tasks/chat-completion/spec/input.json ADDED Viewed

@@ -0,0 +1,63 @@
+{
+	"title": "ChatCompletionInput",
+	"$id": "/inference/schemas/chat-completion/input.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Inputs for ChatCompletion inference",
+	"type": "object",
+	"properties": {
+		"messages": {
+			"type": "array",
+			"title": "ChatCompletionInputMessage",
+			"items": {
+				"type": "object",
+				"properties": {
+					"role": {
+						"$ref": "#/definitions/Role"
+					},
+					"content": {
+						"type": "string",
+						"description": "The content of the message."
+					}
+				},
+				"required": ["role", "content"]
+			}
+		},
+		"frequency_penalty": {
+			"type": "number",
+			"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim."
+		},
+		"max_tokens": {
+			"type": "integer",
+			"description": "The maximum number of tokens that can be generated in the chat completion."
+		},
+		"seed": {
+			"type": "integer",
+			"description": "The random sampling seed."
+		},
+		"stop": {
+			"oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }],
+			"title": "ChatCompletionInputStopReason",
+			"description": "Stop generating tokens if a stop token is generated."
+		},
+		"stream": {
+			"type": "boolean",
+			"description": "If set, partial message deltas will be sent."
+		},
+		"temperature": {
+			"type": "number",
+			"description": "The value used to modulate the logits distribution."
+		},
+		"top_p": {
+			"type": "number",
+			"description": "If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."
+		}
+	},
+	"required": ["messages"],
+	"definitions": {
+		"Role": {
+			"oneOf": [{ "const": "assistant" }, { "const": "system" }, { "const": "user" }],
+			"title": "ChatCompletionMessageRole",
+			"description": "The role of the message author."
+		}
+	}
+}

package/src/tasks/chat-completion/spec/output.json ADDED Viewed

@@ -0,0 +1,58 @@
+{
+	"$id": "/inference/schemas/chat-completion/output.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Outputs for Chat Completion inference",
+	"title": "ChatCompletionOutput",
+	"type": "object",
+	"properties": {
+		"choices": {
+			"type": "array",
+			"description": "A list of chat completion choices.",
+			"title": "ChatCompletionOutputChoice",
+			"items": {
+				"type": "object",
+				"properties": {
+					"finish_reason": {
+						"$ref": "#/definitions/FinishReason",
+						"description": "The reason why the generation was stopped."
+					},
+					"index": {
+						"type": "integer",
+						"description": "The index of the choice in the list of choices."
+					},
+					"message": {
+						"type": "object",
+						"properties": {
+							"role": {
+								"$ref": "/inference/schemas/chat-completion/input.json#/definitions/Role"
+							},
+							"content": {
+								"type": "string",
+								"description": "The content of the chat completion message."
+							}
+						},
+						"title": "ChatCompletionOutputChoiceMessage",
+						"required": ["content", "role"]
+					}
+				},
+				"required": ["finish_reason", "index", "message"]
+			}
+		},
+		"created": {
+			"type": "integer",
+			"description": "The Unix timestamp (in seconds) of when the chat completion was created."
+		}
+	},
+	"required": ["choices", "created"],
+	"definitions": {
+		"FinishReason": {
+			"type": "string",
+			"title": "ChatCompletionFinishReason",
+			"oneOf": [
+				{ "const": "length", "description": "The generated sequence reached the maximum allowed length" },
+				{ "const": "eos_token", "description": "The model generated an end-of-sentence (EOS) token" },
+				{ "const": "stop_sequence", "description": "One of the sequence in stop_sequences was generated" }
+			]
+		}
+	}
+}

package/src/tasks/chat-completion/spec/output_stream.json ADDED Viewed

@@ -0,0 +1,48 @@
+{
+	"$id": "/inference/schemas/chat-completion/output_stream.json",
+	"$schema": "http://json-schema.org/draft-06/schema#",
+	"description": "Chat Completion Stream Output",
+	"title": "ChatCompletionStreamOutput",
+	"type": "object",
+	"properties": {
+		"choices": {
+			"type": "array",
+			"title": "ChatCompletionStreamOutputChoice",
+			"description": "A list of chat completion choices.",
+			"items": {
+				"type": "object",
+				"properties": {
+					"delta": {
+						"type": "object",
+						"title": "ChatCompletionStreamOutputDelta",
+						"description": "A chat completion delta generated by streamed model responses.",
+						"properties": {
+							"content": {
+								"type": "string",
+								"description": "The contents of the chunk message."
+							},
+							"role": {
+								"type": "string",
+								"description": "The role of the author of this message."
+							}
+						}
+					},
+					"finish_reason": {
+						"$ref": "/inference/schemas/chat-completion/output.json#/definitions/FinishReason",
+						"description": "The reason why the generation was stopped."
+					},
+					"index": {
+						"type": "integer",
+						"description": "The index of the choice in the list of choices."
+					}
+				},
+				"required": ["delta", "index"]
+			}
+		},
+		"created": {
+			"type": "integer",
+			"description": "The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp."
+		}
+	},
+	"required": ["choices", "created"]
+}

package/src/tasks/index.ts CHANGED Viewed

@@ -38,6 +38,17 @@ import zeroShotObjectDetection from "./zero-shot-object-detection/data";
 export type * from "./audio-classification/inference";
 export type * from "./automatic-speech-recognition/inference";
+export type {
+	ChatCompletionInput,
+	ChatCompletionInputMessage,
+	ChatCompletionOutput,
+	ChatCompletionOutputChoice,
+	ChatCompletionFinishReason,
+	ChatCompletionOutputChoiceMessage,
+	ChatCompletionStreamOutput,
+	ChatCompletionStreamOutputChoice,
+	ChatCompletionStreamOutputDelta,
+} from "./chat-completion/inference";
 export type * from "./document-question-answering/inference";
 export type * from "./feature-extraction/inference";
 export type * from "./fill-mask/inference";
@@ -73,14 +84,14 @@ export type {
 	TextClassificationParameters,
 } from "./text-classification/inference";
 export type {
-	FinishReason,
-	PrefillToken,
+	TextGenerationFinishReason,
+	TextGenerationPrefillToken,
 	TextGenerationInput,
 	TextGenerationOutput,
 	TextGenerationOutputDetails,
 	TextGenerationParameters,
-	TextGenerationSequenceDetails,
-	Token,
+	TextGenerationOutputSequenceDetails,
+	TextGenerationOutputToken,
 } from "./text-generation/inference";
 export type * from "./video-classification/inference";
 export type * from "./visual-question-answering/inference";

package/src/tasks/text-generation/inference.ts CHANGED Viewed

@@ -16,6 +16,10 @@ export interface TextGenerationInput {
 	 * Additional inference parameters
 	 */
 	parameters?: TextGenerationParameters;
+	/**
+	 * Whether to stream output tokens
+	 */
+	stream?: boolean;
 	[property: string]: unknown;
 }
@@ -114,16 +118,16 @@ export interface TextGenerationOutputDetails {
 	/**
 	 * Details about additional sequences when best_of is provided
 	 */
-	best_of_sequences?: TextGenerationSequenceDetails[];
+	best_of_sequences?: TextGenerationOutputSequenceDetails[];
 	/**
 	 * The reason why the generation was stopped.
 	 */
-	finish_reason: FinishReason;
+	finish_reason: TextGenerationFinishReason;
 	/**
 	 * The number of generated tokens
 	 */
 	generated_tokens: number;
-	prefill: PrefillToken[];
+	prefill: TextGenerationPrefillToken[];
 	/**
 	 * The random seed used for generation
 	 */
@@ -131,24 +135,25 @@ export interface TextGenerationOutputDetails {
 	/**
 	 * The generated tokens and associated details
 	 */
-	tokens: Token[];
+	tokens: TextGenerationOutputToken[];
+	/**
+	 * Most likely tokens
+	 */
+	top_tokens?: Array<TextGenerationOutputToken[]>;
 	[property: string]: unknown;
 }
-export interface TextGenerationSequenceDetails {
-	/**
-	 * The reason why the generation was stopped.
-	 */
-	finish_reason: FinishReason;
+export interface TextGenerationOutputSequenceDetails {
+	finish_reason: TextGenerationFinishReason;
 	/**
 	 * The generated text
 	 */
-	generated_text: number;
+	generated_text: string;
 	/**
 	 * The number of generated tokens
 	 */
 	generated_tokens: number;
-	prefill: PrefillToken[];
+	prefill: TextGenerationPrefillToken[];
 	/**
 	 * The random seed used for generation
 	 */
@@ -156,20 +161,26 @@ export interface TextGenerationSequenceDetails {
 	/**
 	 * The generated tokens and associated details
 	 */
-	tokens: Token[];
+	tokens: TextGenerationOutputToken[];
+	/**
+	 * Most likely tokens
+	 */
+	top_tokens?: Array<TextGenerationOutputToken[]>;
 	[property: string]: unknown;
 }
 /**
- * The generated sequence reached the maximum allowed length
+ * The reason why the generation was stopped.
+ *
+ * length: The generated sequence reached the maximum allowed length
  *
- * The model generated an end-of-sentence (EOS) token
+ * eos_token: The model generated an end-of-sentence (EOS) token
  *
- * One of the sequence in stop_sequences was generated
+ * stop_sequence: One of the sequence in stop_sequences was generated
  */
-export type FinishReason = "length" | "eos_token" | "stop_sequence";
+export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
-export interface PrefillToken {
+export interface TextGenerationPrefillToken {
 	id: number;
 	logprob: number;
 	/**
@@ -179,9 +190,12 @@ export interface PrefillToken {
 	[property: string]: unknown;
 }
-export interface Token {
+/**
+ * Generated token.
+ */
+export interface TextGenerationOutputToken {
 	id: number;
-	logprob: number;
+	logprob?: number;
 	/**
 	 * Whether or not that token is a special one
 	 */
@@ -192,3 +206,45 @@ export interface Token {
 	text: string;
 	[property: string]: unknown;
 }
+/**
+ * Text Generation Stream Output
+ */
+export interface TextGenerationStreamOutput {
+	/**
+	 * Generation details. Only available when the generation is finished.
+	 */
+	details?: TextGenerationStreamDetails;
+	/**
+	 * The complete generated text. Only available when the generation is finished.
+	 */
+	generated_text?: string;
+	/**
+	 * The token index within the stream. Optional to support older clients that omit it.
+	 */
+	index?: number;
+	/**
+	 * Generated token.
+	 */
+	token: TextGenerationOutputToken;
+	[property: string]: unknown;
+}
+/**
+ * Generation details. Only available when the generation is finished.
+ */
+export interface TextGenerationStreamDetails {
+	/**
+	 * The reason why the generation was stopped.
+	 */
+	finish_reason: TextGenerationFinishReason;
+	/**
+	 * The number of generated tokens
+	 */
+	generated_tokens: number;
+	/**
+	 * The random seed used for generation
+	 */
+	seed: number;
+	[property: string]: unknown;
+}

package/src/tasks/text-generation/spec/input.json CHANGED Viewed

@@ -12,6 +12,10 @@
 		"parameters": {
 			"description": "Additional inference parameters",
 			"$ref": "#/$defs/TextGenerationParameters"
+		},
+		"stream": {
+			"description": "Whether to stream output tokens",
+			"type": "boolean"
 		}
 	},
 	"$defs": {