npm - @huggingface/tasks - Versions diffs - 0.12.8 → 0.12.10 - Mend

@huggingface/tasks 0.12.8 → 0.12.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/dist/index.cjs +125 -2
package/dist/index.js +125 -2
package/dist/src/model-libraries-snippets.d.ts +1 -0
package/dist/src/model-libraries-snippets.d.ts.map +1 -1
package/dist/src/model-libraries.d.ts +16 -2
package/dist/src/model-libraries.d.ts.map +1 -1
package/dist/src/tasks/automatic-speech-recognition/inference.d.ts +2 -2
package/dist/src/tasks/chat-completion/inference.d.ts +58 -21
package/dist/src/tasks/chat-completion/inference.d.ts.map +1 -1
package/dist/src/tasks/image-to-text/inference.d.ts +2 -2
package/dist/src/tasks/index.d.ts +1 -1
package/dist/src/tasks/index.d.ts.map +1 -1
package/dist/src/tasks/text-generation/inference.d.ts +62 -0
package/dist/src/tasks/text-generation/inference.d.ts.map +1 -1
package/dist/src/tasks/text-to-audio/inference.d.ts +2 -2
package/dist/src/tasks/text-to-speech/inference.d.ts +6 -8
package/dist/src/tasks/text-to-speech/inference.d.ts.map +1 -1
package/package.json +1 -1
package/src/model-libraries-snippets.ts +6 -0
package/src/model-libraries.ts +14 -0
package/src/tasks/automatic-speech-recognition/inference.ts +2 -2
package/src/tasks/chat-completion/inference.ts +66 -21
package/src/tasks/chat-completion/spec/input.json +163 -40
package/src/tasks/chat-completion/spec/output.json +28 -18
package/src/tasks/chat-completion/spec/stream_output.json +57 -14
package/src/tasks/common-definitions.json +2 -2
package/src/tasks/image-to-text/inference.ts +2 -2
package/src/tasks/index.ts +5 -3
package/src/tasks/text-generation/inference.ts +62 -0
package/src/tasks/text-generation/spec/input.json +24 -0
package/src/tasks/text-generation/spec/stream_output.json +7 -1
package/src/tasks/text-to-audio/inference.ts +2 -2
package/src/tasks/text-to-speech/inference.ts +6 -8
package/src/tasks/text-to-speech/spec/input.json +26 -2

package/src/tasks/chat-completion/spec/input.json CHANGED Viewed

@@ -4,7 +4,7 @@
 	"description": "Chat Completion Input.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
 	"title": "ChatCompletionInput",
 	"type": "object",
-	"required": ["model", "messages"],
+	"required": ["messages"],
 	"properties": {
 		"frequency_penalty": {
 			"type": "number",
@@ -47,7 +47,8 @@
 		"model": {
 			"type": "string",
 			"description": "[UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
-			"example": "mistralai/Mistral-7B-Instruct-v0.2"
+			"example": "mistralai/Mistral-7B-Instruct-v0.2",
+			"nullable": true
 		},
 		"n": {
 			"type": "integer",
@@ -64,6 +65,15 @@
 			"example": 0.1,
 			"nullable": true
 		},
+		"response_format": {
+			"allOf": [
+				{
+					"$ref": "#/$defs/ChatCompletionInputGrammarType"
+				}
+			],
+			"default": "null",
+			"nullable": true
+		},
 		"seed": {
 			"type": "integer",
 			"format": "int64",
@@ -83,6 +93,14 @@
 		"stream": {
 			"type": "boolean"
 		},
+		"stream_options": {
+			"allOf": [
+				{
+					"$ref": "#/$defs/ChatCompletionInputStreamOptions"
+				}
+			],
+			"nullable": true
+		},
 		"temperature": {
 			"type": "number",
 			"format": "float",
@@ -93,7 +111,7 @@
 		"tool_choice": {
 			"allOf": [
 				{
-					"$ref": "#/$defs/ChatCompletionInputToolType"
+					"$ref": "#/$defs/ChatCompletionInputToolChoice"
 				}
 			],
 			"nullable": true
@@ -101,7 +119,7 @@
 		"tool_prompt": {
 			"type": "string",
 			"description": "A prompt to be appended before the tools",
-			"example": "\"You will be presented with a JSON schema representing a set of tools.\nIf the user request lacks of sufficient information to make a precise tool selection: Do not invent any tool's properties, instead notify with an error message.\n\nJSON Schema:\n\"",
+			"example": "Given the functions available, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {name: function name, parameters: dictionary of argument name and its value}.Do not use variables.",
 			"nullable": true
 		},
 		"tools": {
@@ -132,12 +150,10 @@
 	"$defs": {
 		"ChatCompletionInputMessage": {
 			"type": "object",
-			"required": ["role"],
+			"required": ["role", "content"],
 			"properties": {
 				"content": {
-					"type": "string",
-					"example": "My name is David and I",
-					"nullable": true
+					"$ref": "#/$defs/ChatCompletionInputMessageContent"
 				},
 				"name": {
 					"type": "string",
@@ -147,68 +163,160 @@
 				"role": {
 					"type": "string",
 					"example": "user"
+				}
+			},
+			"title": "ChatCompletionInputMessage"
+		},
+		"ChatCompletionInputMessageContent": {
+			"oneOf": [
+				{
+					"type": "string"
 				},
-				"tool_calls": {
+				{
 					"type": "array",
 					"items": {
-						"$ref": "#/$defs/ChatCompletionInputToolCall"
-					},
-					"nullable": true
+						"$ref": "#/$defs/ChatCompletionInputMessageChunk"
+					}
+				}
+			],
+			"title": "ChatCompletionInputMessageContent"
+		},
+		"ChatCompletionInputMessageChunk": {
+			"oneOf": [
+				{
+					"type": "object",
+					"required": ["text", "type"],
+					"properties": {
+						"text": {
+							"type": "string"
+						},
+						"type": {
+							"type": "string",
+							"enum": ["text"]
+						}
+					}
+				},
+				{
+					"type": "object",
+					"required": ["image_url", "type"],
+					"properties": {
+						"image_url": {
+							"$ref": "#/$defs/ChatCompletionInputUrl"
+						},
+						"type": {
+							"type": "string",
+							"enum": ["image_url"]
+						}
+					}
 				}
+			],
+			"discriminator": {
+				"propertyName": "type"
 			},
-			"title": "ChatCompletionInputMessage"
+			"title": "ChatCompletionInputMessageChunk"
 		},
-		"ChatCompletionInputToolCall": {
+		"ChatCompletionInputUrl": {
 			"type": "object",
-			"required": ["id", "type", "function"],
+			"required": ["url"],
 			"properties": {
-				"function": {
-					"$ref": "#/$defs/ChatCompletionInputFunctionDefinition"
-				},
-				"id": {
-					"type": "integer",
-					"format": "int32",
-					"minimum": 0
-				},
-				"type": {
+				"url": {
 					"type": "string"
 				}
 			},
-			"title": "ChatCompletionInputToolCall"
+			"title": "ChatCompletionInputUrl"
 		},
-		"ChatCompletionInputFunctionDefinition": {
+		"ChatCompletionInputGrammarType": {
+			"oneOf": [
+				{
+					"type": "object",
+					"required": ["type", "value"],
+					"properties": {
+						"type": {
+							"type": "string",
+							"enum": ["json"]
+						},
+						"value": {
+							"description": "A string that represents a [JSON Schema](https://json-schema.org/).\n\nJSON Schema is a declarative language that allows to annotate JSON documents\nwith types and descriptions."
+						}
+					}
+				},
+				{
+					"type": "object",
+					"required": ["type", "value"],
+					"properties": {
+						"type": {
+							"type": "string",
+							"enum": ["regex"]
+						},
+						"value": {
+							"type": "string"
+						}
+					}
+				}
+			],
+			"discriminator": {
+				"propertyName": "type"
+			},
+			"title": "ChatCompletionInputGrammarType"
+		},
+		"ChatCompletionInputStreamOptions": {
 			"type": "object",
-			"required": ["name", "arguments"],
+			"required": ["include_usage"],
 			"properties": {
-				"arguments": {},
-				"description": {
-					"type": "string",
-					"nullable": true
-				},
-				"name": {
-					"type": "string"
+				"include_usage": {
+					"type": "boolean",
+					"description": "If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value.",
+					"example": "true"
 				}
 			},
-			"title": "ChatCompletionInputFunctionDefinition"
+			"title": "ChatCompletionInputStreamOptions"
+		},
+		"ChatCompletionInputToolChoice": {
+			"allOf": [
+				{
+					"$ref": "#/$defs/ChatCompletionInputToolType"
+				}
+			],
+			"nullable": true,
+			"title": "ChatCompletionInputToolChoice"
 		},
 		"ChatCompletionInputToolType": {
 			"oneOf": [
 				{
 					"type": "object",
-					"required": ["FunctionName"],
+					"default": null,
+					"nullable": true
+				},
+				{
+					"type": "string"
+				},
+				{
+					"type": "object",
+					"required": ["function"],
 					"properties": {
-						"FunctionName": {
-							"type": "string"
+						"function": {
+							"$ref": "#/$defs/ChatCompletionInputFunctionName"
 						}
 					}
 				},
 				{
-					"type": "string",
-					"enum": ["OneOf"]
+					"type": "object",
+					"default": null,
+					"nullable": true
 				}
 			],
 			"title": "ChatCompletionInputToolType"
 		},
+		"ChatCompletionInputFunctionName": {
+			"type": "object",
+			"required": ["name"],
+			"properties": {
+				"name": {
+					"type": "string"
+				}
+			},
+			"title": "ChatCompletionInputFunctionName"
+		},
 		"ChatCompletionInputTool": {
 			"type": "object",
 			"required": ["type", "function"],
@@ -222,6 +330,21 @@
 				}
 			},
 			"title": "ChatCompletionInputTool"
+		},
+		"ChatCompletionInputFunctionDefinition": {
+			"type": "object",
+			"required": ["name", "arguments"],
+			"properties": {
+				"arguments": {},
+				"description": {
+					"type": "string",
+					"nullable": true
+				},
+				"name": {
+					"type": "string"
+				}
+			},
+			"title": "ChatCompletionInputFunctionDefinition"
 		}
 	}
 }

package/src/tasks/chat-completion/spec/output.json CHANGED Viewed

@@ -4,7 +4,7 @@
 	"description": "Chat Completion Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
 	"title": "ChatCompletionOutput",
 	"type": "object",
-	"required": ["id", "object", "created", "model", "system_fingerprint", "choices", "usage"],
+	"required": ["id", "created", "model", "system_fingerprint", "choices", "usage"],
 	"properties": {
 		"choices": {
 			"type": "array",
@@ -25,9 +25,6 @@
 			"type": "string",
 			"example": "mistralai/Mistral-7B-Instruct-v0.2"
 		},
-		"object": {
-			"type": "string"
-		},
 		"system_fingerprint": {
 			"type": "string"
 		},
@@ -110,32 +107,47 @@
 			"title": "ChatCompletionOutputTopLogprob"
 		},
 		"ChatCompletionOutputMessage": {
+			"oneOf": [
+				{
+					"$ref": "#/$defs/ChatCompletionOutputTextMessage"
+				},
+				{
+					"$ref": "#/$defs/ChatCompletionOutputToolCallMessage"
+				}
+			],
+			"title": "ChatCompletionOutputMessage"
+		},
+		"ChatCompletionOutputTextMessage": {
 			"type": "object",
-			"required": ["role"],
+			"required": ["role", "content"],
 			"properties": {
 				"content": {
 					"type": "string",
-					"example": "My name is David and I",
-					"nullable": true
-				},
-				"name": {
-					"type": "string",
-					"example": "\"David\"",
-					"nullable": true
+					"example": "My name is David and I"
 				},
 				"role": {
 					"type": "string",
 					"example": "user"
+				}
+			},
+			"title": "ChatCompletionOutputTextMessage"
+		},
+		"ChatCompletionOutputToolCallMessage": {
+			"type": "object",
+			"required": ["role", "tool_calls"],
+			"properties": {
+				"role": {
+					"type": "string",
+					"example": "assistant"
 				},
 				"tool_calls": {
 					"type": "array",
 					"items": {
 						"$ref": "#/$defs/ChatCompletionOutputToolCall"
-					},
-					"nullable": true
+					}
 				}
 			},
-			"title": "ChatCompletionOutputMessage"
+			"title": "ChatCompletionOutputToolCallMessage"
 		},
 		"ChatCompletionOutputToolCall": {
 			"type": "object",
@@ -145,9 +157,7 @@
 					"$ref": "#/$defs/ChatCompletionOutputFunctionDefinition"
 				},
 				"id": {
-					"type": "integer",
-					"format": "int32",
-					"minimum": 0
+					"type": "string"
 				},
 				"type": {
 					"type": "string"

package/src/tasks/chat-completion/spec/stream_output.json CHANGED Viewed

@@ -4,7 +4,7 @@
 	"description": "Chat Completion Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
 	"title": "ChatCompletionStreamOutput",
 	"type": "object",
-	"required": ["id", "object", "created", "model", "system_fingerprint", "choices"],
+	"required": ["id", "created", "model", "system_fingerprint", "choices"],
 	"properties": {
 		"choices": {
 			"type": "array",
@@ -25,11 +25,16 @@
 			"type": "string",
 			"example": "mistralai/Mistral-7B-Instruct-v0.2"
 		},
-		"object": {
-			"type": "string"
-		},
 		"system_fingerprint": {
 			"type": "string"
+		},
+		"usage": {
+			"allOf": [
+				{
+					"$ref": "#/$defs/ChatCompletionStreamOutputUsage"
+				}
+			],
+			"nullable": true
 		}
 	},
 	"$defs": {
@@ -61,28 +66,44 @@
 			"title": "ChatCompletionStreamOutputChoice"
 		},
 		"ChatCompletionStreamOutputDelta": {
+			"oneOf": [
+				{
+					"$ref": "#/$defs/ChatCompletionStreamOutputTextMessage"
+				},
+				{
+					"$ref": "#/$defs/ChatCompletionStreamOutputToolCallDelta"
+				}
+			],
+			"title": "ChatCompletionStreamOutputDelta"
+		},
+		"ChatCompletionStreamOutputTextMessage": {
 			"type": "object",
-			"required": ["role"],
+			"required": ["role", "content"],
 			"properties": {
 				"content": {
 					"type": "string",
-					"example": "What is Deep Learning?",
-					"nullable": true
+					"example": "My name is David and I"
 				},
 				"role": {
 					"type": "string",
 					"example": "user"
+				}
+			},
+			"title": "ChatCompletionStreamOutputTextMessage"
+		},
+		"ChatCompletionStreamOutputToolCallDelta": {
+			"type": "object",
+			"required": ["role", "tool_calls"],
+			"properties": {
+				"role": {
+					"type": "string",
+					"example": "assistant"
 				},
 				"tool_calls": {
-					"allOf": [
-						{
-							"$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall"
-						}
-					],
-					"nullable": true
+					"$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall"
 				}
 			},
-			"title": "ChatCompletionStreamOutputDelta"
+			"title": "ChatCompletionStreamOutputToolCallDelta"
 		},
 		"ChatCompletionStreamOutputDeltaToolCall": {
 			"type": "object",
@@ -165,6 +186,28 @@
 				}
 			},
 			"title": "ChatCompletionStreamOutputTopLogprob"
+		},
+		"ChatCompletionStreamOutputUsage": {
+			"type": "object",
+			"required": ["prompt_tokens", "completion_tokens", "total_tokens"],
+			"properties": {
+				"completion_tokens": {
+					"type": "integer",
+					"format": "int32",
+					"minimum": 0
+				},
+				"prompt_tokens": {
+					"type": "integer",
+					"format": "int32",
+					"minimum": 0
+				},
+				"total_tokens": {
+					"type": "integer",
+					"format": "int32",
+					"minimum": 0
+				}
+			},
+			"title": "ChatCompletionStreamOutputUsage"
 		}
 	}
 }

package/src/tasks/common-definitions.json CHANGED Viewed

@@ -59,7 +59,7 @@
 				},
 				"max_new_tokens": {
 					"type": "integer",
-					"description": "The maximum number of tokens to generate. Takes precedence over maxLength."
+					"description": "The maximum number of tokens to generate. Takes precedence over max_length."
 				},
 				"min_length": {
 					"type": "integer",
@@ -67,7 +67,7 @@
 				},
 				"min_new_tokens": {
 					"type": "integer",
-					"description": "The minimum number of tokens to generate. Takes precedence over maxLength."
+					"description": "The minimum number of tokens to generate. Takes precedence over min_length."
 				},
 				"do_sample": {
 					"type": "boolean",

package/src/tasks/image-to-text/inference.ts CHANGED Viewed

@@ -72,7 +72,7 @@ export interface GenerationParameters {
 	 */
 	max_length?: number;
 	/**
-	 * The maximum number of tokens to generate. Takes precedence over maxLength.
+	 * The maximum number of tokens to generate. Takes precedence over max_length.
 	 */
 	max_new_tokens?: number;
 	/**
@@ -80,7 +80,7 @@ export interface GenerationParameters {
 	 */
 	min_length?: number;
 	/**
-	 * The minimum number of tokens to generate. Takes precedence over maxLength.
+	 * The minimum number of tokens to generate. Takes precedence over min_length.
 	 */
 	min_new_tokens?: number;
 	/**

package/src/tasks/index.ts CHANGED Viewed

@@ -39,6 +39,8 @@ import zeroShotImageClassification from "./zero-shot-image-classification/data";
 import zeroShotObjectDetection from "./zero-shot-object-detection/data";
 import imageTo3D from "./image-to-3d/data";
 import textTo3D from "./text-to-3d/data";
+import keypointDetection from "./keypoint-detection/data";
+import videoTextToText from "./video-text-to-text/data";
 export type * from "./audio-classification/inference";
 export type * from "./automatic-speech-recognition/inference";
@@ -71,7 +73,7 @@ export type * from "./sentence-similarity/inference";
 export type * from "./summarization/inference";
 export type * from "./table-question-answering/inference";
 export type { TextToImageInput, TextToImageOutput, TextToImageParameters } from "./text-to-image/inference";
-export type { TextToAudioParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference";
+export type { TextToSpeechParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference";
 export type * from "./token-classification/inference";
 export type { TranslationInput, TranslationOutput } from "./translation/inference";
 export type {
@@ -208,7 +210,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
 	"image-text-to-text": getData("image-text-to-text", imageTextToText),
 	"image-to-text": getData("image-to-text", imageToText),
 	"image-to-video": undefined,
-	"keypoint-detection": getData("keypoint-detection", placeholder),
+	"keypoint-detection": getData("keypoint-detection", keypointDetection),
 	"mask-generation": getData("mask-generation", maskGeneration),
 	"multiple-choice": undefined,
 	"object-detection": getData("object-detection", objectDetection),
@@ -236,7 +238,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
 	"token-classification": getData("token-classification", tokenClassification),
 	translation: getData("translation", translation),
 	"unconditional-image-generation": getData("unconditional-image-generation", unconditionalImageGeneration),
-	"video-text-to-text": getData("video-text-to-text", placeholder),
+	"video-text-to-text": getData("video-text-to-text", videoTextToText),
 	"visual-question-answering": getData("visual-question-answering", visualQuestionAnswering),
 	"voice-activity-detection": undefined,
 	"zero-shot-classification": getData("zero-shot-classification", zeroShotClassification),

package/src/tasks/text-generation/inference.ts CHANGED Viewed

@@ -19,23 +19,84 @@ export interface TextGenerationInput {
 }
 export interface TextGenerationInputGenerateParameters {
+	/**
+	 * Lora adapter id
+	 */
+	adapter_id?: string;
+	/**
+	 * Generate best_of sequences and return the one if the highest token logprobs.
+	 */
 	best_of?: number;
+	/**
+	 * Whether to return decoder input token logprobs and ids.
+	 */
 	decoder_input_details?: boolean;
+	/**
+	 * Whether to return generation details.
+	 */
 	details?: boolean;
+	/**
+	 * Activate logits sampling.
+	 */
 	do_sample?: boolean;
+	/**
+	 * The parameter for frequency penalty. 1.0 means no penalty
+	 * Penalize new tokens based on their existing frequency in the text so far,
+	 * decreasing the model's likelihood to repeat the same line verbatim.
+	 */
 	frequency_penalty?: number;
 	grammar?: TextGenerationInputGrammarType;
+	/**
+	 * Maximum number of tokens to generate.
+	 */
 	max_new_tokens?: number;
+	/**
+	 * The parameter for repetition penalty. 1.0 means no penalty.
+	 * See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+	 */
 	repetition_penalty?: number;
+	/**
+	 * Whether to prepend the prompt to the generated text
+	 */
 	return_full_text?: boolean;
+	/**
+	 * Random sampling seed.
+	 */
 	seed?: number;
+	/**
+	 * Stop generating tokens if a member of `stop` is generated.
+	 */
 	stop?: string[];
+	/**
+	 * The value used to module the logits distribution.
+	 */
 	temperature?: number;
+	/**
+	 * The number of highest probability vocabulary tokens to keep for top-k-filtering.
+	 */
 	top_k?: number;
+	/**
+	 * The number of highest probability vocabulary tokens to keep for top-n-filtering.
+	 */
 	top_n_tokens?: number;
+	/**
+	 * Top-p value for nucleus sampling.
+	 */
 	top_p?: number;
+	/**
+	 * Truncate inputs tokens to the given size.
+	 */
 	truncate?: number;
+	/**
+	 * Typical Decoding mass
+	 * See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666)
+	 * for more information.
+	 */
 	typical_p?: number;
+	/**
+	 * Watermarking with [A Watermark for Large Language
+	 * Models](https://arxiv.org/abs/2301.10226).
+	 */
 	watermark?: boolean;
 	[property: string]: unknown;
 }
@@ -125,6 +186,7 @@ export interface TextGenerationStreamOutput {
 export interface TextGenerationStreamOutputStreamDetails {
 	finish_reason: TextGenerationOutputFinishReason;
 	generated_tokens: number;
+	input_length: number;
 	seed?: number;
 	[property: string]: unknown;
 }