npm - @huggingface/tasks - Versions diffs - 0.13.16 → 0.14.0 - Mend

@huggingface/tasks 0.13.16 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (165) hide show

package/src/tasks/automatic-speech-recognition/spec/input.json CHANGED Viewed

@@ -7,7 +7,8 @@
 	"properties": {
 		"inputs": {
 			"description": "The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload.",
-			"type": "string"
+			"type": "string",
+			"comment": "type=binary"
 		},
 		"parameters": {
 			"description": "Additional inference parameters for Automatic Speech Recognition",

package/src/tasks/chat-completion/inference.ts CHANGED Viewed

@@ -3,7 +3,6 @@
  *
  * Using src/scripts/inference-codegen
  */
 /**
  * Chat Completion Input.
  *
@@ -105,30 +104,24 @@ export interface ChatCompletionInput {
 	top_p?: number;
 	[property: string]: unknown;
 }
 export interface ChatCompletionInputMessage {
 	content: ChatCompletionInputMessageContent;
 	name?: string;
 	role: string;
 	[property: string]: unknown;
 }
 export type ChatCompletionInputMessageContent = ChatCompletionInputMessageChunk[] | string;
 export interface ChatCompletionInputMessageChunk {
 	image_url?: ChatCompletionInputURL;
 	text?: string;
 	type: ChatCompletionInputMessageChunkType;
 	[property: string]: unknown;
 }
 export interface ChatCompletionInputURL {
 	url: string;
 	[property: string]: unknown;
 }
 export type ChatCompletionInputMessageChunkType = "text" | "image_url";
 export interface ChatCompletionInputGrammarType {
 	type: ChatCompletionInputGrammarTypeType;
 	/**
@@ -140,9 +133,7 @@ export interface ChatCompletionInputGrammarType {
 	value: unknown;
 	[property: string]: unknown;
 }
 export type ChatCompletionInputGrammarTypeType = "json" | "regex";
 export interface ChatCompletionInputStreamOptions {
 	/**
 	 * If set, an additional chunk will be streamed before the data: [DONE] message. The usage
@@ -153,13 +144,11 @@ export interface ChatCompletionInputStreamOptions {
 	include_usage: boolean;
 	[property: string]: unknown;
 }
 /**
  *
  * <https://platform.openai.com/docs/guides/function-calling/configuring-function-calling-behavior-using-the-tool_choice-parameter>
  */
 export type ChatCompletionInputToolChoice = ChatCompletionInputToolChoiceEnum | ChatCompletionInputToolChoiceObject;
 /**
  * Means the model can pick between generating a message or calling one or more tools.
  *
@@ -168,30 +157,25 @@ export type ChatCompletionInputToolChoice = ChatCompletionInputToolChoiceEnum |
  * Means the model must call one or more tools.
  */
 export type ChatCompletionInputToolChoiceEnum = "auto" | "none" | "required";
 export interface ChatCompletionInputToolChoiceObject {
 	function: ChatCompletionInputFunctionName;
 	[property: string]: unknown;
 }
 export interface ChatCompletionInputFunctionName {
 	name: string;
 	[property: string]: unknown;
 }
 export interface ChatCompletionInputTool {
 	function: ChatCompletionInputFunctionDefinition;
 	type: string;
 	[property: string]: unknown;
 }
 export interface ChatCompletionInputFunctionDefinition {
 	arguments: unknown;
 	description?: string;
 	name: string;
 	[property: string]: unknown;
 }
 /**
  * Chat Completion Output.
  *
@@ -208,7 +192,6 @@ export interface ChatCompletionOutput {
 	usage: ChatCompletionOutputUsage;
 	[property: string]: unknown;
 }
 export interface ChatCompletionOutputComplete {
 	finish_reason: string;
 	index: number;
@@ -216,53 +199,45 @@ export interface ChatCompletionOutputComplete {
 	message: ChatCompletionOutputMessage;
 	[property: string]: unknown;
 }
 export interface ChatCompletionOutputLogprobs {
 	content: ChatCompletionOutputLogprob[];
 	[property: string]: unknown;
 }
 export interface ChatCompletionOutputLogprob {
 	logprob: number;
 	token: string;
 	top_logprobs: ChatCompletionOutputTopLogprob[];
 	[property: string]: unknown;
 }
 export interface ChatCompletionOutputTopLogprob {
 	logprob: number;
 	token: string;
 	[property: string]: unknown;
 }
 export interface ChatCompletionOutputMessage {
 	content?: string;
 	role: string;
 	tool_calls?: ChatCompletionOutputToolCall[];
 	[property: string]: unknown;
 }
 export interface ChatCompletionOutputToolCall {
 	function: ChatCompletionOutputFunctionDefinition;
 	id: string;
 	type: string;
 	[property: string]: unknown;
 }
 export interface ChatCompletionOutputFunctionDefinition {
 	arguments: unknown;
 	description?: string;
 	name: string;
 	[property: string]: unknown;
 }
 export interface ChatCompletionOutputUsage {
 	completion_tokens: number;
 	prompt_tokens: number;
 	total_tokens: number;
 	[property: string]: unknown;
 }
 /**
  * Chat Completion Stream Output.
  *
@@ -279,7 +254,6 @@ export interface ChatCompletionStreamOutput {
 	usage?: ChatCompletionStreamOutputUsage;
 	[property: string]: unknown;
 }
 export interface ChatCompletionStreamOutputChoice {
 	delta: ChatCompletionStreamOutputDelta;
 	finish_reason?: string;
@@ -287,14 +261,12 @@ export interface ChatCompletionStreamOutputChoice {
 	logprobs?: ChatCompletionStreamOutputLogprobs;
 	[property: string]: unknown;
 }
 export interface ChatCompletionStreamOutputDelta {
 	content?: string;
 	role: string;
 	tool_calls?: ChatCompletionStreamOutputDeltaToolCall;
 	[property: string]: unknown;
 }
 export interface ChatCompletionStreamOutputDeltaToolCall {
 	function: ChatCompletionStreamOutputFunction;
 	id: string;
@@ -302,31 +274,26 @@ export interface ChatCompletionStreamOutputDeltaToolCall {
 	type: string;
 	[property: string]: unknown;
 }
 export interface ChatCompletionStreamOutputFunction {
 	arguments: string;
 	name?: string;
 	[property: string]: unknown;
 }
 export interface ChatCompletionStreamOutputLogprobs {
 	content: ChatCompletionStreamOutputLogprob[];
 	[property: string]: unknown;
 }
 export interface ChatCompletionStreamOutputLogprob {
 	logprob: number;
 	token: string;
 	top_logprobs: ChatCompletionStreamOutputTopLogprob[];
 	[property: string]: unknown;
 }
 export interface ChatCompletionStreamOutputTopLogprob {
 	logprob: number;
 	token: string;
 	[property: string]: unknown;
 }
 export interface ChatCompletionStreamOutputUsage {
 	completion_tokens: number;
 	prompt_tokens: number;

package/src/tasks/depth-estimation/inference.ts CHANGED Viewed

@@ -3,7 +3,6 @@
  *
  * Using src/scripts/inference-codegen
  */
 /**
  * Inputs for Depth Estimation inference
  */
@@ -15,10 +14,11 @@ export interface DepthEstimationInput {
 	/**
 	 * Additional inference parameters for Depth Estimation
 	 */
-	parameters?: { [key: string]: unknown };
+	parameters?: {
+		[key: string]: unknown;
+	};
 	[property: string]: unknown;
 }
 /**
  * Outputs of inference for the Depth Estimation task
  */

package/src/tasks/document-question-answering/spec/input.json CHANGED Viewed

@@ -11,7 +11,8 @@
 			"title": "DocumentQuestionAnsweringInputData",
 			"properties": {
 				"image": {
-					"description": "The image on which the question is asked"
+					"description": "The image on which the question is asked",
+					"comment": "type=binary"
 				},
 				"question": {
 					"type": "string",

package/src/tasks/feature-extraction/inference.ts CHANGED Viewed

@@ -3,9 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
 export type FeatureExtractionOutput = Array<number[]>;
 /**
  * Feature Extraction Input.
  *
@@ -36,5 +34,4 @@ export interface FeatureExtractionInput {
 	truncation_direction?: FeatureExtractionInputTruncationDirection;
 	[property: string]: unknown;
 }
 export type FeatureExtractionInputTruncationDirection = "Left" | "Right";

package/src/tasks/image-classification/inference.ts CHANGED Viewed

@@ -11,7 +11,7 @@ export interface ImageClassificationInput {
 	 * The input image data as a base64-encoded string. If no `parameters` are provided, you can
 	 * also provide the image data as a raw bytes payload.
 	 */
-	inputs: string;
+	inputs: Blob;
 	/**
 	 * Additional inference parameters for Image Classification
 	 */

package/src/tasks/image-classification/spec/input.json CHANGED Viewed

@@ -7,7 +7,8 @@
 	"properties": {
 		"inputs": {
 			"type": "string",
-			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
+			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
+			"comment": "type=binary"
 		},
 		"parameters": {
 			"description": "Additional inference parameters for Image Classification",

package/src/tasks/image-segmentation/inference.ts CHANGED Viewed

@@ -11,7 +11,7 @@ export interface ImageSegmentationInput {
 	 * The input image data as a base64-encoded string. If no `parameters` are provided, you can
 	 * also provide the image data as a raw bytes payload.
 	 */
-	inputs: string;
+	inputs: Blob;
 	/**
 	 * Additional inference parameters for Image Segmentation
 	 */

package/src/tasks/image-segmentation/spec/input.json CHANGED Viewed

@@ -7,7 +7,8 @@
 	"properties": {
 		"inputs": {
 			"type": "string",
-			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
+			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
+			"comment": "type=binary"
 		},
 		"parameters": {
 			"description": "Additional inference parameters for Image Segmentation",

package/src/tasks/image-to-image/inference.ts CHANGED Viewed

@@ -3,7 +3,6 @@
  *
  * Using src/scripts/inference-codegen
  */
 /**
  * Inputs for Image To Image inference
  */
@@ -12,14 +11,13 @@ export interface ImageToImageInput {
 	 * The input image data as a base64-encoded string. If no `parameters` are provided, you can
 	 * also provide the image data as a raw bytes payload.
 	 */
-	inputs: string;
+	inputs: Blob;
 	/**
 	 * Additional inference parameters for Image To Image
 	 */
 	parameters?: ImageToImageParameters;
 	[property: string]: unknown;
 }
 /**
  * Additional inference parameters for Image To Image
  */
@@ -30,9 +28,9 @@ export interface ImageToImageParameters {
 	 */
 	guidance_scale?: number;
 	/**
-	 * One or several prompt to guide what NOT to include in image generation.
+	 * One prompt to guide what NOT to include in image generation.
 	 */
-	negative_prompt?: string[];
+	negative_prompt?: string;
 	/**
 	 * For diffusion models. The number of denoising steps. More denoising steps usually lead to
 	 * a higher quality image at the expense of slower inference.
@@ -44,7 +42,6 @@ export interface ImageToImageParameters {
 	target_size?: TargetSize;
 	[property: string]: unknown;
 }
 /**
  * The size in pixel of the output image.
  */
@@ -53,7 +50,6 @@ export interface TargetSize {
 	width: number;
 	[property: string]: unknown;
 }
 /**
  * Outputs of inference for the Image To Image task
  */

package/src/tasks/image-to-image/spec/input.json CHANGED Viewed

@@ -7,7 +7,8 @@
 	"properties": {
 		"inputs": {
 			"type": "string",
-			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
+			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
+			"comment": "type=binary"
 		},
 		"parameters": {
 			"description": "Additional inference parameters for Image To Image",
@@ -24,11 +25,8 @@
 					"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
 				},
 				"negative_prompt": {
-					"type": "array",
-					"items": {
-						"type": "string"
-					},
-					"description": "One or several prompt to guide what NOT to include in image generation."
+					"type": "string",
+					"description": "One prompt to guide what NOT to include in image generation."
 				},
 				"num_inference_steps": {
 					"type": "integer",

package/src/tasks/image-to-text/inference.ts CHANGED Viewed

@@ -3,7 +3,6 @@
  *
  * Using src/scripts/inference-codegen
  */
 /**
  * Inputs for Image To Text inference
  */
@@ -11,14 +10,13 @@ export interface ImageToTextInput {
 	/**
 	 * The input image data
 	 */
-	inputs: unknown;
+	inputs: Blob;
 	/**
 	 * Additional inference parameters for Image To Text
 	 */
 	parameters?: ImageToTextParameters;
 	[property: string]: unknown;
 }
 /**
  * Additional inference parameters for Image To Text
  */
@@ -33,7 +31,6 @@ export interface ImageToTextParameters {
 	max_new_tokens?: number;
 	[property: string]: unknown;
 }
 /**
  * Parametrization of the text generation process
  */
@@ -120,12 +117,10 @@ export interface GenerationParameters {
 	use_cache?: boolean;
 	[property: string]: unknown;
 }
 /**
  * Controls the stopping condition for beam-based methods.
  */
 export type EarlyStoppingUnion = boolean | "never";
 /**
  * Outputs of inference for the Image To Text task
  */

package/src/tasks/image-to-text/spec/input.json CHANGED Viewed

@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input image data"
+			"description": "The input image data",
+			"comment": "type=binary"
 		},
 		"parameters": {
 			"description": "Additional inference parameters for Image To Text",

package/src/tasks/index.ts CHANGED Viewed

@@ -73,6 +73,7 @@ export type * from "./sentence-similarity/inference.js";
 export type * from "./summarization/inference.js";
 export type * from "./table-question-answering/inference.js";
 export type { TextToImageInput, TextToImageOutput, TextToImageParameters } from "./text-to-image/inference.js";
+export type { TextToVideoParameters, TextToVideoOutput, TextToVideoInput } from "./text-to-video/inference.js";
 export type { TextToSpeechParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference.js";
 export type * from "./token-classification/inference.js";
 export type { TranslationInput, TranslationOutput } from "./translation/inference.js";

package/src/tasks/object-detection/inference.ts CHANGED Viewed

@@ -11,7 +11,7 @@ export interface ObjectDetectionInput {
 	 * The input image data as a base64-encoded string. If no `parameters` are provided, you can
 	 * also provide the image data as a raw bytes payload.
 	 */
-	inputs: string;
+	inputs: Blob;
 	/**
 	 * Additional inference parameters for Object Detection
 	 */

package/src/tasks/object-detection/spec/input.json CHANGED Viewed

@@ -7,7 +7,8 @@
 	"properties": {
 		"inputs": {
 			"type": "string",
-			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
+			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload.",
+			"comment": "type=binary"
 		},
 		"parameters": {
 			"description": "Additional inference parameters for Object Detection",

package/src/tasks/sentence-similarity/inference.ts CHANGED Viewed

@@ -3,9 +3,7 @@
  *
  * Using src/scripts/inference-codegen
  */
 export type SentenceSimilarityOutput = number[];
 /**
  * Inputs for Sentence similarity inference
  */
@@ -14,10 +12,11 @@ export interface SentenceSimilarityInput {
 	/**
 	 * Additional inference parameters for Sentence Similarity
 	 */
-	parameters?: { [key: string]: unknown };
+	parameters?: {
+		[key: string]: unknown;
+	};
 	[property: string]: unknown;
 }
 export interface SentenceSimilarityInputData {
 	/**
 	 * A list of strings which will be compared against the source_sentence.

package/src/tasks/summarization/inference.ts CHANGED Viewed

@@ -3,7 +3,6 @@
  *
  * Using src/scripts/inference-codegen
  */
 /**
  * Inputs for Summarization inference
  */
@@ -18,7 +17,6 @@ export interface SummarizationInput {
 	parameters?: SummarizationParameters;
 	[property: string]: unknown;
 }
 /**
  * Additional inference parameters for summarization.
  */
@@ -30,19 +28,19 @@ export interface SummarizationParameters {
 	/**
 	 * Additional parametrization of the text generation algorithm.
 	 */
-	generate_parameters?: { [key: string]: unknown };
+	generate_parameters?: {
+		[key: string]: unknown;
+	};
 	/**
 	 * The truncation strategy to use.
 	 */
 	truncation?: SummarizationTruncationStrategy;
 	[property: string]: unknown;
 }
 /**
  * The truncation strategy to use.
  */
 export type SummarizationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
 /**
  * Outputs of inference for the Summarization task
  */

package/src/tasks/text-generation/inference.ts CHANGED Viewed

@@ -3,7 +3,6 @@
  *
  * Using src/scripts/inference-codegen
  */
 /**
  * Text Generation Input.
  *
@@ -17,7 +16,6 @@ export interface TextGenerationInput {
 	stream?: boolean;
 	[property: string]: unknown;
 }
 export interface TextGenerationInputGenerateParameters {
 	/**
 	 * Lora adapter id
@@ -100,7 +98,6 @@ export interface TextGenerationInputGenerateParameters {
 	watermark?: boolean;
 	[property: string]: unknown;
 }
 export interface TextGenerationInputGrammarType {
 	type: Type;
 	/**
@@ -112,9 +109,7 @@ export interface TextGenerationInputGrammarType {
 	value: unknown;
 	[property: string]: unknown;
 }
 export type Type = "json" | "regex";
 /**
  * Text Generation Output.
  *
@@ -127,7 +122,6 @@ export interface TextGenerationOutput {
 	generated_text: string;
 	[property: string]: unknown;
 }
 export interface TextGenerationOutputDetails {
 	best_of_sequences?: TextGenerationOutputBestOfSequence[];
 	finish_reason: TextGenerationOutputFinishReason;
@@ -138,7 +132,6 @@ export interface TextGenerationOutputDetails {
 	top_tokens?: Array<TextGenerationOutputToken[]>;
 	[property: string]: unknown;
 }
 export interface TextGenerationOutputBestOfSequence {
 	finish_reason: TextGenerationOutputFinishReason;
 	generated_text: string;
@@ -149,16 +142,13 @@ export interface TextGenerationOutputBestOfSequence {
 	top_tokens?: Array<TextGenerationOutputToken[]>;
 	[property: string]: unknown;
 }
 export type TextGenerationOutputFinishReason = "length" | "eos_token" | "stop_sequence";
 export interface TextGenerationOutputPrefillToken {
 	id: number;
 	logprob: number;
 	text: string;
 	[property: string]: unknown;
 }
 export interface TextGenerationOutputToken {
 	id: number;
 	logprob: number;
@@ -166,7 +156,6 @@ export interface TextGenerationOutputToken {
 	text: string;
 	[property: string]: unknown;
 }
 /**
  * Text Generation Stream Output.
  *
@@ -182,7 +171,6 @@ export interface TextGenerationStreamOutput {
 	top_tokens?: TextGenerationStreamOutputToken[];
 	[property: string]: unknown;
 }
 export interface TextGenerationStreamOutputStreamDetails {
 	finish_reason: TextGenerationOutputFinishReason;
 	generated_tokens: number;
@@ -190,7 +178,6 @@ export interface TextGenerationStreamOutputStreamDetails {
 	seed?: number;
 	[property: string]: unknown;
 }
 export interface TextGenerationStreamOutputToken {
 	id: number;
 	logprob: number;

package/src/tasks/text-to-audio/inference.ts CHANGED Viewed

@@ -1,9 +1,22 @@
+/**
+ * Outputs of inference for the Text To Audio task
+ */
+export interface TextToAudioOutput {
+	/**
+	 * The generated audio waveform.
+	 */
+	audio: Blob;
+	/**
+	 * The sampling rate of the generated audio waveform.
+	 */
+	sampling_rate: number;
+	[property: string]: unknown;
+}
 /**
  * Inference code generated from the JSON schema spec in ./spec
  *
  * Using src/scripts/inference-codegen
  */
 /**
  * Inputs for Text To Audio inference
  */
@@ -18,7 +31,6 @@ export interface TextToAudioInput {
 	parameters?: TextToAudioParameters;
 	[property: string]: unknown;
 }
 /**
  * Additional inference parameters for Text To Audio
  */
@@ -29,7 +41,6 @@ export interface TextToAudioParameters {
 	generation_parameters?: GenerationParameters;
 	[property: string]: unknown;
 }
 /**
  * Parametrization of the text generation process
  */
@@ -116,24 +127,7 @@ export interface GenerationParameters {
 	use_cache?: boolean;
 	[property: string]: unknown;
 }
 /**
  * Controls the stopping condition for beam-based methods.
  */
 export type EarlyStoppingUnion = boolean | "never";
-/**
- * Outputs of inference for the Text To Audio task
- */
-export interface TextToAudioOutput {
-	/**
-	 * The generated audio waveform.
-	 */
-	audio: unknown;
-	samplingRate: unknown;
-	/**
-	 * The sampling rate of the generated audio waveform.
-	 */
-	sampling_rate?: number;
-	[property: string]: unknown;
-}