npm - @huggingface/tasks - Versions diffs - 0.11.12 → 0.11.13 - Mend

@huggingface/tasks 0.11.12 → 0.11.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/dist/src/tasks/summarization/inference.d.ts CHANGED Viewed

@@ -5,43 +5,44 @@
  */
 /**
  * Inputs for Summarization inference
- *
- * Inputs for Text2text Generation inference
  */
 export interface SummarizationInput {
     /**
-     * The input text data
+     * The input text to summarize.
      */
     inputs: string;
     /**
-     * Additional inference parameters
+     * Additional inference parameters.
      */
-    parameters?: Text2TextGenerationParameters;
+    parameters?: SummarizationParameters;
     [property: string]: unknown;
 }
 /**
- * Additional inference parameters
+ * Additional inference parameters.
  *
- * Additional inference parameters for Text2text Generation
+ * Additional inference parameters for summarization.
  */
-export interface Text2TextGenerationParameters {
+export interface SummarizationParameters {
     /**
      * Whether to clean up the potential extra spaces in the text output.
      */
     clean_up_tokenization_spaces?: boolean;
     /**
-     * Additional parametrization of the text generation algorithm
+     * Additional parametrization of the text generation algorithm.
      */
     generate_parameters?: {
         [key: string]: unknown;
     };
     /**
-     * The truncation strategy to use
+     * The truncation strategy to use.
      */
-    truncation?: Text2TextGenerationTruncationStrategy;
+    truncation?: SummarizationTruncationStrategy;
     [property: string]: unknown;
 }
-export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
+/**
+ * The truncation strategy to use.
+ */
+export type SummarizationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
 /**
  * Outputs of inference for the Summarization task
  */

package/dist/src/tasks/summarization/inference.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"inference.d.ts","sourceRoot":"","sources":["../../../../src/tasks/summarization/inference.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH~~;;;;GAIG~~;AACH,MAAM,WAAW,kBAAkB;IAClC;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,UAAU,CAAC,EAAE,~~6BAA6B~~,CAAC;~~IAC3C~~,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;;;GAIG;AACH,MAAM,WAAW,~~6BAA6B~~;~~IAC7C~~;;OAEG;IACH,4BAA4B,CAAC,EAAE,OAAO,CAAC;IACvC;;OAEG;IACH,mBAAmB,CAAC,EAAE;QAAE,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;KAAE,CAAC;IACjD;;OAEG;IACH,UAAU,CAAC,EAAE,~~qCAAqC,~~CAAC;~~IACnD~~,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED,MAAM,MAAM,~~qCAAqC,~~GAAG,iBAAiB,GAAG,eAAe,GAAG,YAAY,GAAG,aAAa,CAAC;~~AAEvH~~;;GAEG;AACH,MAAM,WAAW,mBAAmB;IACnC;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B"}
1	+ {"version":3,"file":"inference.d.ts","sourceRoot":"","sources":["../../../../src/tasks/summarization/inference.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAClC;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,UAAU,CAAC,EAAE,uBAAuB,CAAC;IACrC,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;;;GAIG;AACH,MAAM,WAAW,uBAAuB;IACvC;;OAEG;IACH,4BAA4B,CAAC,EAAE,OAAO,CAAC;IACvC;;OAEG;IACH,mBAAmB,CAAC,EAAE;QAAE,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;KAAE,CAAC;IACjD;;OAEG;IACH,UAAU,CAAC,EAAE,+BAA+B,CAAC;IAC7C,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,MAAM,+BAA+B,GAAG,iBAAiB,GAAG,eAAe,GAAG,YAAY,GAAG,aAAa,CAAC;AAEjH;;GAEG;AACH,MAAM,WAAW,mBAAmB;IACnC;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B"}

package/dist/src/tasks/text-to-image/inference.d.ts CHANGED Viewed

@@ -8,7 +8,7 @@
  */
 export interface TextToImageInput {
     /**
-     * The input text data (sometimes called "prompt"
+     * The input text data (sometimes called "prompt")
      */
     inputs: string;
     /**
@@ -60,7 +60,7 @@ export interface TargetSize {
  */
 export interface TextToImageOutput {
     /**
-     * The generated image
+     * The generated image returned as raw bytes in the payload.
      */
     image: unknown;
     [property: string]: unknown;

package/dist/src/tasks/translation/inference.d.ts CHANGED Viewed

@@ -5,43 +5,54 @@
  */
 /**
  * Inputs for Translation inference
- *
- * Inputs for Text2text Generation inference
  */
 export interface TranslationInput {
     /**
-     * The input text data
+     * The text to translate.
      */
     inputs: string;
     /**
      * Additional inference parameters
      */
-    parameters?: Text2TextGenerationParameters;
+    parameters?: TranslationParameters;
     [property: string]: unknown;
 }
 /**
  * Additional inference parameters
  *
- * Additional inference parameters for Text2text Generation
+ * Additional inference parameters for Translation
  */
-export interface Text2TextGenerationParameters {
+export interface TranslationParameters {
     /**
      * Whether to clean up the potential extra spaces in the text output.
      */
     clean_up_tokenization_spaces?: boolean;
     /**
-     * Additional parametrization of the text generation algorithm
+     * Additional parametrization of the text generation algorithm.
      */
     generate_parameters?: {
         [key: string]: unknown;
     };
     /**
-     * The truncation strategy to use
+     * The source language of the text. Required for models that can translate from multiple
+     * languages.
+     */
+    src_lang?: string;
+    /**
+     * Target language to translate to. Required for models that can translate to multiple
+     * languages.
      */
-    truncation?: Text2TextGenerationTruncationStrategy;
+    tgt_lang?: string;
+    /**
+     * The truncation strategy to use.
+     */
+    truncation?: TranslationTruncationStrategy;
     [property: string]: unknown;
 }
-export type Text2TextGenerationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
+/**
+ * The truncation strategy to use.
+ */
+export type TranslationTruncationStrategy = "do_not_truncate" | "longest_first" | "only_first" | "only_second";
 /**
  * Outputs of inference for the Translation task
  */

package/dist/src/tasks/translation/inference.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"inference.d.ts","sourceRoot":"","sources":["../../../../src/tasks/translation/inference.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH~~;;;;GAIG~~;AACH,MAAM,WAAW,gBAAgB;IAChC;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,UAAU,CAAC,EAAE,~~6BAA6B~~,CAAC;~~IAC3C~~,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;;;GAIG;AACH,MAAM,WAAW,~~6BAA6B~~;~~IAC7C~~;;OAEG;IACH,4BAA4B,CAAC,EAAE,OAAO,CAAC;IACvC;;OAEG;IACH,mBAAmB,CAAC,EAAE;QAAE,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;KAAE,CAAC;IACjD;;OAEG;IACH,UAAU,CAAC,EAAE,~~qCAAqC~~,CAAC;~~IACnD~~,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED,MAAM,MAAM,~~qCAAqC~~,GAAG,iBAAiB,GAAG,eAAe,GAAG,YAAY,GAAG,aAAa,CAAC;~~AAEvH~~;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC;;OAEG;IACH,gBAAgB,EAAE,MAAM,CAAC;IACzB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B"}
1	+ {"version":3,"file":"inference.d.ts","sourceRoot":"","sources":["../../../../src/tasks/translation/inference.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,UAAU,CAAC,EAAE,qBAAqB,CAAC;IACnC,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACrC;;OAEG;IACH,4BAA4B,CAAC,EAAE,OAAO,CAAC;IACvC;;OAEG;IACH,mBAAmB,CAAC,EAAE;QAAE,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;KAAE,CAAC;IACjD;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;OAEG;IACH,UAAU,CAAC,EAAE,6BAA6B,CAAC;IAC3C,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,MAAM,6BAA6B,GAAG,iBAAiB,GAAG,eAAe,GAAG,YAAY,GAAG,aAAa,CAAC;AAE/G;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC;;OAEG;IACH,gBAAgB,EAAE,MAAM,CAAC;IACzB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B"}

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@huggingface/tasks",
   "packageManager": "pnpm@8.10.5",
-  "version": "0.11.12",
+  "version": "0.11.13",
   "description": "List of ML tasks for huggingface.co/tasks",
   "repository": "https://github.com/huggingface/huggingface.js.git",
   "publishConfig": {

package/src/model-libraries-snippets.ts CHANGED Viewed

@@ -170,6 +170,48 @@ export const diffusers = (model: ModelData): string[] => {
 	}
 };
+export const diffusionkit = (model: ModelData): string[] => {
+	const sd3Snippet = `# Pipeline for Stable Diffusion 3
+from diffusionkit.mlx import DiffusionPipeline
+pipeline = DiffusionPipeline(
+	shift=3.0,
+	use_t5=False,
+	model_version=${model.id},
+	low_memory_mode=True,
+	a16=True,
+	w16=True,
+)`;
+	const fluxSnippet = `# Pipeline for Flux
+from diffusionkit.mlx import FluxPipeline
+pipeline = FluxPipeline(
+  shift=1.0,
+  model_version=${model.id},
+  low_memory_mode=True,
+  a16=True,
+  w16=True,
+)`;
+	const generateSnippet = `# Image Generation
+HEIGHT = 512
+WIDTH = 512
+NUM_STEPS = ${model.tags.includes("flux") ? 4 : 50}
+CFG_WEIGHT = ${model.tags.includes("flux") ? 0 : 5}
+image, _ = pipeline.generate_image(
+  "a photo of a cat",
+  cfg_weight=CFG_WEIGHT,
+  num_steps=NUM_STEPS,
+  latent_size=(HEIGHT // 8, WIDTH // 8),
+)`;
+	const pipelineSnippet = model.tags.includes("flux") ? fluxSnippet : sd3Snippet;
+	return [pipelineSnippet, generateSnippet];
+};
 export const cartesia_pytorch = (model: ModelData): string[] => [
 	`# pip install --no-binary :all: cartesia-pytorch
 from cartesia_pytorch import ReneLMHeadModel

package/src/model-libraries.ts CHANGED Viewed

@@ -181,6 +181,12 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		filter: true,
 		/// diffusers has its own more complex "countDownloads" query
 	},
+	diffusionkit: {
+		prettyLabel: "DiffusionKit",
+		repoName: "DiffusionKit",
+		repoUrl: "https://github.com/argmaxinc/DiffusionKit",
+		snippets: snippets.diffusionkit,
+	},
 	doctr: {
 		prettyLabel: "docTR",
 		repoName: "doctr",
@@ -440,6 +446,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		snippets: snippets.pyannote_audio,
 		filter: true,
 	},
+	"py-feat": {
+		prettyLabel: "Py-Feat",
+		repoName: "Py-Feat",
+		repoUrl: "https://github.com/cosanlab/py-feat",
+		docsUrl: "https://py-feat.org/",
+		filter: false,
+	},
 	pythae: {
 		prettyLabel: "pythae",
 		repoName: "pythae",

package/src/pipelines.ts CHANGED Viewed

@@ -656,6 +656,18 @@ export const PIPELINE_DATA = {
 		name: "Video-Text-to-Text",
 		modality: "multimodal",
 		color: "blue",
+		hideInDatasets: false,
+	},
+	"keypoint-detection": {
+		name: "Keypoint Detection",
+		subtasks: [
+			{
+				type: "pose-estimation",
+				name: "Pose Estimation",
+			},
+		],
+		modality: "cv",
+		color: "red",
 		hideInDatasets: true,
 	},
 	other: {

package/src/tasks/audio-classification/inference.ts CHANGED Viewed

@@ -8,9 +8,10 @@
  */
 export interface AudioClassificationInput {
 	/**
-	 * The input audio data
+	 * The input audio data as a base64-encoded string. If no `parameters` are provided, you can
+	 * also provide the audio data as a raw bytes payload.
 	 */
-	inputs: unknown;
+	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */

package/src/tasks/audio-classification/spec/input.json CHANGED Viewed

@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input audio data"
+			"description": "The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload.",
+			"type": "string"
 		},
 		"parameters": {
 			"description": "Additional inference parameters",

package/src/tasks/audio-classification/spec/output.json CHANGED Viewed

@@ -5,6 +5,7 @@
 	"description": "Outputs for Audio Classification inference",
 	"type": "array",
 	"items": {
+		"type": "object",
 		"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
 	}
 }

package/src/tasks/automatic-speech-recognition/inference.ts CHANGED Viewed

@@ -9,9 +9,10 @@
  */
 export interface AutomaticSpeechRecognitionInput {
 	/**
-	 * The input audio data
+	 * The input audio data as a base64-encoded string. If no `parameters` are provided, you can
+	 * also provide the audio data as a raw bytes payload.
 	 */
-	inputs: unknown;
+	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */

package/src/tasks/automatic-speech-recognition/spec/input.json CHANGED Viewed

@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input audio data"
+			"description": "The input audio data as a base64-encoded string. If no `parameters` are provided, you can also provide the audio data as a raw bytes payload.",
+			"type": "string"
 		},
 		"parameters": {
 			"description": "Additional inference parameters",

package/src/tasks/common-definitions.json CHANGED Viewed

@@ -7,17 +7,7 @@
 			"title": "ClassificationOutputTransform",
 			"type": "string",
 			"description": "The function to apply to the model outputs in order to retrieve the scores.",
-			"oneOf": [
-				{
-					"const": "sigmoid"
-				},
-				{
-					"const": "softmax"
-				},
-				{
-					"const": "none"
-				}
-			]
+			"enum": ["sigmoid", "softmax", "none"]
 		},
 		"ClassificationOutput": {
 			"title": "ClassificationOutput",
@@ -84,16 +74,9 @@
 					"description": "Whether to use sampling instead of greedy decoding when generating new tokens."
 				},
 				"early_stopping": {
+					"type": ["boolean", "string"],
 					"description": "Controls the stopping condition for beam-based methods.",
-					"oneOf": [
-						{
-							"type": "boolean"
-						},
-						{
-							"const": "never",
-							"type": "string"
-						}
-					]
+					"enum": ["never", true, false]
 				},
 				"num_beams": {
 					"type": "integer",

package/src/tasks/image-classification/inference.ts CHANGED Viewed

@@ -8,9 +8,10 @@
  */
 export interface ImageClassificationInput {
 	/**
-	 * The input image data
+	 * The input image data as a base64-encoded string. If no `parameters` are provided, you can
+	 * also provide the image data as a raw bytes payload.
 	 */
-	inputs: unknown;
+	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */

package/src/tasks/image-classification/spec/input.json CHANGED Viewed

@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input image data"
+			"type": "string",
+			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
 		},
 		"parameters": {
 			"description": "Additional inference parameters",

package/src/tasks/image-classification/spec/output.json CHANGED Viewed

@@ -5,6 +5,7 @@
 	"title": "ImageClassificationOutput",
 	"type": "array",
 	"items": {
+		"type": "object",
 		"$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
 	}
 }

package/src/tasks/image-segmentation/inference.ts CHANGED Viewed

@@ -8,9 +8,10 @@
  */
 export interface ImageSegmentationInput {
 	/**
-	 * The input image data
+	 * The input image data as a base64-encoded string. If no `parameters` are provided, you can
+	 * also provide the image data as a raw bytes payload.
 	 */
-	inputs: unknown;
+	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */
@@ -41,6 +42,9 @@ export interface ImageSegmentationParameters {
 	threshold?: number;
 	[property: string]: unknown;
 }
+/**
+ * Segmentation task to be performed, depending on model capabilities.
+ */
 export type ImageSegmentationSubtask = "instance" | "panoptic" | "semantic";
 export type ImageSegmentationOutput = ImageSegmentationOutputElement[];
 /**
@@ -50,15 +54,15 @@ export type ImageSegmentationOutput = ImageSegmentationOutputElement[];
  */
 export interface ImageSegmentationOutputElement {
 	/**
-	 * The label of the predicted segment
+	 * The label of the predicted segment.
 	 */
 	label: string;
 	/**
-	 * The corresponding mask as a black-and-white image
+	 * The corresponding mask as a black-and-white image (base64-encoded).
 	 */
-	mask: unknown;
+	mask: string;
 	/**
-	 * The score or confidence degreee the model has
+	 * The score or confidence degree the model has.
 	 */
 	score?: number;
 	[property: string]: unknown;

package/src/tasks/image-segmentation/spec/input.json CHANGED Viewed

@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input image data"
+			"type": "string",
+			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
 		},
 		"parameters": {
 			"description": "Additional inference parameters",
@@ -31,17 +32,7 @@
 					"title": "ImageSegmentationSubtask",
 					"type": "string",
 					"description": "Segmentation task to be performed, depending on model capabilities.",
-					"oneOf": [
-						{
-							"const": "instance"
-						},
-						{
-							"const": "panoptic"
-						},
-						{
-							"const": "semantic"
-						}
-					]
+					"enum": ["instance", "panoptic", "semantic"]
 				},
 				"threshold": {
 					"type": "number",

package/src/tasks/image-segmentation/spec/output.json CHANGED Viewed

@@ -10,14 +10,15 @@
 		"properties": {
 			"label": {
 				"type": "string",
-				"description": "The label of the predicted segment"
+				"description": "The label of the predicted segment."
 			},
 			"mask": {
-				"description": "The corresponding mask as a black-and-white image"
+				"type": "string",
+				"description": "The corresponding mask as a black-and-white image (base64-encoded)."
 			},
 			"score": {
 				"type": "number",
-				"description": "The score or confidence degreee the model has"
+				"description": "The score or confidence degree the model has."
 			}
 		},
 		"required": ["label", "mask"]

package/src/tasks/image-to-image/inference.ts CHANGED Viewed

@@ -9,9 +9,10 @@
  */
 export interface ImageToImageInput {
 	/**
-	 * The input image data
+	 * The input image data as a base64-encoded string. If no `parameters` are provided, you can
+	 * also provide the image data as a raw bytes payload.
 	 */
-	inputs: unknown;
+	inputs: string;
 	/**
 	 * Additional inference parameters
 	 */
@@ -40,14 +41,14 @@ export interface ImageToImageParameters {
 	 */
 	num_inference_steps?: number;
 	/**
-	 * The size in pixel of the output image
+	 * The size in pixel of the output image.
 	 */
 	target_size?: TargetSize;
 	[property: string]: unknown;
 }
 /**
- * The size in pixel of the output image
+ * The size in pixel of the output image.
  */
 export interface TargetSize {
 	height: number;
@@ -60,7 +61,7 @@ export interface TargetSize {
  */
 export interface ImageToImageOutput {
 	/**
-	 * The output image
+	 * The output image returned as raw bytes in the payload.
 	 */
 	image?: unknown;
 	[property: string]: unknown;

package/src/tasks/image-to-image/spec/input.json CHANGED Viewed

@@ -6,7 +6,8 @@
 	"type": "object",
 	"properties": {
 		"inputs": {
-			"description": "The input image data"
+			"type": "string",
+			"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
 		},
 		"parameters": {
 			"description": "Additional inference parameters",
@@ -36,7 +37,7 @@
 				},
 				"target_size": {
 					"type": "object",
-					"description": "The size in pixel of the output image",
+					"description": "The size in pixel of the output image.",
 					"properties": {
 						"width": {
 							"type": "integer"

package/src/tasks/image-to-image/spec/output.json CHANGED Viewed

@@ -6,7 +6,7 @@
 	"type": "object",
 	"properties": {
 		"image": {
-			"description": "The output image"
+			"description": "The output image returned as raw bytes in the payload."
 		}
 	}
 }

package/src/tasks/index.ts CHANGED Viewed

@@ -73,12 +73,7 @@ export type * from "./table-question-answering/inference";
 export type { TextToImageInput, TextToImageOutput, TextToImageParameters } from "./text-to-image/inference";
 export type { TextToAudioParameters, TextToSpeechInput, TextToSpeechOutput } from "./text-to-speech/inference";
 export type * from "./token-classification/inference";
-export type {
-	Text2TextGenerationParameters,
-	Text2TextGenerationTruncationStrategy,
-	TranslationInput,
-	TranslationOutput,
-} from "./translation/inference";
+export type { TranslationInput, TranslationOutput } from "./translation/inference";
 export type {
 	ClassificationOutputTransform,
 	TextClassificationInput,
@@ -131,6 +126,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
 	"image-to-image": ["diffusers", "transformers", "transformers.js"],
 	"image-to-text": ["transformers", "transformers.js"],
 	"image-to-video": ["diffusers"],
+	"keypoint-detection": ["transformers"],
 	"video-classification": ["transformers"],
 	"mask-generation": ["transformers"],
 	"multiple-choice": ["transformers"],
@@ -210,6 +206,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
 	"image-text-to-text": getData("image-text-to-text", imageTextToText),
 	"image-to-text": getData("image-to-text", imageToText),
 	"image-to-video": undefined,
+	"keypoint-detection": getData("keypoint-detection", placeholder),
 	"mask-generation": getData("mask-generation", maskGeneration),
 	"multiple-choice": undefined,
 	"object-detection": getData("object-detection", objectDetection),

package/src/tasks/keypoint-detection/about.md ADDED Viewed

@@ -0,0 +1,59 @@
+## Task Variants
+### Pose Estimation
+Pose estimation is the process of determining the position and orientation of an object or a camera in a 3D space. It is a fundamental task in computer vision and is widely used in various applications such as robotics, augmented reality, and 3D reconstruction.
+## Use Cases for Keypoint Detection
+### Facial Landmark Estimation
+Keypoint detection models can be used to estimate the position of facial landmarks. Facial landmarks are points on the face such as the corners of the mouth, the outer corners of the eyes, and the tip of the nose. These landmarks can be used for a variety of applications, such as facial expression recognition, 3D face reconstruction, and cinematic animation.
+### Fitness Tracking
+Keypoint detection models can be used to track the movement of the human body, e.g. position of the joints in a 3D space. This can be used for a variety of applications, such as fitness tracking, sports analysis or virtual reality applications.
+## Inference Code
+Below you can find an example of how to use a keypoint detection model and how to visualize the results.
+```python
+from transformers import AutoImageProcessor, SuperPointForKeypointDetection
+import torch
+import matplotlib.pyplot as plt
+from PIL import Image
+import requests
+url_image = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url_image_1, stream=True).raw)
+# initialize the model and processor
+processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint")
+model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint")
+# infer
+inputs = processor(image, return_tensors="pt").to(model.device, model.dtype)
+outputs = model(**inputs)
+# visualize the output
+image_width, image_height = image.size
+image_mask = outputs.mask
+image_indices = torch.nonzero(image_mask).squeeze()
+image_scores = outputs.scores.squeeze()
+image_keypoints = outputs.keypoints.squeeze()
+keypoints = image_keypoints.detach().numpy()
+scores = image_scores.detach().numpy()
+plt.axis('off')
+plt.imshow(image)
+plt.scatter(
+    keypoints[:, 0],
+    keypoints[:, 1],
+    s=scores * 100,
+    c='cyan',
+    alpha=0.4
+)
+plt.show()
+```