npm - @huggingface/tasks - Versions diffs - 0.3.0 → 0.3.2 - Mend

@huggingface/tasks 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/index.cjs +141 -186
package/dist/index.d.ts +40 -21
package/dist/index.js +140 -186
package/package.json +1 -1
package/src/default-widget-inputs.ts +2 -2
package/src/index.ts +3 -0
package/src/library-to-tasks.ts +1 -1
package/src/model-data.ts +2 -0
package/src/pipelines.ts +29 -19
package/src/snippets/curl.ts +0 -1
package/src/snippets/inputs.ts +0 -8
package/src/snippets/js.ts +0 -1
package/src/snippets/python.ts +0 -1
package/src/tasks/index.ts +6 -4
package/src/tokenizer-data.ts +24 -0
package/src/tasks/conversational/about.md +0 -50
package/src/tasks/conversational/data.ts +0 -66

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@huggingface/tasks",
   "packageManager": "pnpm@8.10.5",
-  "version": "0.3.0",
+  "version": "0.3.2",
   "description": "List of ML tasks for huggingface.co/tasks",
   "repository": "https://github.com/huggingface/huggingface.js.git",
   "publishConfig": {

package/src/default-widget-inputs.ts CHANGED Viewed

@@ -1,9 +1,9 @@
 import type { WidgetExample } from "./widget-example";
-import type { PipelineType } from "./pipelines";
+import type { WidgetType } from "./pipelines";
 type LanguageCode = string;
-type PerLanguageMapping = Map<PipelineType, string[] | WidgetExample[]>;
+type PerLanguageMapping = Map<WidgetType, string[] | WidgetExample[]>;
 /// NOTE TO CONTRIBUTORS:
 ///

package/src/index.ts CHANGED Viewed

@@ -5,6 +5,7 @@ export * from "./tasks";
 export {
 	PIPELINE_DATA,
 	PIPELINE_TYPES,
+	type WidgetType,
 	type PipelineType,
 	type PipelineData,
 	type Modality,
@@ -16,6 +17,7 @@ export {
 export { ALL_DISPLAY_MODEL_LIBRARY_KEYS, ALL_MODEL_LIBRARY_KEYS, MODEL_LIBRARIES_UI_ELEMENTS } from "./model-libraries";
 export type { LibraryUiElement, ModelLibraryKey } from "./model-libraries";
 export type { ModelData, TransformersInfo } from "./model-data";
+export type { SpecialTokensMap, TokenizerConfig } from "./tokenizer-data";
 export type {
 	WidgetExample,
 	WidgetExampleAttribute,
@@ -37,6 +39,7 @@ export type {
 	WidgetExampleOutputText,
 } from "./widget-example";
 export { InferenceDisplayability } from "./model-data";
+export { SPECIAL_TOKENS_ATTRIBUTES } from "./tokenizer-data";
 import * as snippets from "./snippets";
 export { snippets };

package/src/library-to-tasks.ts CHANGED Viewed

@@ -27,7 +27,7 @@ export const LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS: Partial<Record<ModelLi
 	keras: ["image-classification"],
 	nemo: ["automatic-speech-recognition"],
 	open_clip: ["zero-shot-classification", "zero-shot-image-classification"],
-	paddlenlp: ["conversational", "fill-mask", "summarization", "zero-shot-classification"],
+	paddlenlp: ["fill-mask", "summarization", "zero-shot-classification"],
 	peft: ["text-generation"],
 	"pyannote-audio": ["automatic-speech-recognition"],
 	"sentence-transformers": ["feature-extraction", "sentence-similarity"],

package/src/model-data.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import type { PipelineType } from "./pipelines";
 import type { WidgetExample } from "./widget-example";
+import type { TokenizerConfig } from "./tokenizer-data";
 export enum InferenceDisplayability {
 	/**
@@ -53,6 +54,7 @@ export interface ModelData {
 			base_model_name?: string;
 			task_type?: string;
 		};
+		tokenizer?: TokenizerConfig;
 	};
 	/**
 	 * all the model tags

package/src/pipelines.ts CHANGED Viewed

@@ -225,20 +225,9 @@ export const PIPELINE_DATA = {
 		modality: "nlp",
 		color: "indigo",
 	},
-	conversational: {
-		name: "Conversational",
-		subtasks: [
-			{
-				type: "dialogue-generation",
-				name: "Dialogue Generation",
-			},
-		],
-		modality: "nlp",
-		color: "green",
-	},
 	"feature-extraction": {
 		name: "Feature Extraction",
-		modality: "multimodal",
+		modality: "nlp",
 		color: "red",
 	},
 	"text-generation": {
@@ -248,6 +237,14 @@ export const PIPELINE_DATA = {
 				type: "dialogue-modeling",
 				name: "Dialogue Modeling",
 			},
+			{
+				type: "dialogue-generation",
+				name: "Dialogue Generation",
+			},
+			{
+				type: "conversational",
+				name: "Conversational",
+			},
 			{
 				type: "language-modeling",
 				name: "Language Modeling",
@@ -419,7 +416,7 @@ export const PIPELINE_DATA = {
 	},
 	"text-to-image": {
 		name: "Text-to-Image",
-		modality: "multimodal",
+		modality: "cv",
 		color: "yellow",
 	},
 	"image-to-text": {
@@ -430,7 +427,7 @@ export const PIPELINE_DATA = {
 				name: "Image Captioning",
 			},
 		],
-		modality: "multimodal",
+		modality: "cv",
 		color: "red",
 	},
 	"image-to-image": {
@@ -454,7 +451,7 @@ export const PIPELINE_DATA = {
 	},
 	"image-to-video": {
 		name: "Image-to-Video",
-		modality: "multimodal",
+		modality: "cv",
 		color: "indigo",
 	},
 	"unconditional-image-generation": {
@@ -589,9 +586,15 @@ export const PIPELINE_DATA = {
 	},
 	"text-to-video": {
 		name: "Text-to-Video",
-		modality: "multimodal",
+		modality: "cv",
 		color: "green",
 	},
+	"image-text-to-text": {
+		name: "Image + Text to Text (VLLMs)",
+		modality: "multimodal",
+		color: "red",
+		hideInDatasets: true,
+	},
 	"visual-question-answering": {
 		name: "Visual Question Answering",
 		subtasks: [
@@ -622,7 +625,7 @@ export const PIPELINE_DATA = {
 	},
 	"graph-ml": {
 		name: "Graph Machine Learning",
-		modality: "multimodal",
+		modality: "other",
 		color: "green",
 	},
 	"mask-generation": {
@@ -637,14 +640,19 @@ export const PIPELINE_DATA = {
 	},
 	"text-to-3d": {
 		name: "Text-to-3D",
-		modality: "multimodal",
+		modality: "cv",
 		color: "yellow",
 	},
 	"image-to-3d": {
 		name: "Image-to-3D",
-		modality: "multimodal",
+		modality: "cv",
 		color: "green",
 	},
+	"image-feature-extraction": {
+		name: "Image Feature Extraction",
+		modality: "cv",
+		color: "indigo",
+	},
 	other: {
 		name: "Other",
 		modality: "other",
@@ -656,6 +664,8 @@ export const PIPELINE_DATA = {
 export type PipelineType = keyof typeof PIPELINE_DATA;
+export type WidgetType = PipelineType | "conversational";
 export const PIPELINE_TYPES = Object.keys(PIPELINE_DATA) as PipelineType[];
 export const SUBTASK_TYPES = Object.values(PIPELINE_DATA)

package/src/snippets/curl.ts CHANGED Viewed

@@ -34,7 +34,6 @@ export const curlSnippets: Partial<Record<PipelineType, (model: ModelData, acces
 	"zero-shot-classification": snippetZeroShotClassification,
 	translation: snippetBasic,
 	summarization: snippetBasic,
-	conversational: snippetBasic,
 	"feature-extraction": snippetBasic,
 	"text-generation": snippetBasic,
 	"text2text-generation": snippetBasic,

package/src/snippets/inputs.ts CHANGED Viewed

@@ -9,13 +9,6 @@ const inputsTranslation = () => `"Меня зовут Вольфганг и я
 const inputsSummarization = () =>
 	`"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."`;
-const inputsConversational = () =>
-	`{
-		"past_user_inputs": ["Which movie is the best ?"],
-		"generated_responses": ["It is Die Hard for sure."],
-		"text": "Can you explain why ?"
-	}`;
 const inputsTableQuestionAnswering = () =>
 	`{
 		"query": "How many stars does the transformers repository have?",
@@ -96,7 +89,6 @@ const modelInputSnippets: {
 	"audio-to-audio": inputsAudioToAudio,
 	"audio-classification": inputsAudioClassification,
 	"automatic-speech-recognition": inputsAutomaticSpeechRecognition,
-	conversational: inputsConversational,
 	"document-question-answering": inputsVisualQuestionAnswering,
 	"feature-extraction": inputsFeatureExtraction,
 	"fill-mask": inputsFillMask,

package/src/snippets/js.ts CHANGED Viewed

@@ -121,7 +121,6 @@ export const jsSnippets: Partial<Record<PipelineType, (model: ModelData, accessT
 	"zero-shot-classification": snippetZeroShotClassification,
 	translation: snippetBasic,
 	summarization: snippetBasic,
-	conversational: snippetBasic,
 	"feature-extraction": snippetBasic,
 	"text-generation": snippetBasic,
 	"text2text-generation": snippetBasic,

package/src/snippets/python.ts CHANGED Viewed

@@ -116,7 +116,6 @@ export const pythonSnippets: Partial<Record<PipelineType, (model: ModelData) =>
 	"zero-shot-classification": snippetZeroShotClassification,
 	translation: snippetBasic,
 	summarization: snippetBasic,
-	conversational: snippetBasic,
 	"feature-extraction": snippetBasic,
 	"text-generation": snippetBasic,
 	"text2text-generation": snippetBasic,

package/src/tasks/index.ts CHANGED Viewed

@@ -1,9 +1,9 @@
-import { type PipelineType, PIPELINE_DATA } from "../pipelines";
+import type { PipelineType } from "../pipelines";
+import { PIPELINE_DATA } from "../pipelines";
 import audioClassification from "./audio-classification/data";
 import audioToAudio from "./audio-to-audio/data";
 import automaticSpeechRecognition from "./automatic-speech-recognition/data";
-import conversational from "./conversational/data";
 import documentQuestionAnswering from "./document-question-answering/data";
 import featureExtraction from "./feature-extraction/data";
 import fillMask from "./fill-mask/data";
@@ -45,14 +45,15 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
 	"audio-classification": ["speechbrain", "transformers", "transformers.js"],
 	"audio-to-audio": ["asteroid", "speechbrain"],
 	"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
-	conversational: ["transformers"],
 	"depth-estimation": ["transformers", "transformers.js"],
 	"document-question-answering": ["transformers", "transformers.js"],
 	"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
 	"fill-mask": ["transformers", "transformers.js"],
 	"graph-ml": ["transformers"],
 	"image-classification": ["keras", "timm", "transformers", "transformers.js"],
+	"image-feature-extraction": ["timm", "transformers"],
 	"image-segmentation": ["transformers", "transformers.js"],
+	"image-text-to-text": ["transformers"],
 	"image-to-image": ["diffusers", "transformers", "transformers.js"],
 	"image-to-text": ["transformers", "transformers.js"],
 	"image-to-video": ["diffusers"],
@@ -122,7 +123,6 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
 	"audio-classification": getData("audio-classification", audioClassification),
 	"audio-to-audio": getData("audio-to-audio", audioToAudio),
 	"automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
-	conversational: getData("conversational", conversational),
 	"depth-estimation": getData("depth-estimation", depthEstimation),
 	"document-question-answering": getData("document-question-answering", documentQuestionAnswering),
 	"feature-extraction": getData("feature-extraction", featureExtraction),
@@ -130,6 +130,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
 	"graph-ml": undefined,
 	"image-classification": getData("image-classification", imageClassification),
 	"image-segmentation": getData("image-segmentation", imageSegmentation),
+	"image-text-to-text": undefined,
 	"image-to-image": getData("image-to-image", imageToImage),
 	"image-to-text": getData("image-to-text", imageToText),
 	"image-to-video": undefined,
@@ -167,6 +168,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
 	"zero-shot-object-detection": getData("zero-shot-object-detection", zeroShotObjectDetection),
 	"text-to-3d": getData("text-to-3d", placeholder),
 	"image-to-3d": getData("image-to-3d", placeholder),
+	"image-feature-extraction": getData("image-feature-extraction", placeholder),
 } as const;
 export interface ExampleRepo {

package/src/tokenizer-data.ts ADDED Viewed

@@ -0,0 +1,24 @@
+export const SPECIAL_TOKENS_ATTRIBUTES = [
+	"bos_token",
+	"eos_token",
+	"unk_token",
+	"sep_token",
+	"pad_token",
+	"cls_token",
+	"mask_token",
+	// additional_special_tokens (TODO)
+] as const;
+/**
+ * Public interface for a tokenizer's special tokens mapping
+ */
+export type SpecialTokensMap = {
+	[key in (typeof SPECIAL_TOKENS_ATTRIBUTES)[number]]?: string;
+};
+/**
+ * Public interface for tokenizer config
+ */
+export interface TokenizerConfig extends SpecialTokensMap {
+	use_default_system_prompt?: boolean;
+	chat_template?: string;
+}

package/src/tasks/conversational/about.md DELETED Viewed

@@ -1,50 +0,0 @@
-## Use Cases
-### Chatbot 💬
-Chatbots are used to have conversations instead of providing direct contact with a live human. They are used to provide customer service, sales, and can even be used to play games (see [ELIZA](https://en.wikipedia.org/wiki/ELIZA) from 1966 for one of the earliest examples).
-## Voice Assistants 🎙️
-Conversational response models are used as part of voice assistants to provide appropriate responses to voice based queries.
-## Inference
-You can infer with Conversational models with the 🤗 Transformers library using the `conversational` pipeline. This pipeline takes a conversation prompt or a list of conversations and generates responses for each prompt. The models that this pipeline can use are models that have been fine-tuned on a multi-turn conversational task (see https://huggingface.co/models?filter=conversational for a list of updated Conversational models).
-```python
-from transformers import pipeline, Conversation
-converse = pipeline("conversational")
-conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
-conversation_2 = Conversation("What's the last book you have read?")
-converse([conversation_1, conversation_2])
-## Output:
-## Conversation 1
-## user >> Going to the movies tonight - any suggestions?
-## bot >> The Big Lebowski ,
-## Conversation 2
-## user >> What's the last book you have read?
-## bot >> The Last Question
-```
-You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer with conversational models on Hugging Face Hub.
-```javascript
-import { HfInference } from "@huggingface/inference";
-const inference = new HfInference(HF_TOKEN);
-await inference.conversational({
-	model: "facebook/blenderbot-400M-distill",
-	inputs: "Going to the movies tonight - any suggestions?",
-});
-```
-## Useful Resources
-- Learn how ChatGPT and InstructGPT work in this blog: [Illustrating Reinforcement Learning from Human Feedback (RLHF)](https://huggingface.co/blog/rlhf)
-- [Reinforcement Learning from Human Feedback From Zero to ChatGPT](https://www.youtube.com/watch?v=EAd4oQtEJOM)
-- [A guide on Dialog Agents](https://huggingface.co/blog/dialog-agents)
-This page was made possible thanks to the efforts of [Viraat Aryabumi](https://huggingface.co/viraat).

package/src/tasks/conversational/data.ts DELETED Viewed

@@ -1,66 +0,0 @@
-import type { TaskDataCustom } from "..";
-const taskData: TaskDataCustom = {
-	datasets: [
-		{
-			description:
-				"A dataset of 7k conversations explicitly designed to exhibit multiple conversation modes: displaying personality, having empathy, and demonstrating knowledge.",
-			id: "blended_skill_talk",
-		},
-		{
-			description:
-				"ConvAI is a dataset of human-to-bot conversations labeled for quality. This data can be used to train a metric for evaluating dialogue systems",
-			id: "conv_ai_2",
-		},
-		{
-			description: "EmpatheticDialogues, is a dataset of 25k conversations grounded in emotional situations",
-			id: "empathetic_dialogues",
-		},
-	],
-	demo: {
-		inputs: [
-			{
-				label: "Input",
-				content: "Hey my name is Julien! How are you?",
-				type: "text",
-			},
-		],
-		outputs: [
-			{
-				label: "Answer",
-				content: "Hi Julien! My name is Julia! I am well.",
-				type: "text",
-			},
-		],
-	},
-	metrics: [
-		{
-			description:
-				"BLEU score is calculated by counting the number of shared single or subsequent tokens between the generated sequence and the reference. Subsequent n tokens are called “n-grams”. Unigram refers to a single token while bi-gram refers to token pairs and n-grams refer to n subsequent tokens. The score ranges from 0 to 1, where 1 means the translation perfectly matched and 0 did not match at all",
-			id: "bleu",
-		},
-	],
-	models: [
-		{
-			description: "A faster and smaller model than the famous BERT model.",
-			id: "facebook/blenderbot-400M-distill",
-		},
-		{
-			description:
-				"DialoGPT is a large-scale pretrained dialogue response generation model for multiturn conversations.",
-			id: "microsoft/DialoGPT-large",
-		},
-	],
-	spaces: [
-		{
-			description: "A chatbot based on Blender model.",
-			id: "EXFINITE/BlenderBot-UI",
-		},
-	],
-	summary:
-		"Conversational response modelling is the task of generating conversational text that is relevant, coherent and knowledgable given a prompt. These models have applications in chatbots, and as a part of voice assistants",
-	widgetModels: ["facebook/blenderbot-400M-distill"],
-	youtubeId: "",
-};
-export default taskData;