npm - @huggingface/tasks - Versions diffs - 0.0.7 → 0.0.8 - Mend

@huggingface/tasks 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/index.d.ts +25 -2
package/dist/index.js +89 -14
package/dist/index.mjs +89 -14
package/package.json +2 -4
package/src/pipelines.ts +24 -0
package/src/snippets/inputs.ts +15 -0
package/src/snippets/python.ts +43 -2
package/src/tasks/index.ts +12 -8
package/src/tasks/text-generation/about.md +10 -0

package/dist/index.d.ts CHANGED Viewed

@@ -276,6 +276,10 @@ declare const PIPELINE_DATA: {
     };
     "image-to-image": {
         name: string;
+        subtasks: {
+            type: string;
+            name: string;
+        }[];
         modality: "cv";
         color: "indigo";
     };
@@ -416,6 +420,16 @@ declare const PIPELINE_DATA: {
         modality: "cv";
         color: "yellow";
     };
+    "text-to-3d": {
+        name: string;
+        modality: "multimodal";
+        color: "yellow";
+    };
+    "image-to-3d": {
+        name: string;
+        modality: "multimodal";
+        color: "green";
+    };
     other: {
         name: string;
         modality: "other";
@@ -425,9 +439,9 @@ declare const PIPELINE_DATA: {
     };
 };
 type PipelineType = keyof typeof PIPELINE_DATA;
-declare const PIPELINE_TYPES: ("other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection")[];
+declare const PIPELINE_TYPES: ("other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d")[];
 declare const SUBTASK_TYPES: string[];
-declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection">;
+declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d">;
 /**
  * Mapping from library name (excluding Transformers) to its supported tasks.
@@ -758,10 +772,13 @@ declare namespace curl {
 }
 declare const snippetZeroShotClassification$1: (model: ModelData) => string;
+declare const snippetZeroShotImageClassification: (model: ModelData) => string;
 declare const snippetBasic$1: (model: ModelData) => string;
 declare const snippetFile$1: (model: ModelData) => string;
 declare const snippetTextToImage$1: (model: ModelData) => string;
+declare const snippetTabular: (model: ModelData) => string;
 declare const snippetTextToAudio$1: (model: ModelData) => string;
+declare const snippetDocumentQuestionAnswering: (model: ModelData) => string;
 declare const pythonSnippets: Partial<Record<PipelineType, (model: ModelData) => string>>;
 declare function getPythonInferenceSnippet(model: ModelData, accessToken: string): string;
 declare function hasPythonInferenceSnippet(model: ModelData): boolean;
@@ -769,16 +786,22 @@ declare function hasPythonInferenceSnippet(model: ModelData): boolean;
 declare const python_getPythonInferenceSnippet: typeof getPythonInferenceSnippet;
 declare const python_hasPythonInferenceSnippet: typeof hasPythonInferenceSnippet;
 declare const python_pythonSnippets: typeof pythonSnippets;
+declare const python_snippetDocumentQuestionAnswering: typeof snippetDocumentQuestionAnswering;
+declare const python_snippetTabular: typeof snippetTabular;
+declare const python_snippetZeroShotImageClassification: typeof snippetZeroShotImageClassification;
 declare namespace python {
   export {
     python_getPythonInferenceSnippet as getPythonInferenceSnippet,
     python_hasPythonInferenceSnippet as hasPythonInferenceSnippet,
     python_pythonSnippets as pythonSnippets,
     snippetBasic$1 as snippetBasic,
+    python_snippetDocumentQuestionAnswering as snippetDocumentQuestionAnswering,
     snippetFile$1 as snippetFile,
+    python_snippetTabular as snippetTabular,
     snippetTextToAudio$1 as snippetTextToAudio,
     snippetTextToImage$1 as snippetTextToImage,
     snippetZeroShotClassification$1 as snippetZeroShotClassification,
+    python_snippetZeroShotImageClassification as snippetZeroShotImageClassification,
   };
 }

package/dist/index.js CHANGED Viewed

@@ -1801,6 +1801,20 @@ var PIPELINE_DATA = {
   },
   "image-to-image": {
     name: "Image-to-Image",
+    subtasks: [
+      {
+        type: "image-inpainting",
+        name: "Image Inpainting"
+      },
+      {
+        type: "image-colorization",
+        name: "Image Colorization"
+      },
+      {
+        type: "super-resolution",
+        name: "Super Resolution"
+      }
+    ],
     modality: "cv",
     color: "indigo"
   },
@@ -1987,6 +2001,16 @@ var PIPELINE_DATA = {
     modality: "cv",
     color: "yellow"
   },
+  "text-to-3d": {
+    name: "Text-to-3D",
+    modality: "multimodal",
+    color: "yellow"
+  },
+  "image-to-3d": {
+    name: "Image-to-3D",
+    modality: "multimodal",
+    color: "green"
+  },
   other: {
     name: "Other",
     modality: "other",
@@ -4406,18 +4430,18 @@ var data_default33 = taskData33;
 // src/tasks/index.ts
 var TASKS_MODEL_LIBRARIES = {
-  "audio-classification": ["speechbrain", "transformers"],
+  "audio-classification": ["speechbrain", "transformers", "transformers.js"],
   "audio-to-audio": ["asteroid", "speechbrain"],
   "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
   conversational: ["transformers"],
-  "depth-estimation": ["transformers"],
-  "document-question-answering": ["transformers"],
+  "depth-estimation": ["transformers", "transformers.js"],
+  "document-question-answering": ["transformers", "transformers.js"],
   "feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
   "fill-mask": ["transformers", "transformers.js"],
   "graph-ml": ["transformers"],
   "image-classification": ["keras", "timm", "transformers", "transformers.js"],
   "image-segmentation": ["transformers", "transformers.js"],
-  "image-to-image": ["diffusers"],
+  "image-to-image": ["diffusers", "transformers.js"],
   "image-to-text": ["transformers.js"],
   "image-to-video": ["diffusers"],
   "video-classification": ["transformers"],
@@ -4439,8 +4463,8 @@ var TASKS_MODEL_LIBRARIES = {
   "text-generation": ["transformers", "transformers.js"],
   "text-retrieval": [],
   "text-to-image": ["diffusers"],
-  "text-to-speech": ["espnet", "tensorflowtts", "transformers"],
-  "text-to-audio": ["transformers"],
+  "text-to-speech": ["espnet", "tensorflowtts", "transformers", "transformers.js"],
+  "text-to-audio": ["transformers", "transformers.js"],
   "text-to-video": ["diffusers"],
   "text2text-generation": ["transformers", "transformers.js"],
   "time-series-forecasting": [],
@@ -4455,11 +4479,13 @@ var TASKS_MODEL_LIBRARIES = {
   ],
   translation: ["transformers", "transformers.js"],
   "unconditional-image-generation": ["diffusers"],
-  "visual-question-answering": ["transformers"],
+  "visual-question-answering": ["transformers", "transformers.js"],
   "voice-activity-detection": [],
   "zero-shot-classification": ["transformers", "transformers.js"],
   "zero-shot-image-classification": ["transformers", "transformers.js"],
-  "zero-shot-object-detection": ["transformers"]
+  "zero-shot-object-detection": ["transformers", "transformers.js"],
+  "text-to-3d": [],
+  "image-to-3d": []
 };
 function getData(type, partialTaskData = data_default14) {
   return {
@@ -4515,7 +4541,9 @@ var TASKS_DATA = {
   "voice-activity-detection": void 0,
   "zero-shot-classification": getData("zero-shot-classification", data_default32),
   "zero-shot-image-classification": getData("zero-shot-image-classification", data_default33),
-  "zero-shot-object-detection": getData("zero-shot-object-detection", data_default14)
+  "zero-shot-object-detection": getData("zero-shot-object-detection", data_default14),
+  "text-to-3d": getData("text-to-3d", data_default14),
+  "image-to-3d": getData("image-to-3d", data_default14)
 };
 // src/model-libraries.ts
@@ -4622,6 +4650,10 @@ var inputsTableQuestionAnswering = () => `{
 			]
 		}
 	}`;
+var inputsVisualQuestionAnswering = () => `{
+		"image": "cat.png",
+		"question": "What is in this image?"
+	}`;
 var inputsQuestionAnswering = () => `{
 		"question": "What is my name?",
 		"context": "My name is Clara and I live in Berkeley."
@@ -4650,11 +4682,14 @@ var inputsTextToImage = () => `"Astronaut riding a horse"`;
 var inputsTextToSpeech = () => `"The answer to the universe is 42"`;
 var inputsTextToAudio = () => `"liquid drum and bass, atmospheric synths, airy sounds"`;
 var inputsAutomaticSpeechRecognition = () => `"sample1.flac"`;
+var inputsTabularPrediction = () => `'{"Height":[11.52,12.48],"Length1":[23.2,24.0],"Length2":[25.4,26.3],"Species": ["Bream","Bream"]}'`;
+var inputsZeroShotImageClassification = () => `"cats.jpg"`;
 var modelInputSnippets = {
   "audio-to-audio": inputsAudioToAudio,
   "audio-classification": inputsAudioClassification,
   "automatic-speech-recognition": inputsAutomaticSpeechRecognition,
   conversational: inputsConversational,
+  "document-question-answering": inputsVisualQuestionAnswering,
   "feature-extraction": inputsFeatureExtraction,
   "fill-mask": inputsFillMask,
   "image-classification": inputsImageClassification,
@@ -4665,6 +4700,8 @@ var modelInputSnippets = {
   "sentence-similarity": inputsSentenceSimilarity,
   summarization: inputsSummarization,
   "table-question-answering": inputsTableQuestionAnswering,
+  "tabular-regression": inputsTabularPrediction,
+  "tabular-classification": inputsTabularPrediction,
   "text-classification": inputsTextClassification,
   "text-generation": inputsTextGeneration,
   "text-to-image": inputsTextToImage,
@@ -4673,7 +4710,8 @@ var modelInputSnippets = {
   "text2text-generation": inputsText2TextGeneration,
   "token-classification": inputsTokenClassification,
   translation: inputsTranslation,
-  "zero-shot-classification": inputsZeroShotClassification
+  "zero-shot-classification": inputsZeroShotClassification,
+  "zero-shot-image-classification": inputsZeroShotImageClassification
 };
 function getModelInputSnippet(model, noWrap = false, noQuotes = false) {
   if (model.pipeline_tag) {
@@ -4761,10 +4799,13 @@ __export(python_exports, {
   hasPythonInferenceSnippet: () => hasPythonInferenceSnippet,
   pythonSnippets: () => pythonSnippets,
   snippetBasic: () => snippetBasic2,
+  snippetDocumentQuestionAnswering: () => snippetDocumentQuestionAnswering,
   snippetFile: () => snippetFile2,
+  snippetTabular: () => snippetTabular,
   snippetTextToAudio: () => snippetTextToAudio,
   snippetTextToImage: () => snippetTextToImage,
-  snippetZeroShotClassification: () => snippetZeroShotClassification2
+  snippetZeroShotClassification: () => snippetZeroShotClassification2,
+  snippetZeroShotImageClassification: () => snippetZeroShotImageClassification
 });
 var snippetZeroShotClassification2 = (model) => `def query(payload):
 	response = requests.post(API_URL, headers=headers, json=payload)
@@ -4774,6 +4815,20 @@ output = query({
     "inputs": ${getModelInputSnippet(model)},
     "parameters": {"candidate_labels": ["refund", "legal", "faq"]},
 })`;
+var snippetZeroShotImageClassification = (model) => `def query(data):
+	with open(data["image_path"], "rb") as f:
+		img = f.read()
+	payload={
+		"parameters": data["parameters"],
+		"inputs": base64.b64encode(img).decode("utf-8")
+	}
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+output = query({
+    "image_path": ${getModelInputSnippet(model)},
+    "parameters": {"candidate_labels": ["cat", "dog", "llama"]},
+})`;
 var snippetBasic2 = (model) => `def query(payload):
 	response = requests.post(API_URL, headers=headers, json=payload)
 	return response.json()
@@ -4798,6 +4853,12 @@ image_bytes = query({
 import io
 from PIL import Image
 image = Image.open(io.BytesIO(image_bytes))`;
+var snippetTabular = (model) => `def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.content
+response = query({
+	"inputs": {"data": ${getModelInputSnippet(model)}},
+})`;
 var snippetTextToAudio = (model) => {
   if (model.library_name === "transformers") {
     return `def query(payload):
@@ -4823,8 +4884,18 @@ from IPython.display import Audio
 Audio(audio, rate=sampling_rate)`;
   }
 };
+var snippetDocumentQuestionAnswering = (model) => `def query(payload):
+ 	with open(payload["image"], "rb") as f:
+  		img = f.read()
+		payload["image"] = base64.b64encode(img).decode("utf-8")
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+output = query({
+    "inputs": ${getModelInputSnippet(model)},
+})`;
 var pythonSnippets = {
-  // Same order as in js/src/lib/interfaces/Types.ts
+  // Same order as in tasks/src/pipelines.ts
   "text-classification": snippetBasic2,
   "token-classification": snippetBasic2,
   "table-question-answering": snippetBasic2,
@@ -4845,9 +4916,13 @@ var pythonSnippets = {
   "audio-to-audio": snippetFile2,
   "audio-classification": snippetFile2,
   "image-classification": snippetFile2,
-  "image-to-text": snippetFile2,
+  "tabular-regression": snippetTabular,
+  "tabular-classification": snippetTabular,
   "object-detection": snippetFile2,
-  "image-segmentation": snippetFile2
+  "image-segmentation": snippetFile2,
+  "document-question-answering": snippetDocumentQuestionAnswering,
+  "image-to-text": snippetFile2,
+  "zero-shot-image-classification": snippetZeroShotImageClassification
 };
 function getPythonInferenceSnippet(model, accessToken) {
   const body = model.pipeline_tag && model.pipeline_tag in pythonSnippets ? pythonSnippets[model.pipeline_tag]?.(model) ?? "" : "";

package/dist/index.mjs CHANGED Viewed

@@ -1763,6 +1763,20 @@ var PIPELINE_DATA = {
   },
   "image-to-image": {
     name: "Image-to-Image",
+    subtasks: [
+      {
+        type: "image-inpainting",
+        name: "Image Inpainting"
+      },
+      {
+        type: "image-colorization",
+        name: "Image Colorization"
+      },
+      {
+        type: "super-resolution",
+        name: "Super Resolution"
+      }
+    ],
     modality: "cv",
     color: "indigo"
   },
@@ -1949,6 +1963,16 @@ var PIPELINE_DATA = {
     modality: "cv",
     color: "yellow"
   },
+  "text-to-3d": {
+    name: "Text-to-3D",
+    modality: "multimodal",
+    color: "yellow"
+  },
+  "image-to-3d": {
+    name: "Image-to-3D",
+    modality: "multimodal",
+    color: "green"
+  },
   other: {
     name: "Other",
     modality: "other",
@@ -4368,18 +4392,18 @@ var data_default33 = taskData33;
 // src/tasks/index.ts
 var TASKS_MODEL_LIBRARIES = {
-  "audio-classification": ["speechbrain", "transformers"],
+  "audio-classification": ["speechbrain", "transformers", "transformers.js"],
   "audio-to-audio": ["asteroid", "speechbrain"],
   "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
   conversational: ["transformers"],
-  "depth-estimation": ["transformers"],
-  "document-question-answering": ["transformers"],
+  "depth-estimation": ["transformers", "transformers.js"],
+  "document-question-answering": ["transformers", "transformers.js"],
   "feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
   "fill-mask": ["transformers", "transformers.js"],
   "graph-ml": ["transformers"],
   "image-classification": ["keras", "timm", "transformers", "transformers.js"],
   "image-segmentation": ["transformers", "transformers.js"],
-  "image-to-image": ["diffusers"],
+  "image-to-image": ["diffusers", "transformers.js"],
   "image-to-text": ["transformers.js"],
   "image-to-video": ["diffusers"],
   "video-classification": ["transformers"],
@@ -4401,8 +4425,8 @@ var TASKS_MODEL_LIBRARIES = {
   "text-generation": ["transformers", "transformers.js"],
   "text-retrieval": [],
   "text-to-image": ["diffusers"],
-  "text-to-speech": ["espnet", "tensorflowtts", "transformers"],
-  "text-to-audio": ["transformers"],
+  "text-to-speech": ["espnet", "tensorflowtts", "transformers", "transformers.js"],
+  "text-to-audio": ["transformers", "transformers.js"],
   "text-to-video": ["diffusers"],
   "text2text-generation": ["transformers", "transformers.js"],
   "time-series-forecasting": [],
@@ -4417,11 +4441,13 @@ var TASKS_MODEL_LIBRARIES = {
   ],
   translation: ["transformers", "transformers.js"],
   "unconditional-image-generation": ["diffusers"],
-  "visual-question-answering": ["transformers"],
+  "visual-question-answering": ["transformers", "transformers.js"],
   "voice-activity-detection": [],
   "zero-shot-classification": ["transformers", "transformers.js"],
   "zero-shot-image-classification": ["transformers", "transformers.js"],
-  "zero-shot-object-detection": ["transformers"]
+  "zero-shot-object-detection": ["transformers", "transformers.js"],
+  "text-to-3d": [],
+  "image-to-3d": []
 };
 function getData(type, partialTaskData = data_default14) {
   return {
@@ -4477,7 +4503,9 @@ var TASKS_DATA = {
   "voice-activity-detection": void 0,
   "zero-shot-classification": getData("zero-shot-classification", data_default32),
   "zero-shot-image-classification": getData("zero-shot-image-classification", data_default33),
-  "zero-shot-object-detection": getData("zero-shot-object-detection", data_default14)
+  "zero-shot-object-detection": getData("zero-shot-object-detection", data_default14),
+  "text-to-3d": getData("text-to-3d", data_default14),
+  "image-to-3d": getData("image-to-3d", data_default14)
 };
 // src/model-libraries.ts
@@ -4584,6 +4612,10 @@ var inputsTableQuestionAnswering = () => `{
 			]
 		}
 	}`;
+var inputsVisualQuestionAnswering = () => `{
+		"image": "cat.png",
+		"question": "What is in this image?"
+	}`;
 var inputsQuestionAnswering = () => `{
 		"question": "What is my name?",
 		"context": "My name is Clara and I live in Berkeley."
@@ -4612,11 +4644,14 @@ var inputsTextToImage = () => `"Astronaut riding a horse"`;
 var inputsTextToSpeech = () => `"The answer to the universe is 42"`;
 var inputsTextToAudio = () => `"liquid drum and bass, atmospheric synths, airy sounds"`;
 var inputsAutomaticSpeechRecognition = () => `"sample1.flac"`;
+var inputsTabularPrediction = () => `'{"Height":[11.52,12.48],"Length1":[23.2,24.0],"Length2":[25.4,26.3],"Species": ["Bream","Bream"]}'`;
+var inputsZeroShotImageClassification = () => `"cats.jpg"`;
 var modelInputSnippets = {
   "audio-to-audio": inputsAudioToAudio,
   "audio-classification": inputsAudioClassification,
   "automatic-speech-recognition": inputsAutomaticSpeechRecognition,
   conversational: inputsConversational,
+  "document-question-answering": inputsVisualQuestionAnswering,
   "feature-extraction": inputsFeatureExtraction,
   "fill-mask": inputsFillMask,
   "image-classification": inputsImageClassification,
@@ -4627,6 +4662,8 @@ var modelInputSnippets = {
   "sentence-similarity": inputsSentenceSimilarity,
   summarization: inputsSummarization,
   "table-question-answering": inputsTableQuestionAnswering,
+  "tabular-regression": inputsTabularPrediction,
+  "tabular-classification": inputsTabularPrediction,
   "text-classification": inputsTextClassification,
   "text-generation": inputsTextGeneration,
   "text-to-image": inputsTextToImage,
@@ -4635,7 +4672,8 @@ var modelInputSnippets = {
   "text2text-generation": inputsText2TextGeneration,
   "token-classification": inputsTokenClassification,
   translation: inputsTranslation,
-  "zero-shot-classification": inputsZeroShotClassification
+  "zero-shot-classification": inputsZeroShotClassification,
+  "zero-shot-image-classification": inputsZeroShotImageClassification
 };
 function getModelInputSnippet(model, noWrap = false, noQuotes = false) {
   if (model.pipeline_tag) {
@@ -4723,10 +4761,13 @@ __export(python_exports, {
   hasPythonInferenceSnippet: () => hasPythonInferenceSnippet,
   pythonSnippets: () => pythonSnippets,
   snippetBasic: () => snippetBasic2,
+  snippetDocumentQuestionAnswering: () => snippetDocumentQuestionAnswering,
   snippetFile: () => snippetFile2,
+  snippetTabular: () => snippetTabular,
   snippetTextToAudio: () => snippetTextToAudio,
   snippetTextToImage: () => snippetTextToImage,
-  snippetZeroShotClassification: () => snippetZeroShotClassification2
+  snippetZeroShotClassification: () => snippetZeroShotClassification2,
+  snippetZeroShotImageClassification: () => snippetZeroShotImageClassification
 });
 var snippetZeroShotClassification2 = (model) => `def query(payload):
 	response = requests.post(API_URL, headers=headers, json=payload)
@@ -4736,6 +4777,20 @@ output = query({
     "inputs": ${getModelInputSnippet(model)},
     "parameters": {"candidate_labels": ["refund", "legal", "faq"]},
 })`;
+var snippetZeroShotImageClassification = (model) => `def query(data):
+	with open(data["image_path"], "rb") as f:
+		img = f.read()
+	payload={
+		"parameters": data["parameters"],
+		"inputs": base64.b64encode(img).decode("utf-8")
+	}
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+output = query({
+    "image_path": ${getModelInputSnippet(model)},
+    "parameters": {"candidate_labels": ["cat", "dog", "llama"]},
+})`;
 var snippetBasic2 = (model) => `def query(payload):
 	response = requests.post(API_URL, headers=headers, json=payload)
 	return response.json()
@@ -4760,6 +4815,12 @@ image_bytes = query({
 import io
 from PIL import Image
 image = Image.open(io.BytesIO(image_bytes))`;
+var snippetTabular = (model) => `def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.content
+response = query({
+	"inputs": {"data": ${getModelInputSnippet(model)}},
+})`;
 var snippetTextToAudio = (model) => {
   if (model.library_name === "transformers") {
     return `def query(payload):
@@ -4785,8 +4846,18 @@ from IPython.display import Audio
 Audio(audio, rate=sampling_rate)`;
   }
 };
+var snippetDocumentQuestionAnswering = (model) => `def query(payload):
+ 	with open(payload["image"], "rb") as f:
+  		img = f.read()
+		payload["image"] = base64.b64encode(img).decode("utf-8")
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+output = query({
+    "inputs": ${getModelInputSnippet(model)},
+})`;
 var pythonSnippets = {
-  // Same order as in js/src/lib/interfaces/Types.ts
+  // Same order as in tasks/src/pipelines.ts
   "text-classification": snippetBasic2,
   "token-classification": snippetBasic2,
   "table-question-answering": snippetBasic2,
@@ -4807,9 +4878,13 @@ var pythonSnippets = {
   "audio-to-audio": snippetFile2,
   "audio-classification": snippetFile2,
   "image-classification": snippetFile2,
-  "image-to-text": snippetFile2,
+  "tabular-regression": snippetTabular,
+  "tabular-classification": snippetTabular,
   "object-detection": snippetFile2,
-  "image-segmentation": snippetFile2
+  "image-segmentation": snippetFile2,
+  "document-question-answering": snippetDocumentQuestionAnswering,
+  "image-to-text": snippetFile2,
+  "zero-shot-image-classification": snippetZeroShotImageClassification
 };
 function getPythonInferenceSnippet(model, accessToken) {
   const body = model.pipeline_tag && model.pipeline_tag in pythonSnippets ? pythonSnippets[model.pipeline_tag]?.(model) ?? "" : "";

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@huggingface/tasks",
   "packageManager": "pnpm@8.10.5",
-  "version": "0.0.7",
+  "version": "0.0.8",
   "description": "List of ML tasks for huggingface.co/tasks",
   "repository": "https://github.com/huggingface/huggingface.js.git",
   "publishConfig": {
@@ -30,9 +30,7 @@
   ],
   "author": "Hugging Face",
   "license": "MIT",
-  "devDependencies": {
-    "typescript": "^5.0.4"
-  },
+  "devDependencies": {},
   "scripts": {
     "lint": "eslint --quiet --fix --ext .cjs,.ts .",
     "lint:check": "eslint --ext .cjs,.ts .",

package/src/pipelines.ts CHANGED Viewed

@@ -435,6 +435,20 @@ export const PIPELINE_DATA = {
 	},
 	"image-to-image": {
 		name: "Image-to-Image",
+		subtasks: [
+			{
+				type: "image-inpainting",
+				name: "Image Inpainting",
+			},
+			{
+				type: "image-colorization",
+				name: "Image Colorization",
+			},
+			{
+				type: "super-resolution",
+				name: "Super Resolution",
+			},
+		],
 		modality: "cv",
 		color: "indigo",
 	},
@@ -621,6 +635,16 @@ export const PIPELINE_DATA = {
 		modality: "cv",
 		color: "yellow",
 	},
+	"text-to-3d": {
+		name: "Text-to-3D",
+		modality: "multimodal",
+		color: "yellow",
+	},
+	"image-to-3d": {
+		name: "Image-to-3D",
+		modality: "multimodal",
+		color: "green",
+	},
 	other: {
 		name: "Other",
 		modality: "other",

package/src/snippets/inputs.ts CHANGED Viewed

@@ -31,6 +31,12 @@ const inputsTableQuestionAnswering = () =>
 		}
 	}`;
+const inputsVisualQuestionAnswering = () =>
+	`{
+		"image": "cat.png",
+		"question": "What is in this image?"
+	}`;
 const inputsQuestionAnswering = () =>
 	`{
 		"question": "What is my name?",
@@ -79,6 +85,11 @@ const inputsTextToAudio = () => `"liquid drum and bass, atmospheric synths, airy
 const inputsAutomaticSpeechRecognition = () => `"sample1.flac"`;
+const inputsTabularPrediction = () =>
+	`'{"Height":[11.52,12.48],"Length1":[23.2,24.0],"Length2":[25.4,26.3],"Species": ["Bream","Bream"]}'`;
+const inputsZeroShotImageClassification = () => `"cats.jpg"`;
 const modelInputSnippets: {
 	[key in PipelineType]?: (model: ModelData) => string;
 } = {
@@ -86,6 +97,7 @@ const modelInputSnippets: {
 	"audio-classification": inputsAudioClassification,
 	"automatic-speech-recognition": inputsAutomaticSpeechRecognition,
 	conversational: inputsConversational,
+	"document-question-answering": inputsVisualQuestionAnswering,
 	"feature-extraction": inputsFeatureExtraction,
 	"fill-mask": inputsFillMask,
 	"image-classification": inputsImageClassification,
@@ -96,6 +108,8 @@ const modelInputSnippets: {
 	"sentence-similarity": inputsSentenceSimilarity,
 	summarization: inputsSummarization,
 	"table-question-answering": inputsTableQuestionAnswering,
+	"tabular-regression": inputsTabularPrediction,
+	"tabular-classification": inputsTabularPrediction,
 	"text-classification": inputsTextClassification,
 	"text-generation": inputsTextGeneration,
 	"text-to-image": inputsTextToImage,
@@ -105,6 +119,7 @@ const modelInputSnippets: {
 	"token-classification": inputsTokenClassification,
 	translation: inputsTranslation,
 	"zero-shot-classification": inputsZeroShotClassification,
+	"zero-shot-image-classification": inputsZeroShotImageClassification,
 };
 // Use noWrap to put the whole snippet on a single line (removing new lines and tabulations)

package/src/snippets/python.ts CHANGED Viewed

@@ -12,6 +12,22 @@ output = query({
     "parameters": {"candidate_labels": ["refund", "legal", "faq"]},
 })`;
+export const snippetZeroShotImageClassification = (model: ModelData): string =>
+	`def query(data):
+	with open(data["image_path"], "rb") as f:
+		img = f.read()
+	payload={
+		"parameters": data["parameters"],
+		"inputs": base64.b64encode(img).decode("utf-8")
+	}
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+output = query({
+    "image_path": ${getModelInputSnippet(model)},
+    "parameters": {"candidate_labels": ["cat", "dog", "llama"]},
+})`;
 export const snippetBasic = (model: ModelData): string =>
 	`def query(payload):
 	response = requests.post(API_URL, headers=headers, json=payload)
@@ -42,6 +58,14 @@ import io
 from PIL import Image
 image = Image.open(io.BytesIO(image_bytes))`;
+export const snippetTabular = (model: ModelData): string =>
+	`def query(payload):
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.content
+response = query({
+	"inputs": {"data": ${getModelInputSnippet(model)}},
+})`;
 export const snippetTextToAudio = (model: ModelData): string => {
 	// Transformers TTS pipeline and api-inference-community (AIC) pipeline outputs are diverged
 	// with the latest update to inference-api (IA).
@@ -70,8 +94,21 @@ from IPython.display import Audio
 Audio(audio, rate=sampling_rate)`;
 	}
 };
+export const snippetDocumentQuestionAnswering = (model: ModelData): string =>
+	`def query(payload):
+ 	with open(payload["image"], "rb") as f:
+  		img = f.read()
+		payload["image"] = base64.b64encode(img).decode("utf-8")
+	response = requests.post(API_URL, headers=headers, json=payload)
+	return response.json()
+output = query({
+    "inputs": ${getModelInputSnippet(model)},
+})`;
 export const pythonSnippets: Partial<Record<PipelineType, (model: ModelData) => string>> = {
-	// Same order as in js/src/lib/interfaces/Types.ts
+	// Same order as in tasks/src/pipelines.ts
 	"text-classification": snippetBasic,
 	"token-classification": snippetBasic,
 	"table-question-answering": snippetBasic,
@@ -92,9 +129,13 @@ export const pythonSnippets: Partial<Record<PipelineType, (model: ModelData) =>
 	"audio-to-audio": snippetFile,
 	"audio-classification": snippetFile,
 	"image-classification": snippetFile,
-	"image-to-text": snippetFile,
+	"tabular-regression": snippetTabular,
+	"tabular-classification": snippetTabular,
 	"object-detection": snippetFile,
 	"image-segmentation": snippetFile,
+	"document-question-answering": snippetDocumentQuestionAnswering,
+	"image-to-text": snippetFile,
+	"zero-shot-image-classification": snippetZeroShotImageClassification,
 };
 export function getPythonInferenceSnippet(model: ModelData, accessToken: string): string {

package/src/tasks/index.ts CHANGED Viewed

@@ -40,18 +40,18 @@ import type { ModelLibraryKey } from "../model-libraries";
  * Model libraries compatible with each ML task
  */
 export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
-	"audio-classification": ["speechbrain", "transformers"],
+	"audio-classification": ["speechbrain", "transformers", "transformers.js"],
 	"audio-to-audio": ["asteroid", "speechbrain"],
 	"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
 	conversational: ["transformers"],
-	"depth-estimation": ["transformers"],
-	"document-question-answering": ["transformers"],
+	"depth-estimation": ["transformers", "transformers.js"],
+	"document-question-answering": ["transformers", "transformers.js"],
 	"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
 	"fill-mask": ["transformers", "transformers.js"],
 	"graph-ml": ["transformers"],
 	"image-classification": ["keras", "timm", "transformers", "transformers.js"],
 	"image-segmentation": ["transformers", "transformers.js"],
-	"image-to-image": ["diffusers"],
+	"image-to-image": ["diffusers", "transformers.js"],
 	"image-to-text": ["transformers.js"],
 	"image-to-video": ["diffusers"],
 	"video-classification": ["transformers"],
@@ -73,8 +73,8 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
 	"text-generation": ["transformers", "transformers.js"],
 	"text-retrieval": [],
 	"text-to-image": ["diffusers"],
-	"text-to-speech": ["espnet", "tensorflowtts", "transformers"],
-	"text-to-audio": ["transformers"],
+	"text-to-speech": ["espnet", "tensorflowtts", "transformers", "transformers.js"],
+	"text-to-audio": ["transformers", "transformers.js"],
 	"text-to-video": ["diffusers"],
 	"text2text-generation": ["transformers", "transformers.js"],
 	"time-series-forecasting": [],
@@ -89,11 +89,13 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
 	],
 	translation: ["transformers", "transformers.js"],
 	"unconditional-image-generation": ["diffusers"],
-	"visual-question-answering": ["transformers"],
+	"visual-question-answering": ["transformers", "transformers.js"],
 	"voice-activity-detection": [],
 	"zero-shot-classification": ["transformers", "transformers.js"],
 	"zero-shot-image-classification": ["transformers", "transformers.js"],
-	"zero-shot-object-detection": ["transformers"],
+	"zero-shot-object-detection": ["transformers", "transformers.js"],
+	"text-to-3d": [],
+	"image-to-3d": [],
 };
 /**
@@ -161,6 +163,8 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
 	"zero-shot-classification": getData("zero-shot-classification", zeroShotClassification),
 	"zero-shot-image-classification": getData("zero-shot-image-classification", zeroShotImageClassification),
 	"zero-shot-object-detection": getData("zero-shot-object-detection", placeholder),
+	"text-to-3d": getData("text-to-3d", placeholder),
+	"image-to-3d": getData("image-to-3d", placeholder),
 } as const;
 export interface ExampleRepo {

package/src/tasks/text-generation/about.md CHANGED Viewed

@@ -32,6 +32,16 @@ The most popular models for this task are GPT-based models, [Mistral](mistralai/
 These models are trained to learn the mapping between a pair of texts (e.g. translation from one language to another). The most popular variants of these models are [NLLB](facebook/nllb-200-distilled-600M), [FLAN-T5](https://huggingface.co/google/flan-t5-xxl), and [BART](https://huggingface.co/docs/transformers/model_doc/bart). Text-to-Text models are trained with multi-tasking capabilities, they can accomplish a wide range of tasks, including summarization, translation, and text classification.
+## Language Model Variants
+When it comes to text generation, the underlying language model can come in several types:
+- **Base models:** refers to plain language models like [Mistral 7B](mistralai/Mistral-7B-v0.1) and [Llama-2-70b](https://huggingface.co/meta-llama/Llama-2-70b-hf). These models are good for fine-tuning and few-shot prompting.
+- **Instruction-trained models:** these models are trained in a multi-task manner to follow a broad range of instructions like "Write me a recipe for chocolate cake". Models like [Flan-T5](https://huggingface.co/google/flan-t5-xl), [Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1), and [falcon-40b-instruct](https://huggingface.co/tiiuae/falcon-40b-instruct) are examples of instruction-trained models. In general, instruction-trained models will produce better responses to instructions than base models.
+- **Human feedback models:** these models extend base and instruction-trained models by incorporating human feedback that rates the quality of the generated text according to criteria like [helpfulness, honesty, and harmlessness](https://arxiv.org/abs/2112.00861). The human feedback is then combined with an optimization technique like reinforcement learning to align the original model to be closer with human preferences. The overall methodology is often called [Reinforcement Learning from Human Feedback](https://huggingface.co/blog/rlhf), or RLHF for short. [Llama2-Chat](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) is an open-source model aligned through human feedback.
 ## Inference
 You can use the 🤗 Transformers library `text-generation` pipeline to do inference with Text Generation models. It takes an incomplete text and returns multiple outputs with which the text can be completed.