npm - @huggingface/tasks - Versions diffs - 0.11.7 → 0.11.9 - Mend

@huggingface/tasks 0.11.7 → 0.11.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/index.cjs +159 -31
package/dist/index.js +159 -31
package/dist/src/local-apps.d.ts +22 -1
package/dist/src/local-apps.d.ts.map +1 -1
package/dist/src/model-libraries-snippets.d.ts +2 -0
package/dist/src/model-libraries-snippets.d.ts.map +1 -1
package/dist/src/model-libraries.d.ts +24 -2
package/dist/src/model-libraries.d.ts.map +1 -1
package/dist/src/pipelines.d.ts +8 -2
package/dist/src/pipelines.d.ts.map +1 -1
package/dist/src/tasks/feature-extraction/data.d.ts.map +1 -1
package/dist/src/tasks/image-feature-extraction/data.d.ts.map +1 -1
package/dist/src/tasks/index.d.ts.map +1 -1
package/dist/src/tasks/text-generation/data.d.ts.map +1 -1
package/package.json +4 -1
package/src/local-apps.ts +45 -1
package/src/model-libraries-snippets.ts +47 -0
package/src/model-libraries.ts +22 -0
package/src/pipelines.ts +6 -0
package/src/tasks/feature-extraction/data.ts +5 -1
package/src/tasks/image-feature-extraction/data.ts +7 -3
package/src/tasks/image-segmentation/data.ts +4 -4
package/src/tasks/image-text-to-text/about.md +2 -0
package/src/tasks/image-text-to-text/data.ts +1 -1
package/src/tasks/image-to-image/data.ts +2 -2
package/src/tasks/index.ts +2 -0
package/src/tasks/mask-generation/data.ts +4 -4
package/src/tasks/text-generation/data.ts +16 -12
package/src/tasks/text-to-image/data.ts +3 -3

package/src/local-apps.ts CHANGED Viewed

@@ -99,6 +99,29 @@ const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[]
 	];
 };
+const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
+	const command = (binary: string) =>
+		["# Load and run the model:", `${binary} huggingface://${model.id}/${filepath ?? "{{GGUF_FILE}}"}`].join("\n");
+	return [
+		{
+			title: "Install from binary",
+			setup: "curl https://localai.io/install.sh | sh",
+			content: command("local-ai run"),
+		},
+		{
+			title: "Use Docker images",
+			setup: [
+				// prettier-ignore
+				"# Pull the image:",
+				"docker pull localai/localai:latest-cpu",
+			].join("\n"),
+			content: command(
+				"docker run -p 8080:8080 --name localai -v $PWD/models:/build/models localai/localai:latest-cpu"
+			),
+		},
+	];
+};
 /**
  * Add your new local app here.
  *
@@ -126,6 +149,13 @@ export const LOCAL_APPS = {
 		deeplink: (model, filepath) =>
 			new URL(`lmstudio://open_from_hf?model=${model.id}${filepath ? `&file=${filepath}` : ""}`),
 	},
+	localai: {
+		prettyLabel: "LocalAI",
+		docsUrl: "https://github.com/mudler/LocalAI",
+		mainTask: "text-generation",
+		displayOnModelPage: isGgufModel,
+		snippet: snippetLocalAI,
+	},
 	jan: {
 		prettyLabel: "Jan",
 		docsUrl: "https://jan.ai",
@@ -201,10 +231,24 @@ export const LOCAL_APPS = {
 		docsUrl: "https://diffusionbee.com",
 		mainTask: "text-to-image",
 		macOSOnly: true,
-		comingSoon: true,
 		displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image",
 		deeplink: (model) => new URL(`diffusionbee://open_from_hf?model=${model.id}`),
 	},
+	joyfusion: {
+		prettyLabel: "JoyFusion",
+		docsUrl: "https://joyfusion.app",
+		mainTask: "text-to-image",
+		macOSOnly: true,
+		displayOnModelPage: (model) => model.tags.includes("coreml") && model.pipeline_tag === "text-to-image",
+		deeplink: (model) => new URL(`https://joyfusion.app/import_from_hf?repo_id=${model.id}`),
+	},
+	invoke: {
+		prettyLabel: "Invoke",
+		docsUrl: "https://github.com/invoke-ai/InvokeAI",
+		mainTask: "text-to-image",
+		displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image",
+		deeplink: (model) => new URL(`https://models.invoke.ai/huggingface/${model.id}`),
+	},
 } satisfies Record<string, LocalApp>;
 export type LocalAppKey = keyof typeof LOCAL_APPS;

package/src/model-libraries-snippets.ts CHANGED Viewed

@@ -261,6 +261,24 @@ backbone = keras_nlp.models.Backbone.from_preset("hf://${model.id}")
 `,
 ];
+export const llama_cpp_python = (model: ModelData): string[] => [
+	`from llama_cpp import Llama
+llm = Llama.from_pretrained(
+	repo_id="${model.id}",
+	filename="{{GGUF_FILE}}",
+)
+llm.create_chat_completion(
+		messages = [
+			{
+				"role": "user",
+				"content": "What is the capital of France?"
+			}
+		]
+)`,
+];
 export const tf_keras = (model: ModelData): string[] => [
 	`# Note: 'keras<3.x' or 'tf_keras' must be installed (legacy)
 # See https://github.com/keras-team/tf-keras for more details.
@@ -507,6 +525,35 @@ export const fastai = (model: ModelData): string[] => [
 learn = from_pretrained_fastai("${model.id}")`,
 ];
+export const sam2 = (model: ModelData): string[] => {
+	const image_predictor = `# Use SAM2 with images
+import torch
+from sam2.sam2_image_predictor import SAM2ImagePredictor
+predictor = SAM2ImagePredictor.from_pretrained(${model.id})
+with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
+    predictor.set_image(<your_image>)
+    masks, _, _ = predictor.predict(<input_prompts>)`;
+	const video_predictor = `# Use SAM2 with videos
+import torch
+from sam2.sam2_video_predictor import SAM2VideoPredictor
+predictor = SAM2VideoPredictor.from_pretrained(${model.id})
+with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
+    state = predictor.init_state(<your_video>)
+    # add new prompts and instantly get the output on the same frame
+    frame_idx, object_ids, masks = predictor.add_new_points(state, <your_prompts>):
+    # propagate the prompts to get masklets throughout the video
+    for frame_idx, object_ids, masks in predictor.propagate_in_video(state):
+        ...`;
+	return [image_predictor, video_predictor];
+};
 export const sampleFactory = (model: ModelData): string[] => [
 	`python -m sample_factory.huggingface.load_from_hub -r ${model.id} -d ./train_dir`,
 ];

package/src/model-libraries.ts CHANGED Viewed

@@ -314,6 +314,12 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		filter: false,
 		countDownloads: `path:"liveportrait/landmark.onnx"`,
 	},
+	"llama-cpp-python": {
+		prettyLabel: "llama-cpp-python",
+		repoName: "llama-cpp-python",
+		repoUrl: "https://github.com/abetlen/llama-cpp-python",
+		snippets: snippets.llama_cpp_python,
+	},
 	mindspore: {
 		prettyLabel: "MindSpore",
 		repoName: "mindspore",
@@ -429,6 +435,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		filter: false,
 		countDownloads: `path:"tokenizer.model"`,
 	},
+	refiners: {
+		prettyLabel: "Refiners",
+		repoName: "Refiners",
+		repoUrl: "https://github.com/finegrain-ai/refiners",
+		docsUrl: "https://refine.rs/",
+		filter: false,
+		countDownloads: `path:"model.safetensors"`,
+	},
 	saelens: {
 		prettyLabel: "SAELens",
 		repoName: "SAELens",
@@ -436,6 +450,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
 		snippets: snippets.saelens,
 		filter: false,
 	},
+	sam2: {
+		prettyLabel: "sam2",
+		repoName: "sam2",
+		repoUrl: "https://github.com/facebookresearch/segment-anything-2",
+		filter: false,
+		snippets: snippets.sam2,
+		countDownloads: `path_extension:"pt"`,
+	},
 	"sample-factory": {
 		prettyLabel: "sample-factory",
 		repoName: "sample-factory",

package/src/pipelines.ts CHANGED Viewed

@@ -652,6 +652,12 @@ export const PIPELINE_DATA = {
 		modality: "cv",
 		color: "indigo",
 	},
+	"video-text-to-text": {
+		name: "Video-Text-to-Text",
+		modality: "multimodal",
+		color: "blue",
+		hideInDatasets: true,
+	},
 	other: {
 		name: "Other",
 		modality: "other",

package/src/tasks/feature-extraction/data.ts CHANGED Viewed

@@ -42,9 +42,13 @@ const taskData: TaskDataCustom = {
 	],
 	spaces: [
 		{
-			description: "A leaderboard to rank best feature extraction models..",
+			description: "A leaderboard to rank text feature extraction models based on a benchmark.",
 			id: "mteb/leaderboard",
 		},
+		{
+			description: "A leaderboard to rank best feature extraction models based on human feedback.",
+			id: "mteb/arena",
+		},
 	],
 	summary: "Feature extraction is the task of extracting features learnt in a model.",
 	widgetModels: ["facebook/bart-base"],

package/src/tasks/image-feature-extraction/data.ts CHANGED Viewed

@@ -36,16 +36,20 @@ const taskData: TaskDataCustom = {
 		},
 		{
 			description: "A strong image feature extraction model.",
-			id: "google/vit-base-patch16-224-in21k",
+			id: "nvidia/MambaVision-T-1K",
 		},
 		{
-			description: "A robust image feature extraction models.",
+			description: "A robust image feature extraction model.",
 			id: "facebook/dino-vitb16",
 		},
 		{
-			description: "Strong image-text-to-text model made for information retrieval from documents.",
+			description: "Strong image feature extraction model made for information retrieval from documents.",
 			id: "vidore/colpali",
 		},
+		{
+			description: "Strong image feature extraction model that can be used on images and documents.",
+			id: "OpenGVLab/InternViT-6B-448px-V1-2",
+		},
 	],
 	spaces: [],
 	summary: "Image feature extraction is the task of extracting features learnt in a computer vision model.",

package/src/tasks/image-segmentation/data.ts CHANGED Viewed

@@ -48,16 +48,16 @@ const taskData: TaskDataCustom = {
 			id: "facebook/detr-resnet-50-panoptic",
 		},
 		{
-			description: "Semantic segmentation model trained on ADE20k benchmark dataset.",
-			id: "microsoft/beit-large-finetuned-ade-640-640",
+			description: "Background removal model.",
+			id: "briaai/RMBG-1.4",
 		},
 		{
 			description: "Semantic segmentation model trained on ADE20k benchmark dataset with 512x512 resolution.",
 			id: "nvidia/segformer-b0-finetuned-ade-512-512",
 		},
 		{
-			description: "Semantic segmentation model trained Cityscapes dataset.",
-			id: "facebook/mask2former-swin-large-cityscapes-semantic",
+			description: "A multipurpose image segmentation model for high resolution images.",
+			id: "ZhengPeng7/BiRefNet",
 		},
 		{
 			description: "Panoptic segmentation model trained COCO (common objects) dataset.",

package/src/tasks/image-text-to-text/about.md CHANGED Viewed

@@ -72,3 +72,5 @@ print(processor.decode(output[0], skip_special_tokens=True))
 - [Vision Language Models Explained](https://huggingface.co/blog/vlms)
 - [Open-source Multimodality and How to Achieve it using Hugging Face](https://www.youtube.com/watch?v=IoGaGfU1CIg&t=601s)
 - [Introducing Idefics2: A Powerful 8B Vision-Language Model for the community](https://huggingface.co/blog/idefics2)
+- [Image-text-to-text task guide](https://huggingface.co/tasks/image-text-to-text)
+- [Preference Optimization for Vision Language Models with TRL](https://huggingface.co/blog/dpo_vlm)

package/src/tasks/image-text-to-text/data.ts CHANGED Viewed

@@ -88,7 +88,7 @@ const taskData: TaskDataCustom = {
 	summary:
 		"Image-text-to-text models take in an image and text prompt and output text. These models are also called vision-language models, or VLMs. The difference from image-to-text models is that these models take an additional text input, not restricting the model to certain use cases like image captioning, and may also be trained to accept a conversation as input.",
 	widgetModels: ["microsoft/kosmos-2-patch14-224"],
-	youtubeId: "",
+	youtubeId: "IoGaGfU1CIg",
 };
 export default taskData;

package/src/tasks/image-to-image/data.ts CHANGED Viewed

@@ -45,8 +45,8 @@ const taskData: TaskDataCustom = {
 	],
 	models: [
 		{
-			description: "A model that enhances images captured in low light conditions.",
-			id: "keras-io/low-light-image-enhancement",
+			description: "An image-to-image model to improve image resolution.",
+			id: "fal/AuraSR-v2",
 		},
 		{
 			description: "A model that increases the resolution of an image.",

package/src/tasks/index.ts CHANGED Viewed

@@ -166,6 +166,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
 	],
 	translation: ["transformers", "transformers.js"],
 	"unconditional-image-generation": ["diffusers"],
+	"video-text-to-text": ["transformers"],
 	"visual-question-answering": ["transformers", "transformers.js"],
 	"voice-activity-detection": [],
 	"zero-shot-classification": ["transformers", "transformers.js"],
@@ -236,6 +237,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
 	"token-classification": getData("token-classification", tokenClassification),
 	translation: getData("translation", translation),
 	"unconditional-image-generation": getData("unconditional-image-generation", unconditionalImageGeneration),
+	"video-text-to-text": getData("video-text-to-text", placeholder),
 	"visual-question-answering": getData("visual-question-answering", visualQuestionAnswering),
 	"voice-activity-detection": undefined,
 	"zero-shot-classification": getData("zero-shot-classification", zeroShotClassification),

package/src/tasks/mask-generation/data.ts CHANGED Viewed

@@ -24,14 +24,14 @@ const taskData: TaskDataCustom = {
 		},
 		{
 			description: "Very strong mask generation model.",
-			id: "facebook/sam-vit-huge",
+			id: "facebook/sam2-hiera-large",
 		},
 	],
 	spaces: [
 		{
 			description:
-				"An application that combines a mask generation model with an image embedding model for open-vocabulary image segmentation.",
-			id: "SkalskiP/SAM_and_MetaCLIP",
+				"An application that combines a mask generation model with a zero-shot object detection model for text-guided image segmentation.",
+			id: "merve/OWLSAM2",
 		},
 		{
 			description: "An application that compares the performance of a large and a small mask generation model.",
@@ -39,7 +39,7 @@ const taskData: TaskDataCustom = {
 		},
 		{
 			description: "An application based on an improved mask generation model.",
-			id: "linfanluntan/Grounded-SAM",
+			id: "SkalskiP/segment-anything-model-2",
 		},
 		{
 			description: "An application to remove objects from videos using mask generation models.",

package/src/tasks/text-generation/data.ts CHANGED Viewed

@@ -19,6 +19,10 @@ const taskData: TaskDataCustom = {
 			description: "An instruction dataset with preference ratings on responses.",
 			id: "openbmb/UltraFeedback",
 		},
+		{
+			description: "A large synthetic dataset for alignment of text generation models.",
+			id: "argilla/magpie-ultra-v0.1",
+		},
 	],
 	demo: {
 		inputs: [
@@ -51,32 +55,32 @@ const taskData: TaskDataCustom = {
 	],
 	models: [
 		{
-			description: "A large language model trained for text generation.",
-			id: "bigscience/bloom-560m",
+			description: "A text-generation model trained to follow instructions.",
+			id: "google/gemma-2-2b-it",
 		},
 		{
-			description: "A large code generation model that can generate code in 80+ languages.",
+			description: "A code generation model that can generate code in 80+ languages.",
 			id: "bigcode/starcoder",
 		},
 		{
-			description: "A very powerful text generation model.",
-			id: "mistralai/Mixtral-8x7B-Instruct-v0.1",
+			description: "Very powerful text generation model trained to follow instructions.",
+			id: "meta-llama/Meta-Llama-3.1-8B-Instruct",
 		},
 		{
 			description: "Small yet powerful text generation model.",
-			id: "microsoft/phi-2",
+			id: "microsoft/Phi-3-mini-4k-instruct",
 		},
 		{
-			description: "A very powerful model that can chat, do mathematical reasoning and write code.",
-			id: "openchat/openchat-3.5-0106",
+			description: "A very powerful model that can solve mathematical problems.",
+			id: "AI-MO/NuminaMath-7B-TIR",
 		},
 		{
-			description: "Very strong yet small assistant model.",
-			id: "HuggingFaceH4/zephyr-7b-beta",
+			description: "Strong coding assistant model.",
+			id: "HuggingFaceH4/starchat2-15b-v0.1",
 		},
 		{
 			description: "Very strong open-source large language model.",
-			id: "meta-llama/Llama-2-70b-hf",
+			id: "mistralai/Mistral-Nemo-Instruct-2407",
 		},
 	],
 	spaces: [
@@ -104,7 +108,7 @@ const taskData: TaskDataCustom = {
 	summary:
 		"Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
 	widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
-	youtubeId: "Vpjb1lu0MDk",
+	youtubeId: "e9gNEAlsOvU",
 };
 export default taskData;

package/src/tasks/text-to-image/data.ts CHANGED Viewed

@@ -46,15 +46,15 @@ const taskData: TaskDataCustom = {
 	models: [
 		{
 			description: "One of the most powerful image generation models that can generate realistic outputs.",
-			id: "stabilityai/stable-diffusion-xl-base-1.0",
+			id: "black-forest-labs/FLUX.1-dev",
 		},
 		{
 			description: "A powerful yet fast image generation model.",
 			id: "latent-consistency/lcm-lora-sdxl",
 		},
 		{
-			description: "A very fast text-to-image model.",
-			id: "ByteDance/SDXL-Lightning",
+			description: "Text-to-image model for photorealistic generation.",
+			id: "Kwai-Kolors/Kolors",
 		},
 		{
 			description: "A powerful text-to-image model.",