npm - @huggingface/tasks - Versions diffs - 0.11.6 → 0.11.8 - Mend

@huggingface/tasks 0.11.6 → 0.11.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/dist/index.cjs +151 -51
package/dist/index.js +151 -51
package/dist/src/hardware.d.ts +4 -0
package/dist/src/hardware.d.ts.map +1 -1
package/dist/src/index.d.ts +1 -1
package/dist/src/index.d.ts.map +1 -1
package/dist/src/local-apps.d.ts +16 -3
package/dist/src/local-apps.d.ts.map +1 -1
package/dist/src/model-libraries-snippets.d.ts +3 -0
package/dist/src/model-libraries-snippets.d.ts.map +1 -1
package/dist/src/model-libraries.d.ts +29 -2
package/dist/src/model-libraries.d.ts.map +1 -1
package/dist/src/pipelines.d.ts +8 -2
package/dist/src/pipelines.d.ts.map +1 -1
package/dist/src/tasks/feature-extraction/data.d.ts.map +1 -1
package/dist/src/tasks/image-feature-extraction/data.d.ts.map +1 -1
package/dist/src/tasks/index.d.ts.map +1 -1
package/dist/src/tasks/text-generation/data.d.ts.map +1 -1
package/package.json +4 -1
package/src/hardware.ts +4 -0
package/src/index.ts +1 -1
package/src/local-apps.ts +49 -23
package/src/model-libraries-snippets.ts +42 -0
package/src/model-libraries.ts +27 -0
package/src/pipelines.ts +6 -0
package/src/tasks/feature-extraction/data.ts +5 -1
package/src/tasks/image-feature-extraction/data.ts +7 -3
package/src/tasks/image-segmentation/data.ts +4 -4
package/src/tasks/image-text-to-text/about.md +2 -0
package/src/tasks/image-text-to-text/data.ts +1 -1
package/src/tasks/image-to-image/data.ts +2 -2
package/src/tasks/index.ts +2 -0
package/src/tasks/mask-generation/data.ts +4 -4
package/src/tasks/text-generation/data.ts +16 -12
package/src/tasks/text-to-image/data.ts +3 -3

package/dist/index.cjs CHANGED Viewed

@@ -1388,6 +1388,12 @@ var PIPELINE_DATA = {
     modality: "cv",
     color: "indigo"
   },
+  "video-text-to-text": {
+    name: "Video-Text-to-Text",
+    modality: "multimodal",
+    color: "blue",
+    hideInDatasets: true
+  },
   other: {
     name: "Other",
     modality: "other",
@@ -1731,8 +1737,12 @@ var taskData5 = {
   ],
   spaces: [
     {
-      description: "A leaderboard to rank best feature extraction models..",
+      description: "A leaderboard to rank text feature extraction models based on a benchmark.",
       id: "mteb/leaderboard"
+    },
+    {
+      description: "A leaderboard to rank best feature extraction models based on human feedback.",
+      id: "mteb/arena"
     }
   ],
   summary: "Feature extraction is the task of extracting features learnt in a model.",
@@ -1937,15 +1947,19 @@ var taskData8 = {
     },
     {
       description: "A strong image feature extraction model.",
-      id: "google/vit-base-patch16-224-in21k"
+      id: "nvidia/MambaVision-T-1K"
     },
     {
-      description: "A robust image feature extraction models.",
+      description: "A robust image feature extraction model.",
       id: "facebook/dino-vitb16"
     },
     {
-      description: "Strong image-text-to-text model made for information retrieval from documents.",
+      description: "Strong image feature extraction model made for information retrieval from documents.",
       id: "vidore/colpali"
+    },
+    {
+      description: "Strong image feature extraction model that can be used on images and documents.",
+      id: "OpenGVLab/InternViT-6B-448px-V1-2"
     }
   ],
   spaces: [],
@@ -1997,8 +2011,8 @@ var taskData9 = {
   ],
   models: [
     {
-      description: "A model that enhances images captured in low light conditions.",
-      id: "keras-io/low-light-image-enhancement"
+      description: "An image-to-image model to improve image resolution.",
+      id: "fal/AuraSR-v2"
     },
     {
       description: "A model that increases the resolution of an image.",
@@ -2216,7 +2230,7 @@ var taskData11 = {
   ],
   summary: "Image-text-to-text models take in an image and text prompt and output text. These models are also called vision-language models, or VLMs. The difference from image-to-text models is that these models take an additional text input, not restricting the model to certain use cases like image captioning, and may also be trained to accept a conversation as input.",
   widgetModels: ["microsoft/kosmos-2-patch14-224"],
-  youtubeId: ""
+  youtubeId: "IoGaGfU1CIg"
 };
 var data_default11 = taskData11;
@@ -2267,16 +2281,16 @@ var taskData12 = {
       id: "facebook/detr-resnet-50-panoptic"
     },
     {
-      description: "Semantic segmentation model trained on ADE20k benchmark dataset.",
-      id: "microsoft/beit-large-finetuned-ade-640-640"
+      description: "Background removal model.",
+      id: "briaai/RMBG-1.4"
     },
     {
       description: "Semantic segmentation model trained on ADE20k benchmark dataset with 512x512 resolution.",
       id: "nvidia/segformer-b0-finetuned-ade-512-512"
     },
     {
-      description: "Semantic segmentation model trained Cityscapes dataset.",
-      id: "facebook/mask2former-swin-large-cityscapes-semantic"
+      description: "A multipurpose image segmentation model for high resolution images.",
+      id: "ZhengPeng7/BiRefNet"
     },
     {
       description: "Panoptic segmentation model trained COCO (common objects) dataset.",
@@ -2340,13 +2354,13 @@ var taskData13 = {
     },
     {
       description: "Very strong mask generation model.",
-      id: "facebook/sam-vit-huge"
+      id: "facebook/sam2-hiera-large"
     }
   ],
   spaces: [
     {
-      description: "An application that combines a mask generation model with an image embedding model for open-vocabulary image segmentation.",
-      id: "SkalskiP/SAM_and_MetaCLIP"
+      description: "An application that combines a mask generation model with a zero-shot object detection model for text-guided image segmentation.",
+      id: "merve/OWLSAM2"
     },
     {
       description: "An application that compares the performance of a large and a small mask generation model.",
@@ -2354,7 +2368,7 @@ var taskData13 = {
     },
     {
       description: "An application based on an improved mask generation model.",
-      id: "linfanluntan/Grounded-SAM"
+      id: "SkalskiP/segment-anything-model-2"
     },
     {
       description: "An application to remove objects from videos using mask generation models.",
@@ -3050,15 +3064,15 @@ var taskData24 = {
   models: [
     {
       description: "One of the most powerful image generation models that can generate realistic outputs.",
-      id: "stabilityai/stable-diffusion-xl-base-1.0"
+      id: "black-forest-labs/FLUX.1-dev"
     },
     {
       description: "A powerful yet fast image generation model.",
       id: "latent-consistency/lcm-lora-sdxl"
     },
     {
-      description: "A very fast text-to-image model.",
-      id: "ByteDance/SDXL-Lightning"
+      description: "Text-to-image model for photorealistic generation.",
+      id: "Kwai-Kolors/Kolors"
     },
     {
       description: "A powerful text-to-image model.",
@@ -3419,6 +3433,10 @@ var taskData29 = {
     {
       description: "An instruction dataset with preference ratings on responses.",
       id: "openbmb/UltraFeedback"
+    },
+    {
+      description: "A large synthetic dataset for alignment of text generation models.",
+      id: "argilla/magpie-ultra-v0.1"
     }
   ],
   demo: {
@@ -3449,32 +3467,32 @@ var taskData29 = {
   ],
   models: [
     {
-      description: "A large language model trained for text generation.",
-      id: "bigscience/bloom-560m"
+      description: "A text-generation model trained to follow instructions.",
+      id: "google/gemma-2-2b-it"
     },
     {
-      description: "A large code generation model that can generate code in 80+ languages.",
+      description: "A code generation model that can generate code in 80+ languages.",
       id: "bigcode/starcoder"
     },
     {
-      description: "A very powerful text generation model.",
-      id: "mistralai/Mixtral-8x7B-Instruct-v0.1"
+      description: "Very powerful text generation model trained to follow instructions.",
+      id: "meta-llama/Meta-Llama-3.1-8B-Instruct"
     },
     {
       description: "Small yet powerful text generation model.",
-      id: "microsoft/phi-2"
+      id: "microsoft/Phi-3-mini-4k-instruct"
     },
     {
-      description: "A very powerful model that can chat, do mathematical reasoning and write code.",
-      id: "openchat/openchat-3.5-0106"
+      description: "A very powerful model that can solve mathematical problems.",
+      id: "AI-MO/NuminaMath-7B-TIR"
     },
     {
-      description: "Very strong yet small assistant model.",
-      id: "HuggingFaceH4/zephyr-7b-beta"
+      description: "Strong coding assistant model.",
+      id: "HuggingFaceH4/starchat2-15b-v0.1"
     },
     {
       description: "Very strong open-source large language model.",
-      id: "meta-llama/Llama-2-70b-hf"
+      id: "mistralai/Mistral-Nemo-Instruct-2407"
     }
   ],
   spaces: [
@@ -3501,7 +3519,7 @@ var taskData29 = {
   ],
   summary: "Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
   widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
-  youtubeId: "Vpjb1lu0MDk"
+  youtubeId: "e9gNEAlsOvU"
 };
 var data_default29 = taskData29;
@@ -4226,6 +4244,7 @@ var TASKS_MODEL_LIBRARIES = {
   ],
   translation: ["transformers", "transformers.js"],
   "unconditional-image-generation": ["diffusers"],
+  "video-text-to-text": ["transformers"],
   "visual-question-answering": ["transformers", "transformers.js"],
   "voice-activity-detection": [],
   "zero-shot-classification": ["transformers", "transformers.js"],
@@ -4285,6 +4304,7 @@ var TASKS_DATA = {
   "token-classification": getData("token-classification", data_default26),
   translation: getData("translation", data_default27),
   "unconditional-image-generation": getData("unconditional-image-generation", data_default31),
+  "video-text-to-text": getData("video-text-to-text", data_default16),
   "visual-question-answering": getData("visual-question-answering", data_default33),
   "voice-activity-detection": void 0,
   "zero-shot-classification": getData("zero-shot-classification", data_default34),
@@ -4522,6 +4542,23 @@ tokenizer = keras_nlp.models.Tokenizer.from_preset("hf://${model.id}")
 backbone = keras_nlp.models.Backbone.from_preset("hf://${model.id}")
 `
 ];
+var llama_cpp_python = (model) => [
+  `from llama_cpp import Llama
+llm = Llama.from_pretrained(
+	repo_id="${model.id}",
+	filename="{{GGUF_FILE}}",
+)
+llm.create_chat_completion(
+		messages = [
+			{
+				"role": "user",
+				"content": "What is the capital of France?"
+			}
+		]
+)`
+];
 var tf_keras = (model) => [
   `# Note: 'keras<3.x' or 'tf_keras' must be installed (legacy)
 # See https://github.com/keras-team/tf-keras for more details.
@@ -4648,6 +4685,15 @@ var timm = (model) => [
 model = timm.create_model("hf_hub:${model.id}", pretrained=True)`
 ];
+var saelens = () => [
+  `# pip install sae-lens
+from sae_lens import SAE
+sae, cfg_dict, sparsity = SAE.from_pretrained(
+    release = "RELEASE_ID", # e.g., "gpt2-small-res-jb". See other options in https://github.com/jbloomAus/SAELens/blob/main/sae_lens/pretrained_saes.yaml
+    sae_id = "SAE_ID", # e.g., "blocks.8.hook_resid_pre". Won't always be a hook point
+)`
+];
 var skopsPickle = (model, modelFile) => {
   return [
     `import joblib
@@ -4940,6 +4986,19 @@ wavs = chat.infer(texts, )
 torchaudio.save("output1.wav", torch.from_numpy(wavs[0]), 24000)`
 ];
+var birefnet = (model) => [
+  `# Option 1: use with transformers
+from transformers import AutoModelForImageSegmentation
+birefnet = AutoModelForImageSegmentation.from_pretrained("${model.id}", trust_remote_code=True)
+`,
+  `# Option 2: use with BiRefNet
+# Install from https://github.com/ZhengPeng7/BiRefNet
+from models.birefnet import BiRefNet
+model = BiRefNet.from_pretrained("${model.id}")`
+];
 var mlx = (model) => [
   `pip install huggingface_hub hf_transfer
@@ -5070,6 +5129,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
     filter: false,
     countDownloads: `path_extension:"npz"`
   },
+  birefnet: {
+    prettyLabel: "BiRefNet",
+    repoName: "BiRefNet",
+    repoUrl: "https://github.com/ZhengPeng7/BiRefNet",
+    snippets: birefnet,
+    filter: false
+  },
   bm25s: {
     prettyLabel: "BM25S",
     repoName: "bm25s",
@@ -5263,6 +5329,12 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
     filter: false,
     countDownloads: `path:"liveportrait/landmark.onnx"`
   },
+  "llama-cpp-python": {
+    prettyLabel: "llama-cpp-python",
+    repoName: "llama-cpp-python",
+    repoUrl: "https://github.com/abetlen/llama-cpp-python",
+    snippets: llama_cpp_python
+  },
   mindspore: {
     prettyLabel: "MindSpore",
     repoName: "mindspore",
@@ -5378,6 +5450,20 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
     filter: false,
     countDownloads: `path:"tokenizer.model"`
   },
+  saelens: {
+    prettyLabel: "SAELens",
+    repoName: "SAELens",
+    repoUrl: "https://github.com/jbloomAus/SAELens",
+    snippets: saelens,
+    filter: false
+  },
+  sam2: {
+    prettyLabel: "sam2",
+    repoName: "sam2",
+    repoUrl: "https://github.com/facebookresearch/segment-anything-2",
+    filter: false,
+    countDownloads: `path_extension:"pt"`
+  },
   "sample-factory": {
     prettyLabel: "sample-factory",
     repoName: "sample-factory",
@@ -6122,6 +6208,10 @@ var SKUS = {
         tflops: 12,
         memory: [16]
       },
+      "RTX A4000": {
+        tflops: 19.2,
+        memory: [16]
+      },
       A100: {
         tflops: 77.97,
         memory: [80, 40]
@@ -6481,27 +6571,38 @@ function isGgufModel(model) {
   return model.tags.includes("gguf");
 }
 var snippetLlamacpp = (model, filepath) => {
+  const command = (binary) => [
+    "# Load and run the model:",
+    `${binary} \\`,
+    `  --hf-repo "${model.id}" \\`,
+    `  --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`,
+    '  -p "You are a helpful assistant" \\',
+    "  --conversation"
+  ].join("\n");
   return [
-    `# Option 1: use llama.cpp with brew
-brew install llama.cpp
-# Load and run the model
-llama \\
-	--hf-repo "${model.id}" \\
-	--hf-file ${filepath ?? "{{GGUF_FILE}}"} \\
-	-p "I believe the meaning of life is" \\
-	-n 128`,
-    `# Option 2: build llama.cpp from source with curl support
-git clone https://github.com/ggerganov/llama.cpp.git
-cd llama.cpp
-LLAMA_CURL=1 make
-# Load and run the model
-./main \\
-	--hf-repo "${model.id}" \\
-	-m ${filepath ?? "{{GGUF_FILE}}"} \\
-	-p "I believe the meaning of life is" \\
-	-n 128`
+    {
+      title: "Install from brew",
+      setup: "brew install llama.cpp",
+      content: command("llama-cli")
+    },
+    {
+      title: "Use pre-built binary",
+      setup: [
+        // prettier-ignore
+        "# Download pre-built binary from:",
+        "# https://github.com/ggerganov/llama.cpp/releases"
+      ].join("\n"),
+      content: command("./llama-cli")
+    },
+    {
+      title: "Build from source code",
+      setup: [
+        "git clone https://github.com/ggerganov/llama.cpp.git",
+        "cd llama.cpp",
+        "LLAMA_CURL=1 make llama-cli"
+      ].join("\n"),
+      content: command("./llama-cli")
+    }
   ];
 };
 var LOCAL_APPS = {
@@ -6589,7 +6690,6 @@ var LOCAL_APPS = {
     docsUrl: "https://diffusionbee.com",
     mainTask: "text-to-image",
     macOSOnly: true,
-    comingSoon: true,
     displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image",
     deeplink: (model) => new URL(`diffusionbee://open_from_hf?model=${model.id}`)
   }