@huggingface/tasks 0.11.6 → 0.11.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +151 -51
- package/dist/index.js +151 -51
- package/dist/src/hardware.d.ts +4 -0
- package/dist/src/hardware.d.ts.map +1 -1
- package/dist/src/index.d.ts +1 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/local-apps.d.ts +16 -3
- package/dist/src/local-apps.d.ts.map +1 -1
- package/dist/src/model-libraries-snippets.d.ts +3 -0
- package/dist/src/model-libraries-snippets.d.ts.map +1 -1
- package/dist/src/model-libraries.d.ts +29 -2
- package/dist/src/model-libraries.d.ts.map +1 -1
- package/dist/src/pipelines.d.ts +8 -2
- package/dist/src/pipelines.d.ts.map +1 -1
- package/dist/src/tasks/feature-extraction/data.d.ts.map +1 -1
- package/dist/src/tasks/image-feature-extraction/data.d.ts.map +1 -1
- package/dist/src/tasks/index.d.ts.map +1 -1
- package/dist/src/tasks/text-generation/data.d.ts.map +1 -1
- package/package.json +4 -1
- package/src/hardware.ts +4 -0
- package/src/index.ts +1 -1
- package/src/local-apps.ts +49 -23
- package/src/model-libraries-snippets.ts +42 -0
- package/src/model-libraries.ts +27 -0
- package/src/pipelines.ts +6 -0
- package/src/tasks/feature-extraction/data.ts +5 -1
- package/src/tasks/image-feature-extraction/data.ts +7 -3
- package/src/tasks/image-segmentation/data.ts +4 -4
- package/src/tasks/image-text-to-text/about.md +2 -0
- package/src/tasks/image-text-to-text/data.ts +1 -1
- package/src/tasks/image-to-image/data.ts +2 -2
- package/src/tasks/index.ts +2 -0
- package/src/tasks/mask-generation/data.ts +4 -4
- package/src/tasks/text-generation/data.ts +16 -12
- package/src/tasks/text-to-image/data.ts +3 -3
package/dist/index.cjs
CHANGED
|
@@ -1388,6 +1388,12 @@ var PIPELINE_DATA = {
|
|
|
1388
1388
|
modality: "cv",
|
|
1389
1389
|
color: "indigo"
|
|
1390
1390
|
},
|
|
1391
|
+
"video-text-to-text": {
|
|
1392
|
+
name: "Video-Text-to-Text",
|
|
1393
|
+
modality: "multimodal",
|
|
1394
|
+
color: "blue",
|
|
1395
|
+
hideInDatasets: true
|
|
1396
|
+
},
|
|
1391
1397
|
other: {
|
|
1392
1398
|
name: "Other",
|
|
1393
1399
|
modality: "other",
|
|
@@ -1731,8 +1737,12 @@ var taskData5 = {
|
|
|
1731
1737
|
],
|
|
1732
1738
|
spaces: [
|
|
1733
1739
|
{
|
|
1734
|
-
description: "A leaderboard to rank
|
|
1740
|
+
description: "A leaderboard to rank text feature extraction models based on a benchmark.",
|
|
1735
1741
|
id: "mteb/leaderboard"
|
|
1742
|
+
},
|
|
1743
|
+
{
|
|
1744
|
+
description: "A leaderboard to rank best feature extraction models based on human feedback.",
|
|
1745
|
+
id: "mteb/arena"
|
|
1736
1746
|
}
|
|
1737
1747
|
],
|
|
1738
1748
|
summary: "Feature extraction is the task of extracting features learnt in a model.",
|
|
@@ -1937,15 +1947,19 @@ var taskData8 = {
|
|
|
1937
1947
|
},
|
|
1938
1948
|
{
|
|
1939
1949
|
description: "A strong image feature extraction model.",
|
|
1940
|
-
id: "
|
|
1950
|
+
id: "nvidia/MambaVision-T-1K"
|
|
1941
1951
|
},
|
|
1942
1952
|
{
|
|
1943
|
-
description: "A robust image feature extraction
|
|
1953
|
+
description: "A robust image feature extraction model.",
|
|
1944
1954
|
id: "facebook/dino-vitb16"
|
|
1945
1955
|
},
|
|
1946
1956
|
{
|
|
1947
|
-
description: "Strong image
|
|
1957
|
+
description: "Strong image feature extraction model made for information retrieval from documents.",
|
|
1948
1958
|
id: "vidore/colpali"
|
|
1959
|
+
},
|
|
1960
|
+
{
|
|
1961
|
+
description: "Strong image feature extraction model that can be used on images and documents.",
|
|
1962
|
+
id: "OpenGVLab/InternViT-6B-448px-V1-2"
|
|
1949
1963
|
}
|
|
1950
1964
|
],
|
|
1951
1965
|
spaces: [],
|
|
@@ -1997,8 +2011,8 @@ var taskData9 = {
|
|
|
1997
2011
|
],
|
|
1998
2012
|
models: [
|
|
1999
2013
|
{
|
|
2000
|
-
description: "
|
|
2001
|
-
id: "
|
|
2014
|
+
description: "An image-to-image model to improve image resolution.",
|
|
2015
|
+
id: "fal/AuraSR-v2"
|
|
2002
2016
|
},
|
|
2003
2017
|
{
|
|
2004
2018
|
description: "A model that increases the resolution of an image.",
|
|
@@ -2216,7 +2230,7 @@ var taskData11 = {
|
|
|
2216
2230
|
],
|
|
2217
2231
|
summary: "Image-text-to-text models take in an image and text prompt and output text. These models are also called vision-language models, or VLMs. The difference from image-to-text models is that these models take an additional text input, not restricting the model to certain use cases like image captioning, and may also be trained to accept a conversation as input.",
|
|
2218
2232
|
widgetModels: ["microsoft/kosmos-2-patch14-224"],
|
|
2219
|
-
youtubeId: ""
|
|
2233
|
+
youtubeId: "IoGaGfU1CIg"
|
|
2220
2234
|
};
|
|
2221
2235
|
var data_default11 = taskData11;
|
|
2222
2236
|
|
|
@@ -2267,16 +2281,16 @@ var taskData12 = {
|
|
|
2267
2281
|
id: "facebook/detr-resnet-50-panoptic"
|
|
2268
2282
|
},
|
|
2269
2283
|
{
|
|
2270
|
-
description: "
|
|
2271
|
-
id: "
|
|
2284
|
+
description: "Background removal model.",
|
|
2285
|
+
id: "briaai/RMBG-1.4"
|
|
2272
2286
|
},
|
|
2273
2287
|
{
|
|
2274
2288
|
description: "Semantic segmentation model trained on ADE20k benchmark dataset with 512x512 resolution.",
|
|
2275
2289
|
id: "nvidia/segformer-b0-finetuned-ade-512-512"
|
|
2276
2290
|
},
|
|
2277
2291
|
{
|
|
2278
|
-
description: "
|
|
2279
|
-
id: "
|
|
2292
|
+
description: "A multipurpose image segmentation model for high resolution images.",
|
|
2293
|
+
id: "ZhengPeng7/BiRefNet"
|
|
2280
2294
|
},
|
|
2281
2295
|
{
|
|
2282
2296
|
description: "Panoptic segmentation model trained COCO (common objects) dataset.",
|
|
@@ -2340,13 +2354,13 @@ var taskData13 = {
|
|
|
2340
2354
|
},
|
|
2341
2355
|
{
|
|
2342
2356
|
description: "Very strong mask generation model.",
|
|
2343
|
-
id: "facebook/
|
|
2357
|
+
id: "facebook/sam2-hiera-large"
|
|
2344
2358
|
}
|
|
2345
2359
|
],
|
|
2346
2360
|
spaces: [
|
|
2347
2361
|
{
|
|
2348
|
-
description: "An application that combines a mask generation model with
|
|
2349
|
-
id: "
|
|
2362
|
+
description: "An application that combines a mask generation model with a zero-shot object detection model for text-guided image segmentation.",
|
|
2363
|
+
id: "merve/OWLSAM2"
|
|
2350
2364
|
},
|
|
2351
2365
|
{
|
|
2352
2366
|
description: "An application that compares the performance of a large and a small mask generation model.",
|
|
@@ -2354,7 +2368,7 @@ var taskData13 = {
|
|
|
2354
2368
|
},
|
|
2355
2369
|
{
|
|
2356
2370
|
description: "An application based on an improved mask generation model.",
|
|
2357
|
-
id: "
|
|
2371
|
+
id: "SkalskiP/segment-anything-model-2"
|
|
2358
2372
|
},
|
|
2359
2373
|
{
|
|
2360
2374
|
description: "An application to remove objects from videos using mask generation models.",
|
|
@@ -3050,15 +3064,15 @@ var taskData24 = {
|
|
|
3050
3064
|
models: [
|
|
3051
3065
|
{
|
|
3052
3066
|
description: "One of the most powerful image generation models that can generate realistic outputs.",
|
|
3053
|
-
id: "
|
|
3067
|
+
id: "black-forest-labs/FLUX.1-dev"
|
|
3054
3068
|
},
|
|
3055
3069
|
{
|
|
3056
3070
|
description: "A powerful yet fast image generation model.",
|
|
3057
3071
|
id: "latent-consistency/lcm-lora-sdxl"
|
|
3058
3072
|
},
|
|
3059
3073
|
{
|
|
3060
|
-
description: "
|
|
3061
|
-
id: "
|
|
3074
|
+
description: "Text-to-image model for photorealistic generation.",
|
|
3075
|
+
id: "Kwai-Kolors/Kolors"
|
|
3062
3076
|
},
|
|
3063
3077
|
{
|
|
3064
3078
|
description: "A powerful text-to-image model.",
|
|
@@ -3419,6 +3433,10 @@ var taskData29 = {
|
|
|
3419
3433
|
{
|
|
3420
3434
|
description: "An instruction dataset with preference ratings on responses.",
|
|
3421
3435
|
id: "openbmb/UltraFeedback"
|
|
3436
|
+
},
|
|
3437
|
+
{
|
|
3438
|
+
description: "A large synthetic dataset for alignment of text generation models.",
|
|
3439
|
+
id: "argilla/magpie-ultra-v0.1"
|
|
3422
3440
|
}
|
|
3423
3441
|
],
|
|
3424
3442
|
demo: {
|
|
@@ -3449,32 +3467,32 @@ var taskData29 = {
|
|
|
3449
3467
|
],
|
|
3450
3468
|
models: [
|
|
3451
3469
|
{
|
|
3452
|
-
description: "A
|
|
3453
|
-
id: "
|
|
3470
|
+
description: "A text-generation model trained to follow instructions.",
|
|
3471
|
+
id: "google/gemma-2-2b-it"
|
|
3454
3472
|
},
|
|
3455
3473
|
{
|
|
3456
|
-
description: "A
|
|
3474
|
+
description: "A code generation model that can generate code in 80+ languages.",
|
|
3457
3475
|
id: "bigcode/starcoder"
|
|
3458
3476
|
},
|
|
3459
3477
|
{
|
|
3460
|
-
description: "
|
|
3461
|
-
id: "
|
|
3478
|
+
description: "Very powerful text generation model trained to follow instructions.",
|
|
3479
|
+
id: "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
|
3462
3480
|
},
|
|
3463
3481
|
{
|
|
3464
3482
|
description: "Small yet powerful text generation model.",
|
|
3465
|
-
id: "microsoft/
|
|
3483
|
+
id: "microsoft/Phi-3-mini-4k-instruct"
|
|
3466
3484
|
},
|
|
3467
3485
|
{
|
|
3468
|
-
description: "A very powerful model that can
|
|
3469
|
-
id: "
|
|
3486
|
+
description: "A very powerful model that can solve mathematical problems.",
|
|
3487
|
+
id: "AI-MO/NuminaMath-7B-TIR"
|
|
3470
3488
|
},
|
|
3471
3489
|
{
|
|
3472
|
-
description: "
|
|
3473
|
-
id: "HuggingFaceH4/
|
|
3490
|
+
description: "Strong coding assistant model.",
|
|
3491
|
+
id: "HuggingFaceH4/starchat2-15b-v0.1"
|
|
3474
3492
|
},
|
|
3475
3493
|
{
|
|
3476
3494
|
description: "Very strong open-source large language model.",
|
|
3477
|
-
id: "
|
|
3495
|
+
id: "mistralai/Mistral-Nemo-Instruct-2407"
|
|
3478
3496
|
}
|
|
3479
3497
|
],
|
|
3480
3498
|
spaces: [
|
|
@@ -3501,7 +3519,7 @@ var taskData29 = {
|
|
|
3501
3519
|
],
|
|
3502
3520
|
summary: "Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
|
|
3503
3521
|
widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
|
|
3504
|
-
youtubeId: "
|
|
3522
|
+
youtubeId: "e9gNEAlsOvU"
|
|
3505
3523
|
};
|
|
3506
3524
|
var data_default29 = taskData29;
|
|
3507
3525
|
|
|
@@ -4226,6 +4244,7 @@ var TASKS_MODEL_LIBRARIES = {
|
|
|
4226
4244
|
],
|
|
4227
4245
|
translation: ["transformers", "transformers.js"],
|
|
4228
4246
|
"unconditional-image-generation": ["diffusers"],
|
|
4247
|
+
"video-text-to-text": ["transformers"],
|
|
4229
4248
|
"visual-question-answering": ["transformers", "transformers.js"],
|
|
4230
4249
|
"voice-activity-detection": [],
|
|
4231
4250
|
"zero-shot-classification": ["transformers", "transformers.js"],
|
|
@@ -4285,6 +4304,7 @@ var TASKS_DATA = {
|
|
|
4285
4304
|
"token-classification": getData("token-classification", data_default26),
|
|
4286
4305
|
translation: getData("translation", data_default27),
|
|
4287
4306
|
"unconditional-image-generation": getData("unconditional-image-generation", data_default31),
|
|
4307
|
+
"video-text-to-text": getData("video-text-to-text", data_default16),
|
|
4288
4308
|
"visual-question-answering": getData("visual-question-answering", data_default33),
|
|
4289
4309
|
"voice-activity-detection": void 0,
|
|
4290
4310
|
"zero-shot-classification": getData("zero-shot-classification", data_default34),
|
|
@@ -4522,6 +4542,23 @@ tokenizer = keras_nlp.models.Tokenizer.from_preset("hf://${model.id}")
|
|
|
4522
4542
|
backbone = keras_nlp.models.Backbone.from_preset("hf://${model.id}")
|
|
4523
4543
|
`
|
|
4524
4544
|
];
|
|
4545
|
+
var llama_cpp_python = (model) => [
|
|
4546
|
+
`from llama_cpp import Llama
|
|
4547
|
+
|
|
4548
|
+
llm = Llama.from_pretrained(
|
|
4549
|
+
repo_id="${model.id}",
|
|
4550
|
+
filename="{{GGUF_FILE}}",
|
|
4551
|
+
)
|
|
4552
|
+
|
|
4553
|
+
llm.create_chat_completion(
|
|
4554
|
+
messages = [
|
|
4555
|
+
{
|
|
4556
|
+
"role": "user",
|
|
4557
|
+
"content": "What is the capital of France?"
|
|
4558
|
+
}
|
|
4559
|
+
]
|
|
4560
|
+
)`
|
|
4561
|
+
];
|
|
4525
4562
|
var tf_keras = (model) => [
|
|
4526
4563
|
`# Note: 'keras<3.x' or 'tf_keras' must be installed (legacy)
|
|
4527
4564
|
# See https://github.com/keras-team/tf-keras for more details.
|
|
@@ -4648,6 +4685,15 @@ var timm = (model) => [
|
|
|
4648
4685
|
|
|
4649
4686
|
model = timm.create_model("hf_hub:${model.id}", pretrained=True)`
|
|
4650
4687
|
];
|
|
4688
|
+
var saelens = () => [
|
|
4689
|
+
`# pip install sae-lens
|
|
4690
|
+
from sae_lens import SAE
|
|
4691
|
+
|
|
4692
|
+
sae, cfg_dict, sparsity = SAE.from_pretrained(
|
|
4693
|
+
release = "RELEASE_ID", # e.g., "gpt2-small-res-jb". See other options in https://github.com/jbloomAus/SAELens/blob/main/sae_lens/pretrained_saes.yaml
|
|
4694
|
+
sae_id = "SAE_ID", # e.g., "blocks.8.hook_resid_pre". Won't always be a hook point
|
|
4695
|
+
)`
|
|
4696
|
+
];
|
|
4651
4697
|
var skopsPickle = (model, modelFile) => {
|
|
4652
4698
|
return [
|
|
4653
4699
|
`import joblib
|
|
@@ -4940,6 +4986,19 @@ wavs = chat.infer(texts, )
|
|
|
4940
4986
|
|
|
4941
4987
|
torchaudio.save("output1.wav", torch.from_numpy(wavs[0]), 24000)`
|
|
4942
4988
|
];
|
|
4989
|
+
var birefnet = (model) => [
|
|
4990
|
+
`# Option 1: use with transformers
|
|
4991
|
+
|
|
4992
|
+
from transformers import AutoModelForImageSegmentation
|
|
4993
|
+
birefnet = AutoModelForImageSegmentation.from_pretrained("${model.id}", trust_remote_code=True)
|
|
4994
|
+
`,
|
|
4995
|
+
`# Option 2: use with BiRefNet
|
|
4996
|
+
|
|
4997
|
+
# Install from https://github.com/ZhengPeng7/BiRefNet
|
|
4998
|
+
|
|
4999
|
+
from models.birefnet import BiRefNet
|
|
5000
|
+
model = BiRefNet.from_pretrained("${model.id}")`
|
|
5001
|
+
];
|
|
4943
5002
|
var mlx = (model) => [
|
|
4944
5003
|
`pip install huggingface_hub hf_transfer
|
|
4945
5004
|
|
|
@@ -5070,6 +5129,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5070
5129
|
filter: false,
|
|
5071
5130
|
countDownloads: `path_extension:"npz"`
|
|
5072
5131
|
},
|
|
5132
|
+
birefnet: {
|
|
5133
|
+
prettyLabel: "BiRefNet",
|
|
5134
|
+
repoName: "BiRefNet",
|
|
5135
|
+
repoUrl: "https://github.com/ZhengPeng7/BiRefNet",
|
|
5136
|
+
snippets: birefnet,
|
|
5137
|
+
filter: false
|
|
5138
|
+
},
|
|
5073
5139
|
bm25s: {
|
|
5074
5140
|
prettyLabel: "BM25S",
|
|
5075
5141
|
repoName: "bm25s",
|
|
@@ -5263,6 +5329,12 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5263
5329
|
filter: false,
|
|
5264
5330
|
countDownloads: `path:"liveportrait/landmark.onnx"`
|
|
5265
5331
|
},
|
|
5332
|
+
"llama-cpp-python": {
|
|
5333
|
+
prettyLabel: "llama-cpp-python",
|
|
5334
|
+
repoName: "llama-cpp-python",
|
|
5335
|
+
repoUrl: "https://github.com/abetlen/llama-cpp-python",
|
|
5336
|
+
snippets: llama_cpp_python
|
|
5337
|
+
},
|
|
5266
5338
|
mindspore: {
|
|
5267
5339
|
prettyLabel: "MindSpore",
|
|
5268
5340
|
repoName: "mindspore",
|
|
@@ -5378,6 +5450,20 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5378
5450
|
filter: false,
|
|
5379
5451
|
countDownloads: `path:"tokenizer.model"`
|
|
5380
5452
|
},
|
|
5453
|
+
saelens: {
|
|
5454
|
+
prettyLabel: "SAELens",
|
|
5455
|
+
repoName: "SAELens",
|
|
5456
|
+
repoUrl: "https://github.com/jbloomAus/SAELens",
|
|
5457
|
+
snippets: saelens,
|
|
5458
|
+
filter: false
|
|
5459
|
+
},
|
|
5460
|
+
sam2: {
|
|
5461
|
+
prettyLabel: "sam2",
|
|
5462
|
+
repoName: "sam2",
|
|
5463
|
+
repoUrl: "https://github.com/facebookresearch/segment-anything-2",
|
|
5464
|
+
filter: false,
|
|
5465
|
+
countDownloads: `path_extension:"pt"`
|
|
5466
|
+
},
|
|
5381
5467
|
"sample-factory": {
|
|
5382
5468
|
prettyLabel: "sample-factory",
|
|
5383
5469
|
repoName: "sample-factory",
|
|
@@ -6122,6 +6208,10 @@ var SKUS = {
|
|
|
6122
6208
|
tflops: 12,
|
|
6123
6209
|
memory: [16]
|
|
6124
6210
|
},
|
|
6211
|
+
"RTX A4000": {
|
|
6212
|
+
tflops: 19.2,
|
|
6213
|
+
memory: [16]
|
|
6214
|
+
},
|
|
6125
6215
|
A100: {
|
|
6126
6216
|
tflops: 77.97,
|
|
6127
6217
|
memory: [80, 40]
|
|
@@ -6481,27 +6571,38 @@ function isGgufModel(model) {
|
|
|
6481
6571
|
return model.tags.includes("gguf");
|
|
6482
6572
|
}
|
|
6483
6573
|
var snippetLlamacpp = (model, filepath) => {
|
|
6574
|
+
const command = (binary) => [
|
|
6575
|
+
"# Load and run the model:",
|
|
6576
|
+
`${binary} \\`,
|
|
6577
|
+
` --hf-repo "${model.id}" \\`,
|
|
6578
|
+
` --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`,
|
|
6579
|
+
' -p "You are a helpful assistant" \\',
|
|
6580
|
+
" --conversation"
|
|
6581
|
+
].join("\n");
|
|
6484
6582
|
return [
|
|
6485
|
-
|
|
6486
|
-
|
|
6487
|
-
|
|
6488
|
-
|
|
6489
|
-
|
|
6490
|
-
|
|
6491
|
-
|
|
6492
|
-
|
|
6493
|
-
|
|
6494
|
-
|
|
6495
|
-
|
|
6496
|
-
|
|
6497
|
-
|
|
6498
|
-
|
|
6499
|
-
|
|
6500
|
-
|
|
6501
|
-
|
|
6502
|
-
|
|
6503
|
-
|
|
6504
|
-
|
|
6583
|
+
{
|
|
6584
|
+
title: "Install from brew",
|
|
6585
|
+
setup: "brew install llama.cpp",
|
|
6586
|
+
content: command("llama-cli")
|
|
6587
|
+
},
|
|
6588
|
+
{
|
|
6589
|
+
title: "Use pre-built binary",
|
|
6590
|
+
setup: [
|
|
6591
|
+
// prettier-ignore
|
|
6592
|
+
"# Download pre-built binary from:",
|
|
6593
|
+
"# https://github.com/ggerganov/llama.cpp/releases"
|
|
6594
|
+
].join("\n"),
|
|
6595
|
+
content: command("./llama-cli")
|
|
6596
|
+
},
|
|
6597
|
+
{
|
|
6598
|
+
title: "Build from source code",
|
|
6599
|
+
setup: [
|
|
6600
|
+
"git clone https://github.com/ggerganov/llama.cpp.git",
|
|
6601
|
+
"cd llama.cpp",
|
|
6602
|
+
"LLAMA_CURL=1 make llama-cli"
|
|
6603
|
+
].join("\n"),
|
|
6604
|
+
content: command("./llama-cli")
|
|
6605
|
+
}
|
|
6505
6606
|
];
|
|
6506
6607
|
};
|
|
6507
6608
|
var LOCAL_APPS = {
|
|
@@ -6589,7 +6690,6 @@ var LOCAL_APPS = {
|
|
|
6589
6690
|
docsUrl: "https://diffusionbee.com",
|
|
6590
6691
|
mainTask: "text-to-image",
|
|
6591
6692
|
macOSOnly: true,
|
|
6592
|
-
comingSoon: true,
|
|
6593
6693
|
displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image",
|
|
6594
6694
|
deeplink: (model) => new URL(`diffusionbee://open_from_hf?model=${model.id}`)
|
|
6595
6695
|
}
|