@huggingface/tasks 0.11.0 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,7 +4,7 @@ This package contains the definition files (written in Typescript) for the huggi
4
4
 
5
5
  - **pipeline types** (a.k.a. **task types**) - used to determine which widget to display on the model page, and which inference API to run.
6
6
  - **default widget inputs** - when they aren't provided in the model card.
7
- - definitions and UI elements for **model libraries** (and soon for **dataset libraries**).
7
+ - definitions and UI elements for **model and dataset libraries**.
8
8
 
9
9
  Please add any missing ones to these definitions by opening a PR. Thanks 🔥
10
10
 
package/dist/index.cjs CHANGED
@@ -1719,14 +1719,19 @@ var taskData5 = {
1719
1719
  models: [
1720
1720
  {
1721
1721
  description: "A powerful feature extraction model for natural language processing tasks.",
1722
- id: "facebook/bart-base"
1722
+ id: "thenlper/gte-large"
1723
1723
  },
1724
1724
  {
1725
- description: "A strong feature extraction model for coding tasks.",
1726
- id: "microsoft/codebert-base"
1725
+ description: "A strong feature extraction model for retrieval.",
1726
+ id: "Alibaba-NLP/gte-Qwen1.5-7B-instruct"
1727
+ }
1728
+ ],
1729
+ spaces: [
1730
+ {
1731
+ description: "A leaderboard to rank best feature extraction models..",
1732
+ id: "mteb/leaderboard"
1727
1733
  }
1728
1734
  ],
1729
- spaces: [],
1730
1735
  summary: "Feature extraction is the task of extracting features learnt in a model.",
1731
1736
  widgetModels: ["facebook/bart-base"]
1732
1737
  };
@@ -2355,9 +2360,12 @@ var data_default13 = taskData13;
2355
2360
  var taskData14 = {
2356
2361
  datasets: [
2357
2362
  {
2358
- // TODO write proper description
2359
- description: "Widely used benchmark dataset for multiple Vision tasks.",
2363
+ description: "Widely used benchmark dataset for multiple vision tasks.",
2360
2364
  id: "merve/coco2017"
2365
+ },
2366
+ {
2367
+ description: "Multi-task computer vision benchmark.",
2368
+ id: "merve/pascal-voc"
2361
2369
  }
2362
2370
  ],
2363
2371
  demo: {
@@ -2396,6 +2404,10 @@ var taskData14 = {
2396
2404
  {
2397
2405
  description: "Strong object detection model trained on ImageNet-21k dataset.",
2398
2406
  id: "microsoft/beit-base-patch16-224-pt22k-ft22k"
2407
+ },
2408
+ {
2409
+ description: "Fast and accurate object detection model trained on COCO dataset.",
2410
+ id: "PekingU/rtdetr_r18vd_coco_o365"
2399
2411
  }
2400
2412
  ],
2401
2413
  spaces: [
@@ -2403,10 +2415,6 @@ var taskData14 = {
2403
2415
  description: "Leaderboard to compare various object detection models across several metrics.",
2404
2416
  id: "hf-vision/object_detection_leaderboard"
2405
2417
  },
2406
- {
2407
- description: "An object detection application that can detect unseen objects out of the box.",
2408
- id: "merve/owlv2"
2409
- },
2410
2418
  {
2411
2419
  description: "An application that contains various object detection models to try from.",
2412
2420
  id: "Gradio-Blocks/Object-Detection-With-DETR-and-YOLOS"
@@ -2418,6 +2426,10 @@ var taskData14 = {
2418
2426
  {
2419
2427
  description: "An object tracking, segmentation and inpainting application.",
2420
2428
  id: "VIPLab/Track-Anything"
2429
+ },
2430
+ {
2431
+ description: "Very fast object tracking application based on object detection.",
2432
+ id: "merve/RT-DETR-tracking-coco"
2421
2433
  }
2422
2434
  ],
2423
2435
  summary: "Object Detection models allow users to identify objects of certain defined classes. Object detection models receive an image as input and output the images with bounding boxes and labels on detected objects.",
@@ -2430,8 +2442,12 @@ var data_default14 = taskData14;
2430
2442
  var taskData15 = {
2431
2443
  datasets: [
2432
2444
  {
2433
- description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
2445
+ description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data.",
2434
2446
  id: "sayakpaul/nyu_depth_v2"
2447
+ },
2448
+ {
2449
+ description: "Monocular depth estimation benchmark based without noise and errors.",
2450
+ id: "depth-anything/DA-2K"
2435
2451
  }
2436
2452
  ],
2437
2453
  demo: {
@@ -2451,16 +2467,16 @@ var taskData15 = {
2451
2467
  metrics: [],
2452
2468
  models: [
2453
2469
  {
2454
- description: "Strong Depth Estimation model trained on 1.4 million images.",
2455
- id: "Intel/dpt-large"
2456
- },
2457
- {
2458
- description: "Strong Depth Estimation model trained on a big compilation of datasets.",
2459
- id: "LiheYoung/depth-anything-large-hf"
2470
+ description: "Cutting-edge depth estimation model.",
2471
+ id: "depth-anything/Depth-Anything-V2-Large"
2460
2472
  },
2461
2473
  {
2462
2474
  description: "A strong monocular depth estimation model.",
2463
2475
  id: "Bingxin/Marigold"
2476
+ },
2477
+ {
2478
+ description: "A metric depth estimation model trained on NYU dataset.",
2479
+ id: "Intel/zoedepth-nyu"
2464
2480
  }
2465
2481
  ],
2466
2482
  spaces: [
@@ -2469,8 +2485,8 @@ var taskData15 = {
2469
2485
  id: "radames/dpt-depth-estimation-3d-voxels"
2470
2486
  },
2471
2487
  {
2472
- description: "An application to compare the outputs of different depth estimation models.",
2473
- id: "LiheYoung/Depth-Anything"
2488
+ description: "An application on cutting-edge depth estimation.",
2489
+ id: "depth-anything/Depth-Anything-V2"
2474
2490
  },
2475
2491
  {
2476
2492
  description: "An application to try state-of-the-art depth estimation.",
@@ -3030,18 +3046,18 @@ var taskData24 = {
3030
3046
  id: "latent-consistency/lcm-lora-sdxl"
3031
3047
  },
3032
3048
  {
3033
- description: "A text-to-image model that can generate coherent text inside image.",
3034
- id: "DeepFloyd/IF-I-XL-v1.0"
3049
+ description: "A very fast text-to-image model.",
3050
+ id: "ByteDance/SDXL-Lightning"
3035
3051
  },
3036
3052
  {
3037
3053
  description: "A powerful text-to-image model.",
3038
- id: "kakaobrain/karlo-v1-alpha"
3054
+ id: "stabilityai/stable-diffusion-3-medium-diffusers"
3039
3055
  }
3040
3056
  ],
3041
3057
  spaces: [
3042
3058
  {
3043
3059
  description: "A powerful text-to-image application.",
3044
- id: "stabilityai/stable-diffusion"
3060
+ id: "stabilityai/stable-diffusion-3-medium"
3045
3061
  },
3046
3062
  {
3047
3063
  description: "A text-to-image application to generate comics.",
@@ -3453,7 +3469,7 @@ var taskData29 = {
3453
3469
  spaces: [
3454
3470
  {
3455
3471
  description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
3456
- id: "HuggingFaceH4/open_llm_leaderboard"
3472
+ id: "open-llm-leaderboard/open_llm_leaderboard"
3457
3473
  },
3458
3474
  {
3459
3475
  description: "An text generation based application based on a very powerful LLaMA2 model.",
@@ -3931,6 +3947,10 @@ var taskData35 = {
3931
3947
  description: "Strong zero-shot image classification model.",
3932
3948
  id: "google/siglip-base-patch16-224"
3933
3949
  },
3950
+ {
3951
+ description: "Small yet powerful zero-shot image classification model that can run on edge devices.",
3952
+ id: "apple/MobileCLIP-S1-OpenCLIP"
3953
+ },
3934
3954
  {
3935
3955
  description: "Strong image classification model for biomedical domain.",
3936
3956
  id: "microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224"
@@ -3990,11 +4010,11 @@ var taskData36 = {
3990
4010
  ],
3991
4011
  models: [
3992
4012
  {
3993
- description: "Solid zero-shot object detection model that uses CLIP as backbone.",
3994
- id: "google/owlvit-base-patch32"
4013
+ description: "Solid zero-shot object detection model.",
4014
+ id: "IDEA-Research/grounding-dino-base"
3995
4015
  },
3996
4016
  {
3997
- description: "The improved version of the owlvit model.",
4017
+ description: "Cutting-edge zero-shot object detection model.",
3998
4018
  id: "google/owlv2-base-patch16-ensemble"
3999
4019
  }
4000
4020
  ],
@@ -4002,6 +4022,10 @@ var taskData36 = {
4002
4022
  {
4003
4023
  description: "A demo to try the state-of-the-art zero-shot object detection model, OWLv2.",
4004
4024
  id: "merve/owlv2"
4025
+ },
4026
+ {
4027
+ description: "A demo that combines a zero-shot object detection and mask generation model for zero-shot segmentation.",
4028
+ id: "merve/OWLSAM"
4005
4029
  }
4006
4030
  ],
4007
4031
  summary: "Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",
@@ -4198,6 +4222,49 @@ var bm25s = (model) => [
4198
4222
 
4199
4223
  retriever = BM25HF.load_from_hub("${model.id}")`
4200
4224
  ];
4225
+ var depth_anything_v2 = (model) => {
4226
+ let encoder;
4227
+ let features;
4228
+ let out_channels;
4229
+ encoder = "<ENCODER>";
4230
+ features = "<NUMBER_OF_FEATURES>";
4231
+ out_channels = "<OUT_CHANNELS>";
4232
+ if (model.id === "depth-anything/Depth-Anything-V2-Small") {
4233
+ encoder = "vits";
4234
+ features = "64";
4235
+ out_channels = "[48, 96, 192, 384]";
4236
+ } else if (model.id === "depth-anything/Depth-Anything-V2-Base") {
4237
+ encoder = "vitb";
4238
+ features = "128";
4239
+ out_channels = "[96, 192, 384, 768]";
4240
+ } else if (model.id === "depth-anything/Depth-Anything-V2-Large") {
4241
+ encoder = "vitl";
4242
+ features = "256";
4243
+ out_channels = "[256, 512, 1024, 1024";
4244
+ }
4245
+ return [
4246
+ `
4247
+ # Install from https://github.com/DepthAnything/Depth-Anything-V2
4248
+
4249
+ # Load the model and infer depth from an image
4250
+ import cv2
4251
+ import torch
4252
+
4253
+ from depth_anything_v2.dpt import DepthAnythingV2
4254
+
4255
+ # instantiate the model
4256
+ model = DepthAnythingV2(encoder="${encoder}", features=${features}, out_channels=${out_channels})
4257
+
4258
+ # load the weights
4259
+ filepath = hf_hub_download(repo_id="${model.id}", filename="depth_anything_v2_${encoder}.pth", repo_type="model")
4260
+ state_dict = torch.load(filepath, map_location="cpu")
4261
+ model.load_state_dict(state_dict).eval()
4262
+
4263
+ raw_img = cv2.imread("your/image/path")
4264
+ depth = model.infer_image(raw_img) # HxW raw depth map in numpy
4265
+ `
4266
+ ];
4267
+ };
4201
4268
  var diffusers_default = (model) => [
4202
4269
  `from diffusers import DiffusionPipeline
4203
4270
 
@@ -4887,6 +4954,14 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4887
4954
  filter: false,
4888
4955
  countDownloads: `path:"adapter_config.json"`
4889
4956
  },
4957
+ "depth-anything-v2": {
4958
+ prettyLabel: "DepthAnythingV2",
4959
+ repoName: "Depth Anything V2",
4960
+ repoUrl: "https://github.com/DepthAnything/Depth-Anything-V2",
4961
+ snippets: depth_anything_v2,
4962
+ filter: false,
4963
+ countDownloads: `path_extension:"pth"`
4964
+ },
4890
4965
  diffusers: {
4891
4966
  prettyLabel: "Diffusers",
4892
4967
  repoName: "\u{1F917}/diffusers",
@@ -6385,6 +6460,24 @@ var DATASET_LIBRARIES_UI_ELEMENTS = {
6385
6460
  repoName: "dask",
6386
6461
  repoUrl: "https://github.com/dask/dask",
6387
6462
  docsUrl: "https://huggingface.co/docs/hub/datasets-dask"
6463
+ },
6464
+ distilabel: {
6465
+ prettyLabel: "Distilabel",
6466
+ repoName: "distilabel",
6467
+ repoUrl: "https://github.com/argilla-io/distilabel",
6468
+ docsUrl: "https://distilabel.argilla.io"
6469
+ },
6470
+ fiftyone: {
6471
+ prettyLabel: "FiftyOne",
6472
+ repoName: "fiftyone",
6473
+ repoUrl: "https://github.com/voxel51/fiftyone",
6474
+ docsUrl: "https://docs.voxel51.com"
6475
+ },
6476
+ argilla: {
6477
+ prettyLabel: "Argilla",
6478
+ repoName: "argilla",
6479
+ repoUrl: "https://github.com/argilla-io/argilla",
6480
+ docsUrl: "https://argilla-io.github.io/argilla"
6388
6481
  }
6389
6482
  };
6390
6483
  // Annotate the CommonJS export names for ESM import in node:
package/dist/index.js CHANGED
@@ -1681,14 +1681,19 @@ var taskData5 = {
1681
1681
  models: [
1682
1682
  {
1683
1683
  description: "A powerful feature extraction model for natural language processing tasks.",
1684
- id: "facebook/bart-base"
1684
+ id: "thenlper/gte-large"
1685
1685
  },
1686
1686
  {
1687
- description: "A strong feature extraction model for coding tasks.",
1688
- id: "microsoft/codebert-base"
1687
+ description: "A strong feature extraction model for retrieval.",
1688
+ id: "Alibaba-NLP/gte-Qwen1.5-7B-instruct"
1689
+ }
1690
+ ],
1691
+ spaces: [
1692
+ {
1693
+ description: "A leaderboard to rank best feature extraction models..",
1694
+ id: "mteb/leaderboard"
1689
1695
  }
1690
1696
  ],
1691
- spaces: [],
1692
1697
  summary: "Feature extraction is the task of extracting features learnt in a model.",
1693
1698
  widgetModels: ["facebook/bart-base"]
1694
1699
  };
@@ -2317,9 +2322,12 @@ var data_default13 = taskData13;
2317
2322
  var taskData14 = {
2318
2323
  datasets: [
2319
2324
  {
2320
- // TODO write proper description
2321
- description: "Widely used benchmark dataset for multiple Vision tasks.",
2325
+ description: "Widely used benchmark dataset for multiple vision tasks.",
2322
2326
  id: "merve/coco2017"
2327
+ },
2328
+ {
2329
+ description: "Multi-task computer vision benchmark.",
2330
+ id: "merve/pascal-voc"
2323
2331
  }
2324
2332
  ],
2325
2333
  demo: {
@@ -2358,6 +2366,10 @@ var taskData14 = {
2358
2366
  {
2359
2367
  description: "Strong object detection model trained on ImageNet-21k dataset.",
2360
2368
  id: "microsoft/beit-base-patch16-224-pt22k-ft22k"
2369
+ },
2370
+ {
2371
+ description: "Fast and accurate object detection model trained on COCO dataset.",
2372
+ id: "PekingU/rtdetr_r18vd_coco_o365"
2361
2373
  }
2362
2374
  ],
2363
2375
  spaces: [
@@ -2365,10 +2377,6 @@ var taskData14 = {
2365
2377
  description: "Leaderboard to compare various object detection models across several metrics.",
2366
2378
  id: "hf-vision/object_detection_leaderboard"
2367
2379
  },
2368
- {
2369
- description: "An object detection application that can detect unseen objects out of the box.",
2370
- id: "merve/owlv2"
2371
- },
2372
2380
  {
2373
2381
  description: "An application that contains various object detection models to try from.",
2374
2382
  id: "Gradio-Blocks/Object-Detection-With-DETR-and-YOLOS"
@@ -2380,6 +2388,10 @@ var taskData14 = {
2380
2388
  {
2381
2389
  description: "An object tracking, segmentation and inpainting application.",
2382
2390
  id: "VIPLab/Track-Anything"
2391
+ },
2392
+ {
2393
+ description: "Very fast object tracking application based on object detection.",
2394
+ id: "merve/RT-DETR-tracking-coco"
2383
2395
  }
2384
2396
  ],
2385
2397
  summary: "Object Detection models allow users to identify objects of certain defined classes. Object detection models receive an image as input and output the images with bounding boxes and labels on detected objects.",
@@ -2392,8 +2404,12 @@ var data_default14 = taskData14;
2392
2404
  var taskData15 = {
2393
2405
  datasets: [
2394
2406
  {
2395
- description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
2407
+ description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data.",
2396
2408
  id: "sayakpaul/nyu_depth_v2"
2409
+ },
2410
+ {
2411
+ description: "Monocular depth estimation benchmark based without noise and errors.",
2412
+ id: "depth-anything/DA-2K"
2397
2413
  }
2398
2414
  ],
2399
2415
  demo: {
@@ -2413,16 +2429,16 @@ var taskData15 = {
2413
2429
  metrics: [],
2414
2430
  models: [
2415
2431
  {
2416
- description: "Strong Depth Estimation model trained on 1.4 million images.",
2417
- id: "Intel/dpt-large"
2418
- },
2419
- {
2420
- description: "Strong Depth Estimation model trained on a big compilation of datasets.",
2421
- id: "LiheYoung/depth-anything-large-hf"
2432
+ description: "Cutting-edge depth estimation model.",
2433
+ id: "depth-anything/Depth-Anything-V2-Large"
2422
2434
  },
2423
2435
  {
2424
2436
  description: "A strong monocular depth estimation model.",
2425
2437
  id: "Bingxin/Marigold"
2438
+ },
2439
+ {
2440
+ description: "A metric depth estimation model trained on NYU dataset.",
2441
+ id: "Intel/zoedepth-nyu"
2426
2442
  }
2427
2443
  ],
2428
2444
  spaces: [
@@ -2431,8 +2447,8 @@ var taskData15 = {
2431
2447
  id: "radames/dpt-depth-estimation-3d-voxels"
2432
2448
  },
2433
2449
  {
2434
- description: "An application to compare the outputs of different depth estimation models.",
2435
- id: "LiheYoung/Depth-Anything"
2450
+ description: "An application on cutting-edge depth estimation.",
2451
+ id: "depth-anything/Depth-Anything-V2"
2436
2452
  },
2437
2453
  {
2438
2454
  description: "An application to try state-of-the-art depth estimation.",
@@ -2992,18 +3008,18 @@ var taskData24 = {
2992
3008
  id: "latent-consistency/lcm-lora-sdxl"
2993
3009
  },
2994
3010
  {
2995
- description: "A text-to-image model that can generate coherent text inside image.",
2996
- id: "DeepFloyd/IF-I-XL-v1.0"
3011
+ description: "A very fast text-to-image model.",
3012
+ id: "ByteDance/SDXL-Lightning"
2997
3013
  },
2998
3014
  {
2999
3015
  description: "A powerful text-to-image model.",
3000
- id: "kakaobrain/karlo-v1-alpha"
3016
+ id: "stabilityai/stable-diffusion-3-medium-diffusers"
3001
3017
  }
3002
3018
  ],
3003
3019
  spaces: [
3004
3020
  {
3005
3021
  description: "A powerful text-to-image application.",
3006
- id: "stabilityai/stable-diffusion"
3022
+ id: "stabilityai/stable-diffusion-3-medium"
3007
3023
  },
3008
3024
  {
3009
3025
  description: "A text-to-image application to generate comics.",
@@ -3415,7 +3431,7 @@ var taskData29 = {
3415
3431
  spaces: [
3416
3432
  {
3417
3433
  description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
3418
- id: "HuggingFaceH4/open_llm_leaderboard"
3434
+ id: "open-llm-leaderboard/open_llm_leaderboard"
3419
3435
  },
3420
3436
  {
3421
3437
  description: "An text generation based application based on a very powerful LLaMA2 model.",
@@ -3893,6 +3909,10 @@ var taskData35 = {
3893
3909
  description: "Strong zero-shot image classification model.",
3894
3910
  id: "google/siglip-base-patch16-224"
3895
3911
  },
3912
+ {
3913
+ description: "Small yet powerful zero-shot image classification model that can run on edge devices.",
3914
+ id: "apple/MobileCLIP-S1-OpenCLIP"
3915
+ },
3896
3916
  {
3897
3917
  description: "Strong image classification model for biomedical domain.",
3898
3918
  id: "microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224"
@@ -3952,11 +3972,11 @@ var taskData36 = {
3952
3972
  ],
3953
3973
  models: [
3954
3974
  {
3955
- description: "Solid zero-shot object detection model that uses CLIP as backbone.",
3956
- id: "google/owlvit-base-patch32"
3975
+ description: "Solid zero-shot object detection model.",
3976
+ id: "IDEA-Research/grounding-dino-base"
3957
3977
  },
3958
3978
  {
3959
- description: "The improved version of the owlvit model.",
3979
+ description: "Cutting-edge zero-shot object detection model.",
3960
3980
  id: "google/owlv2-base-patch16-ensemble"
3961
3981
  }
3962
3982
  ],
@@ -3964,6 +3984,10 @@ var taskData36 = {
3964
3984
  {
3965
3985
  description: "A demo to try the state-of-the-art zero-shot object detection model, OWLv2.",
3966
3986
  id: "merve/owlv2"
3987
+ },
3988
+ {
3989
+ description: "A demo that combines a zero-shot object detection and mask generation model for zero-shot segmentation.",
3990
+ id: "merve/OWLSAM"
3967
3991
  }
3968
3992
  ],
3969
3993
  summary: "Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",
@@ -4160,6 +4184,49 @@ var bm25s = (model) => [
4160
4184
 
4161
4185
  retriever = BM25HF.load_from_hub("${model.id}")`
4162
4186
  ];
4187
+ var depth_anything_v2 = (model) => {
4188
+ let encoder;
4189
+ let features;
4190
+ let out_channels;
4191
+ encoder = "<ENCODER>";
4192
+ features = "<NUMBER_OF_FEATURES>";
4193
+ out_channels = "<OUT_CHANNELS>";
4194
+ if (model.id === "depth-anything/Depth-Anything-V2-Small") {
4195
+ encoder = "vits";
4196
+ features = "64";
4197
+ out_channels = "[48, 96, 192, 384]";
4198
+ } else if (model.id === "depth-anything/Depth-Anything-V2-Base") {
4199
+ encoder = "vitb";
4200
+ features = "128";
4201
+ out_channels = "[96, 192, 384, 768]";
4202
+ } else if (model.id === "depth-anything/Depth-Anything-V2-Large") {
4203
+ encoder = "vitl";
4204
+ features = "256";
4205
+ out_channels = "[256, 512, 1024, 1024";
4206
+ }
4207
+ return [
4208
+ `
4209
+ # Install from https://github.com/DepthAnything/Depth-Anything-V2
4210
+
4211
+ # Load the model and infer depth from an image
4212
+ import cv2
4213
+ import torch
4214
+
4215
+ from depth_anything_v2.dpt import DepthAnythingV2
4216
+
4217
+ # instantiate the model
4218
+ model = DepthAnythingV2(encoder="${encoder}", features=${features}, out_channels=${out_channels})
4219
+
4220
+ # load the weights
4221
+ filepath = hf_hub_download(repo_id="${model.id}", filename="depth_anything_v2_${encoder}.pth", repo_type="model")
4222
+ state_dict = torch.load(filepath, map_location="cpu")
4223
+ model.load_state_dict(state_dict).eval()
4224
+
4225
+ raw_img = cv2.imread("your/image/path")
4226
+ depth = model.infer_image(raw_img) # HxW raw depth map in numpy
4227
+ `
4228
+ ];
4229
+ };
4163
4230
  var diffusers_default = (model) => [
4164
4231
  `from diffusers import DiffusionPipeline
4165
4232
 
@@ -4849,6 +4916,14 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4849
4916
  filter: false,
4850
4917
  countDownloads: `path:"adapter_config.json"`
4851
4918
  },
4919
+ "depth-anything-v2": {
4920
+ prettyLabel: "DepthAnythingV2",
4921
+ repoName: "Depth Anything V2",
4922
+ repoUrl: "https://github.com/DepthAnything/Depth-Anything-V2",
4923
+ snippets: depth_anything_v2,
4924
+ filter: false,
4925
+ countDownloads: `path_extension:"pth"`
4926
+ },
4852
4927
  diffusers: {
4853
4928
  prettyLabel: "Diffusers",
4854
4929
  repoName: "\u{1F917}/diffusers",
@@ -6347,6 +6422,24 @@ var DATASET_LIBRARIES_UI_ELEMENTS = {
6347
6422
  repoName: "dask",
6348
6423
  repoUrl: "https://github.com/dask/dask",
6349
6424
  docsUrl: "https://huggingface.co/docs/hub/datasets-dask"
6425
+ },
6426
+ distilabel: {
6427
+ prettyLabel: "Distilabel",
6428
+ repoName: "distilabel",
6429
+ repoUrl: "https://github.com/argilla-io/distilabel",
6430
+ docsUrl: "https://distilabel.argilla.io"
6431
+ },
6432
+ fiftyone: {
6433
+ prettyLabel: "FiftyOne",
6434
+ repoName: "fiftyone",
6435
+ repoUrl: "https://github.com/voxel51/fiftyone",
6436
+ docsUrl: "https://docs.voxel51.com"
6437
+ },
6438
+ argilla: {
6439
+ prettyLabel: "Argilla",
6440
+ repoName: "argilla",
6441
+ repoUrl: "https://github.com/argilla-io/argilla",
6442
+ docsUrl: "https://argilla-io.github.io/argilla"
6350
6443
  }
6351
6444
  };
6352
6445
  export {
@@ -52,6 +52,24 @@ export declare const DATASET_LIBRARIES_UI_ELEMENTS: {
52
52
  repoUrl: string;
53
53
  docsUrl: string;
54
54
  };
55
+ distilabel: {
56
+ prettyLabel: string;
57
+ repoName: string;
58
+ repoUrl: string;
59
+ docsUrl: string;
60
+ };
61
+ fiftyone: {
62
+ prettyLabel: string;
63
+ repoName: string;
64
+ repoUrl: string;
65
+ docsUrl: string;
66
+ };
67
+ argilla: {
68
+ prettyLabel: string;
69
+ repoName: string;
70
+ repoUrl: string;
71
+ docsUrl: string;
72
+ };
55
73
  };
56
74
  export type DatasetLibraryKey = keyof typeof DATASET_LIBRARIES_UI_ELEMENTS;
57
75
  //# sourceMappingURL=dataset-libraries.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"dataset-libraries.d.ts","sourceRoot":"","sources":["../../src/dataset-libraries.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACvC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,eAAO,MAAM,6BAA6B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA+BS,CAAC;AAGpD,MAAM,MAAM,iBAAiB,GAAG,MAAM,OAAO,6BAA6B,CAAC"}
1
+ {"version":3,"file":"dataset-libraries.d.ts","sourceRoot":"","sources":["../../src/dataset-libraries.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACvC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,eAAO,MAAM,6BAA6B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiDS,CAAC;AAGpD,MAAM,MAAM,iBAAiB,GAAG,MAAM,OAAO,6BAA6B,CAAC"}
@@ -5,6 +5,7 @@ export declare const asteroid: (model: ModelData) => string[];
5
5
  export declare const audioseal: (model: ModelData) => string[];
6
6
  export declare const bertopic: (model: ModelData) => string[];
7
7
  export declare const bm25s: (model: ModelData) => string[];
8
+ export declare const depth_anything_v2: (model: ModelData) => string[];
8
9
  export declare const diffusers: (model: ModelData) => string[];
9
10
  export declare const edsnlp: (model: ModelData) => string[];
10
11
  export declare const espnetTTS: (model: ModelData) => string[];
@@ -1 +1 @@
1
- {"version":3,"file":"model-libraries-snippets.d.ts","sourceRoot":"","sources":["../../src/model-libraries-snippets.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAY9C,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAkBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAkBlD,CAAC;AAMF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AA+BF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAUlD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAgB/C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAMlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EASlD,CAAC;AAIF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAO/C,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAMhD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAS9C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAUlD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAOjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,aAAa,QAAO,MAAM,EAQtC,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAsBlD,CAAC;AAEF,eAAO,MAAM,uBAAuB,UAAW,SAAS,KAAG,MAAM,EAehE,CAAC;AAiBF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAKvD,CAAC;AAyBF,eAAO,MAAM,aAAa,UAAW,SAAS,KAAG,MAAM,EAOtD,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAI7C,CAAC;AAsCF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAehD,CAAC;AAEF,eAAO,MAAM,kBAAkB,UAAW,SAAS,KAAG,MAAM,EAmC3D,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,aAAa,UAAW,SAAS,KAAG,MAAM,EAEtD,CAAC;AAEF,eAAO,MAAM,oBAAoB,UAAW,SAAS,KAAG,MAAM,EAI7D,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAU9C,CAAC;AAEF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAIpD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAK/C,CAAC;AAkBF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAkBpD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EA4CrD,CAAC;AAEF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAcvD,CAAC;AAiBF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAkB7C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EAMzD,CAAC;AAgBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAEjD,CAAC;AAEF,eAAO,MAAM,MAAM,QAA6B,MAAM,EAMrD,CAAC;AAEF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAInD,CAAC;AAEF,eAAO,MAAM,OAAO,QAAO,MAAM,EAYhC,CAAC;AAEF,eAAO,MAAM,GAAG,UAAW,SAAS,KAAG,MAAM,EAK5C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAQ7C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AA6BF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAUnD,CAAC;AAEF,eAAO,MAAM,UAAU,QAAO,MAAM,EAYnC,CAAC"}
1
+ {"version":3,"file":"model-libraries-snippets.d.ts","sourceRoot":"","sources":["../../src/model-libraries-snippets.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAY9C,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAkBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAkBlD,CAAC;AAMF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAIjD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,iBAAiB,UAAW,SAAS,KAAG,MAAM,EA6C1D,CAAC;AA+BF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAUlD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAgB/C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAMlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EASlD,CAAC;AAIF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAO/C,CAAC;AAEF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAMhD,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAS9C,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAUlD,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAOjD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,aAAa,QAAO,MAAM,EAQtC,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAKlD,CAAC;AAEF,eAAO,MAAM,SAAS,UAAW,SAAS,KAAG,MAAM,EAsBlD,CAAC;AAEF,eAAO,MAAM,uBAAuB,UAAW,SAAS,KAAG,MAAM,EAehE,CAAC;AAiBF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAKvD,CAAC;AAyBF,eAAO,MAAM,aAAa,UAAW,SAAS,KAAG,MAAM,EAOtD,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAI7C,CAAC;AAsCF,eAAO,MAAM,OAAO,UAAW,SAAS,KAAG,MAAM,EAehD,CAAC;AAEF,eAAO,MAAM,kBAAkB,UAAW,SAAS,KAAG,MAAM,EAmC3D,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,aAAa,UAAW,SAAS,KAAG,MAAM,EAEtD,CAAC;AAEF,eAAO,MAAM,oBAAoB,UAAW,SAAS,KAAG,MAAM,EAI7D,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAU9C,CAAC;AAEF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAIpD,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAK/C,CAAC;AAkBF,eAAO,MAAM,WAAW,UAAW,SAAS,KAAG,MAAM,EAkBpD,CAAC;AAEF,eAAO,MAAM,YAAY,UAAW,SAAS,KAAG,MAAM,EA4CrD,CAAC;AAEF,eAAO,MAAM,cAAc,UAAW,SAAS,KAAG,MAAM,EAcvD,CAAC;AAiBF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAkB7C,CAAC;AAEF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAKjD,CAAC;AAEF,eAAO,MAAM,gBAAgB,UAAW,SAAS,KAAG,MAAM,EAMzD,CAAC;AAgBF,eAAO,MAAM,QAAQ,UAAW,SAAS,KAAG,MAAM,EAEjD,CAAC;AAEF,eAAO,MAAM,MAAM,QAA6B,MAAM,EAMrD,CAAC;AAEF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAInD,CAAC;AAEF,eAAO,MAAM,OAAO,QAAO,MAAM,EAYhC,CAAC;AAEF,eAAO,MAAM,GAAG,UAAW,SAAS,KAAG,MAAM,EAK5C,CAAC;AAEF,eAAO,MAAM,KAAK,UAAW,SAAS,KAAG,MAAM,EAI9C,CAAC;AAEF,eAAO,MAAM,IAAI,UAAW,SAAS,KAAG,MAAM,EAQ7C,CAAC;AAEF,eAAO,MAAM,MAAM,UAAW,SAAS,KAAG,MAAM,EAI/C,CAAC;AA6BF,eAAO,MAAM,UAAU,UAAW,SAAS,KAAG,MAAM,EAUnD,CAAC;AAEF,eAAO,MAAM,UAAU,QAAO,MAAM,EAYnC,CAAC"}
@@ -139,6 +139,14 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
139
139
  filter: false;
140
140
  countDownloads: string;
141
141
  };
142
+ "depth-anything-v2": {
143
+ prettyLabel: string;
144
+ repoName: string;
145
+ repoUrl: string;
146
+ snippets: (model: ModelData) => string[];
147
+ filter: false;
148
+ countDownloads: string;
149
+ };
142
150
  diffusers: {
143
151
  prettyLabel: string;
144
152
  repoName: string;
@@ -549,6 +557,6 @@ export declare const MODEL_LIBRARIES_UI_ELEMENTS: {
549
557
  };
550
558
  };
551
559
  export type ModelLibraryKey = keyof typeof MODEL_LIBRARIES_UI_ELEMENTS;
552
- export declare const ALL_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "bm25s" | "champ" | "chat_tts" | "colpali" | "diffusers" | "doctr" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "mindspore" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "recurrentgemma" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "stable-audio-tools" | "diffusion-single-file" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "whisperkit")[];
553
- export declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "bm25s" | "champ" | "chat_tts" | "colpali" | "diffusers" | "doctr" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "mindspore" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "recurrentgemma" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "stable-audio-tools" | "diffusion-single-file" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "whisperkit")[];
560
+ export declare const ALL_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "bm25s" | "champ" | "chat_tts" | "colpali" | "depth-anything-v2" | "diffusers" | "doctr" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "mindspore" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "recurrentgemma" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "stable-audio-tools" | "diffusion-single-file" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "whisperkit")[];
561
+ export declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "audioseal" | "bertopic" | "big_vision" | "bm25s" | "champ" | "chat_tts" | "colpali" | "depth-anything-v2" | "diffusers" | "doctr" | "edsnlp" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gemma.cpp" | "gliner" | "glyph-byt5" | "grok" | "hallo" | "hunyuan-dit" | "keras" | "tf-keras" | "keras-nlp" | "k2" | "mindspore" | "mars5-tts" | "mesh-anything" | "ml-agents" | "mlx" | "mlx-image" | "mlc-llm" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "recurrentgemma" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "speechbrain" | "stable-audio-tools" | "diffusion-single-file" | "stable-baselines3" | "stanza" | "tensorflowtts" | "tic-clip" | "timesfm" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "whisperkit")[];
554
562
  //# sourceMappingURL=model-libraries.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"model-libraries.d.ts","sourceRoot":"","sources":["../../src/model-libraries.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AAEtE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,EAAE,CAAC;IAC1C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;IACpC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AAEH,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgfI,CAAC;AAE7C,MAAM,MAAM,eAAe,GAAG,MAAM,OAAO,2BAA2B,CAAC;AAEvE,eAAO,MAAM,sBAAsB,43BAAgE,CAAC;AAEpG,eAAO,MAAM,8BAA8B,43BAQ1B,CAAC"}
1
+ {"version":3,"file":"model-libraries.d.ts","sourceRoot":"","sources":["../../src/model-libraries.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AAEtE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;;;OAIG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,QAAQ,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,EAAE,CAAC;IAC1C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,kBAAkB,CAAC;IACpC;;;OAGG;IACH,MAAM,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;GAaG;AAEH,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwfI,CAAC;AAE7C,MAAM,MAAM,eAAe,GAAG,MAAM,OAAO,2BAA2B,CAAC;AAEvE,eAAO,MAAM,sBAAsB,k5BAAgE,CAAC;AAEpG,eAAO,MAAM,8BAA8B,k5BAQ1B,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/depth-estimation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAqDf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/depth-estimation/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAyDf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/feature-extraction/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA2Cf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/feature-extraction/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAgDf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/object-detection/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA0Ef,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/object-detection/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAiFf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/zero-shot-image-classification/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA2Ef,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/zero-shot-image-classification/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA+Ef,CAAC;AAEF,eAAe,QAAQ,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/zero-shot-object-detection/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cAyDf,CAAC;AAEF,eAAe,QAAQ,CAAC"}
1
+ {"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../../src/tasks/zero-shot-object-detection/data.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAEzC,QAAA,MAAM,QAAQ,EAAE,cA8Df,CAAC;AAEF,eAAe,QAAQ,CAAC"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@huggingface/tasks",
3
3
  "packageManager": "pnpm@8.10.5",
4
- "version": "0.11.0",
4
+ "version": "0.11.2",
5
5
  "description": "List of ML tasks for huggingface.co/tasks",
6
6
  "repository": "https://github.com/huggingface/huggingface.js.git",
7
7
  "publishConfig": {
@@ -53,6 +53,24 @@ export const DATASET_LIBRARIES_UI_ELEMENTS = {
53
53
  repoUrl: "https://github.com/dask/dask",
54
54
  docsUrl: "https://huggingface.co/docs/hub/datasets-dask",
55
55
  },
56
+ distilabel: {
57
+ prettyLabel: "Distilabel",
58
+ repoName: "distilabel",
59
+ repoUrl: "https://github.com/argilla-io/distilabel",
60
+ docsUrl: "https://distilabel.argilla.io",
61
+ },
62
+ fiftyone: {
63
+ prettyLabel: "FiftyOne",
64
+ repoName: "fiftyone",
65
+ repoUrl: "https://github.com/voxel51/fiftyone",
66
+ docsUrl: "https://docs.voxel51.com",
67
+ },
68
+ argilla: {
69
+ prettyLabel: "Argilla",
70
+ repoName: "argilla",
71
+ repoUrl: "https://github.com/argilla-io/argilla",
72
+ docsUrl: "https://argilla-io.github.io/argilla",
73
+ },
56
74
  } satisfies Record<string, DatasetLibraryUiElement>;
57
75
 
58
76
  /// List of the dataset libraries supported by the Hub
@@ -82,6 +82,53 @@ export const bm25s = (model: ModelData): string[] => [
82
82
  retriever = BM25HF.load_from_hub("${model.id}")`,
83
83
  ];
84
84
 
85
+ export const depth_anything_v2 = (model: ModelData): string[] => {
86
+ let encoder: string;
87
+ let features: string;
88
+ let out_channels: string;
89
+
90
+ encoder = "<ENCODER>";
91
+ features = "<NUMBER_OF_FEATURES>";
92
+ out_channels = "<OUT_CHANNELS>";
93
+
94
+ if (model.id === "depth-anything/Depth-Anything-V2-Small") {
95
+ encoder = "vits";
96
+ features = "64";
97
+ out_channels = "[48, 96, 192, 384]";
98
+ } else if (model.id === "depth-anything/Depth-Anything-V2-Base") {
99
+ encoder = "vitb";
100
+ features = "128";
101
+ out_channels = "[96, 192, 384, 768]";
102
+ } else if (model.id === "depth-anything/Depth-Anything-V2-Large") {
103
+ encoder = "vitl";
104
+ features = "256";
105
+ out_channels = "[256, 512, 1024, 1024";
106
+ }
107
+
108
+ return [
109
+ `
110
+ # Install from https://github.com/DepthAnything/Depth-Anything-V2
111
+
112
+ # Load the model and infer depth from an image
113
+ import cv2
114
+ import torch
115
+
116
+ from depth_anything_v2.dpt import DepthAnythingV2
117
+
118
+ # instantiate the model
119
+ model = DepthAnythingV2(encoder="${encoder}", features=${features}, out_channels=${out_channels})
120
+
121
+ # load the weights
122
+ filepath = hf_hub_download(repo_id="${model.id}", filename="depth_anything_v2_${encoder}.pth", repo_type="model")
123
+ state_dict = torch.load(filepath, map_location="cpu")
124
+ model.load_state_dict(state_dict).eval()
125
+
126
+ raw_img = cv2.imread("your/image/path")
127
+ depth = model.infer_image(raw_img) # HxW raw depth map in numpy
128
+ `,
129
+ ];
130
+ };
131
+
85
132
  const diffusers_default = (model: ModelData) => [
86
133
  `from diffusers import DiffusionPipeline
87
134
 
@@ -143,6 +143,14 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
143
143
  filter: false,
144
144
  countDownloads: `path:"adapter_config.json"`,
145
145
  },
146
+ "depth-anything-v2": {
147
+ prettyLabel: "DepthAnythingV2",
148
+ repoName: "Depth Anything V2",
149
+ repoUrl: "https://github.com/DepthAnything/Depth-Anything-V2",
150
+ snippets: snippets.depth_anything_v2,
151
+ filter: false,
152
+ countDownloads: `path_extension:"pth"`,
153
+ },
146
154
  diffusers: {
147
155
  prettyLabel: "Diffusers",
148
156
  repoName: "🤗/diffusers",
@@ -1,4 +1,5 @@
1
- ## Use Cases
1
+ ## Use Cases
2
+
2
3
  Depth estimation models can be used to estimate the depth of different objects present in an image.
3
4
 
4
5
  ### Estimation of Volumetric Information
@@ -8,6 +9,14 @@ Depth estimation models are widely used to study volumetric formation of objects
8
9
 
9
10
  Depth estimation models can also be used to develop a 3D representation from a 2D image.
10
11
 
12
+ ## Depth Estimation Subtasks
13
+
14
+ There are two depth estimation subtasks.
15
+
16
+ - **Absolute depth estimation**: Absolute (or metric) depth estimation aims to provide exact depth measurements from the camera. Absolute depth estimation models output depth maps with real-world distances in meter or feet.
17
+
18
+ - **Relative depth estimation**: Relative depth estimation aims to predict the depth order of objects or points in a scene without providing the precise measurements.
19
+
11
20
  ## Inference
12
21
 
13
22
  With the `transformers` library, you can use the `depth-estimation` pipeline to infer with image classification models. You can initialize the pipeline with a model id from the Hub. If you do not provide a model id it will initialize with [Intel/dpt-large](https://huggingface.co/Intel/dpt-large) by default. When calling the pipeline you just need to specify a path, http link or an image loaded in PIL. Additionally, you can find a comprehensive list of various depth estimation models at [this link](https://huggingface.co/models?pipeline_tag=depth-estimation).
@@ -3,9 +3,13 @@ import type { TaskDataCustom } from "..";
3
3
  const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
- description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
6
+ description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data.",
7
7
  id: "sayakpaul/nyu_depth_v2",
8
8
  },
9
+ {
10
+ description: "Monocular depth estimation benchmark based without noise and errors.",
11
+ id: "depth-anything/DA-2K",
12
+ },
9
13
  ],
10
14
  demo: {
11
15
  inputs: [
@@ -24,17 +28,17 @@ const taskData: TaskDataCustom = {
24
28
  metrics: [],
25
29
  models: [
26
30
  {
27
- description: "Strong Depth Estimation model trained on 1.4 million images.",
28
- id: "Intel/dpt-large",
29
- },
30
- {
31
- description: "Strong Depth Estimation model trained on a big compilation of datasets.",
32
- id: "LiheYoung/depth-anything-large-hf",
31
+ description: "Cutting-edge depth estimation model.",
32
+ id: "depth-anything/Depth-Anything-V2-Large",
33
33
  },
34
34
  {
35
35
  description: "A strong monocular depth estimation model.",
36
36
  id: "Bingxin/Marigold",
37
37
  },
38
+ {
39
+ description: "A metric depth estimation model trained on NYU dataset.",
40
+ id: "Intel/zoedepth-nyu",
41
+ },
38
42
  ],
39
43
  spaces: [
40
44
  {
@@ -42,8 +46,8 @@ const taskData: TaskDataCustom = {
42
46
  id: "radames/dpt-depth-estimation-3d-voxels",
43
47
  },
44
48
  {
45
- description: "An application to compare the outputs of different depth estimation models.",
46
- id: "LiheYoung/Depth-Anything",
49
+ description: "An application on cutting-edge depth estimation.",
50
+ id: "depth-anything/Depth-Anything-V2",
47
51
  },
48
52
  {
49
53
  description: "An application to try state-of-the-art depth estimation.",
@@ -1,9 +1,21 @@
1
1
  ## Use Cases
2
2
 
3
+ ### Transfer Learning
4
+
3
5
  Models trained on a specific dataset can learn features about the data. For instance, a model trained on an English poetry dataset learns English grammar at a very high level. This information can be transferred to a new model that is going to be trained on tweets. This process of extracting features and transferring to another model is called transfer learning. One can pass their dataset through a feature extraction pipeline and feed the result to a classifier.
4
6
 
7
+ ### Retrieval and Reranking
8
+
9
+ Retrieval is the process of obtaining relevant documents or information based on a user's search query. In the context of NLP, retrieval systems aim to find relevant text passages or documents from a large corpus of data that match the user's query. The goal is to return a set of results that are likely to be useful to the user. On the other hand, reranking is a technique used to improve the quality of retrieval results by reordering them based on their relevance to the query.
10
+
11
+ ### Retrieval Augmented Generation
12
+
13
+ Retrieval-augmented generation (RAG) is a technique in which user inputs to generative models are first queried through a knowledge base, and the most relevant information from the knowledge base is used to augment the prompt to reduce hallucinations during generation. Feature extraction models (primarily retrieval and reranking models) can be used in RAG to reduce model hallucinations and ground the model.
14
+
5
15
  ## Inference
6
16
 
17
+ You can infer feature extraction models using `pipeline` of transformers library.
18
+
7
19
  ```python
8
20
  from transformers import pipeline
9
21
  checkpoint = "facebook/bart-base"
@@ -22,6 +34,39 @@ feature_extractor(text,return_tensors = "pt")[0].numpy().mean(axis=0)
22
34
  [ 0.2520, -0.6869, -1.0582, ..., 0.5198, -2.2106, 0.4547]]])'''
23
35
  ```
24
36
 
37
+ A very popular library for training similarity and search models is called `sentence-transformers`.  To get started, install the library.
38
+
39
+ ```bash
40
+ pip install -U sentence-transformers
41
+ ```
42
+
43
+ You can infer with `sentence-transformers` models as follows.
44
+
45
+ ```python
46
+ from sentence_transformers import SentenceTransformer
47
+
48
+ model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
49
+ sentences = [
50
+ "The weather is lovely today.",
51
+ "It's so sunny outside!",
52
+ "He drove to the stadium.",
53
+ ]
54
+
55
+ embeddings = model.encode(sentences)
56
+ similarities = model.similarity(embeddings, embeddings)
57
+ print(similarities)
58
+ # tensor([[1.0000, 0.6660, 0.1046],
59
+ # [0.6660, 1.0000, 0.1411],
60
+ # [0.1046, 0.1411, 1.0000]])
61
+ ```
62
+
63
+ ### Text Embedding Inference
64
+
65
+ [Text Embeddings Inference (TEI)](https://github.com/huggingface/text-embeddings-inference) is a toolkit to easily serve feature extraction models using few lines of code.
66
+
25
67
  ## Useful resources
26
68
 
27
- - [Documentation for feature extractor of 🤗Transformers](https://huggingface.co/docs/transformers/main_classes/feature_extractor)
69
+ - [Documentation for feature extraction task in 🤗Transformers](https://huggingface.co/docs/transformers/main_classes/feature_extractor)
70
+ - [Introduction to MTEB Benchmark](https://huggingface.co/blog/mteb)
71
+ - [Cookbook: Simple RAG for GitHub issues using Hugging Face Zephyr and LangChain](https://huggingface.co/learn/cookbook/rag_zephyr_langchain)
72
+ - [sentence-transformers organization on Hugging Face Hub](https://huggingface.co/sentence-transformers)
@@ -33,14 +33,19 @@ const taskData: TaskDataCustom = {
33
33
  models: [
34
34
  {
35
35
  description: "A powerful feature extraction model for natural language processing tasks.",
36
- id: "facebook/bart-base",
36
+ id: "thenlper/gte-large",
37
37
  },
38
38
  {
39
- description: "A strong feature extraction model for coding tasks.",
40
- id: "microsoft/codebert-base",
39
+ description: "A strong feature extraction model for retrieval.",
40
+ id: "Alibaba-NLP/gte-Qwen1.5-7B-instruct",
41
+ },
42
+ ],
43
+ spaces: [
44
+ {
45
+ description: "A leaderboard to rank best feature extraction models..",
46
+ id: "mteb/leaderboard",
41
47
  },
42
48
  ],
43
- spaces: [],
44
49
  summary: "Feature extraction is the task of extracting features learnt in a model.",
45
50
  widgetModels: ["facebook/bart-base"],
46
51
  };
@@ -3,10 +3,13 @@ import type { TaskDataCustom } from "..";
3
3
  const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
- // TODO write proper description
7
- description: "Widely used benchmark dataset for multiple Vision tasks.",
6
+ description: "Widely used benchmark dataset for multiple vision tasks.",
8
7
  id: "merve/coco2017",
9
8
  },
9
+ {
10
+ description: "Multi-task computer vision benchmark.",
11
+ id: "merve/pascal-voc",
12
+ },
10
13
  ],
11
14
  demo: {
12
15
  inputs: [
@@ -47,16 +50,16 @@ const taskData: TaskDataCustom = {
47
50
  description: "Strong object detection model trained on ImageNet-21k dataset.",
48
51
  id: "microsoft/beit-base-patch16-224-pt22k-ft22k",
49
52
  },
53
+ {
54
+ description: "Fast and accurate object detection model trained on COCO dataset.",
55
+ id: "PekingU/rtdetr_r18vd_coco_o365",
56
+ },
50
57
  ],
51
58
  spaces: [
52
59
  {
53
60
  description: "Leaderboard to compare various object detection models across several metrics.",
54
61
  id: "hf-vision/object_detection_leaderboard",
55
62
  },
56
- {
57
- description: "An object detection application that can detect unseen objects out of the box.",
58
- id: "merve/owlv2",
59
- },
60
63
  {
61
64
  description: "An application that contains various object detection models to try from.",
62
65
  id: "Gradio-Blocks/Object-Detection-With-DETR-and-YOLOS",
@@ -69,6 +72,10 @@ const taskData: TaskDataCustom = {
69
72
  description: "An object tracking, segmentation and inpainting application.",
70
73
  id: "VIPLab/Track-Anything",
71
74
  },
75
+ {
76
+ description: "Very fast object tracking application based on object detection.",
77
+ id: "merve/RT-DETR-tracking-coco",
78
+ },
72
79
  ],
73
80
  summary:
74
81
  "Object Detection models allow users to identify objects of certain defined classes. Object detection models receive an image as input and output the images with bounding boxes and labels on detected objects.",
@@ -82,7 +82,7 @@ const taskData: TaskDataCustom = {
82
82
  spaces: [
83
83
  {
84
84
  description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
85
- id: "HuggingFaceH4/open_llm_leaderboard",
85
+ id: "open-llm-leaderboard/open_llm_leaderboard",
86
86
  },
87
87
  {
88
88
  description: "An text generation based application based on a very powerful LLaMA2 model.",
@@ -53,18 +53,18 @@ const taskData: TaskDataCustom = {
53
53
  id: "latent-consistency/lcm-lora-sdxl",
54
54
  },
55
55
  {
56
- description: "A text-to-image model that can generate coherent text inside image.",
57
- id: "DeepFloyd/IF-I-XL-v1.0",
56
+ description: "A very fast text-to-image model.",
57
+ id: "ByteDance/SDXL-Lightning",
58
58
  },
59
59
  {
60
60
  description: "A powerful text-to-image model.",
61
- id: "kakaobrain/karlo-v1-alpha",
61
+ id: "stabilityai/stable-diffusion-3-medium-diffusers",
62
62
  },
63
63
  ],
64
64
  spaces: [
65
65
  {
66
66
  description: "A powerful text-to-image application.",
67
- id: "stabilityai/stable-diffusion",
67
+ id: "stabilityai/stable-diffusion-3-medium",
68
68
  },
69
69
  {
70
70
  description: "A text-to-image application to generate comics.",
@@ -68,9 +68,8 @@ The highest probability is 0.995 for the label cat and dog
68
68
 
69
69
  ## Useful Resources
70
70
 
71
- You can contribute useful resources about this task [here](https://github.com/huggingface/hub-docs/blob/main/tasks/src/zero-shot-image-classification/about.md).
72
-
73
- Check out [Zero-shot image classification task guide](https://huggingface.co/docs/transformers/tasks/zero_shot_image_classification).
71
+ - [Zero-shot image classification task guide](https://huggingface.co/docs/transformers/tasks/zero_shot_image_classification).
72
+ - [Image-text Similarity Search](https://huggingface.co/learn/cookbook/faiss_with_hf_datasets_and_clip)
74
73
 
75
74
  This page was made possible thanks to the efforts of [Shamima Hossain](https://huggingface.co/Shamima), [Haider Zaidi
76
75
  ](https://huggingface.co/chefhaider) and [Paarth Bhatnagar](https://huggingface.co/Paarth).
@@ -55,6 +55,10 @@ const taskData: TaskDataCustom = {
55
55
  description: "Strong zero-shot image classification model.",
56
56
  id: "google/siglip-base-patch16-224",
57
57
  },
58
+ {
59
+ description: "Small yet powerful zero-shot image classification model that can run on edge devices.",
60
+ id: "apple/MobileCLIP-S1-OpenCLIP",
61
+ },
58
62
  {
59
63
  description: "Strong image classification model for biomedical domain.",
60
64
  id: "microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224",
@@ -39,11 +39,11 @@ const taskData: TaskDataCustom = {
39
39
  ],
40
40
  models: [
41
41
  {
42
- description: "Solid zero-shot object detection model that uses CLIP as backbone.",
43
- id: "google/owlvit-base-patch32",
42
+ description: "Solid zero-shot object detection model.",
43
+ id: "IDEA-Research/grounding-dino-base",
44
44
  },
45
45
  {
46
- description: "The improved version of the owlvit model.",
46
+ description: "Cutting-edge zero-shot object detection model.",
47
47
  id: "google/owlv2-base-patch16-ensemble",
48
48
  },
49
49
  ],
@@ -52,6 +52,11 @@ const taskData: TaskDataCustom = {
52
52
  description: "A demo to try the state-of-the-art zero-shot object detection model, OWLv2.",
53
53
  id: "merve/owlv2",
54
54
  },
55
+ {
56
+ description:
57
+ "A demo that combines a zero-shot object detection and mask generation model for zero-shot segmentation.",
58
+ id: "merve/OWLSAM",
59
+ },
55
60
  ],
56
61
  summary:
57
62
  "Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",