@huggingface/tasks 0.10.22 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/index.cjs +96 -41
  2. package/dist/index.js +96 -40
  3. package/dist/scripts/inference-tei-import.d.ts +2 -0
  4. package/dist/scripts/inference-tei-import.d.ts.map +1 -0
  5. package/dist/src/index.d.ts +0 -1
  6. package/dist/src/index.d.ts.map +1 -1
  7. package/dist/src/model-data.d.ts +2 -15
  8. package/dist/src/model-data.d.ts.map +1 -1
  9. package/dist/src/model-libraries-snippets.d.ts +1 -0
  10. package/dist/src/model-libraries-snippets.d.ts.map +1 -1
  11. package/dist/src/model-libraries.d.ts +37 -2
  12. package/dist/src/model-libraries.d.ts.map +1 -1
  13. package/dist/src/tasks/depth-estimation/data.d.ts.map +1 -1
  14. package/dist/src/tasks/feature-extraction/data.d.ts.map +1 -1
  15. package/dist/src/tasks/feature-extraction/inference.d.ts +22 -7
  16. package/dist/src/tasks/feature-extraction/inference.d.ts.map +1 -1
  17. package/dist/src/tasks/object-detection/data.d.ts.map +1 -1
  18. package/dist/src/tasks/zero-shot-image-classification/data.d.ts.map +1 -1
  19. package/dist/src/tasks/zero-shot-object-detection/data.d.ts.map +1 -1
  20. package/package.json +3 -2
  21. package/src/index.ts +0 -1
  22. package/src/model-data.ts +2 -16
  23. package/src/model-libraries-snippets.ts +10 -0
  24. package/src/model-libraries.ts +35 -0
  25. package/src/tasks/depth-estimation/about.md +10 -1
  26. package/src/tasks/depth-estimation/data.ts +13 -9
  27. package/src/tasks/feature-extraction/about.md +46 -1
  28. package/src/tasks/feature-extraction/data.ts +9 -4
  29. package/src/tasks/feature-extraction/inference.ts +23 -5
  30. package/src/tasks/feature-extraction/spec/input.json +34 -13
  31. package/src/tasks/feature-extraction/spec/output.json +10 -2
  32. package/src/tasks/image-text-to-text/data.ts +1 -1
  33. package/src/tasks/object-detection/data.ts +13 -6
  34. package/src/tasks/text-generation/data.ts +1 -1
  35. package/src/tasks/text-to-image/data.ts +4 -4
  36. package/src/tasks/zero-shot-image-classification/about.md +2 -3
  37. package/src/tasks/zero-shot-image-classification/data.ts +4 -0
  38. package/src/tasks/zero-shot-object-detection/data.ts +8 -3
@@ -1,7 +1,15 @@
1
1
  {
2
2
  "$id": "/inference/schemas/feature-extraction/output.json",
3
3
  "$schema": "http://json-schema.org/draft-06/schema#",
4
- "description": "The embedding for the input text, as a nested list (tensor) of floats",
4
+ "description": "Feature Extraction Output.\n\nAuto-generated from TEI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tei-import.ts.",
5
+ "title": "FeatureExtractionOutput",
5
6
  "type": "array",
6
- "title": "FeatureExtractionOutput"
7
+ "$defs": {},
8
+ "items": {
9
+ "type": "array",
10
+ "items": {
11
+ "type": "number",
12
+ "format": "float"
13
+ }
14
+ }
7
15
  }
@@ -22,7 +22,7 @@ const taskData: TaskDataCustom = {
22
22
  demo: {
23
23
  inputs: [
24
24
  {
25
- filename: "mask-generation-input.png",
25
+ filename: "image-text-to-text-input.png",
26
26
  type: "img",
27
27
  },
28
28
  {
@@ -3,10 +3,13 @@ import type { TaskDataCustom } from "..";
3
3
  const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
- // TODO write proper description
7
- description: "Widely used benchmark dataset for multiple Vision tasks.",
6
+ description: "Widely used benchmark dataset for multiple vision tasks.",
8
7
  id: "merve/coco2017",
9
8
  },
9
+ {
10
+ description: "Multi-task computer vision benchmark.",
11
+ id: "merve/pascal-voc",
12
+ },
10
13
  ],
11
14
  demo: {
12
15
  inputs: [
@@ -47,16 +50,16 @@ const taskData: TaskDataCustom = {
47
50
  description: "Strong object detection model trained on ImageNet-21k dataset.",
48
51
  id: "microsoft/beit-base-patch16-224-pt22k-ft22k",
49
52
  },
53
+ {
54
+ description: "Fast and accurate object detection model trained on COCO dataset.",
55
+ id: "PekingU/rtdetr_r18vd_coco_o365",
56
+ },
50
57
  ],
51
58
  spaces: [
52
59
  {
53
60
  description: "Leaderboard to compare various object detection models across several metrics.",
54
61
  id: "hf-vision/object_detection_leaderboard",
55
62
  },
56
- {
57
- description: "An object detection application that can detect unseen objects out of the box.",
58
- id: "merve/owlv2",
59
- },
60
63
  {
61
64
  description: "An application that contains various object detection models to try from.",
62
65
  id: "Gradio-Blocks/Object-Detection-With-DETR-and-YOLOS",
@@ -69,6 +72,10 @@ const taskData: TaskDataCustom = {
69
72
  description: "An object tracking, segmentation and inpainting application.",
70
73
  id: "VIPLab/Track-Anything",
71
74
  },
75
+ {
76
+ description: "Very fast object tracking application based on object detection.",
77
+ id: "merve/RT-DETR-tracking-coco",
78
+ },
72
79
  ],
73
80
  summary:
74
81
  "Object Detection models allow users to identify objects of certain defined classes. Object detection models receive an image as input and output the images with bounding boxes and labels on detected objects.",
@@ -82,7 +82,7 @@ const taskData: TaskDataCustom = {
82
82
  spaces: [
83
83
  {
84
84
  description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
85
- id: "HuggingFaceH4/open_llm_leaderboard",
85
+ id: "open-llm-leaderboard/open_llm_leaderboard",
86
86
  },
87
87
  {
88
88
  description: "An text generation based application based on a very powerful LLaMA2 model.",
@@ -53,18 +53,18 @@ const taskData: TaskDataCustom = {
53
53
  id: "latent-consistency/lcm-lora-sdxl",
54
54
  },
55
55
  {
56
- description: "A text-to-image model that can generate coherent text inside image.",
57
- id: "DeepFloyd/IF-I-XL-v1.0",
56
+ description: "A very fast text-to-image model.",
57
+ id: "ByteDance/SDXL-Lightning",
58
58
  },
59
59
  {
60
60
  description: "A powerful text-to-image model.",
61
- id: "kakaobrain/karlo-v1-alpha",
61
+ id: "stabilityai/stable-diffusion-3-medium-diffusers",
62
62
  },
63
63
  ],
64
64
  spaces: [
65
65
  {
66
66
  description: "A powerful text-to-image application.",
67
- id: "stabilityai/stable-diffusion",
67
+ id: "stabilityai/stable-diffusion-3-medium",
68
68
  },
69
69
  {
70
70
  description: "A text-to-image application to generate comics.",
@@ -68,9 +68,8 @@ The highest probability is 0.995 for the label cat and dog
68
68
 
69
69
  ## Useful Resources
70
70
 
71
- You can contribute useful resources about this task [here](https://github.com/huggingface/hub-docs/blob/main/tasks/src/zero-shot-image-classification/about.md).
72
-
73
- Check out [Zero-shot image classification task guide](https://huggingface.co/docs/transformers/tasks/zero_shot_image_classification).
71
+ - [Zero-shot image classification task guide](https://huggingface.co/docs/transformers/tasks/zero_shot_image_classification).
72
+ - [Image-text Similarity Search](https://huggingface.co/learn/cookbook/faiss_with_hf_datasets_and_clip)
74
73
 
75
74
  This page was made possible thanks to the efforts of [Shamima Hossain](https://huggingface.co/Shamima), [Haider Zaidi
76
75
  ](https://huggingface.co/chefhaider) and [Paarth Bhatnagar](https://huggingface.co/Paarth).
@@ -55,6 +55,10 @@ const taskData: TaskDataCustom = {
55
55
  description: "Strong zero-shot image classification model.",
56
56
  id: "google/siglip-base-patch16-224",
57
57
  },
58
+ {
59
+ description: "Small yet powerful zero-shot image classification model that can run on edge devices.",
60
+ id: "apple/MobileCLIP-S1-OpenCLIP",
61
+ },
58
62
  {
59
63
  description: "Strong image classification model for biomedical domain.",
60
64
  id: "microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224",
@@ -39,11 +39,11 @@ const taskData: TaskDataCustom = {
39
39
  ],
40
40
  models: [
41
41
  {
42
- description: "Solid zero-shot object detection model that uses CLIP as backbone.",
43
- id: "google/owlvit-base-patch32",
42
+ description: "Solid zero-shot object detection model.",
43
+ id: "IDEA-Research/grounding-dino-base",
44
44
  },
45
45
  {
46
- description: "The improved version of the owlvit model.",
46
+ description: "Cutting-edge zero-shot object detection model.",
47
47
  id: "google/owlv2-base-patch16-ensemble",
48
48
  },
49
49
  ],
@@ -52,6 +52,11 @@ const taskData: TaskDataCustom = {
52
52
  description: "A demo to try the state-of-the-art zero-shot object detection model, OWLv2.",
53
53
  id: "merve/owlv2",
54
54
  },
55
+ {
56
+ description:
57
+ "A demo that combines a zero-shot object detection and mask generation model for zero-shot segmentation.",
58
+ id: "merve/OWLSAM",
59
+ },
55
60
  ],
56
61
  summary:
57
62
  "Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",