@huggingface/tasks 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +4 -3
- package/dist/index.js +16 -2
- package/dist/index.mjs +16 -2
- package/package.json +1 -1
- package/src/library-ui-elements.ts +15 -1
- package/src/model-libraries.ts +2 -1
- package/src/tasks/mask-generation/about.md +0 -0
- package/src/tasks/mask-generation/data.ts +18 -0
- package/src/tasks/zero-shot-object-detection/about.md +39 -0
- package/src/tasks/zero-shot-object-detection/data.ts +57 -0
package/dist/index.d.ts
CHANGED
|
@@ -37,12 +37,13 @@ declare enum ModelLibrary {
|
|
|
37
37
|
"stanza" = "Stanza",
|
|
38
38
|
"fasttext" = "fastText",
|
|
39
39
|
"stable-baselines3" = "Stable-Baselines3",
|
|
40
|
-
"ml-agents" = "ML-Agents",
|
|
40
|
+
"ml-agents" = "Unity ML-Agents",
|
|
41
41
|
"pythae" = "Pythae",
|
|
42
|
-
"mindspore" = "MindSpore"
|
|
42
|
+
"mindspore" = "MindSpore",
|
|
43
|
+
"unity-sentis" = "Unity Sentis"
|
|
43
44
|
}
|
|
44
45
|
type ModelLibraryKey = keyof typeof ModelLibrary;
|
|
45
|
-
declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("speechbrain" | "fastai" | "adapter-transformers" | "allennlp" | "asteroid" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "flair" | "keras" | "k2" | "mlx" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "stanza" | "fasttext" | "stable-baselines3" | "ml-agents" | "pythae" | "mindspore")[];
|
|
46
|
+
declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("speechbrain" | "fastai" | "adapter-transformers" | "allennlp" | "asteroid" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "flair" | "keras" | "k2" | "mlx" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "sample-factory" | "sentence-transformers" | "setfit" | "sklearn" | "spacy" | "span-marker" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "stanza" | "fasttext" | "stable-baselines3" | "ml-agents" | "pythae" | "mindspore" | "unity-sentis")[];
|
|
46
47
|
|
|
47
48
|
declare const MODALITIES: readonly ["cv", "nlp", "audio", "tabular", "multimodal", "rl", "other"];
|
|
48
49
|
type Modality = (typeof MODALITIES)[number];
|
package/dist/index.js
CHANGED
|
@@ -527,6 +527,13 @@ transcriptions = asr_model.transcribe(["file.wav"])`
|
|
|
527
527
|
}
|
|
528
528
|
};
|
|
529
529
|
var mlAgents = (model) => [`mlagents-load-from-hf --repo-id="${model.id}" --local-dir="./downloads"`];
|
|
530
|
+
var sentis = (model) => [
|
|
531
|
+
`string modelName = "[Your model name here].sentis";
|
|
532
|
+
Model model = ModelLoader.Load(Application.streamingAssetsPath + "/" + modelName);
|
|
533
|
+
IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
|
|
534
|
+
// Please see provided C# file for more details
|
|
535
|
+
`
|
|
536
|
+
];
|
|
530
537
|
var mlx = (model) => [
|
|
531
538
|
`pip install huggingface_hub hf_transfer
|
|
532
539
|
|
|
@@ -749,10 +756,16 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
749
756
|
"ml-agents": {
|
|
750
757
|
btnLabel: "ml-agents",
|
|
751
758
|
repoName: "ml-agents",
|
|
752
|
-
repoUrl: "https://github.com/
|
|
759
|
+
repoUrl: "https://github.com/Unity-Technologies/ml-agents",
|
|
753
760
|
docsUrl: "https://huggingface.co/docs/hub/ml-agents",
|
|
754
761
|
snippets: mlAgents
|
|
755
762
|
},
|
|
763
|
+
"unity-sentis": {
|
|
764
|
+
btnLabel: "unity-sentis",
|
|
765
|
+
repoName: "unity-sentis",
|
|
766
|
+
repoUrl: "https://github.com/Unity-Technologies/sentis-samples",
|
|
767
|
+
snippets: sentis
|
|
768
|
+
},
|
|
756
769
|
pythae: {
|
|
757
770
|
btnLabel: "pythae",
|
|
758
771
|
repoName: "pythae",
|
|
@@ -4618,9 +4631,10 @@ var ModelLibrary = /* @__PURE__ */ ((ModelLibrary2) => {
|
|
|
4618
4631
|
ModelLibrary2["stanza"] = "Stanza";
|
|
4619
4632
|
ModelLibrary2["fasttext"] = "fastText";
|
|
4620
4633
|
ModelLibrary2["stable-baselines3"] = "Stable-Baselines3";
|
|
4621
|
-
ModelLibrary2["ml-agents"] = "ML-Agents";
|
|
4634
|
+
ModelLibrary2["ml-agents"] = "Unity ML-Agents";
|
|
4622
4635
|
ModelLibrary2["pythae"] = "Pythae";
|
|
4623
4636
|
ModelLibrary2["mindspore"] = "MindSpore";
|
|
4637
|
+
ModelLibrary2["unity-sentis"] = "Unity Sentis";
|
|
4624
4638
|
return ModelLibrary2;
|
|
4625
4639
|
})(ModelLibrary || {});
|
|
4626
4640
|
var ALL_MODEL_LIBRARY_KEYS = Object.keys(ModelLibrary);
|
package/dist/index.mjs
CHANGED
|
@@ -493,6 +493,13 @@ transcriptions = asr_model.transcribe(["file.wav"])`
|
|
|
493
493
|
}
|
|
494
494
|
};
|
|
495
495
|
var mlAgents = (model) => [`mlagents-load-from-hf --repo-id="${model.id}" --local-dir="./downloads"`];
|
|
496
|
+
var sentis = (model) => [
|
|
497
|
+
`string modelName = "[Your model name here].sentis";
|
|
498
|
+
Model model = ModelLoader.Load(Application.streamingAssetsPath + "/" + modelName);
|
|
499
|
+
IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
|
|
500
|
+
// Please see provided C# file for more details
|
|
501
|
+
`
|
|
502
|
+
];
|
|
496
503
|
var mlx = (model) => [
|
|
497
504
|
`pip install huggingface_hub hf_transfer
|
|
498
505
|
|
|
@@ -715,10 +722,16 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
715
722
|
"ml-agents": {
|
|
716
723
|
btnLabel: "ml-agents",
|
|
717
724
|
repoName: "ml-agents",
|
|
718
|
-
repoUrl: "https://github.com/
|
|
725
|
+
repoUrl: "https://github.com/Unity-Technologies/ml-agents",
|
|
719
726
|
docsUrl: "https://huggingface.co/docs/hub/ml-agents",
|
|
720
727
|
snippets: mlAgents
|
|
721
728
|
},
|
|
729
|
+
"unity-sentis": {
|
|
730
|
+
btnLabel: "unity-sentis",
|
|
731
|
+
repoName: "unity-sentis",
|
|
732
|
+
repoUrl: "https://github.com/Unity-Technologies/sentis-samples",
|
|
733
|
+
snippets: sentis
|
|
734
|
+
},
|
|
722
735
|
pythae: {
|
|
723
736
|
btnLabel: "pythae",
|
|
724
737
|
repoName: "pythae",
|
|
@@ -4584,9 +4597,10 @@ var ModelLibrary = /* @__PURE__ */ ((ModelLibrary2) => {
|
|
|
4584
4597
|
ModelLibrary2["stanza"] = "Stanza";
|
|
4585
4598
|
ModelLibrary2["fasttext"] = "fastText";
|
|
4586
4599
|
ModelLibrary2["stable-baselines3"] = "Stable-Baselines3";
|
|
4587
|
-
ModelLibrary2["ml-agents"] = "ML-Agents";
|
|
4600
|
+
ModelLibrary2["ml-agents"] = "Unity ML-Agents";
|
|
4588
4601
|
ModelLibrary2["pythae"] = "Pythae";
|
|
4589
4602
|
ModelLibrary2["mindspore"] = "MindSpore";
|
|
4603
|
+
ModelLibrary2["unity-sentis"] = "Unity Sentis";
|
|
4590
4604
|
return ModelLibrary2;
|
|
4591
4605
|
})(ModelLibrary || {});
|
|
4592
4606
|
var ALL_MODEL_LIBRARY_KEYS = Object.keys(ModelLibrary);
|
package/package.json
CHANGED
|
@@ -541,6 +541,14 @@ transcriptions = asr_model.transcribe(["file.wav"])`,
|
|
|
541
541
|
|
|
542
542
|
const mlAgents = (model: ModelData) => [`mlagents-load-from-hf --repo-id="${model.id}" --local-dir="./downloads"`];
|
|
543
543
|
|
|
544
|
+
const sentis = (model: ModelData) => [
|
|
545
|
+
`string modelName = "[Your model name here].sentis";
|
|
546
|
+
Model model = ModelLoader.Load(Application.streamingAssetsPath + "/" + modelName);
|
|
547
|
+
IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
|
|
548
|
+
// Please see provided C# file for more details
|
|
549
|
+
`
|
|
550
|
+
];
|
|
551
|
+
|
|
544
552
|
const mlx = (model: ModelData) => [
|
|
545
553
|
`pip install huggingface_hub hf_transfer
|
|
546
554
|
|
|
@@ -770,10 +778,16 @@ export const MODEL_LIBRARIES_UI_ELEMENTS: Partial<Record<ModelLibraryKey, Librar
|
|
|
770
778
|
"ml-agents": {
|
|
771
779
|
btnLabel: "ml-agents",
|
|
772
780
|
repoName: "ml-agents",
|
|
773
|
-
repoUrl: "https://github.com/
|
|
781
|
+
repoUrl: "https://github.com/Unity-Technologies/ml-agents",
|
|
774
782
|
docsUrl: "https://huggingface.co/docs/hub/ml-agents",
|
|
775
783
|
snippets: mlAgents,
|
|
776
784
|
},
|
|
785
|
+
"unity-sentis": {
|
|
786
|
+
btnLabel: "unity-sentis",
|
|
787
|
+
repoName: "unity-sentis",
|
|
788
|
+
repoUrl: "https://github.com/Unity-Technologies/sentis-samples",
|
|
789
|
+
snippets: sentis,
|
|
790
|
+
},
|
|
777
791
|
pythae: {
|
|
778
792
|
btnLabel: "pythae",
|
|
779
793
|
repoName: "pythae",
|
package/src/model-libraries.ts
CHANGED
|
@@ -37,9 +37,10 @@ export enum ModelLibrary {
|
|
|
37
37
|
"stanza" = "Stanza",
|
|
38
38
|
"fasttext" = "fastText",
|
|
39
39
|
"stable-baselines3" = "Stable-Baselines3",
|
|
40
|
-
"ml-agents" = "ML-Agents",
|
|
40
|
+
"ml-agents" = "Unity ML-Agents",
|
|
41
41
|
"pythae" = "Pythae",
|
|
42
42
|
"mindspore" = "MindSpore",
|
|
43
|
+
"unity-sentis" = "Unity Sentis",
|
|
43
44
|
}
|
|
44
45
|
|
|
45
46
|
export type ModelLibraryKey = keyof typeof ModelLibrary;
|
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { TaskDataCustom } from "..";
|
|
2
|
+
|
|
3
|
+
const taskData: TaskDataCustom = {
|
|
4
|
+
datasets: [],
|
|
5
|
+
demo: {
|
|
6
|
+
inputs: [],
|
|
7
|
+
outputs: [],
|
|
8
|
+
},
|
|
9
|
+
metrics: [],
|
|
10
|
+
models: [],
|
|
11
|
+
spaces: [],
|
|
12
|
+
summary:
|
|
13
|
+
"Mask generation is creating a binary image that identifies a specific object or region of interest in an input image. Masks are often used in segmentation tasks, where they provide a precise way to isolate the object of interest for further processing or analysis.",
|
|
14
|
+
widgetModels: [],
|
|
15
|
+
youtubeId: "",
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
export default taskData;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
## Use Cases
|
|
2
|
+
|
|
3
|
+
### Object Search
|
|
4
|
+
|
|
5
|
+
Zero-shot object detection models can be used in image search. Smartphones, for example, use zero-shot object detection models to detect entities (such as specific places or objects) and allow the user to search for the entity on the internet.
|
|
6
|
+
|
|
7
|
+
### Object Counting
|
|
8
|
+
|
|
9
|
+
Zero-shot object detection models are used to count instances of objects in a given image. This can include counting the objects in warehouses or stores or the number of visitors in a store. They are also used to manage crowds at events to prevent disasters.
|
|
10
|
+
|
|
11
|
+
## Inference
|
|
12
|
+
|
|
13
|
+
You can infer with zero-shot object detection models through the `zero-shot-object-detection` pipeline. When calling the pipeline, you just need to specify a path or HTTP link to an image and the candidate labels.
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from transformers import pipeline
|
|
17
|
+
from PIL import Image
|
|
18
|
+
|
|
19
|
+
image = Image.open("my-image.png").convert("RGB")
|
|
20
|
+
|
|
21
|
+
detector = pipeline(model="google/owlvit-base-patch32", task="zero-shot-object-detection")
|
|
22
|
+
|
|
23
|
+
predictions = detector(
|
|
24
|
+
image,
|
|
25
|
+
candidate_labels=["a photo of a cat", "a photo of a dog"],
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# [{'score': 0.95,
|
|
29
|
+
# 'label': 'a photo of a cat',
|
|
30
|
+
# 'box': {'xmin': 180, 'ymin': 71, 'xmax': 271, 'ymax': 178}},
|
|
31
|
+
# ...
|
|
32
|
+
# ]
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
# Useful Resources
|
|
36
|
+
|
|
37
|
+
- [Zero-shot object detection task guide](https://huggingface.co/docs/transformers/tasks/zero_shot_object_detection)
|
|
38
|
+
|
|
39
|
+
This page was made possible thanks to the efforts of [Victor Guichard](https://huggingface.co/VictorGuichard)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type { TaskDataCustom } from "..";
|
|
2
|
+
|
|
3
|
+
const taskData: TaskDataCustom = {
|
|
4
|
+
datasets: [],
|
|
5
|
+
demo: {
|
|
6
|
+
inputs: [
|
|
7
|
+
{
|
|
8
|
+
filename: "zero-shot-object-detection-input.jpg",
|
|
9
|
+
type: "img",
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
label: "Classes",
|
|
13
|
+
content: "cat, dog, bird",
|
|
14
|
+
type: "text",
|
|
15
|
+
},
|
|
16
|
+
],
|
|
17
|
+
outputs: [
|
|
18
|
+
{
|
|
19
|
+
filename: "zero-shot-object-detection-output.jpg",
|
|
20
|
+
type: "img",
|
|
21
|
+
},
|
|
22
|
+
],
|
|
23
|
+
},
|
|
24
|
+
metrics: [
|
|
25
|
+
{
|
|
26
|
+
description:
|
|
27
|
+
"The Average Precision (AP) metric is the Area Under the PR Curve (AUC-PR). It is calculated for each class separately",
|
|
28
|
+
id: "Average Precision",
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
description: "The Mean Average Precision (mAP) metric is the overall average of the AP values",
|
|
32
|
+
id: "Mean Average Precision",
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
description:
|
|
36
|
+
"The APα metric is the Average Precision at the IoU threshold of a α value, for example, AP50 and AP75",
|
|
37
|
+
id: "APα",
|
|
38
|
+
},
|
|
39
|
+
],
|
|
40
|
+
models: [
|
|
41
|
+
{
|
|
42
|
+
description: "Solid zero-shot object detection model that uses CLIP as backbone.",
|
|
43
|
+
id: "google/owlvit-base-patch32",
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
description: "The improved version of the owlvit model.",
|
|
47
|
+
id: "google/owlv2-base-patch16-ensemble",
|
|
48
|
+
},
|
|
49
|
+
],
|
|
50
|
+
spaces: [],
|
|
51
|
+
summary:
|
|
52
|
+
"Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",
|
|
53
|
+
widgetModels: [],
|
|
54
|
+
youtubeId: "",
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
export default taskData;
|