@huggingface/tasks 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +20 -0
- package/dist/index.d.ts +368 -46
- package/dist/index.js +117 -41
- package/dist/{index.cjs → index.mjs} +84 -67
- package/package.json +43 -33
- package/src/Types.ts +49 -43
- package/src/audio-classification/about.md +5 -5
- package/src/audio-classification/data.ts +11 -11
- package/src/audio-to-audio/about.md +4 -3
- package/src/audio-to-audio/data.ts +18 -15
- package/src/automatic-speech-recognition/about.md +5 -4
- package/src/automatic-speech-recognition/data.ts +18 -17
- package/src/const.ts +52 -44
- package/src/conversational/about.md +9 -9
- package/src/conversational/data.ts +22 -18
- package/src/depth-estimation/about.md +1 -3
- package/src/depth-estimation/data.ts +11 -11
- package/src/document-question-answering/about.md +1 -2
- package/src/document-question-answering/data.ts +22 -19
- package/src/feature-extraction/about.md +2 -3
- package/src/feature-extraction/data.ts +12 -15
- package/src/fill-mask/about.md +1 -1
- package/src/fill-mask/data.ts +16 -14
- package/src/image-classification/about.md +5 -3
- package/src/image-classification/data.ts +15 -15
- package/src/image-segmentation/about.md +4 -4
- package/src/image-segmentation/data.ts +26 -23
- package/src/image-to-image/about.md +10 -12
- package/src/image-to-image/data.ts +31 -27
- package/src/image-to-text/about.md +13 -6
- package/src/image-to-text/data.ts +20 -21
- package/src/index.ts +11 -0
- package/src/modelLibraries.ts +43 -0
- package/src/object-detection/about.md +2 -1
- package/src/object-detection/data.ts +20 -17
- package/src/pipelines.ts +619 -0
- package/src/placeholder/about.md +3 -3
- package/src/placeholder/data.ts +8 -8
- package/src/question-answering/about.md +1 -1
- package/src/question-answering/data.ts +21 -19
- package/src/reinforcement-learning/about.md +167 -176
- package/src/reinforcement-learning/data.ts +75 -78
- package/src/sentence-similarity/data.ts +29 -28
- package/src/summarization/about.md +6 -5
- package/src/summarization/data.ts +23 -20
- package/src/table-question-answering/about.md +5 -5
- package/src/table-question-answering/data.ts +35 -39
- package/src/tabular-classification/about.md +4 -6
- package/src/tabular-classification/data.ts +11 -12
- package/src/tabular-regression/about.md +14 -18
- package/src/tabular-regression/data.ts +10 -11
- package/src/tasksData.ts +47 -50
- package/src/text-classification/about.md +5 -4
- package/src/text-classification/data.ts +21 -20
- package/src/text-generation/about.md +7 -6
- package/src/text-generation/data.ts +36 -34
- package/src/text-to-image/about.md +19 -18
- package/src/text-to-image/data.ts +32 -26
- package/src/text-to-speech/about.md +4 -5
- package/src/text-to-speech/data.ts +16 -17
- package/src/text-to-video/about.md +41 -36
- package/src/text-to-video/data.ts +43 -38
- package/src/token-classification/about.md +1 -3
- package/src/token-classification/data.ts +26 -25
- package/src/translation/about.md +4 -4
- package/src/translation/data.ts +21 -21
- package/src/unconditional-image-generation/about.md +10 -5
- package/src/unconditional-image-generation/data.ts +26 -20
- package/src/video-classification/about.md +5 -1
- package/src/video-classification/data.ts +14 -14
- package/src/visual-question-answering/about.md +8 -3
- package/src/visual-question-answering/data.ts +22 -19
- package/src/zero-shot-classification/about.md +5 -4
- package/src/zero-shot-classification/data.ts +20 -20
- package/src/zero-shot-image-classification/about.md +17 -9
- package/src/zero-shot-image-classification/data.ts +12 -14
- package/tsconfig.json +18 -0
- package/assets/audio-classification/audio.wav +0 -0
- package/assets/audio-to-audio/input.wav +0 -0
- package/assets/audio-to-audio/label-0.wav +0 -0
- package/assets/audio-to-audio/label-1.wav +0 -0
- package/assets/automatic-speech-recognition/input.flac +0 -0
- package/assets/automatic-speech-recognition/wav2vec2.png +0 -0
- package/assets/contribution-guide/anatomy.png +0 -0
- package/assets/contribution-guide/libraries.png +0 -0
- package/assets/depth-estimation/depth-estimation-input.jpg +0 -0
- package/assets/depth-estimation/depth-estimation-output.png +0 -0
- package/assets/document-question-answering/document-question-answering-input.png +0 -0
- package/assets/image-classification/image-classification-input.jpeg +0 -0
- package/assets/image-segmentation/image-segmentation-input.jpeg +0 -0
- package/assets/image-segmentation/image-segmentation-output.png +0 -0
- package/assets/image-to-image/image-to-image-input.jpeg +0 -0
- package/assets/image-to-image/image-to-image-output.png +0 -0
- package/assets/image-to-image/pix2pix_examples.jpg +0 -0
- package/assets/image-to-text/savanna.jpg +0 -0
- package/assets/object-detection/object-detection-input.jpg +0 -0
- package/assets/object-detection/object-detection-output.jpg +0 -0
- package/assets/table-question-answering/tableQA.jpg +0 -0
- package/assets/text-to-image/image.jpeg +0 -0
- package/assets/text-to-speech/audio.wav +0 -0
- package/assets/text-to-video/text-to-video-output.gif +0 -0
- package/assets/unconditional-image-generation/unconditional-image-generation-output.jpeg +0 -0
- package/assets/video-classification/video-classification-input.gif +0 -0
- package/assets/visual-question-answering/elephant.jpeg +0 -0
- package/assets/zero-shot-image-classification/image-classification-input.jpeg +0 -0
- package/dist/index.d.cts +0 -145
|
@@ -3,60 +3,64 @@ import type { TaskDataCustom } from "../Types";
|
|
|
3
3
|
const taskData: TaskDataCustom = {
|
|
4
4
|
datasets: [
|
|
5
5
|
{
|
|
6
|
-
description:
|
|
7
|
-
|
|
6
|
+
description:
|
|
7
|
+
"A dataset of 7k conversations explicitly designed to exhibit multiple conversation modes: displaying personality, having empathy, and demonstrating knowledge.",
|
|
8
|
+
id: "blended_skill_talk",
|
|
8
9
|
},
|
|
9
10
|
{
|
|
10
|
-
description:
|
|
11
|
-
|
|
11
|
+
description:
|
|
12
|
+
"ConvAI is a dataset of human-to-bot conversations labeled for quality. This data can be used to train a metric for evaluating dialogue systems",
|
|
13
|
+
id: "conv_ai_2",
|
|
12
14
|
},
|
|
13
15
|
{
|
|
14
16
|
description: "EmpatheticDialogues, is a dataset of 25k conversations grounded in emotional situations",
|
|
15
|
-
id:
|
|
17
|
+
id: "empathetic_dialogues",
|
|
16
18
|
},
|
|
17
19
|
],
|
|
18
20
|
demo: {
|
|
19
21
|
inputs: [
|
|
20
22
|
{
|
|
21
|
-
label:
|
|
23
|
+
label: "Input",
|
|
22
24
|
content: "Hey my name is Julien! How are you?",
|
|
23
|
-
type:
|
|
25
|
+
type: "text",
|
|
24
26
|
},
|
|
25
|
-
|
|
26
27
|
],
|
|
27
28
|
outputs: [
|
|
28
29
|
{
|
|
29
|
-
label:
|
|
30
|
+
label: "Answer",
|
|
30
31
|
content: "Hi Julien! My name is Julia! I am well.",
|
|
31
|
-
type:
|
|
32
|
+
type: "text",
|
|
32
33
|
},
|
|
33
34
|
],
|
|
34
35
|
},
|
|
35
36
|
metrics: [
|
|
36
37
|
{
|
|
37
|
-
description:
|
|
38
|
-
|
|
38
|
+
description:
|
|
39
|
+
"BLEU score is calculated by counting the number of shared single or subsequent tokens between the generated sequence and the reference. Subsequent n tokens are called “n-grams”. Unigram refers to a single token while bi-gram refers to token pairs and n-grams refer to n subsequent tokens. The score ranges from 0 to 1, where 1 means the translation perfectly matched and 0 did not match at all",
|
|
40
|
+
id: "bleu",
|
|
39
41
|
},
|
|
40
42
|
],
|
|
41
43
|
models: [
|
|
42
44
|
{
|
|
43
45
|
description: "A faster and smaller model than the famous BERT model.",
|
|
44
|
-
id:
|
|
46
|
+
id: "facebook/blenderbot-400M-distill",
|
|
45
47
|
},
|
|
46
48
|
{
|
|
47
|
-
description:
|
|
48
|
-
|
|
49
|
+
description:
|
|
50
|
+
"DialoGPT is a large-scale pretrained dialogue response generation model for multiturn conversations.",
|
|
51
|
+
id: "microsoft/DialoGPT-large",
|
|
49
52
|
},
|
|
50
53
|
],
|
|
51
54
|
spaces: [
|
|
52
55
|
{
|
|
53
56
|
description: "A chatbot based on Blender model.",
|
|
54
|
-
id:
|
|
57
|
+
id: "EXFINITE/BlenderBot-UI",
|
|
55
58
|
},
|
|
56
59
|
],
|
|
57
|
-
summary:
|
|
60
|
+
summary:
|
|
61
|
+
"Conversational response modelling is the task of generating conversational text that is relevant, coherent and knowledgable given a prompt. These models have applications in chatbots, and as a part of voice assistants",
|
|
58
62
|
widgetModels: ["facebook/blenderbot-400M-distill"],
|
|
59
|
-
youtubeId:
|
|
63
|
+
youtubeId: "",
|
|
60
64
|
};
|
|
61
65
|
|
|
62
66
|
export default taskData;
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Depth estimation models can be used to estimate the depth of different objects present in an image.
|
|
3
3
|
|
|
4
4
|
### Estimation of Volumetric Information
|
|
5
|
-
Depth estimation models are widely used to study volumetric formation of objects present inside an image. This is an important use case in the domain of computer graphics.
|
|
5
|
+
Depth estimation models are widely used to study volumetric formation of objects present inside an image. This is an important use case in the domain of computer graphics.
|
|
6
6
|
|
|
7
7
|
### 3D Representation
|
|
8
8
|
|
|
@@ -31,8 +31,6 @@ result
|
|
|
31
31
|
# You can visualize the result just by calling `result["depth"]`.
|
|
32
32
|
```
|
|
33
33
|
|
|
34
|
-
|
|
35
34
|
## Useful Resources
|
|
36
35
|
|
|
37
36
|
- [Monocular depth estimation task guide](https://huggingface.co/docs/transformers/tasks/monocular_depth_estimation)
|
|
38
|
-
|
|
@@ -7,46 +7,46 @@ const taskData: TaskDataCustom = {
|
|
|
7
7
|
id: "sayakpaul/nyu_depth_v2",
|
|
8
8
|
},
|
|
9
9
|
],
|
|
10
|
-
demo:
|
|
10
|
+
demo: {
|
|
11
11
|
inputs: [
|
|
12
12
|
{
|
|
13
13
|
filename: "depth-estimation-input.jpg",
|
|
14
|
-
type:
|
|
14
|
+
type: "img",
|
|
15
15
|
},
|
|
16
16
|
],
|
|
17
17
|
outputs: [
|
|
18
18
|
{
|
|
19
19
|
filename: "depth-estimation-output.png",
|
|
20
|
-
type:
|
|
20
|
+
type: "img",
|
|
21
21
|
},
|
|
22
22
|
],
|
|
23
23
|
},
|
|
24
24
|
metrics: [],
|
|
25
|
-
models:
|
|
25
|
+
models: [
|
|
26
26
|
{
|
|
27
27
|
// TO DO: write description
|
|
28
28
|
description: "Strong Depth Estimation model trained on 1.4 million images.",
|
|
29
|
-
id:
|
|
29
|
+
id: "Intel/dpt-large",
|
|
30
30
|
},
|
|
31
31
|
{
|
|
32
32
|
// TO DO: write description
|
|
33
33
|
description: "Strong Depth Estimation model trained on the KITTI dataset.",
|
|
34
|
-
id:
|
|
34
|
+
id: "vinvino02/glpn-kitti",
|
|
35
35
|
},
|
|
36
36
|
],
|
|
37
|
-
spaces:
|
|
37
|
+
spaces: [
|
|
38
38
|
{
|
|
39
39
|
description: "An application that predicts the depth of an image and then reconstruct the 3D model as voxels.",
|
|
40
|
-
id:
|
|
40
|
+
id: "radames/dpt-depth-estimation-3d-voxels",
|
|
41
41
|
},
|
|
42
42
|
{
|
|
43
43
|
description: "An application that can estimate the depth in a given image.",
|
|
44
|
-
id:
|
|
44
|
+
id: "keras-io/Monocular-Depth-Estimation",
|
|
45
45
|
},
|
|
46
46
|
],
|
|
47
|
-
summary:
|
|
47
|
+
summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
|
|
48
48
|
widgetModels: [""],
|
|
49
|
-
youtubeId:
|
|
49
|
+
youtubeId: "",
|
|
50
50
|
};
|
|
51
51
|
|
|
52
52
|
export default taskData;
|
|
@@ -16,7 +16,6 @@ Another very popular use case is invoice information extraction. For example, yo
|
|
|
16
16
|
You can infer with Document QA models with the 🤗 Transformers library using the [`document-question-answering` pipeline](https://huggingface.co/docs/transformers/en/main_classes/pipelines#transformers.DocumentQuestionAnsweringPipeline). If no model checkpoint is given, the pipeline will be initialized with [`impira/layoutlm-document-qa`](https://huggingface.co/impira/layoutlm-document-qa). This pipeline takes question(s) and document(s) as input, and returns the answer.
|
|
17
17
|
👉 Note that the question answering task solved here is extractive: the model extracts the answer from a context (the document).
|
|
18
18
|
|
|
19
|
-
|
|
20
19
|
```python
|
|
21
20
|
from transformers import pipeline
|
|
22
21
|
from PIL import Image
|
|
@@ -51,4 +50,4 @@ Would you like to learn more about Document QA? Awesome! Here are some curated r
|
|
|
51
50
|
|
|
52
51
|
- [Document question answering task guide](https://huggingface.co/docs/transformers/tasks/document_question_answering)
|
|
53
52
|
|
|
54
|
-
The contents of this page are contributed by [Eliott Zemour](https://huggingface.co/eliolio) and reviewed by [Kwadwo Agyapon-Ntra](https://huggingface.co/KayO) and [Ankur Goyal](https://huggingface.co/ankrgyl).
|
|
53
|
+
The contents of this page are contributed by [Eliott Zemour](https://huggingface.co/eliolio) and reviewed by [Kwadwo Agyapon-Ntra](https://huggingface.co/KayO) and [Ankur Goyal](https://huggingface.co/ankrgyl).
|
|
@@ -4,64 +4,67 @@ const taskData: TaskDataCustom = {
|
|
|
4
4
|
datasets: [
|
|
5
5
|
{
|
|
6
6
|
// TODO write proper description
|
|
7
|
-
description:
|
|
8
|
-
|
|
7
|
+
description:
|
|
8
|
+
"Dataset from the 2020 DocVQA challenge. The documents are taken from the UCSF Industry Documents Library.",
|
|
9
|
+
id: "eliolio/docvqa",
|
|
9
10
|
},
|
|
10
11
|
],
|
|
11
12
|
demo: {
|
|
12
13
|
inputs: [
|
|
13
14
|
{
|
|
14
|
-
label:
|
|
15
|
+
label: "Question",
|
|
15
16
|
content: "What is the idea behind the consumer relations efficiency team?",
|
|
16
|
-
type:
|
|
17
|
+
type: "text",
|
|
17
18
|
},
|
|
18
19
|
{
|
|
19
20
|
filename: "document-question-answering-input.png",
|
|
20
|
-
type:
|
|
21
|
+
type: "img",
|
|
21
22
|
},
|
|
22
23
|
],
|
|
23
24
|
outputs: [
|
|
24
25
|
{
|
|
25
|
-
label:
|
|
26
|
+
label: "Answer",
|
|
26
27
|
content: "Balance cost efficiency with quality customer service",
|
|
27
|
-
type:
|
|
28
|
+
type: "text",
|
|
28
29
|
},
|
|
29
30
|
],
|
|
30
31
|
},
|
|
31
32
|
metrics: [
|
|
32
33
|
{
|
|
33
|
-
description:
|
|
34
|
-
|
|
34
|
+
description:
|
|
35
|
+
"The evaluation metric for the DocVQA challenge is the Average Normalized Levenshtein Similarity (ANLS). This metric is flexible to character regognition errors and compares the predicted answer with the ground truth answer.",
|
|
36
|
+
id: "anls",
|
|
35
37
|
},
|
|
36
38
|
{
|
|
37
|
-
description:
|
|
38
|
-
|
|
39
|
+
description:
|
|
40
|
+
"Exact Match is a metric based on the strict character match of the predicted answer and the right answer. For answers predicted correctly, the Exact Match will be 1. Even if only one character is different, Exact Match will be 0",
|
|
41
|
+
id: "exact-match",
|
|
39
42
|
},
|
|
40
43
|
],
|
|
41
44
|
models: [
|
|
42
45
|
{
|
|
43
46
|
description: "A LayoutLM model for the document QA task, fine-tuned on DocVQA and SQuAD2.0.",
|
|
44
|
-
id:
|
|
47
|
+
id: "impira/layoutlm-document-qa",
|
|
45
48
|
},
|
|
46
49
|
{
|
|
47
50
|
description: "A special model for OCR-free Document QA task. Donut model fine-tuned on DocVQA.",
|
|
48
|
-
id:
|
|
51
|
+
id: "naver-clova-ix/donut-base-finetuned-docvqa",
|
|
49
52
|
},
|
|
50
53
|
],
|
|
51
|
-
spaces:
|
|
54
|
+
spaces: [
|
|
52
55
|
{
|
|
53
56
|
description: "A robust document question answering application.",
|
|
54
|
-
id:
|
|
57
|
+
id: "impira/docquery",
|
|
55
58
|
},
|
|
56
59
|
{
|
|
57
60
|
description: "An application that can answer questions from invoices.",
|
|
58
|
-
id:
|
|
61
|
+
id: "impira/invoices",
|
|
59
62
|
},
|
|
60
63
|
],
|
|
61
|
-
summary:
|
|
64
|
+
summary:
|
|
65
|
+
"Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.",
|
|
62
66
|
widgetModels: ["impira/layoutlm-document-qa"],
|
|
63
|
-
youtubeId:
|
|
67
|
+
youtubeId: "",
|
|
64
68
|
};
|
|
65
69
|
|
|
66
|
-
|
|
67
70
|
export default taskData;
|
|
@@ -4,6 +4,7 @@ Feature extraction is the task of building features intended to be informative f
|
|
|
4
4
|
facilitating the subsequent learning and generalization steps in various domains of machine learning.
|
|
5
5
|
|
|
6
6
|
## Use Cases
|
|
7
|
+
|
|
7
8
|
Feature extraction can be used to do transfer learning in natural language processing, computer vision and audio models.
|
|
8
9
|
|
|
9
10
|
## Inference
|
|
@@ -17,7 +18,7 @@ feature_extractor = pipeline("feature-extraction",framework="pt",model=checkpoin
|
|
|
17
18
|
text = "Transformers is an awesome library!"
|
|
18
19
|
|
|
19
20
|
#Reducing along the first dimension to get a 768 dimensional array
|
|
20
|
-
feature_extractor(text,return_tensors = "pt")[0].numpy().mean(axis=0)
|
|
21
|
+
feature_extractor(text,return_tensors = "pt")[0].numpy().mean(axis=0)
|
|
21
22
|
|
|
22
23
|
'''tensor([[[ 2.5834, 2.7571, 0.9024, ..., 1.5036, -0.0435, -0.8603],
|
|
23
24
|
[-1.2850, -1.0094, -2.0826, ..., 1.5993, -0.9017, 0.6426],
|
|
@@ -31,5 +32,3 @@ feature_extractor(text,return_tensors = "pt")[0].numpy().mean(axis=0)
|
|
|
31
32
|
## Useful resources
|
|
32
33
|
|
|
33
34
|
- [Documentation for feature extractor of 🤗Transformers](https://huggingface.co/docs/transformers/main_classes/feature_extractor)
|
|
34
|
-
|
|
35
|
-
|
|
@@ -3,29 +3,27 @@ import type { TaskDataCustom } from "../Types";
|
|
|
3
3
|
const taskData: TaskDataCustom = {
|
|
4
4
|
datasets: [
|
|
5
5
|
{
|
|
6
|
-
description:
|
|
7
|
-
|
|
6
|
+
description:
|
|
7
|
+
"Wikipedia dataset containing cleaned articles of all languages. Can be used to train `feature-extraction` models.",
|
|
8
|
+
id: "wikipedia",
|
|
8
9
|
},
|
|
9
10
|
],
|
|
10
11
|
demo: {
|
|
11
12
|
inputs: [
|
|
12
13
|
{
|
|
13
|
-
label:
|
|
14
|
-
content:
|
|
15
|
-
"India, officially the Republic of India, is a country in South Asia.",
|
|
14
|
+
label: "Input",
|
|
15
|
+
content: "India, officially the Republic of India, is a country in South Asia.",
|
|
16
16
|
type: "text",
|
|
17
17
|
},
|
|
18
|
-
|
|
19
18
|
],
|
|
20
19
|
outputs: [
|
|
21
|
-
{
|
|
20
|
+
{
|
|
22
21
|
table: [
|
|
23
22
|
["Dimension 1", "Dimension 2", "Dimension 3"],
|
|
24
23
|
["2.583383083343506", "2.757075071334839", "0.9023529887199402"],
|
|
25
24
|
["8.29393482208252", "1.1071064472198486", "2.03399395942688"],
|
|
26
25
|
["-0.7754912972450256", "-1.647324562072754", "-0.6113331913948059"],
|
|
27
26
|
["0.07087723910808563", "1.5942802429199219", "1.4610432386398315"],
|
|
28
|
-
|
|
29
27
|
],
|
|
30
28
|
type: "tabular",
|
|
31
29
|
},
|
|
@@ -34,23 +32,22 @@ const taskData: TaskDataCustom = {
|
|
|
34
32
|
metrics: [
|
|
35
33
|
{
|
|
36
34
|
description: "",
|
|
37
|
-
id:
|
|
35
|
+
id: "",
|
|
38
36
|
},
|
|
39
37
|
],
|
|
40
38
|
models: [
|
|
41
39
|
{
|
|
42
40
|
description: "A powerful feature extraction model for natural language processing tasks.",
|
|
43
|
-
id:
|
|
41
|
+
id: "facebook/bart-base",
|
|
44
42
|
},
|
|
45
43
|
{
|
|
46
44
|
description: "A strong feature extraction model for coding tasks.",
|
|
47
|
-
id:
|
|
45
|
+
id: "microsoft/codebert-base",
|
|
48
46
|
},
|
|
49
47
|
],
|
|
50
|
-
spaces:
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
summary: "Feature extraction refers to the process of transforming raw data into numerical features that can be processed while preserving the information in the original dataset.",
|
|
48
|
+
spaces: [],
|
|
49
|
+
summary:
|
|
50
|
+
"Feature extraction refers to the process of transforming raw data into numerical features that can be processed while preserving the information in the original dataset.",
|
|
54
51
|
widgetModels: ["facebook/bart-base"],
|
|
55
52
|
};
|
|
56
53
|
|
package/src/fill-mask/about.md
CHANGED
|
@@ -48,4 +48,4 @@ Would you like to learn more about the topic? Awesome! Here you can find some cu
|
|
|
48
48
|
|
|
49
49
|
### Documentation
|
|
50
50
|
|
|
51
|
-
- [Masked language modeling task guide](https://huggingface.co/docs/transformers/tasks/masked_language_modeling)
|
|
51
|
+
- [Masked language modeling task guide](https://huggingface.co/docs/transformers/tasks/masked_language_modeling)
|
package/src/fill-mask/data.ts
CHANGED
|
@@ -4,21 +4,20 @@ const taskData: TaskDataCustom = {
|
|
|
4
4
|
datasets: [
|
|
5
5
|
{
|
|
6
6
|
description: "A common dataset that is used to train models for many languages.",
|
|
7
|
-
id:
|
|
7
|
+
id: "wikipedia",
|
|
8
8
|
},
|
|
9
9
|
{
|
|
10
10
|
description: "A large English dataset with text crawled from the web.",
|
|
11
|
-
id:
|
|
11
|
+
id: "c4",
|
|
12
12
|
},
|
|
13
13
|
],
|
|
14
14
|
demo: {
|
|
15
15
|
inputs: [
|
|
16
16
|
{
|
|
17
|
-
label:
|
|
17
|
+
label: "Input",
|
|
18
18
|
content: "The <mask> barked at me",
|
|
19
|
-
type:
|
|
19
|
+
type: "text",
|
|
20
20
|
},
|
|
21
|
-
|
|
22
21
|
],
|
|
23
22
|
outputs: [
|
|
24
23
|
{
|
|
@@ -50,28 +49,31 @@ const taskData: TaskDataCustom = {
|
|
|
50
49
|
},
|
|
51
50
|
metrics: [
|
|
52
51
|
{
|
|
53
|
-
description:
|
|
54
|
-
|
|
52
|
+
description:
|
|
53
|
+
"Cross Entropy is a metric that calculates the difference between two probability distributions. Each probability distribution is the distribution of predicted words",
|
|
54
|
+
id: "cross_entropy",
|
|
55
55
|
},
|
|
56
56
|
{
|
|
57
|
-
description:
|
|
58
|
-
|
|
57
|
+
description:
|
|
58
|
+
"Perplexity is the exponential of the cross-entropy loss. It evaluates the probabilities assigned to the next word by the model. Lower perplexity indicates better performance",
|
|
59
|
+
id: "perplexity",
|
|
59
60
|
},
|
|
60
61
|
],
|
|
61
62
|
models: [
|
|
62
63
|
{
|
|
63
64
|
description: "A faster and smaller model than the famous BERT model.",
|
|
64
|
-
id:
|
|
65
|
+
id: "distilbert-base-uncased",
|
|
65
66
|
},
|
|
66
67
|
{
|
|
67
68
|
description: "A multilingual model trained on 100 languages.",
|
|
68
|
-
id:
|
|
69
|
+
id: "xlm-roberta-base",
|
|
69
70
|
},
|
|
70
71
|
],
|
|
71
|
-
spaces:
|
|
72
|
-
summary:
|
|
72
|
+
spaces: [],
|
|
73
|
+
summary:
|
|
74
|
+
"Masked language modeling is the task of masking some of the words in a sentence and predicting which words should replace those masks. These models are useful when we want to get a statistical understanding of the language in which the model is trained in.",
|
|
73
75
|
widgetModels: ["distilroberta-base"],
|
|
74
|
-
youtubeId:
|
|
76
|
+
youtubeId: "mqElG5QJWUg",
|
|
75
77
|
};
|
|
76
78
|
|
|
77
79
|
export default taskData;
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
## Use Cases
|
|
2
|
+
|
|
2
3
|
Image classification models can be used when we are not interested in specific instances of objects with location information or their shape.
|
|
3
4
|
|
|
4
5
|
### Keyword Classification
|
|
6
|
+
|
|
5
7
|
Image classification models are used widely in stock photography to assign each image a keyword.
|
|
6
8
|
|
|
7
9
|
### Image Search
|
|
@@ -29,9 +31,9 @@ import { HfInference } from "@huggingface/inference";
|
|
|
29
31
|
|
|
30
32
|
const inference = new HfInference(HF_ACCESS_TOKEN);
|
|
31
33
|
await inference.imageClassification({
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
})
|
|
34
|
+
data: await (await fetch("https://picsum.photos/300/300")).blob(),
|
|
35
|
+
model: "microsoft/resnet-50",
|
|
36
|
+
});
|
|
35
37
|
```
|
|
36
38
|
|
|
37
39
|
## Useful Resources
|
|
@@ -5,19 +5,19 @@ const taskData: TaskDataCustom = {
|
|
|
5
5
|
{
|
|
6
6
|
// TODO write proper description
|
|
7
7
|
description: "Benchmark dataset used for image classification with images that belong to 100 classes.",
|
|
8
|
-
id:
|
|
8
|
+
id: "cifar100",
|
|
9
9
|
},
|
|
10
10
|
{
|
|
11
11
|
// TODO write proper description
|
|
12
12
|
description: "Dataset consisting of images of garments.",
|
|
13
|
-
id:
|
|
13
|
+
id: "fashion_mnist",
|
|
14
14
|
},
|
|
15
15
|
],
|
|
16
16
|
demo: {
|
|
17
17
|
inputs: [
|
|
18
18
|
{
|
|
19
19
|
filename: "image-classification-input.jpeg",
|
|
20
|
-
type:
|
|
20
|
+
type: "img",
|
|
21
21
|
},
|
|
22
22
|
],
|
|
23
23
|
outputs: [
|
|
@@ -43,46 +43,46 @@ const taskData: TaskDataCustom = {
|
|
|
43
43
|
metrics: [
|
|
44
44
|
{
|
|
45
45
|
description: "",
|
|
46
|
-
id:
|
|
46
|
+
id: "accuracy",
|
|
47
47
|
},
|
|
48
48
|
{
|
|
49
49
|
description: "",
|
|
50
|
-
id:
|
|
50
|
+
id: "recall",
|
|
51
51
|
},
|
|
52
52
|
{
|
|
53
53
|
description: "",
|
|
54
|
-
id:
|
|
54
|
+
id: "precision",
|
|
55
55
|
},
|
|
56
56
|
{
|
|
57
57
|
description: "",
|
|
58
|
-
id:
|
|
58
|
+
id: "f1",
|
|
59
59
|
},
|
|
60
|
-
|
|
61
60
|
],
|
|
62
61
|
models: [
|
|
63
62
|
{
|
|
64
63
|
description: "A strong image classification model.",
|
|
65
|
-
id:
|
|
64
|
+
id: "google/vit-base-patch16-224",
|
|
66
65
|
},
|
|
67
66
|
{
|
|
68
67
|
description: "A robust image classification model.",
|
|
69
|
-
id:
|
|
68
|
+
id: "facebook/deit-base-distilled-patch16-224",
|
|
70
69
|
},
|
|
71
70
|
{
|
|
72
71
|
description: "A strong image classification model.",
|
|
73
|
-
id:
|
|
72
|
+
id: "facebook/convnext-large-224",
|
|
74
73
|
},
|
|
75
74
|
],
|
|
76
|
-
spaces:
|
|
75
|
+
spaces: [
|
|
77
76
|
{
|
|
78
77
|
// TO DO: write description
|
|
79
78
|
description: "An application that classifies what a given image is about.",
|
|
80
|
-
id:
|
|
79
|
+
id: "nielsr/perceiver-image-classification",
|
|
81
80
|
},
|
|
82
81
|
],
|
|
83
|
-
summary:
|
|
82
|
+
summary:
|
|
83
|
+
"Image classification is the task of assigning a label or class to an entire image. Images are expected to have only one class for each image. Image classification models take an image as input and return a prediction about which class the image belongs to.",
|
|
84
84
|
widgetModels: ["google/vit-base-patch16-224"],
|
|
85
|
-
youtubeId:
|
|
85
|
+
youtubeId: "tjAIM7BOYhw",
|
|
86
86
|
};
|
|
87
87
|
|
|
88
88
|
export default taskData;
|
|
@@ -47,9 +47,9 @@ import { HfInference } from "@huggingface/inference";
|
|
|
47
47
|
|
|
48
48
|
const inference = new HfInference(HF_ACCESS_TOKEN);
|
|
49
49
|
await inference.imageSegmentation({
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
})
|
|
50
|
+
data: await (await fetch("https://picsum.photos/300/300")).blob(),
|
|
51
|
+
model: "facebook/detr-resnet-50-panoptic",
|
|
52
|
+
});
|
|
53
53
|
```
|
|
54
54
|
|
|
55
55
|
## Useful Resources
|
|
@@ -60,4 +60,4 @@ Would you like to learn more about image segmentation? Great! Here you can find
|
|
|
60
60
|
- [Walkthrough of Computer Vision Ecosystem in Hugging Face - CV Study Group](https://www.youtube.com/watch?v=oL-xmufhZM8)
|
|
61
61
|
- [A Guide on Universal Image Segmentation with Mask2Former and OneFormer](https://huggingface.co/blog/mask2former)
|
|
62
62
|
- [Zero-shot image segmentation with CLIPSeg](https://huggingface.co/blog/clipseg-zero-shot)
|
|
63
|
-
- [Semantic segmentation task guide](https://huggingface.co/docs/transformers/tasks/semantic_segmentation)
|
|
63
|
+
- [Semantic segmentation task guide](https://huggingface.co/docs/transformers/tasks/semantic_segmentation)
|