@huggingface/tasks 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +20 -0
- package/dist/index.d.ts +358 -46
- package/dist/index.js +103 -41
- package/dist/{index.cjs → index.mjs} +73 -67
- package/package.json +42 -33
- package/src/Types.ts +49 -43
- package/src/audio-classification/about.md +5 -5
- package/src/audio-classification/data.ts +11 -11
- package/src/audio-to-audio/about.md +4 -3
- package/src/audio-to-audio/data.ts +18 -15
- package/src/automatic-speech-recognition/about.md +5 -4
- package/src/automatic-speech-recognition/data.ts +18 -17
- package/src/const.ts +52 -44
- package/src/conversational/about.md +9 -9
- package/src/conversational/data.ts +22 -18
- package/src/depth-estimation/about.md +1 -3
- package/src/depth-estimation/data.ts +11 -11
- package/src/document-question-answering/about.md +1 -2
- package/src/document-question-answering/data.ts +22 -19
- package/src/feature-extraction/about.md +2 -3
- package/src/feature-extraction/data.ts +12 -15
- package/src/fill-mask/about.md +1 -1
- package/src/fill-mask/data.ts +16 -14
- package/src/image-classification/about.md +5 -3
- package/src/image-classification/data.ts +15 -15
- package/src/image-segmentation/about.md +4 -4
- package/src/image-segmentation/data.ts +26 -23
- package/src/image-to-image/about.md +8 -10
- package/src/image-to-image/data.ts +31 -27
- package/src/image-to-text/about.md +13 -6
- package/src/image-to-text/data.ts +20 -21
- package/src/index.ts +3 -1
- package/src/modelLibraries.ts +43 -0
- package/src/object-detection/about.md +2 -1
- package/src/object-detection/data.ts +20 -17
- package/src/pipelines.ts +608 -0
- package/src/placeholder/about.md +3 -3
- package/src/placeholder/data.ts +8 -8
- package/src/question-answering/about.md +1 -1
- package/src/question-answering/data.ts +21 -19
- package/src/reinforcement-learning/about.md +167 -176
- package/src/reinforcement-learning/data.ts +75 -78
- package/src/sentence-similarity/data.ts +29 -28
- package/src/summarization/about.md +6 -5
- package/src/summarization/data.ts +23 -20
- package/src/table-question-answering/about.md +5 -5
- package/src/table-question-answering/data.ts +35 -39
- package/src/tabular-classification/about.md +4 -6
- package/src/tabular-classification/data.ts +11 -12
- package/src/tabular-regression/about.md +14 -18
- package/src/tabular-regression/data.ts +10 -11
- package/src/tasksData.ts +47 -50
- package/src/text-classification/about.md +5 -4
- package/src/text-classification/data.ts +21 -20
- package/src/text-generation/about.md +7 -6
- package/src/text-generation/data.ts +36 -34
- package/src/text-to-image/about.md +19 -18
- package/src/text-to-image/data.ts +32 -26
- package/src/text-to-speech/about.md +4 -5
- package/src/text-to-speech/data.ts +16 -17
- package/src/text-to-video/about.md +41 -36
- package/src/text-to-video/data.ts +43 -38
- package/src/token-classification/about.md +1 -3
- package/src/token-classification/data.ts +26 -25
- package/src/translation/about.md +4 -4
- package/src/translation/data.ts +21 -21
- package/src/unconditional-image-generation/about.md +10 -5
- package/src/unconditional-image-generation/data.ts +26 -20
- package/src/video-classification/about.md +5 -1
- package/src/video-classification/data.ts +14 -14
- package/src/visual-question-answering/about.md +8 -3
- package/src/visual-question-answering/data.ts +22 -19
- package/src/zero-shot-classification/about.md +5 -4
- package/src/zero-shot-classification/data.ts +20 -20
- package/src/zero-shot-image-classification/about.md +17 -9
- package/src/zero-shot-image-classification/data.ts +12 -14
- package/tsconfig.json +18 -0
- package/assets/audio-classification/audio.wav +0 -0
- package/assets/audio-to-audio/input.wav +0 -0
- package/assets/audio-to-audio/label-0.wav +0 -0
- package/assets/audio-to-audio/label-1.wav +0 -0
- package/assets/automatic-speech-recognition/input.flac +0 -0
- package/assets/automatic-speech-recognition/wav2vec2.png +0 -0
- package/assets/contribution-guide/anatomy.png +0 -0
- package/assets/contribution-guide/libraries.png +0 -0
- package/assets/depth-estimation/depth-estimation-input.jpg +0 -0
- package/assets/depth-estimation/depth-estimation-output.png +0 -0
- package/assets/document-question-answering/document-question-answering-input.png +0 -0
- package/assets/image-classification/image-classification-input.jpeg +0 -0
- package/assets/image-segmentation/image-segmentation-input.jpeg +0 -0
- package/assets/image-segmentation/image-segmentation-output.png +0 -0
- package/assets/image-to-image/image-to-image-input.jpeg +0 -0
- package/assets/image-to-image/image-to-image-output.png +0 -0
- package/assets/image-to-image/pix2pix_examples.jpg +0 -0
- package/assets/image-to-text/savanna.jpg +0 -0
- package/assets/object-detection/object-detection-input.jpg +0 -0
- package/assets/object-detection/object-detection-output.jpg +0 -0
- package/assets/table-question-answering/tableQA.jpg +0 -0
- package/assets/text-to-image/image.jpeg +0 -0
- package/assets/text-to-speech/audio.wav +0 -0
- package/assets/text-to-video/text-to-video-output.gif +0 -0
- package/assets/unconditional-image-generation/unconditional-image-generation-output.jpeg +0 -0
- package/assets/video-classification/video-classification-input.gif +0 -0
- package/assets/visual-question-answering/elephant.jpeg +0 -0
- package/assets/zero-shot-image-classification/image-classification-input.jpeg +0 -0
- package/dist/index.d.cts +0 -145
package/src/tasksData.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type PipelineType, PIPELINE_DATA } from "
|
|
1
|
+
import { type PipelineType, PIPELINE_DATA } from "./pipelines";
|
|
2
2
|
import type { TaskDataCustom, TaskData } from "./Types";
|
|
3
3
|
|
|
4
4
|
import audioClassification from "./audio-classification/data";
|
|
@@ -41,64 +41,61 @@ import { TASKS_MODEL_LIBRARIES } from "./const";
|
|
|
41
41
|
// Tasks that call getData() without the second argument will
|
|
42
42
|
// have a "placeholder" page.
|
|
43
43
|
export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
|
|
44
|
-
"audio-classification":
|
|
45
|
-
"audio-to-audio":
|
|
46
|
-
"automatic-speech-recognition":
|
|
47
|
-
|
|
48
|
-
"depth-estimation":
|
|
49
|
-
"document-question-answering":
|
|
50
|
-
"feature-extraction":
|
|
51
|
-
"fill-mask":
|
|
52
|
-
"graph-ml":
|
|
53
|
-
"image-classification":
|
|
54
|
-
"image-segmentation":
|
|
55
|
-
"image-to-image":
|
|
56
|
-
"image-to-text":
|
|
57
|
-
"multiple-choice":
|
|
58
|
-
"object-detection":
|
|
59
|
-
"video-classification":
|
|
60
|
-
|
|
61
|
-
"question-answering":
|
|
62
|
-
"reinforcement-learning":
|
|
63
|
-
|
|
64
|
-
"sentence-similarity":
|
|
65
|
-
|
|
66
|
-
"table-question-answering":
|
|
67
|
-
"table-to-text":
|
|
68
|
-
"tabular-classification":
|
|
69
|
-
"tabular-regression":
|
|
70
|
-
"tabular-to-text":
|
|
71
|
-
"text-classification":
|
|
72
|
-
"text-generation":
|
|
73
|
-
"text-retrieval":
|
|
74
|
-
"text-to-image":
|
|
75
|
-
"text-to-speech":
|
|
76
|
-
"text-to-audio":
|
|
77
|
-
"text-to-video":
|
|
78
|
-
"text2text-generation":
|
|
79
|
-
"time-series-forecasting":
|
|
80
|
-
"token-classification":
|
|
81
|
-
|
|
44
|
+
"audio-classification": getData("audio-classification", audioClassification),
|
|
45
|
+
"audio-to-audio": getData("audio-to-audio", audioToAudio),
|
|
46
|
+
"automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
|
|
47
|
+
conversational: getData("conversational", conversational),
|
|
48
|
+
"depth-estimation": getData("depth-estimation", depthEstimation),
|
|
49
|
+
"document-question-answering": getData("document-question-answering", documentQuestionAnswering),
|
|
50
|
+
"feature-extraction": getData("feature-extraction", featureExtraction),
|
|
51
|
+
"fill-mask": getData("fill-mask", fillMask),
|
|
52
|
+
"graph-ml": undefined,
|
|
53
|
+
"image-classification": getData("image-classification", imageClassification),
|
|
54
|
+
"image-segmentation": getData("image-segmentation", imageSegmentation),
|
|
55
|
+
"image-to-image": getData("image-to-image", imageToImage),
|
|
56
|
+
"image-to-text": getData("image-to-text", imageToText),
|
|
57
|
+
"multiple-choice": undefined,
|
|
58
|
+
"object-detection": getData("object-detection", objectDetection),
|
|
59
|
+
"video-classification": getData("video-classification", videoClassification),
|
|
60
|
+
other: undefined,
|
|
61
|
+
"question-answering": getData("question-answering", questionAnswering),
|
|
62
|
+
"reinforcement-learning": getData("reinforcement-learning", reinforcementLearning),
|
|
63
|
+
robotics: undefined,
|
|
64
|
+
"sentence-similarity": getData("sentence-similarity", sentenceSimilarity),
|
|
65
|
+
summarization: getData("summarization", summarization),
|
|
66
|
+
"table-question-answering": getData("table-question-answering", tableQuestionAnswering),
|
|
67
|
+
"table-to-text": undefined,
|
|
68
|
+
"tabular-classification": getData("tabular-classification", tabularClassification),
|
|
69
|
+
"tabular-regression": getData("tabular-regression", tabularRegression),
|
|
70
|
+
"tabular-to-text": undefined,
|
|
71
|
+
"text-classification": getData("text-classification", textClassification),
|
|
72
|
+
"text-generation": getData("text-generation", textGeneration),
|
|
73
|
+
"text-retrieval": undefined,
|
|
74
|
+
"text-to-image": getData("text-to-image", textToImage),
|
|
75
|
+
"text-to-speech": getData("text-to-speech", textToSpeech),
|
|
76
|
+
"text-to-audio": undefined,
|
|
77
|
+
"text-to-video": getData("text-to-video", textToVideo),
|
|
78
|
+
"text2text-generation": undefined,
|
|
79
|
+
"time-series-forecasting": undefined,
|
|
80
|
+
"token-classification": getData("token-classification", tokenClassification),
|
|
81
|
+
translation: getData("translation", translation),
|
|
82
82
|
"unconditional-image-generation": getData("unconditional-image-generation", unconditionalImageGeneration),
|
|
83
|
-
"visual-question-answering":
|
|
84
|
-
"voice-activity-detection":
|
|
85
|
-
"zero-shot-classification":
|
|
83
|
+
"visual-question-answering": getData("visual-question-answering", visualQuestionAnswering),
|
|
84
|
+
"voice-activity-detection": undefined,
|
|
85
|
+
"zero-shot-classification": getData("zero-shot-classification", zeroShotClassification),
|
|
86
86
|
"zero-shot-image-classification": getData("zero-shot-image-classification", zeroShotImageClassification),
|
|
87
87
|
} as const;
|
|
88
88
|
|
|
89
|
-
|
|
90
|
-
* Return the whole TaskData object for a certain task.
|
|
89
|
+
/**
|
|
90
|
+
* Return the whole TaskData object for a certain task.
|
|
91
91
|
* If the partialTaskData argument is left undefined,
|
|
92
92
|
* the default placholder data will be used.
|
|
93
93
|
*/
|
|
94
|
-
function getData(
|
|
95
|
-
type: PipelineType,
|
|
96
|
-
partialTaskData: TaskDataCustom = placeholder
|
|
97
|
-
): TaskData {
|
|
94
|
+
function getData(type: PipelineType, partialTaskData: TaskDataCustom = placeholder): TaskData {
|
|
98
95
|
return {
|
|
99
96
|
...partialTaskData,
|
|
100
|
-
id:
|
|
101
|
-
label:
|
|
97
|
+
id: type,
|
|
98
|
+
label: PIPELINE_DATA[type].name,
|
|
102
99
|
libraries: TASKS_MODEL_LIBRARIES[type],
|
|
103
100
|
};
|
|
104
101
|
}
|
|
@@ -119,10 +119,11 @@ import { HfInference } from "@huggingface/inference";
|
|
|
119
119
|
|
|
120
120
|
const inference = new HfInference(HF_ACCESS_TOKEN);
|
|
121
121
|
await inference.conversational({
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
})
|
|
122
|
+
model: "distilbert-base-uncased-finetuned-sst-2-english",
|
|
123
|
+
inputs: "I love this movie!",
|
|
124
|
+
});
|
|
125
125
|
```
|
|
126
|
+
|
|
126
127
|
### Grammatical Correctness
|
|
127
128
|
|
|
128
129
|
Linguistic Acceptability is the task of assessing the grammatical acceptability of a sentence. The classes in this task are “acceptable” and “unacceptable”. The benchmark dataset used for this task is [Corpus of Linguistic Acceptability (CoLA)](https://huggingface.co/datasets/glue/viewer/cola/test). The dataset consists of texts and their labels.
|
|
@@ -168,4 +169,4 @@ Would you like to learn more about the topic? Awesome! Here you can find some cu
|
|
|
168
169
|
|
|
169
170
|
### Documentation
|
|
170
171
|
|
|
171
|
-
- [Text classification task guide](https://huggingface.co/docs/transformers/tasks/sequence_classification)
|
|
172
|
+
- [Text classification task guide](https://huggingface.co/docs/transformers/tasks/sequence_classification)
|
|
@@ -4,21 +4,20 @@ const taskData: TaskDataCustom = {
|
|
|
4
4
|
datasets: [
|
|
5
5
|
{
|
|
6
6
|
description: "A widely used dataset used to benchmark multiple variants of text classification.",
|
|
7
|
-
id:
|
|
7
|
+
id: "glue",
|
|
8
8
|
},
|
|
9
9
|
{
|
|
10
10
|
description: "A text classification dataset used to benchmark natural language inference models",
|
|
11
|
-
id:
|
|
11
|
+
id: "snli",
|
|
12
12
|
},
|
|
13
13
|
],
|
|
14
14
|
demo: {
|
|
15
15
|
inputs: [
|
|
16
16
|
{
|
|
17
|
-
label:
|
|
17
|
+
label: "Input",
|
|
18
18
|
content: "I love Hugging Face!",
|
|
19
|
-
type:
|
|
19
|
+
type: "text",
|
|
20
20
|
},
|
|
21
|
-
|
|
22
21
|
],
|
|
23
22
|
outputs: [
|
|
24
23
|
{
|
|
@@ -26,15 +25,15 @@ const taskData: TaskDataCustom = {
|
|
|
26
25
|
data: [
|
|
27
26
|
{
|
|
28
27
|
label: "POSITIVE",
|
|
29
|
-
score: 0.
|
|
28
|
+
score: 0.9,
|
|
30
29
|
},
|
|
31
30
|
{
|
|
32
31
|
label: "NEUTRAL",
|
|
33
|
-
score: 0.
|
|
32
|
+
score: 0.1,
|
|
34
33
|
},
|
|
35
34
|
{
|
|
36
35
|
label: "NEGATIVE",
|
|
37
|
-
score: 0.
|
|
36
|
+
score: 0.0,
|
|
38
37
|
},
|
|
39
38
|
],
|
|
40
39
|
},
|
|
@@ -43,48 +42,50 @@ const taskData: TaskDataCustom = {
|
|
|
43
42
|
metrics: [
|
|
44
43
|
{
|
|
45
44
|
description: "",
|
|
46
|
-
id:
|
|
45
|
+
id: "accuracy",
|
|
47
46
|
},
|
|
48
47
|
{
|
|
49
48
|
description: "",
|
|
50
|
-
id:
|
|
49
|
+
id: "recall",
|
|
51
50
|
},
|
|
52
51
|
{
|
|
53
52
|
description: "",
|
|
54
|
-
id:
|
|
53
|
+
id: "precision",
|
|
55
54
|
},
|
|
56
55
|
{
|
|
57
|
-
description:
|
|
58
|
-
|
|
56
|
+
description:
|
|
57
|
+
"The F1 metric is the harmonic mean of the precision and recall. It can be calculated as: F1 = 2 * (precision * recall) / (precision + recall)",
|
|
58
|
+
id: "f1",
|
|
59
59
|
},
|
|
60
60
|
],
|
|
61
61
|
models: [
|
|
62
62
|
{
|
|
63
63
|
description: "A robust model trained for sentiment analysis.",
|
|
64
|
-
id:
|
|
64
|
+
id: "distilbert-base-uncased-finetuned-sst-2-english",
|
|
65
65
|
},
|
|
66
66
|
{
|
|
67
67
|
description: "Multi-genre natural language inference model.",
|
|
68
|
-
id:
|
|
68
|
+
id: "roberta-large-mnli",
|
|
69
69
|
},
|
|
70
70
|
],
|
|
71
71
|
spaces: [
|
|
72
72
|
{
|
|
73
73
|
description: "An application that can classify financial sentiment.",
|
|
74
|
-
id:
|
|
74
|
+
id: "IoannisTr/Tech_Stocks_Trading_Assistant",
|
|
75
75
|
},
|
|
76
76
|
{
|
|
77
77
|
description: "A dashboard that contains various text classification tasks.",
|
|
78
|
-
id:
|
|
78
|
+
id: "miesnerjacob/Multi-task-NLP",
|
|
79
79
|
},
|
|
80
80
|
{
|
|
81
81
|
description: "An application that analyzes user reviews in healthcare.",
|
|
82
|
-
id:
|
|
82
|
+
id: "spacy/healthsea-demo",
|
|
83
83
|
},
|
|
84
84
|
],
|
|
85
|
-
summary:
|
|
85
|
+
summary:
|
|
86
|
+
"Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.",
|
|
86
87
|
widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
|
|
87
|
-
youtubeId:
|
|
88
|
+
youtubeId: "leNG9fN9FQU",
|
|
88
89
|
};
|
|
89
90
|
|
|
90
91
|
export default taskData;
|
|
@@ -26,7 +26,7 @@ A popular variant of Text Generation models predicts the next word given a bunch
|
|
|
26
26
|
- Continue a story given the first sentences.
|
|
27
27
|
- Provided a code description, generate the code.
|
|
28
28
|
|
|
29
|
-
The most popular models for this task are GPT-based models or [Llama series](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf). These models are trained on data that has no labels, so you just need plain text to train your own model. You can train text generation models to generate a wide variety of documents, from code to stories.
|
|
29
|
+
The most popular models for this task are GPT-based models or [Llama series](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf). These models are trained on data that has no labels, so you just need plain text to train your own model. You can train text generation models to generate a wide variety of documents, from code to stories.
|
|
30
30
|
|
|
31
31
|
### Text-to-Text Generation Models
|
|
32
32
|
|
|
@@ -44,7 +44,6 @@ generator("Hello, I'm a language model", max_length = 30, num_return_sequences=3
|
|
|
44
44
|
## {'generated_text': "Hello, I'm a language modeler. I write and maintain software in Python. I love to code, and that includes coding things that require writing"}, ...
|
|
45
45
|
```
|
|
46
46
|
|
|
47
|
-
|
|
48
47
|
[Text-to-Text generation models](https://huggingface.co/models?pipeline_tag=text2text-generation&sort=downloads) have a separate pipeline called `text2text-generation`. This pipeline takes an input containing the sentence including the task and returns the output of the accomplished task.
|
|
49
48
|
|
|
50
49
|
```python
|
|
@@ -65,9 +64,9 @@ import { HfInference } from "@huggingface/inference";
|
|
|
65
64
|
|
|
66
65
|
const inference = new HfInference(HF_ACCESS_TOKEN);
|
|
67
66
|
await inference.conversational({
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
})
|
|
67
|
+
model: "distilbert-base-uncased-finetuned-sst-2-english",
|
|
68
|
+
inputs: "I love this movie!",
|
|
69
|
+
});
|
|
71
70
|
```
|
|
72
71
|
|
|
73
72
|
## Text Generation Inference
|
|
@@ -80,24 +79,26 @@ Hugging Face Spaces includes templates to easily deploy your own instance of a s
|
|
|
80
79
|
|
|
81
80
|

|
|
82
81
|
|
|
83
|
-
|
|
84
82
|
## Useful Resources
|
|
85
83
|
|
|
86
84
|
Would you like to learn more about the topic? Awesome! Here you can find some curated resources that you may find helpful!
|
|
87
85
|
|
|
88
86
|
### Tools within Hugging Face Ecosystem
|
|
87
|
+
|
|
89
88
|
- You can use [PEFT](https://github.com/huggingface/peft) to adapt large language models in efficient way.
|
|
90
89
|
- [ChatUI](https://github.com/huggingface/chat-ui) is the open-source interface to conversate with Large Language Models.
|
|
91
90
|
- [text-generation-inferface](https://github.com/huggingface/text-generation-inference)
|
|
92
91
|
- [HuggingChat](https://huggingface.co/chat/) is a chat interface powered by Hugging Face to chat with powerful models like Llama 2 70B.
|
|
93
92
|
|
|
94
93
|
### Documentation
|
|
94
|
+
|
|
95
95
|
- [PEFT documentation](https://huggingface.co/docs/peft/index)
|
|
96
96
|
- [ChatUI Docker Spaces](https://huggingface.co/docs/hub/spaces-sdks-docker-chatui)
|
|
97
97
|
- [Causal language modeling task guide](https://huggingface.co/docs/transformers/tasks/language_modeling)
|
|
98
98
|
- [Text generation strategies](https://huggingface.co/docs/transformers/generation_strategies)
|
|
99
99
|
|
|
100
100
|
### Course and Blogs
|
|
101
|
+
|
|
101
102
|
- [Course Chapter on Training a causal language model from scratch](https://huggingface.co/course/chapter7/6?fw=pt)
|
|
102
103
|
- [TO Discussion with Victor Sanh](https://www.youtube.com/watch?v=Oy49SCW_Xpw&ab_channel=HuggingFace)
|
|
103
104
|
- [Hugging Face Course Workshops: Pretraining Language Models & CodeParrot](https://www.youtube.com/watch?v=ExUR7w6xe94&ab_channel=HuggingFace)
|
|
@@ -4,121 +4,123 @@ const taskData: TaskDataCustom = {
|
|
|
4
4
|
datasets: [
|
|
5
5
|
{
|
|
6
6
|
description: "A large multilingual dataset of text crawled from the web.",
|
|
7
|
-
id:
|
|
7
|
+
id: "mc4",
|
|
8
8
|
},
|
|
9
9
|
{
|
|
10
|
-
description:
|
|
11
|
-
|
|
10
|
+
description:
|
|
11
|
+
"Diverse open-source data consisting of 22 smaller high-quality datasets. It was used to train GPT-Neo.",
|
|
12
|
+
id: "the_pile",
|
|
12
13
|
},
|
|
13
14
|
{
|
|
14
15
|
description: "A crowd-sourced instruction dataset to develop an AI assistant.",
|
|
15
|
-
id:
|
|
16
|
+
id: "OpenAssistant/oasst1",
|
|
16
17
|
},
|
|
17
18
|
{
|
|
18
19
|
description: "A crowd-sourced instruction dataset created by Databricks employees.",
|
|
19
|
-
id:
|
|
20
|
+
id: "databricks/databricks-dolly-15k",
|
|
20
21
|
},
|
|
21
22
|
],
|
|
22
23
|
demo: {
|
|
23
24
|
inputs: [
|
|
24
25
|
{
|
|
25
|
-
label:
|
|
26
|
-
content:
|
|
27
|
-
"Once upon a time,",
|
|
26
|
+
label: "Input",
|
|
27
|
+
content: "Once upon a time,",
|
|
28
28
|
type: "text",
|
|
29
29
|
},
|
|
30
|
-
|
|
31
30
|
],
|
|
32
31
|
outputs: [
|
|
33
32
|
{
|
|
34
|
-
label:
|
|
33
|
+
label: "Output",
|
|
35
34
|
content:
|
|
36
|
-
|
|
35
|
+
"Once upon a time, we knew that our ancestors were on the verge of extinction. The great explorers and poets of the Old World, from Alexander the Great to Chaucer, are dead and gone. A good many of our ancient explorers and poets have",
|
|
37
36
|
type: "text",
|
|
38
37
|
},
|
|
39
38
|
],
|
|
40
39
|
},
|
|
41
40
|
metrics: [
|
|
42
41
|
{
|
|
43
|
-
description:
|
|
44
|
-
|
|
42
|
+
description:
|
|
43
|
+
"Cross Entropy is a metric that calculates the difference between two probability distributions. Each probability distribution is the distribution of predicted words",
|
|
44
|
+
id: "Cross Entropy",
|
|
45
45
|
},
|
|
46
46
|
{
|
|
47
|
-
description:
|
|
48
|
-
|
|
47
|
+
description:
|
|
48
|
+
"The Perplexity metric is the exponential of the cross-entropy loss. It evaluates the probabilities assigned to the next word by the model. Lower perplexity indicates better performance",
|
|
49
|
+
id: "Perplexity",
|
|
49
50
|
},
|
|
50
51
|
],
|
|
51
52
|
models: [
|
|
52
53
|
{
|
|
53
54
|
description: "A large language model trained for text generation.",
|
|
54
|
-
id:
|
|
55
|
+
id: "bigscience/bloom-560m",
|
|
55
56
|
},
|
|
56
57
|
{
|
|
57
58
|
description: "A large code generation model that can generate code in 80+ languages.",
|
|
58
|
-
id:
|
|
59
|
+
id: "bigcode/starcoder",
|
|
59
60
|
},
|
|
60
61
|
{
|
|
61
62
|
description: "A model trained to follow instructions, uses Pythia-12b as base model.",
|
|
62
|
-
id:
|
|
63
|
+
id: "databricks/dolly-v2-12b",
|
|
63
64
|
},
|
|
64
65
|
{
|
|
65
66
|
description: "A model trained to follow instructions curated by community, uses Pythia-12b as base model.",
|
|
66
|
-
id:
|
|
67
|
+
id: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
|
|
67
68
|
},
|
|
68
69
|
{
|
|
69
70
|
description: "A large language model trained to generate text in English.",
|
|
70
|
-
id:
|
|
71
|
+
id: "stabilityai/stablelm-tuned-alpha-7b",
|
|
71
72
|
},
|
|
72
73
|
{
|
|
73
74
|
description: "A model trained to follow instructions, based on mosaicml/mpt-7b.",
|
|
74
|
-
id:
|
|
75
|
+
id: "mosaicml/mpt-7b-instruct",
|
|
75
76
|
},
|
|
76
77
|
{
|
|
77
78
|
description: "A large language model trained to generate text in English.",
|
|
78
|
-
id:
|
|
79
|
+
id: "EleutherAI/pythia-12b",
|
|
79
80
|
},
|
|
80
81
|
{
|
|
81
82
|
description: "A large text-to-text model trained to follow instructions.",
|
|
82
|
-
id:
|
|
83
|
+
id: "google/flan-ul2",
|
|
83
84
|
},
|
|
84
85
|
{
|
|
85
86
|
description: "A large and powerful text generation model.",
|
|
86
|
-
id:
|
|
87
|
+
id: "tiiuae/falcon-40b",
|
|
87
88
|
},
|
|
88
89
|
{
|
|
89
90
|
description: "State-of-the-art open-source large language model.",
|
|
90
|
-
id:
|
|
91
|
+
id: "meta-llama/Llama-2-70b-hf",
|
|
91
92
|
},
|
|
92
93
|
],
|
|
93
|
-
spaces:
|
|
94
|
+
spaces: [
|
|
94
95
|
{
|
|
95
96
|
description: "A robust text generation model that can perform various tasks through natural language prompting.",
|
|
96
|
-
id:
|
|
97
|
+
id: "huggingface/bloom_demo",
|
|
97
98
|
},
|
|
98
99
|
{
|
|
99
100
|
description: "An text generation based application that can write code for 80+ languages.",
|
|
100
|
-
id:
|
|
101
|
+
id: "bigcode/bigcode-playground",
|
|
101
102
|
},
|
|
102
103
|
{
|
|
103
104
|
description: "An text generation based application for conversations.",
|
|
104
|
-
id:
|
|
105
|
+
id: "h2oai/h2ogpt-chatbot",
|
|
105
106
|
},
|
|
106
107
|
{
|
|
107
108
|
description: "An text generation application that combines OpenAI and Hugging Face models.",
|
|
108
|
-
id:
|
|
109
|
+
id: "microsoft/HuggingGPT",
|
|
109
110
|
},
|
|
110
111
|
{
|
|
111
112
|
description: "An text generation application that uses StableLM-tuned-alpha-7b.",
|
|
112
|
-
id:
|
|
113
|
+
id: "stabilityai/stablelm-tuned-alpha-chat",
|
|
113
114
|
},
|
|
114
115
|
{
|
|
115
116
|
description: "An UI that uses StableLM-tuned-alpha-7b.",
|
|
116
|
-
id:
|
|
117
|
+
id: "togethercomputer/OpenChatKit",
|
|
117
118
|
},
|
|
118
119
|
],
|
|
119
|
-
summary:
|
|
120
|
+
summary:
|
|
121
|
+
"Generating text is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase.",
|
|
120
122
|
widgetModels: ["tiiuae/falcon-7b-instruct"],
|
|
121
|
-
youtubeId:
|
|
123
|
+
youtubeId: "Vpjb1lu0MDk",
|
|
122
124
|
};
|
|
123
125
|
|
|
124
126
|
export default taskData;
|
|
@@ -1,28 +1,29 @@
|
|
|
1
1
|
## Use Cases
|
|
2
|
+
|
|
2
3
|
### Data Generation
|
|
3
|
-
|
|
4
|
-
Businesses can generate data for their their use cases by inputting text and getting image outputs.
|
|
5
|
-
|
|
4
|
+
|
|
5
|
+
Businesses can generate data for their their use cases by inputting text and getting image outputs.
|
|
6
|
+
|
|
6
7
|
### Immersive Conversational Chatbots
|
|
7
|
-
|
|
8
|
+
|
|
8
9
|
Chatbots can be made more immersive if they provide contextual images based on the input provided by the user.
|
|
9
|
-
|
|
10
|
+
|
|
10
11
|
### Creative Ideas for Fashion Industry
|
|
11
|
-
|
|
12
|
+
|
|
12
13
|
Different patterns can be generated to obtain unique pieces of fashion. Text-to-image models make creations easier for designers to conceptualize their design before actually implementing it.
|
|
13
|
-
|
|
14
|
+
|
|
14
15
|
### Architecture Industry
|
|
15
|
-
|
|
16
|
+
|
|
16
17
|
Architects can utilise the models to construct an environment based out on the requirements of the floor plan. This can also include the furniture that has to be placed in that environment.
|
|
17
18
|
|
|
18
19
|
## Task Variants
|
|
19
20
|
|
|
20
21
|
You can contribute variants of this task [here](https://github.com/huggingface/hub-docs/blob/main/tasks/src/text-to-image/about.md).
|
|
21
22
|
|
|
22
|
-
|
|
23
23
|
## Inference
|
|
24
24
|
|
|
25
25
|
You can use diffusers pipelines to infer with `text-to-image` models.
|
|
26
|
+
|
|
26
27
|
```python
|
|
27
28
|
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
|
|
28
29
|
|
|
@@ -42,15 +43,16 @@ import { HfInference } from "@huggingface/inference";
|
|
|
42
43
|
|
|
43
44
|
const inference = new HfInference(HF_ACCESS_TOKEN);
|
|
44
45
|
await inference.textToImage({
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
})
|
|
46
|
+
model: "stabilityai/stable-diffusion-2",
|
|
47
|
+
inputs: "award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]",
|
|
48
|
+
parameters: {
|
|
49
|
+
negative_prompt: "blurry",
|
|
50
|
+
},
|
|
51
|
+
});
|
|
51
52
|
```
|
|
52
|
-
|
|
53
|
+
|
|
53
54
|
## Useful Resources
|
|
55
|
+
|
|
54
56
|
- [Hugging Face Diffusion Models Course](https://github.com/huggingface/diffusion-models-class)
|
|
55
57
|
- [Getting Started with Diffusers](https://huggingface.co/docs/diffusers/index)
|
|
56
58
|
- [Text-to-Image Generation](https://huggingface.co/docs/diffusers/using-diffusers/conditional_image_generation)
|
|
@@ -61,5 +63,4 @@ await inference.textToImage({
|
|
|
61
63
|
- [🧨 Stable Diffusion in JAX/Flax](https://huggingface.co/blog/stable_diffusion_jax)
|
|
62
64
|
- [Running IF with 🧨 diffusers on a Free Tier Google Colab](https://huggingface.co/blog/if)
|
|
63
65
|
|
|
64
|
-
|
|
65
|
-
This page was made possible thanks to the efforts of [Ishan Dutta](https://huggingface.co/ishandutta), [Enrique Elias Ubaldo](https://huggingface.co/herrius) and [Oğuz Akif](https://huggingface.co/oguzakif).
|
|
66
|
+
This page was made possible thanks to the efforts of [Ishan Dutta](https://huggingface.co/ishandutta), [Enrique Elias Ubaldo](https://huggingface.co/herrius) and [Oğuz Akif](https://huggingface.co/oguzakif).
|
|
@@ -4,85 +4,91 @@ const taskData: TaskDataCustom = {
|
|
|
4
4
|
datasets: [
|
|
5
5
|
{
|
|
6
6
|
description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
|
|
7
|
-
id:
|
|
7
|
+
id: "red_caps",
|
|
8
8
|
},
|
|
9
9
|
{
|
|
10
10
|
description: "Conceptual Captions is a dataset consisting of ~3.3M images annotated with captions.",
|
|
11
|
-
id:
|
|
11
|
+
id: "conceptual_captions",
|
|
12
12
|
},
|
|
13
13
|
],
|
|
14
14
|
demo: {
|
|
15
15
|
inputs: [
|
|
16
16
|
{
|
|
17
|
-
label:
|
|
17
|
+
label: "Input",
|
|
18
18
|
content: "A city above clouds, pastel colors, Victorian style",
|
|
19
|
-
type:
|
|
19
|
+
type: "text",
|
|
20
20
|
},
|
|
21
21
|
],
|
|
22
22
|
outputs: [
|
|
23
23
|
{
|
|
24
24
|
filename: "image.jpeg",
|
|
25
|
-
type:
|
|
25
|
+
type: "img",
|
|
26
26
|
},
|
|
27
27
|
],
|
|
28
28
|
},
|
|
29
29
|
metrics: [
|
|
30
30
|
{
|
|
31
|
-
description:
|
|
31
|
+
description:
|
|
32
|
+
"The Inception Score (IS) measure assesses diversity and meaningfulness. It uses a generated image sample to predict its label. A higher score signifies more diverse and meaningful images.",
|
|
32
33
|
id: "IS",
|
|
33
34
|
},
|
|
34
35
|
{
|
|
35
|
-
description:
|
|
36
|
+
description:
|
|
37
|
+
"The Fréchet Inception Distance (FID) calculates the distance between distributions between synthetic and real samples. A lower FID score indicates better similarity between the distributions of real and generated images.",
|
|
36
38
|
id: "FID",
|
|
37
|
-
},
|
|
39
|
+
},
|
|
38
40
|
{
|
|
39
|
-
description:
|
|
41
|
+
description:
|
|
42
|
+
"R-precision assesses how the generated image aligns with the provided text description. It uses the generated images as queries to retrieve relevant text descriptions. The top 'r' relevant descriptions are selected and used to calculate R-precision as r/R, where 'R' is the number of ground truth descriptions associated with the generated images. A higher R-precision value indicates a better model.",
|
|
40
43
|
id: "R-Precision",
|
|
41
|
-
},
|
|
42
|
-
|
|
43
|
-
models:
|
|
44
|
+
},
|
|
45
|
+
],
|
|
46
|
+
models: [
|
|
44
47
|
{
|
|
45
|
-
description:
|
|
46
|
-
|
|
48
|
+
description:
|
|
49
|
+
"A latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
|
|
50
|
+
id: "CompVis/stable-diffusion-v1-4",
|
|
47
51
|
},
|
|
48
52
|
{
|
|
49
|
-
description:
|
|
50
|
-
|
|
53
|
+
description:
|
|
54
|
+
"A model that can be used to generate images based on text prompts. The DALL·E Mega model is the largest version of DALLE Mini.",
|
|
55
|
+
id: "dalle-mini/dalle-mega",
|
|
51
56
|
},
|
|
52
57
|
{
|
|
53
58
|
description: "A text-to-image model that can generate coherent text inside image.",
|
|
54
|
-
id:
|
|
59
|
+
id: "DeepFloyd/IF-I-XL-v1.0",
|
|
55
60
|
},
|
|
56
61
|
{
|
|
57
62
|
description: "A powerful text-to-image model.",
|
|
58
|
-
id:
|
|
63
|
+
id: "kakaobrain/karlo-v1-alpha",
|
|
59
64
|
},
|
|
60
65
|
],
|
|
61
|
-
spaces:
|
|
66
|
+
spaces: [
|
|
62
67
|
{
|
|
63
68
|
description: "A powerful text-to-image application.",
|
|
64
|
-
id:
|
|
69
|
+
id: "stabilityai/stable-diffusion",
|
|
65
70
|
},
|
|
66
71
|
{
|
|
67
72
|
description: "An text-to-image application that can generate coherent text inside the image.",
|
|
68
|
-
id:
|
|
73
|
+
id: "DeepFloyd/IF",
|
|
69
74
|
},
|
|
70
75
|
{
|
|
71
76
|
description: "An powerful text-to-image application that can generate images.",
|
|
72
|
-
id:
|
|
77
|
+
id: "kakaobrain/karlo",
|
|
73
78
|
},
|
|
74
79
|
{
|
|
75
80
|
description: "An powerful text-to-image application that can generates 3D representations.",
|
|
76
|
-
id:
|
|
81
|
+
id: "hysts/Shap-E",
|
|
77
82
|
},
|
|
78
83
|
{
|
|
79
84
|
description: "A strong application for `text-to-image`, `image-to-image` and image inpainting.",
|
|
80
|
-
id:
|
|
85
|
+
id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI",
|
|
81
86
|
},
|
|
82
87
|
],
|
|
83
|
-
summary:
|
|
88
|
+
summary:
|
|
89
|
+
"Generates images from input text. These models can be used to generate and modify images based on text prompts.",
|
|
84
90
|
widgetModels: ["CompVis/stable-diffusion-v1-4"],
|
|
85
|
-
youtubeId:
|
|
91
|
+
youtubeId: "",
|
|
86
92
|
};
|
|
87
93
|
|
|
88
94
|
export default taskData;
|