@huggingface/tasks 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +20 -0
  3. package/dist/index.d.ts +358 -46
  4. package/dist/index.js +103 -41
  5. package/dist/{index.cjs → index.mjs} +73 -67
  6. package/package.json +42 -33
  7. package/src/Types.ts +49 -43
  8. package/src/audio-classification/about.md +5 -5
  9. package/src/audio-classification/data.ts +11 -11
  10. package/src/audio-to-audio/about.md +4 -3
  11. package/src/audio-to-audio/data.ts +18 -15
  12. package/src/automatic-speech-recognition/about.md +5 -4
  13. package/src/automatic-speech-recognition/data.ts +18 -17
  14. package/src/const.ts +52 -44
  15. package/src/conversational/about.md +9 -9
  16. package/src/conversational/data.ts +22 -18
  17. package/src/depth-estimation/about.md +1 -3
  18. package/src/depth-estimation/data.ts +11 -11
  19. package/src/document-question-answering/about.md +1 -2
  20. package/src/document-question-answering/data.ts +22 -19
  21. package/src/feature-extraction/about.md +2 -3
  22. package/src/feature-extraction/data.ts +12 -15
  23. package/src/fill-mask/about.md +1 -1
  24. package/src/fill-mask/data.ts +16 -14
  25. package/src/image-classification/about.md +5 -3
  26. package/src/image-classification/data.ts +15 -15
  27. package/src/image-segmentation/about.md +4 -4
  28. package/src/image-segmentation/data.ts +26 -23
  29. package/src/image-to-image/about.md +8 -10
  30. package/src/image-to-image/data.ts +31 -27
  31. package/src/image-to-text/about.md +13 -6
  32. package/src/image-to-text/data.ts +20 -21
  33. package/src/index.ts +3 -1
  34. package/src/modelLibraries.ts +43 -0
  35. package/src/object-detection/about.md +2 -1
  36. package/src/object-detection/data.ts +20 -17
  37. package/src/pipelines.ts +608 -0
  38. package/src/placeholder/about.md +3 -3
  39. package/src/placeholder/data.ts +8 -8
  40. package/src/question-answering/about.md +1 -1
  41. package/src/question-answering/data.ts +21 -19
  42. package/src/reinforcement-learning/about.md +167 -176
  43. package/src/reinforcement-learning/data.ts +75 -78
  44. package/src/sentence-similarity/data.ts +29 -28
  45. package/src/summarization/about.md +6 -5
  46. package/src/summarization/data.ts +23 -20
  47. package/src/table-question-answering/about.md +5 -5
  48. package/src/table-question-answering/data.ts +35 -39
  49. package/src/tabular-classification/about.md +4 -6
  50. package/src/tabular-classification/data.ts +11 -12
  51. package/src/tabular-regression/about.md +14 -18
  52. package/src/tabular-regression/data.ts +10 -11
  53. package/src/tasksData.ts +47 -50
  54. package/src/text-classification/about.md +5 -4
  55. package/src/text-classification/data.ts +21 -20
  56. package/src/text-generation/about.md +7 -6
  57. package/src/text-generation/data.ts +36 -34
  58. package/src/text-to-image/about.md +19 -18
  59. package/src/text-to-image/data.ts +32 -26
  60. package/src/text-to-speech/about.md +4 -5
  61. package/src/text-to-speech/data.ts +16 -17
  62. package/src/text-to-video/about.md +41 -36
  63. package/src/text-to-video/data.ts +43 -38
  64. package/src/token-classification/about.md +1 -3
  65. package/src/token-classification/data.ts +26 -25
  66. package/src/translation/about.md +4 -4
  67. package/src/translation/data.ts +21 -21
  68. package/src/unconditional-image-generation/about.md +10 -5
  69. package/src/unconditional-image-generation/data.ts +26 -20
  70. package/src/video-classification/about.md +5 -1
  71. package/src/video-classification/data.ts +14 -14
  72. package/src/visual-question-answering/about.md +8 -3
  73. package/src/visual-question-answering/data.ts +22 -19
  74. package/src/zero-shot-classification/about.md +5 -4
  75. package/src/zero-shot-classification/data.ts +20 -20
  76. package/src/zero-shot-image-classification/about.md +17 -9
  77. package/src/zero-shot-image-classification/data.ts +12 -14
  78. package/tsconfig.json +18 -0
  79. package/assets/audio-classification/audio.wav +0 -0
  80. package/assets/audio-to-audio/input.wav +0 -0
  81. package/assets/audio-to-audio/label-0.wav +0 -0
  82. package/assets/audio-to-audio/label-1.wav +0 -0
  83. package/assets/automatic-speech-recognition/input.flac +0 -0
  84. package/assets/automatic-speech-recognition/wav2vec2.png +0 -0
  85. package/assets/contribution-guide/anatomy.png +0 -0
  86. package/assets/contribution-guide/libraries.png +0 -0
  87. package/assets/depth-estimation/depth-estimation-input.jpg +0 -0
  88. package/assets/depth-estimation/depth-estimation-output.png +0 -0
  89. package/assets/document-question-answering/document-question-answering-input.png +0 -0
  90. package/assets/image-classification/image-classification-input.jpeg +0 -0
  91. package/assets/image-segmentation/image-segmentation-input.jpeg +0 -0
  92. package/assets/image-segmentation/image-segmentation-output.png +0 -0
  93. package/assets/image-to-image/image-to-image-input.jpeg +0 -0
  94. package/assets/image-to-image/image-to-image-output.png +0 -0
  95. package/assets/image-to-image/pix2pix_examples.jpg +0 -0
  96. package/assets/image-to-text/savanna.jpg +0 -0
  97. package/assets/object-detection/object-detection-input.jpg +0 -0
  98. package/assets/object-detection/object-detection-output.jpg +0 -0
  99. package/assets/table-question-answering/tableQA.jpg +0 -0
  100. package/assets/text-to-image/image.jpeg +0 -0
  101. package/assets/text-to-speech/audio.wav +0 -0
  102. package/assets/text-to-video/text-to-video-output.gif +0 -0
  103. package/assets/unconditional-image-generation/unconditional-image-generation-output.jpeg +0 -0
  104. package/assets/video-classification/video-classification-input.gif +0 -0
  105. package/assets/visual-question-answering/elephant.jpeg +0 -0
  106. package/assets/zero-shot-image-classification/image-classification-input.jpeg +0 -0
  107. package/dist/index.d.cts +0 -145
package/src/tasksData.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { type PipelineType, PIPELINE_DATA } from "../../js/src/lib/interfaces/Types";
1
+ import { type PipelineType, PIPELINE_DATA } from "./pipelines";
2
2
  import type { TaskDataCustom, TaskData } from "./Types";
3
3
 
4
4
  import audioClassification from "./audio-classification/data";
@@ -41,64 +41,61 @@ import { TASKS_MODEL_LIBRARIES } from "./const";
41
41
  // Tasks that call getData() without the second argument will
42
42
  // have a "placeholder" page.
43
43
  export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
44
- "audio-classification": getData("audio-classification", audioClassification),
45
- "audio-to-audio": getData("audio-to-audio", audioToAudio),
46
- "automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
47
- "conversational": getData("conversational", conversational),
48
- "depth-estimation": getData("depth-estimation", depthEstimation),
49
- "document-question-answering": getData("document-question-answering", documentQuestionAnswering),
50
- "feature-extraction": getData("feature-extraction", featureExtraction),
51
- "fill-mask": getData("fill-mask", fillMask),
52
- "graph-ml": undefined,
53
- "image-classification": getData("image-classification", imageClassification),
54
- "image-segmentation": getData("image-segmentation", imageSegmentation),
55
- "image-to-image": getData("image-to-image", imageToImage),
56
- "image-to-text": getData("image-to-text", imageToText),
57
- "multiple-choice": undefined,
58
- "object-detection": getData("object-detection", objectDetection),
59
- "video-classification": getData("video-classification", videoClassification),
60
- "other": undefined,
61
- "question-answering": getData("question-answering", questionAnswering),
62
- "reinforcement-learning": getData("reinforcement-learning", reinforcementLearning),
63
- "robotics": undefined,
64
- "sentence-similarity": getData("sentence-similarity", sentenceSimilarity),
65
- "summarization": getData("summarization", summarization),
66
- "table-question-answering": getData("table-question-answering", tableQuestionAnswering),
67
- "table-to-text": undefined,
68
- "tabular-classification": getData("tabular-classification", tabularClassification),
69
- "tabular-regression": getData("tabular-regression", tabularRegression),
70
- "tabular-to-text": undefined,
71
- "text-classification": getData("text-classification", textClassification),
72
- "text-generation": getData("text-generation", textGeneration),
73
- "text-retrieval": undefined,
74
- "text-to-image": getData("text-to-image", textToImage),
75
- "text-to-speech": getData("text-to-speech", textToSpeech),
76
- "text-to-audio": undefined,
77
- "text-to-video": getData("text-to-video", textToVideo),
78
- "text2text-generation": undefined,
79
- "time-series-forecasting": undefined,
80
- "token-classification": getData("token-classification", tokenClassification),
81
- "translation": getData("translation", translation),
44
+ "audio-classification": getData("audio-classification", audioClassification),
45
+ "audio-to-audio": getData("audio-to-audio", audioToAudio),
46
+ "automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
47
+ conversational: getData("conversational", conversational),
48
+ "depth-estimation": getData("depth-estimation", depthEstimation),
49
+ "document-question-answering": getData("document-question-answering", documentQuestionAnswering),
50
+ "feature-extraction": getData("feature-extraction", featureExtraction),
51
+ "fill-mask": getData("fill-mask", fillMask),
52
+ "graph-ml": undefined,
53
+ "image-classification": getData("image-classification", imageClassification),
54
+ "image-segmentation": getData("image-segmentation", imageSegmentation),
55
+ "image-to-image": getData("image-to-image", imageToImage),
56
+ "image-to-text": getData("image-to-text", imageToText),
57
+ "multiple-choice": undefined,
58
+ "object-detection": getData("object-detection", objectDetection),
59
+ "video-classification": getData("video-classification", videoClassification),
60
+ other: undefined,
61
+ "question-answering": getData("question-answering", questionAnswering),
62
+ "reinforcement-learning": getData("reinforcement-learning", reinforcementLearning),
63
+ robotics: undefined,
64
+ "sentence-similarity": getData("sentence-similarity", sentenceSimilarity),
65
+ summarization: getData("summarization", summarization),
66
+ "table-question-answering": getData("table-question-answering", tableQuestionAnswering),
67
+ "table-to-text": undefined,
68
+ "tabular-classification": getData("tabular-classification", tabularClassification),
69
+ "tabular-regression": getData("tabular-regression", tabularRegression),
70
+ "tabular-to-text": undefined,
71
+ "text-classification": getData("text-classification", textClassification),
72
+ "text-generation": getData("text-generation", textGeneration),
73
+ "text-retrieval": undefined,
74
+ "text-to-image": getData("text-to-image", textToImage),
75
+ "text-to-speech": getData("text-to-speech", textToSpeech),
76
+ "text-to-audio": undefined,
77
+ "text-to-video": getData("text-to-video", textToVideo),
78
+ "text2text-generation": undefined,
79
+ "time-series-forecasting": undefined,
80
+ "token-classification": getData("token-classification", tokenClassification),
81
+ translation: getData("translation", translation),
82
82
  "unconditional-image-generation": getData("unconditional-image-generation", unconditionalImageGeneration),
83
- "visual-question-answering": getData("visual-question-answering", visualQuestionAnswering),
84
- "voice-activity-detection": undefined,
85
- "zero-shot-classification": getData("zero-shot-classification", zeroShotClassification),
83
+ "visual-question-answering": getData("visual-question-answering", visualQuestionAnswering),
84
+ "voice-activity-detection": undefined,
85
+ "zero-shot-classification": getData("zero-shot-classification", zeroShotClassification),
86
86
  "zero-shot-image-classification": getData("zero-shot-image-classification", zeroShotImageClassification),
87
87
  } as const;
88
88
 
89
- /*
90
- * Return the whole TaskData object for a certain task.
89
+ /**
90
+ * Return the whole TaskData object for a certain task.
91
91
  * If the partialTaskData argument is left undefined,
92
92
  * the default placholder data will be used.
93
93
  */
94
- function getData(
95
- type: PipelineType,
96
- partialTaskData: TaskDataCustom = placeholder
97
- ): TaskData {
94
+ function getData(type: PipelineType, partialTaskData: TaskDataCustom = placeholder): TaskData {
98
95
  return {
99
96
  ...partialTaskData,
100
- id: type,
101
- label: PIPELINE_DATA[type].name,
97
+ id: type,
98
+ label: PIPELINE_DATA[type].name,
102
99
  libraries: TASKS_MODEL_LIBRARIES[type],
103
100
  };
104
101
  }
@@ -119,10 +119,11 @@ import { HfInference } from "@huggingface/inference";
119
119
 
120
120
  const inference = new HfInference(HF_ACCESS_TOKEN);
121
121
  await inference.conversational({
122
- model: 'distilbert-base-uncased-finetuned-sst-2-english',
123
- inputs: "I love this movie!"
124
- })
122
+ model: "distilbert-base-uncased-finetuned-sst-2-english",
123
+ inputs: "I love this movie!",
124
+ });
125
125
  ```
126
+
126
127
  ### Grammatical Correctness
127
128
 
128
129
  Linguistic Acceptability is the task of assessing the grammatical acceptability of a sentence. The classes in this task are “acceptable” and “unacceptable”. The benchmark dataset used for this task is [Corpus of Linguistic Acceptability (CoLA)](https://huggingface.co/datasets/glue/viewer/cola/test). The dataset consists of texts and their labels.
@@ -168,4 +169,4 @@ Would you like to learn more about the topic? Awesome! Here you can find some cu
168
169
 
169
170
  ### Documentation
170
171
 
171
- - [Text classification task guide](https://huggingface.co/docs/transformers/tasks/sequence_classification)
172
+ - [Text classification task guide](https://huggingface.co/docs/transformers/tasks/sequence_classification)
@@ -4,21 +4,20 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
7
- id: "glue",
7
+ id: "glue",
8
8
  },
9
9
  {
10
10
  description: "A text classification dataset used to benchmark natural language inference models",
11
- id: "snli",
11
+ id: "snli",
12
12
  },
13
13
  ],
14
14
  demo: {
15
15
  inputs: [
16
16
  {
17
- label: "Input",
17
+ label: "Input",
18
18
  content: "I love Hugging Face!",
19
- type: "text",
19
+ type: "text",
20
20
  },
21
-
22
21
  ],
23
22
  outputs: [
24
23
  {
@@ -26,15 +25,15 @@ const taskData: TaskDataCustom = {
26
25
  data: [
27
26
  {
28
27
  label: "POSITIVE",
29
- score: 0.90,
28
+ score: 0.9,
30
29
  },
31
30
  {
32
31
  label: "NEUTRAL",
33
- score: 0.10,
32
+ score: 0.1,
34
33
  },
35
34
  {
36
35
  label: "NEGATIVE",
37
- score: 0.00,
36
+ score: 0.0,
38
37
  },
39
38
  ],
40
39
  },
@@ -43,48 +42,50 @@ const taskData: TaskDataCustom = {
43
42
  metrics: [
44
43
  {
45
44
  description: "",
46
- id: "accuracy",
45
+ id: "accuracy",
47
46
  },
48
47
  {
49
48
  description: "",
50
- id: "recall",
49
+ id: "recall",
51
50
  },
52
51
  {
53
52
  description: "",
54
- id: "precision",
53
+ id: "precision",
55
54
  },
56
55
  {
57
- description: "The F1 metric is the harmonic mean of the precision and recall. It can be calculated as: F1 = 2 * (precision * recall) / (precision + recall)",
58
- id: "f1",
56
+ description:
57
+ "The F1 metric is the harmonic mean of the precision and recall. It can be calculated as: F1 = 2 * (precision * recall) / (precision + recall)",
58
+ id: "f1",
59
59
  },
60
60
  ],
61
61
  models: [
62
62
  {
63
63
  description: "A robust model trained for sentiment analysis.",
64
- id: "distilbert-base-uncased-finetuned-sst-2-english",
64
+ id: "distilbert-base-uncased-finetuned-sst-2-english",
65
65
  },
66
66
  {
67
67
  description: "Multi-genre natural language inference model.",
68
- id: "roberta-large-mnli",
68
+ id: "roberta-large-mnli",
69
69
  },
70
70
  ],
71
71
  spaces: [
72
72
  {
73
73
  description: "An application that can classify financial sentiment.",
74
- id: "IoannisTr/Tech_Stocks_Trading_Assistant",
74
+ id: "IoannisTr/Tech_Stocks_Trading_Assistant",
75
75
  },
76
76
  {
77
77
  description: "A dashboard that contains various text classification tasks.",
78
- id: "miesnerjacob/Multi-task-NLP",
78
+ id: "miesnerjacob/Multi-task-NLP",
79
79
  },
80
80
  {
81
81
  description: "An application that analyzes user reviews in healthcare.",
82
- id: "spacy/healthsea-demo",
82
+ id: "spacy/healthsea-demo",
83
83
  },
84
84
  ],
85
- summary: "Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.",
85
+ summary:
86
+ "Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.",
86
87
  widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
87
- youtubeId: "leNG9fN9FQU",
88
+ youtubeId: "leNG9fN9FQU",
88
89
  };
89
90
 
90
91
  export default taskData;
@@ -26,7 +26,7 @@ A popular variant of Text Generation models predicts the next word given a bunch
26
26
  - Continue a story given the first sentences.
27
27
  - Provided a code description, generate the code.
28
28
 
29
- The most popular models for this task are GPT-based models or [Llama series](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf). These models are trained on data that has no labels, so you just need plain text to train your own model. You can train text generation models to generate a wide variety of documents, from code to stories.
29
+ The most popular models for this task are GPT-based models or [Llama series](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf). These models are trained on data that has no labels, so you just need plain text to train your own model. You can train text generation models to generate a wide variety of documents, from code to stories.
30
30
 
31
31
  ### Text-to-Text Generation Models
32
32
 
@@ -44,7 +44,6 @@ generator("Hello, I'm a language model", max_length = 30, num_return_sequences=3
44
44
  ## {'generated_text': "Hello, I'm a language modeler. I write and maintain software in Python. I love to code, and that includes coding things that require writing"}, ...
45
45
  ```
46
46
 
47
-
48
47
  [Text-to-Text generation models](https://huggingface.co/models?pipeline_tag=text2text-generation&sort=downloads) have a separate pipeline called `text2text-generation`. This pipeline takes an input containing the sentence including the task and returns the output of the accomplished task.
49
48
 
50
49
  ```python
@@ -65,9 +64,9 @@ import { HfInference } from "@huggingface/inference";
65
64
 
66
65
  const inference = new HfInference(HF_ACCESS_TOKEN);
67
66
  await inference.conversational({
68
- model: 'distilbert-base-uncased-finetuned-sst-2-english',
69
- inputs: "I love this movie!"
70
- })
67
+ model: "distilbert-base-uncased-finetuned-sst-2-english",
68
+ inputs: "I love this movie!",
69
+ });
71
70
  ```
72
71
 
73
72
  ## Text Generation Inference
@@ -80,24 +79,26 @@ Hugging Face Spaces includes templates to easily deploy your own instance of a s
80
79
 
81
80
  ![ChatUI](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/os_llms/docker_chat.png)
82
81
 
83
-
84
82
  ## Useful Resources
85
83
 
86
84
  Would you like to learn more about the topic? Awesome! Here you can find some curated resources that you may find helpful!
87
85
 
88
86
  ### Tools within Hugging Face Ecosystem
87
+
89
88
  - You can use [PEFT](https://github.com/huggingface/peft) to adapt large language models in efficient way.
90
89
  - [ChatUI](https://github.com/huggingface/chat-ui) is the open-source interface to conversate with Large Language Models.
91
90
  - [text-generation-inferface](https://github.com/huggingface/text-generation-inference)
92
91
  - [HuggingChat](https://huggingface.co/chat/) is a chat interface powered by Hugging Face to chat with powerful models like Llama 2 70B.
93
92
 
94
93
  ### Documentation
94
+
95
95
  - [PEFT documentation](https://huggingface.co/docs/peft/index)
96
96
  - [ChatUI Docker Spaces](https://huggingface.co/docs/hub/spaces-sdks-docker-chatui)
97
97
  - [Causal language modeling task guide](https://huggingface.co/docs/transformers/tasks/language_modeling)
98
98
  - [Text generation strategies](https://huggingface.co/docs/transformers/generation_strategies)
99
99
 
100
100
  ### Course and Blogs
101
+
101
102
  - [Course Chapter on Training a causal language model from scratch](https://huggingface.co/course/chapter7/6?fw=pt)
102
103
  - [TO Discussion with Victor Sanh](https://www.youtube.com/watch?v=Oy49SCW_Xpw&ab_channel=HuggingFace)
103
104
  - [Hugging Face Course Workshops: Pretraining Language Models & CodeParrot](https://www.youtube.com/watch?v=ExUR7w6xe94&ab_channel=HuggingFace)
@@ -4,121 +4,123 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "A large multilingual dataset of text crawled from the web.",
7
- id: "mc4",
7
+ id: "mc4",
8
8
  },
9
9
  {
10
- description: "Diverse open-source data consisting of 22 smaller high-quality datasets. It was used to train GPT-Neo.",
11
- id: "the_pile",
10
+ description:
11
+ "Diverse open-source data consisting of 22 smaller high-quality datasets. It was used to train GPT-Neo.",
12
+ id: "the_pile",
12
13
  },
13
14
  {
14
15
  description: "A crowd-sourced instruction dataset to develop an AI assistant.",
15
- id: "OpenAssistant/oasst1",
16
+ id: "OpenAssistant/oasst1",
16
17
  },
17
18
  {
18
19
  description: "A crowd-sourced instruction dataset created by Databricks employees.",
19
- id: "databricks/databricks-dolly-15k",
20
+ id: "databricks/databricks-dolly-15k",
20
21
  },
21
22
  ],
22
23
  demo: {
23
24
  inputs: [
24
25
  {
25
- label: "Input",
26
- content:
27
- "Once upon a time,",
26
+ label: "Input",
27
+ content: "Once upon a time,",
28
28
  type: "text",
29
29
  },
30
-
31
30
  ],
32
31
  outputs: [
33
32
  {
34
- label: "Output",
33
+ label: "Output",
35
34
  content:
36
- "Once upon a time, we knew that our ancestors were on the verge of extinction. The great explorers and poets of the Old World, from Alexander the Great to Chaucer, are dead and gone. A good many of our ancient explorers and poets have",
35
+ "Once upon a time, we knew that our ancestors were on the verge of extinction. The great explorers and poets of the Old World, from Alexander the Great to Chaucer, are dead and gone. A good many of our ancient explorers and poets have",
37
36
  type: "text",
38
37
  },
39
38
  ],
40
39
  },
41
40
  metrics: [
42
41
  {
43
- description: "Cross Entropy is a metric that calculates the difference between two probability distributions. Each probability distribution is the distribution of predicted words",
44
- id: "Cross Entropy",
42
+ description:
43
+ "Cross Entropy is a metric that calculates the difference between two probability distributions. Each probability distribution is the distribution of predicted words",
44
+ id: "Cross Entropy",
45
45
  },
46
46
  {
47
- description: "The Perplexity metric is the exponential of the cross-entropy loss. It evaluates the probabilities assigned to the next word by the model. Lower perplexity indicates better performance",
48
- id: "Perplexity",
47
+ description:
48
+ "The Perplexity metric is the exponential of the cross-entropy loss. It evaluates the probabilities assigned to the next word by the model. Lower perplexity indicates better performance",
49
+ id: "Perplexity",
49
50
  },
50
51
  ],
51
52
  models: [
52
53
  {
53
54
  description: "A large language model trained for text generation.",
54
- id: "bigscience/bloom-560m",
55
+ id: "bigscience/bloom-560m",
55
56
  },
56
57
  {
57
58
  description: "A large code generation model that can generate code in 80+ languages.",
58
- id: "bigcode/starcoder",
59
+ id: "bigcode/starcoder",
59
60
  },
60
61
  {
61
62
  description: "A model trained to follow instructions, uses Pythia-12b as base model.",
62
- id: "databricks/dolly-v2-12b",
63
+ id: "databricks/dolly-v2-12b",
63
64
  },
64
65
  {
65
66
  description: "A model trained to follow instructions curated by community, uses Pythia-12b as base model.",
66
- id: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
67
+ id: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
67
68
  },
68
69
  {
69
70
  description: "A large language model trained to generate text in English.",
70
- id: "stabilityai/stablelm-tuned-alpha-7b",
71
+ id: "stabilityai/stablelm-tuned-alpha-7b",
71
72
  },
72
73
  {
73
74
  description: "A model trained to follow instructions, based on mosaicml/mpt-7b.",
74
- id: "mosaicml/mpt-7b-instruct",
75
+ id: "mosaicml/mpt-7b-instruct",
75
76
  },
76
77
  {
77
78
  description: "A large language model trained to generate text in English.",
78
- id: "EleutherAI/pythia-12b",
79
+ id: "EleutherAI/pythia-12b",
79
80
  },
80
81
  {
81
82
  description: "A large text-to-text model trained to follow instructions.",
82
- id: "google/flan-ul2",
83
+ id: "google/flan-ul2",
83
84
  },
84
85
  {
85
86
  description: "A large and powerful text generation model.",
86
- id: "tiiuae/falcon-40b",
87
+ id: "tiiuae/falcon-40b",
87
88
  },
88
89
  {
89
90
  description: "State-of-the-art open-source large language model.",
90
- id: "meta-llama/Llama-2-70b-hf",
91
+ id: "meta-llama/Llama-2-70b-hf",
91
92
  },
92
93
  ],
93
- spaces: [
94
+ spaces: [
94
95
  {
95
96
  description: "A robust text generation model that can perform various tasks through natural language prompting.",
96
- id: "huggingface/bloom_demo",
97
+ id: "huggingface/bloom_demo",
97
98
  },
98
99
  {
99
100
  description: "An text generation based application that can write code for 80+ languages.",
100
- id: "bigcode/bigcode-playground",
101
+ id: "bigcode/bigcode-playground",
101
102
  },
102
103
  {
103
104
  description: "An text generation based application for conversations.",
104
- id: "h2oai/h2ogpt-chatbot",
105
+ id: "h2oai/h2ogpt-chatbot",
105
106
  },
106
107
  {
107
108
  description: "An text generation application that combines OpenAI and Hugging Face models.",
108
- id: "microsoft/HuggingGPT",
109
+ id: "microsoft/HuggingGPT",
109
110
  },
110
111
  {
111
112
  description: "An text generation application that uses StableLM-tuned-alpha-7b.",
112
- id: "stabilityai/stablelm-tuned-alpha-chat",
113
+ id: "stabilityai/stablelm-tuned-alpha-chat",
113
114
  },
114
115
  {
115
116
  description: "An UI that uses StableLM-tuned-alpha-7b.",
116
- id: "togethercomputer/OpenChatKit",
117
+ id: "togethercomputer/OpenChatKit",
117
118
  },
118
119
  ],
119
- summary: "Generating text is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase.",
120
+ summary:
121
+ "Generating text is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase.",
120
122
  widgetModels: ["tiiuae/falcon-7b-instruct"],
121
- youtubeId: "Vpjb1lu0MDk",
123
+ youtubeId: "Vpjb1lu0MDk",
122
124
  };
123
125
 
124
126
  export default taskData;
@@ -1,28 +1,29 @@
1
1
  ## Use Cases
2
+
2
3
  ### Data Generation
3
-
4
- Businesses can generate data for their their use cases by inputting text and getting image outputs.
5
-
4
+
5
+ Businesses can generate data for their their use cases by inputting text and getting image outputs.
6
+
6
7
  ### Immersive Conversational Chatbots
7
-
8
+
8
9
  Chatbots can be made more immersive if they provide contextual images based on the input provided by the user.
9
-
10
+
10
11
  ### Creative Ideas for Fashion Industry
11
-
12
+
12
13
  Different patterns can be generated to obtain unique pieces of fashion. Text-to-image models make creations easier for designers to conceptualize their design before actually implementing it.
13
-
14
+
14
15
  ### Architecture Industry
15
-
16
+
16
17
  Architects can utilise the models to construct an environment based out on the requirements of the floor plan. This can also include the furniture that has to be placed in that environment.
17
18
 
18
19
  ## Task Variants
19
20
 
20
21
  You can contribute variants of this task [here](https://github.com/huggingface/hub-docs/blob/main/tasks/src/text-to-image/about.md).
21
22
 
22
-
23
23
  ## Inference
24
24
 
25
25
  You can use diffusers pipelines to infer with `text-to-image` models.
26
+
26
27
  ```python
27
28
  from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
28
29
 
@@ -42,15 +43,16 @@ import { HfInference } from "@huggingface/inference";
42
43
 
43
44
  const inference = new HfInference(HF_ACCESS_TOKEN);
44
45
  await inference.textToImage({
45
- model: 'stabilityai/stable-diffusion-2',
46
- inputs: 'award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]',
47
- parameters: {
48
- negative_prompt: 'blurry',
49
- }
50
- })
46
+ model: "stabilityai/stable-diffusion-2",
47
+ inputs: "award winning high resolution photo of a giant tortoise/((ladybird)) hybrid, [trending on artstation]",
48
+ parameters: {
49
+ negative_prompt: "blurry",
50
+ },
51
+ });
51
52
  ```
52
-
53
+
53
54
  ## Useful Resources
55
+
54
56
  - [Hugging Face Diffusion Models Course](https://github.com/huggingface/diffusion-models-class)
55
57
  - [Getting Started with Diffusers](https://huggingface.co/docs/diffusers/index)
56
58
  - [Text-to-Image Generation](https://huggingface.co/docs/diffusers/using-diffusers/conditional_image_generation)
@@ -61,5 +63,4 @@ await inference.textToImage({
61
63
  - [🧨 Stable Diffusion in JAX/Flax](https://huggingface.co/blog/stable_diffusion_jax)
62
64
  - [Running IF with 🧨 diffusers on a Free Tier Google Colab](https://huggingface.co/blog/if)
63
65
 
64
-
65
- This page was made possible thanks to the efforts of [Ishan Dutta](https://huggingface.co/ishandutta), [Enrique Elias Ubaldo](https://huggingface.co/herrius) and [Oğuz Akif](https://huggingface.co/oguzakif).
66
+ This page was made possible thanks to the efforts of [Ishan Dutta](https://huggingface.co/ishandutta), [Enrique Elias Ubaldo](https://huggingface.co/herrius) and [Oğuz Akif](https://huggingface.co/oguzakif).
@@ -4,85 +4,91 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
7
- id: "red_caps",
7
+ id: "red_caps",
8
8
  },
9
9
  {
10
10
  description: "Conceptual Captions is a dataset consisting of ~3.3M images annotated with captions.",
11
- id: "conceptual_captions",
11
+ id: "conceptual_captions",
12
12
  },
13
13
  ],
14
14
  demo: {
15
15
  inputs: [
16
16
  {
17
- label: "Input",
17
+ label: "Input",
18
18
  content: "A city above clouds, pastel colors, Victorian style",
19
- type: "text",
19
+ type: "text",
20
20
  },
21
21
  ],
22
22
  outputs: [
23
23
  {
24
24
  filename: "image.jpeg",
25
- type: "img",
25
+ type: "img",
26
26
  },
27
27
  ],
28
28
  },
29
29
  metrics: [
30
30
  {
31
- description: "The Inception Score (IS) measure assesses diversity and meaningfulness. It uses a generated image sample to predict its label. A higher score signifies more diverse and meaningful images.",
31
+ description:
32
+ "The Inception Score (IS) measure assesses diversity and meaningfulness. It uses a generated image sample to predict its label. A higher score signifies more diverse and meaningful images.",
32
33
  id: "IS",
33
34
  },
34
35
  {
35
- description: "The Fréchet Inception Distance (FID) calculates the distance between distributions between synthetic and real samples. A lower FID score indicates better similarity between the distributions of real and generated images.",
36
+ description:
37
+ "The Fréchet Inception Distance (FID) calculates the distance between distributions between synthetic and real samples. A lower FID score indicates better similarity between the distributions of real and generated images.",
36
38
  id: "FID",
37
- },
39
+ },
38
40
  {
39
- description: "R-precision assesses how the generated image aligns with the provided text description. It uses the generated images as queries to retrieve relevant text descriptions. The top 'r' relevant descriptions are selected and used to calculate R-precision as r/R, where 'R' is the number of ground truth descriptions associated with the generated images. A higher R-precision value indicates a better model.",
41
+ description:
42
+ "R-precision assesses how the generated image aligns with the provided text description. It uses the generated images as queries to retrieve relevant text descriptions. The top 'r' relevant descriptions are selected and used to calculate R-precision as r/R, where 'R' is the number of ground truth descriptions associated with the generated images. A higher R-precision value indicates a better model.",
40
43
  id: "R-Precision",
41
- },
42
- ],
43
- models: [
44
+ },
45
+ ],
46
+ models: [
44
47
  {
45
- description: "A latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
46
- id: "CompVis/stable-diffusion-v1-4",
48
+ description:
49
+ "A latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
50
+ id: "CompVis/stable-diffusion-v1-4",
47
51
  },
48
52
  {
49
- description: "A model that can be used to generate images based on text prompts. The DALL·E Mega model is the largest version of DALLE Mini.",
50
- id: "dalle-mini/dalle-mega",
53
+ description:
54
+ "A model that can be used to generate images based on text prompts. The DALL·E Mega model is the largest version of DALLE Mini.",
55
+ id: "dalle-mini/dalle-mega",
51
56
  },
52
57
  {
53
58
  description: "A text-to-image model that can generate coherent text inside image.",
54
- id: "DeepFloyd/IF-I-XL-v1.0",
59
+ id: "DeepFloyd/IF-I-XL-v1.0",
55
60
  },
56
61
  {
57
62
  description: "A powerful text-to-image model.",
58
- id: "kakaobrain/karlo-v1-alpha",
63
+ id: "kakaobrain/karlo-v1-alpha",
59
64
  },
60
65
  ],
61
- spaces: [
66
+ spaces: [
62
67
  {
63
68
  description: "A powerful text-to-image application.",
64
- id: "stabilityai/stable-diffusion",
69
+ id: "stabilityai/stable-diffusion",
65
70
  },
66
71
  {
67
72
  description: "An text-to-image application that can generate coherent text inside the image.",
68
- id: "DeepFloyd/IF",
73
+ id: "DeepFloyd/IF",
69
74
  },
70
75
  {
71
76
  description: "An powerful text-to-image application that can generate images.",
72
- id: "kakaobrain/karlo",
77
+ id: "kakaobrain/karlo",
73
78
  },
74
79
  {
75
80
  description: "An powerful text-to-image application that can generates 3D representations.",
76
- id: "hysts/Shap-E",
81
+ id: "hysts/Shap-E",
77
82
  },
78
83
  {
79
84
  description: "A strong application for `text-to-image`, `image-to-image` and image inpainting.",
80
- id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI",
85
+ id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI",
81
86
  },
82
87
  ],
83
- summary: "Generates images from input text. These models can be used to generate and modify images based on text prompts.",
88
+ summary:
89
+ "Generates images from input text. These models can be used to generate and modify images based on text prompts.",
84
90
  widgetModels: ["CompVis/stable-diffusion-v1-4"],
85
- youtubeId: "",
91
+ youtubeId: "",
86
92
  };
87
93
 
88
94
  export default taskData;