@huggingface/tasks 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/audio-classification/audio.wav +0 -0
- package/assets/audio-to-audio/input.wav +0 -0
- package/assets/audio-to-audio/label-0.wav +0 -0
- package/assets/audio-to-audio/label-1.wav +0 -0
- package/assets/automatic-speech-recognition/input.flac +0 -0
- package/assets/automatic-speech-recognition/wav2vec2.png +0 -0
- package/assets/contribution-guide/anatomy.png +0 -0
- package/assets/contribution-guide/libraries.png +0 -0
- package/assets/depth-estimation/depth-estimation-input.jpg +0 -0
- package/assets/depth-estimation/depth-estimation-output.png +0 -0
- package/assets/document-question-answering/document-question-answering-input.png +0 -0
- package/assets/image-classification/image-classification-input.jpeg +0 -0
- package/assets/image-segmentation/image-segmentation-input.jpeg +0 -0
- package/assets/image-segmentation/image-segmentation-output.png +0 -0
- package/assets/image-to-image/image-to-image-input.jpeg +0 -0
- package/assets/image-to-image/image-to-image-output.png +0 -0
- package/assets/image-to-image/pix2pix_examples.jpg +0 -0
- package/assets/image-to-text/savanna.jpg +0 -0
- package/assets/object-detection/object-detection-input.jpg +0 -0
- package/assets/object-detection/object-detection-output.jpg +0 -0
- package/assets/table-question-answering/tableQA.jpg +0 -0
- package/assets/text-to-image/image.jpeg +0 -0
- package/assets/text-to-speech/audio.wav +0 -0
- package/assets/text-to-video/text-to-video-output.gif +0 -0
- package/assets/unconditional-image-generation/unconditional-image-generation-output.jpeg +0 -0
- package/assets/video-classification/video-classification-input.gif +0 -0
- package/assets/visual-question-answering/elephant.jpeg +0 -0
- package/assets/zero-shot-image-classification/image-classification-input.jpeg +0 -0
- package/dist/index.cjs +3105 -0
- package/dist/index.d.cts +145 -0
- package/dist/index.d.ts +145 -0
- package/dist/index.js +3079 -0
- package/package.json +35 -0
- package/src/Types.ts +58 -0
- package/src/audio-classification/about.md +85 -0
- package/src/audio-classification/data.ts +77 -0
- package/src/audio-to-audio/about.md +55 -0
- package/src/audio-to-audio/data.ts +63 -0
- package/src/automatic-speech-recognition/about.md +86 -0
- package/src/automatic-speech-recognition/data.ts +77 -0
- package/src/const.ts +51 -0
- package/src/conversational/about.md +50 -0
- package/src/conversational/data.ts +62 -0
- package/src/depth-estimation/about.md +38 -0
- package/src/depth-estimation/data.ts +52 -0
- package/src/document-question-answering/about.md +54 -0
- package/src/document-question-answering/data.ts +67 -0
- package/src/feature-extraction/about.md +35 -0
- package/src/feature-extraction/data.ts +57 -0
- package/src/fill-mask/about.md +51 -0
- package/src/fill-mask/data.ts +77 -0
- package/src/image-classification/about.md +48 -0
- package/src/image-classification/data.ts +88 -0
- package/src/image-segmentation/about.md +63 -0
- package/src/image-segmentation/data.ts +96 -0
- package/src/image-to-image/about.md +81 -0
- package/src/image-to-image/data.ts +97 -0
- package/src/image-to-text/about.md +58 -0
- package/src/image-to-text/data.ts +87 -0
- package/src/index.ts +2 -0
- package/src/object-detection/about.md +36 -0
- package/src/object-detection/data.ts +73 -0
- package/src/placeholder/about.md +15 -0
- package/src/placeholder/data.ts +18 -0
- package/src/question-answering/about.md +56 -0
- package/src/question-answering/data.ts +69 -0
- package/src/reinforcement-learning/about.md +176 -0
- package/src/reinforcement-learning/data.ts +78 -0
- package/src/sentence-similarity/about.md +97 -0
- package/src/sentence-similarity/data.ts +100 -0
- package/src/summarization/about.md +57 -0
- package/src/summarization/data.ts +72 -0
- package/src/table-question-answering/about.md +43 -0
- package/src/table-question-answering/data.ts +63 -0
- package/src/tabular-classification/about.md +67 -0
- package/src/tabular-classification/data.ts +69 -0
- package/src/tabular-regression/about.md +91 -0
- package/src/tabular-regression/data.ts +58 -0
- package/src/tasksData.ts +104 -0
- package/src/text-classification/about.md +171 -0
- package/src/text-classification/data.ts +90 -0
- package/src/text-generation/about.md +128 -0
- package/src/text-generation/data.ts +124 -0
- package/src/text-to-image/about.md +65 -0
- package/src/text-to-image/data.ts +88 -0
- package/src/text-to-speech/about.md +63 -0
- package/src/text-to-speech/data.ts +70 -0
- package/src/text-to-video/about.md +36 -0
- package/src/text-to-video/data.ts +97 -0
- package/src/token-classification/about.md +78 -0
- package/src/token-classification/data.ts +83 -0
- package/src/translation/about.md +65 -0
- package/src/translation/data.ts +68 -0
- package/src/unconditional-image-generation/about.md +45 -0
- package/src/unconditional-image-generation/data.ts +66 -0
- package/src/video-classification/about.md +53 -0
- package/src/video-classification/data.ts +84 -0
- package/src/visual-question-answering/about.md +43 -0
- package/src/visual-question-answering/data.ts +90 -0
- package/src/zero-shot-classification/about.md +39 -0
- package/src/zero-shot-classification/data.ts +66 -0
- package/src/zero-shot-image-classification/about.md +68 -0
- package/src/zero-shot-image-classification/data.ts +79 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,3105 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __export = (target, all) => {
|
|
6
|
+
for (var name in all)
|
|
7
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
8
|
+
};
|
|
9
|
+
var __copyProps = (to, from, except, desc) => {
|
|
10
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
11
|
+
for (let key of __getOwnPropNames(from))
|
|
12
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
13
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
14
|
+
}
|
|
15
|
+
return to;
|
|
16
|
+
};
|
|
17
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
18
|
+
|
|
19
|
+
// src/index.ts
|
|
20
|
+
var src_exports = {};
|
|
21
|
+
__export(src_exports, {
|
|
22
|
+
TASKS_DATA: () => TASKS_DATA
|
|
23
|
+
});
|
|
24
|
+
module.exports = __toCommonJS(src_exports);
|
|
25
|
+
|
|
26
|
+
// ../js/src/lib/interfaces/Types.ts
|
|
27
|
+
function ensureRecordOfPipelines(record) {
|
|
28
|
+
return record;
|
|
29
|
+
}
|
|
30
|
+
var PIPELINE_DATA = ensureRecordOfPipelines({
|
|
31
|
+
"text-classification": {
|
|
32
|
+
name: "Text Classification",
|
|
33
|
+
subtasks: [
|
|
34
|
+
{
|
|
35
|
+
type: "acceptability-classification",
|
|
36
|
+
name: "Acceptability Classification"
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
type: "entity-linking-classification",
|
|
40
|
+
name: "Entity Linking Classification"
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
type: "fact-checking",
|
|
44
|
+
name: "Fact Checking"
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
type: "intent-classification",
|
|
48
|
+
name: "Intent Classification"
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
type: "language-identification",
|
|
52
|
+
name: "Language Identification"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
type: "multi-class-classification",
|
|
56
|
+
name: "Multi Class Classification"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
type: "multi-label-classification",
|
|
60
|
+
name: "Multi Label Classification"
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
type: "multi-input-text-classification",
|
|
64
|
+
name: "Multi-input Text Classification"
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
type: "natural-language-inference",
|
|
68
|
+
name: "Natural Language Inference"
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
type: "semantic-similarity-classification",
|
|
72
|
+
name: "Semantic Similarity Classification"
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
type: "sentiment-classification",
|
|
76
|
+
name: "Sentiment Classification"
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
type: "topic-classification",
|
|
80
|
+
name: "Topic Classification"
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
type: "semantic-similarity-scoring",
|
|
84
|
+
name: "Semantic Similarity Scoring"
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
type: "sentiment-scoring",
|
|
88
|
+
name: "Sentiment Scoring"
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
type: "sentiment-analysis",
|
|
92
|
+
name: "Sentiment Analysis"
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
type: "hate-speech-detection",
|
|
96
|
+
name: "Hate Speech Detection"
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
type: "text-scoring",
|
|
100
|
+
name: "Text Scoring"
|
|
101
|
+
}
|
|
102
|
+
],
|
|
103
|
+
modality: "nlp",
|
|
104
|
+
color: "orange"
|
|
105
|
+
},
|
|
106
|
+
"token-classification": {
|
|
107
|
+
name: "Token Classification",
|
|
108
|
+
subtasks: [
|
|
109
|
+
{
|
|
110
|
+
type: "named-entity-recognition",
|
|
111
|
+
name: "Named Entity Recognition"
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
type: "part-of-speech",
|
|
115
|
+
name: "Part of Speech"
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
type: "parsing",
|
|
119
|
+
name: "Parsing"
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
type: "lemmatization",
|
|
123
|
+
name: "Lemmatization"
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
type: "word-sense-disambiguation",
|
|
127
|
+
name: "Word Sense Disambiguation"
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
type: "coreference-resolution",
|
|
131
|
+
name: "Coreference-resolution"
|
|
132
|
+
}
|
|
133
|
+
],
|
|
134
|
+
modality: "nlp",
|
|
135
|
+
color: "blue"
|
|
136
|
+
},
|
|
137
|
+
"table-question-answering": {
|
|
138
|
+
name: "Table Question Answering",
|
|
139
|
+
modality: "nlp",
|
|
140
|
+
color: "green"
|
|
141
|
+
},
|
|
142
|
+
"question-answering": {
|
|
143
|
+
name: "Question Answering",
|
|
144
|
+
subtasks: [
|
|
145
|
+
{
|
|
146
|
+
type: "extractive-qa",
|
|
147
|
+
name: "Extractive QA"
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
type: "open-domain-qa",
|
|
151
|
+
name: "Open Domain QA"
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
type: "closed-domain-qa",
|
|
155
|
+
name: "Closed Domain QA"
|
|
156
|
+
}
|
|
157
|
+
],
|
|
158
|
+
modality: "nlp",
|
|
159
|
+
color: "blue"
|
|
160
|
+
},
|
|
161
|
+
"zero-shot-classification": {
|
|
162
|
+
name: "Zero-Shot Classification",
|
|
163
|
+
modality: "nlp",
|
|
164
|
+
color: "yellow"
|
|
165
|
+
},
|
|
166
|
+
"translation": {
|
|
167
|
+
name: "Translation",
|
|
168
|
+
modality: "nlp",
|
|
169
|
+
color: "green"
|
|
170
|
+
},
|
|
171
|
+
"summarization": {
|
|
172
|
+
name: "Summarization",
|
|
173
|
+
subtasks: [
|
|
174
|
+
{
|
|
175
|
+
type: "news-articles-summarization",
|
|
176
|
+
name: "News Articles Summarization"
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
type: "news-articles-headline-generation",
|
|
180
|
+
name: "News Articles Headline Generation"
|
|
181
|
+
}
|
|
182
|
+
],
|
|
183
|
+
modality: "nlp",
|
|
184
|
+
color: "indigo"
|
|
185
|
+
},
|
|
186
|
+
"conversational": {
|
|
187
|
+
name: "Conversational",
|
|
188
|
+
subtasks: [
|
|
189
|
+
{
|
|
190
|
+
type: "dialogue-generation",
|
|
191
|
+
name: "Dialogue Generation"
|
|
192
|
+
}
|
|
193
|
+
],
|
|
194
|
+
modality: "nlp",
|
|
195
|
+
color: "green"
|
|
196
|
+
},
|
|
197
|
+
"feature-extraction": {
|
|
198
|
+
name: "Feature Extraction",
|
|
199
|
+
modality: "multimodal",
|
|
200
|
+
color: "red"
|
|
201
|
+
},
|
|
202
|
+
"text-generation": {
|
|
203
|
+
name: "Text Generation",
|
|
204
|
+
subtasks: [
|
|
205
|
+
{
|
|
206
|
+
type: "dialogue-modeling",
|
|
207
|
+
name: "Dialogue Modeling"
|
|
208
|
+
},
|
|
209
|
+
{
|
|
210
|
+
type: "language-modeling",
|
|
211
|
+
name: "Language Modeling"
|
|
212
|
+
}
|
|
213
|
+
],
|
|
214
|
+
modality: "nlp",
|
|
215
|
+
color: "indigo"
|
|
216
|
+
},
|
|
217
|
+
"text2text-generation": {
|
|
218
|
+
name: "Text2Text Generation",
|
|
219
|
+
subtasks: [
|
|
220
|
+
{
|
|
221
|
+
type: "text-simplification",
|
|
222
|
+
name: "Text simplification"
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
type: "explanation-generation",
|
|
226
|
+
name: "Explanation Generation"
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
type: "abstractive-qa",
|
|
230
|
+
name: "Abstractive QA"
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
type: "open-domain-abstractive-qa",
|
|
234
|
+
name: "Open Domain Abstractive QA"
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
type: "closed-domain-qa",
|
|
238
|
+
name: "Closed Domain QA"
|
|
239
|
+
},
|
|
240
|
+
{
|
|
241
|
+
type: "open-book-qa",
|
|
242
|
+
name: "Open Book QA"
|
|
243
|
+
},
|
|
244
|
+
{
|
|
245
|
+
type: "closed-book-qa",
|
|
246
|
+
name: "Closed Book QA"
|
|
247
|
+
}
|
|
248
|
+
],
|
|
249
|
+
modality: "nlp",
|
|
250
|
+
color: "indigo"
|
|
251
|
+
},
|
|
252
|
+
"fill-mask": {
|
|
253
|
+
name: "Fill-Mask",
|
|
254
|
+
subtasks: [
|
|
255
|
+
{
|
|
256
|
+
type: "slot-filling",
|
|
257
|
+
name: "Slot Filling"
|
|
258
|
+
},
|
|
259
|
+
{
|
|
260
|
+
type: "masked-language-modeling",
|
|
261
|
+
name: "Masked Language Modeling"
|
|
262
|
+
}
|
|
263
|
+
],
|
|
264
|
+
modality: "nlp",
|
|
265
|
+
color: "red"
|
|
266
|
+
},
|
|
267
|
+
"sentence-similarity": {
|
|
268
|
+
name: "Sentence Similarity",
|
|
269
|
+
modality: "nlp",
|
|
270
|
+
color: "yellow"
|
|
271
|
+
},
|
|
272
|
+
"text-to-speech": {
|
|
273
|
+
name: "Text-to-Speech",
|
|
274
|
+
modality: "audio",
|
|
275
|
+
color: "yellow"
|
|
276
|
+
},
|
|
277
|
+
"text-to-audio": {
|
|
278
|
+
name: "Text-to-Audio",
|
|
279
|
+
modality: "audio",
|
|
280
|
+
color: "yellow"
|
|
281
|
+
},
|
|
282
|
+
"automatic-speech-recognition": {
|
|
283
|
+
name: "Automatic Speech Recognition",
|
|
284
|
+
modality: "audio",
|
|
285
|
+
color: "yellow"
|
|
286
|
+
},
|
|
287
|
+
"audio-to-audio": {
|
|
288
|
+
name: "Audio-to-Audio",
|
|
289
|
+
modality: "audio",
|
|
290
|
+
color: "blue"
|
|
291
|
+
},
|
|
292
|
+
"audio-classification": {
|
|
293
|
+
name: "Audio Classification",
|
|
294
|
+
subtasks: [
|
|
295
|
+
{
|
|
296
|
+
type: "keyword-spotting",
|
|
297
|
+
name: "Keyword Spotting"
|
|
298
|
+
},
|
|
299
|
+
{
|
|
300
|
+
type: "speaker-identification",
|
|
301
|
+
name: "Speaker Identification"
|
|
302
|
+
},
|
|
303
|
+
{
|
|
304
|
+
type: "audio-intent-classification",
|
|
305
|
+
name: "Audio Intent Classification"
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
type: "audio-emotion-recognition",
|
|
309
|
+
name: "Audio Emotion Recognition"
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
type: "audio-language-identification",
|
|
313
|
+
name: "Audio Language Identification"
|
|
314
|
+
}
|
|
315
|
+
],
|
|
316
|
+
modality: "audio",
|
|
317
|
+
color: "green"
|
|
318
|
+
},
|
|
319
|
+
"voice-activity-detection": {
|
|
320
|
+
name: "Voice Activity Detection",
|
|
321
|
+
modality: "audio",
|
|
322
|
+
color: "red"
|
|
323
|
+
},
|
|
324
|
+
"depth-estimation": {
|
|
325
|
+
name: "Depth Estimation",
|
|
326
|
+
modality: "cv",
|
|
327
|
+
color: "yellow"
|
|
328
|
+
},
|
|
329
|
+
"image-classification": {
|
|
330
|
+
name: "Image Classification",
|
|
331
|
+
subtasks: [
|
|
332
|
+
{
|
|
333
|
+
type: "multi-label-image-classification",
|
|
334
|
+
name: "Multi Label Image Classification"
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
type: "multi-class-image-classification",
|
|
338
|
+
name: "Multi Class Image Classification"
|
|
339
|
+
}
|
|
340
|
+
],
|
|
341
|
+
modality: "cv",
|
|
342
|
+
color: "blue"
|
|
343
|
+
},
|
|
344
|
+
"object-detection": {
|
|
345
|
+
name: "Object Detection",
|
|
346
|
+
subtasks: [
|
|
347
|
+
{
|
|
348
|
+
type: "face-detection",
|
|
349
|
+
name: "Face Detection"
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
type: "vehicle-detection",
|
|
353
|
+
name: "Vehicle Detection"
|
|
354
|
+
}
|
|
355
|
+
],
|
|
356
|
+
modality: "cv",
|
|
357
|
+
color: "yellow"
|
|
358
|
+
},
|
|
359
|
+
"image-segmentation": {
|
|
360
|
+
name: "Image Segmentation",
|
|
361
|
+
subtasks: [
|
|
362
|
+
{
|
|
363
|
+
type: "instance-segmentation",
|
|
364
|
+
name: "Instance Segmentation"
|
|
365
|
+
},
|
|
366
|
+
{
|
|
367
|
+
type: "semantic-segmentation",
|
|
368
|
+
name: "Semantic Segmentation"
|
|
369
|
+
},
|
|
370
|
+
{
|
|
371
|
+
type: "panoptic-segmentation",
|
|
372
|
+
name: "Panoptic Segmentation"
|
|
373
|
+
}
|
|
374
|
+
],
|
|
375
|
+
modality: "cv",
|
|
376
|
+
color: "green"
|
|
377
|
+
},
|
|
378
|
+
"text-to-image": {
|
|
379
|
+
name: "Text-to-Image",
|
|
380
|
+
modality: "multimodal",
|
|
381
|
+
color: "yellow"
|
|
382
|
+
},
|
|
383
|
+
"image-to-text": {
|
|
384
|
+
name: "Image-to-Text",
|
|
385
|
+
subtasks: [
|
|
386
|
+
{
|
|
387
|
+
type: "image-captioning",
|
|
388
|
+
name: "Image Captioning"
|
|
389
|
+
}
|
|
390
|
+
],
|
|
391
|
+
modality: "multimodal",
|
|
392
|
+
color: "red"
|
|
393
|
+
},
|
|
394
|
+
"image-to-image": {
|
|
395
|
+
name: "Image-to-Image",
|
|
396
|
+
modality: "cv",
|
|
397
|
+
color: "indigo"
|
|
398
|
+
},
|
|
399
|
+
"unconditional-image-generation": {
|
|
400
|
+
name: "Unconditional Image Generation",
|
|
401
|
+
modality: "cv",
|
|
402
|
+
color: "green"
|
|
403
|
+
},
|
|
404
|
+
"video-classification": {
|
|
405
|
+
name: "Video Classification",
|
|
406
|
+
modality: "cv",
|
|
407
|
+
color: "blue"
|
|
408
|
+
},
|
|
409
|
+
"reinforcement-learning": {
|
|
410
|
+
name: "Reinforcement Learning",
|
|
411
|
+
modality: "rl",
|
|
412
|
+
color: "red"
|
|
413
|
+
},
|
|
414
|
+
"robotics": {
|
|
415
|
+
name: "Robotics",
|
|
416
|
+
modality: "rl",
|
|
417
|
+
subtasks: [
|
|
418
|
+
{
|
|
419
|
+
type: "grasping",
|
|
420
|
+
name: "Grasping"
|
|
421
|
+
},
|
|
422
|
+
{
|
|
423
|
+
type: "task-planning",
|
|
424
|
+
name: "Task Planning"
|
|
425
|
+
}
|
|
426
|
+
],
|
|
427
|
+
color: "blue"
|
|
428
|
+
},
|
|
429
|
+
"tabular-classification": {
|
|
430
|
+
name: "Tabular Classification",
|
|
431
|
+
modality: "tabular",
|
|
432
|
+
subtasks: [
|
|
433
|
+
{
|
|
434
|
+
type: "tabular-multi-class-classification",
|
|
435
|
+
name: "Tabular Multi Class Classification"
|
|
436
|
+
},
|
|
437
|
+
{
|
|
438
|
+
type: "tabular-multi-label-classification",
|
|
439
|
+
name: "Tabular Multi Label Classification"
|
|
440
|
+
}
|
|
441
|
+
],
|
|
442
|
+
color: "blue"
|
|
443
|
+
},
|
|
444
|
+
"tabular-regression": {
|
|
445
|
+
name: "Tabular Regression",
|
|
446
|
+
modality: "tabular",
|
|
447
|
+
subtasks: [
|
|
448
|
+
{
|
|
449
|
+
type: "tabular-single-column-regression",
|
|
450
|
+
name: "Tabular Single Column Regression"
|
|
451
|
+
}
|
|
452
|
+
],
|
|
453
|
+
color: "blue"
|
|
454
|
+
},
|
|
455
|
+
"tabular-to-text": {
|
|
456
|
+
name: "Tabular to Text",
|
|
457
|
+
modality: "tabular",
|
|
458
|
+
subtasks: [
|
|
459
|
+
{
|
|
460
|
+
type: "rdf-to-text",
|
|
461
|
+
name: "RDF to text"
|
|
462
|
+
}
|
|
463
|
+
],
|
|
464
|
+
color: "blue",
|
|
465
|
+
hideInModels: true
|
|
466
|
+
},
|
|
467
|
+
"table-to-text": {
|
|
468
|
+
name: "Table to Text",
|
|
469
|
+
modality: "nlp",
|
|
470
|
+
color: "blue",
|
|
471
|
+
hideInModels: true
|
|
472
|
+
},
|
|
473
|
+
"multiple-choice": {
|
|
474
|
+
name: "Multiple Choice",
|
|
475
|
+
subtasks: [
|
|
476
|
+
{
|
|
477
|
+
type: "multiple-choice-qa",
|
|
478
|
+
name: "Multiple Choice QA"
|
|
479
|
+
},
|
|
480
|
+
{
|
|
481
|
+
type: "multiple-choice-coreference-resolution",
|
|
482
|
+
name: "Multiple Choice Coreference Resolution"
|
|
483
|
+
}
|
|
484
|
+
],
|
|
485
|
+
modality: "nlp",
|
|
486
|
+
color: "blue",
|
|
487
|
+
hideInModels: true
|
|
488
|
+
},
|
|
489
|
+
"text-retrieval": {
|
|
490
|
+
name: "Text Retrieval",
|
|
491
|
+
subtasks: [
|
|
492
|
+
{
|
|
493
|
+
type: "document-retrieval",
|
|
494
|
+
name: "Document Retrieval"
|
|
495
|
+
},
|
|
496
|
+
{
|
|
497
|
+
type: "utterance-retrieval",
|
|
498
|
+
name: "Utterance Retrieval"
|
|
499
|
+
},
|
|
500
|
+
{
|
|
501
|
+
type: "entity-linking-retrieval",
|
|
502
|
+
name: "Entity Linking Retrieval"
|
|
503
|
+
},
|
|
504
|
+
{
|
|
505
|
+
type: "fact-checking-retrieval",
|
|
506
|
+
name: "Fact Checking Retrieval"
|
|
507
|
+
}
|
|
508
|
+
],
|
|
509
|
+
modality: "nlp",
|
|
510
|
+
color: "indigo",
|
|
511
|
+
hideInModels: true
|
|
512
|
+
},
|
|
513
|
+
"time-series-forecasting": {
|
|
514
|
+
name: "Time Series Forecasting",
|
|
515
|
+
modality: "tabular",
|
|
516
|
+
subtasks: [
|
|
517
|
+
{
|
|
518
|
+
type: "univariate-time-series-forecasting",
|
|
519
|
+
name: "Univariate Time Series Forecasting"
|
|
520
|
+
},
|
|
521
|
+
{
|
|
522
|
+
type: "multivariate-time-series-forecasting",
|
|
523
|
+
name: "Multivariate Time Series Forecasting"
|
|
524
|
+
}
|
|
525
|
+
],
|
|
526
|
+
color: "blue",
|
|
527
|
+
hideInModels: true
|
|
528
|
+
},
|
|
529
|
+
"text-to-video": {
|
|
530
|
+
name: "Text-to-Video",
|
|
531
|
+
modality: "multimodal",
|
|
532
|
+
color: "green"
|
|
533
|
+
},
|
|
534
|
+
"visual-question-answering": {
|
|
535
|
+
name: "Visual Question Answering",
|
|
536
|
+
subtasks: [
|
|
537
|
+
{
|
|
538
|
+
type: "visual-question-answering",
|
|
539
|
+
name: "Visual Question Answering"
|
|
540
|
+
}
|
|
541
|
+
],
|
|
542
|
+
modality: "multimodal",
|
|
543
|
+
color: "red"
|
|
544
|
+
},
|
|
545
|
+
"document-question-answering": {
|
|
546
|
+
name: "Document Question Answering",
|
|
547
|
+
subtasks: [
|
|
548
|
+
{
|
|
549
|
+
type: "document-question-answering",
|
|
550
|
+
name: "Document Question Answering"
|
|
551
|
+
}
|
|
552
|
+
],
|
|
553
|
+
modality: "multimodal",
|
|
554
|
+
color: "blue",
|
|
555
|
+
hideInDatasets: true
|
|
556
|
+
},
|
|
557
|
+
"zero-shot-image-classification": {
|
|
558
|
+
name: "Zero-Shot Image Classification",
|
|
559
|
+
modality: "cv",
|
|
560
|
+
color: "yellow"
|
|
561
|
+
},
|
|
562
|
+
"graph-ml": {
|
|
563
|
+
name: "Graph Machine Learning",
|
|
564
|
+
modality: "multimodal",
|
|
565
|
+
color: "green"
|
|
566
|
+
},
|
|
567
|
+
"other": {
|
|
568
|
+
name: "Other",
|
|
569
|
+
modality: "other",
|
|
570
|
+
color: "blue",
|
|
571
|
+
hideInModels: true,
|
|
572
|
+
hideInDatasets: true
|
|
573
|
+
}
|
|
574
|
+
});
|
|
575
|
+
var ALL_PIPELINE_TYPES = Object.keys(PIPELINE_DATA);
|
|
576
|
+
var ALL_PIPELINE_TYPES_SET = new Set(ALL_PIPELINE_TYPES);
|
|
577
|
+
var ALL_SUBTASKS = Object.values(PIPELINE_DATA).flatMap((data) => data.subtasks ?? []);
|
|
578
|
+
var ALL_SUBTASK_TYPES = ALL_SUBTASKS.map((s) => s.type);
|
|
579
|
+
var ALL_SUBTASK_TYPES_SET = new Set(ALL_SUBTASK_TYPES);
|
|
580
|
+
|
|
581
|
+
// src/audio-classification/data.ts
|
|
582
|
+
var taskData = {
|
|
583
|
+
datasets: [
|
|
584
|
+
{
|
|
585
|
+
description: "A benchmark of 10 different audio tasks.",
|
|
586
|
+
id: "superb"
|
|
587
|
+
}
|
|
588
|
+
],
|
|
589
|
+
demo: {
|
|
590
|
+
inputs: [
|
|
591
|
+
{
|
|
592
|
+
filename: "audio.wav",
|
|
593
|
+
type: "audio"
|
|
594
|
+
}
|
|
595
|
+
],
|
|
596
|
+
outputs: [
|
|
597
|
+
{
|
|
598
|
+
data: [
|
|
599
|
+
{
|
|
600
|
+
label: "Up",
|
|
601
|
+
score: 0.2
|
|
602
|
+
},
|
|
603
|
+
{
|
|
604
|
+
label: "Down",
|
|
605
|
+
score: 0.8
|
|
606
|
+
}
|
|
607
|
+
],
|
|
608
|
+
type: "chart"
|
|
609
|
+
}
|
|
610
|
+
]
|
|
611
|
+
},
|
|
612
|
+
metrics: [
|
|
613
|
+
{
|
|
614
|
+
description: "",
|
|
615
|
+
id: "accuracy"
|
|
616
|
+
},
|
|
617
|
+
{
|
|
618
|
+
description: "",
|
|
619
|
+
id: "recall"
|
|
620
|
+
},
|
|
621
|
+
{
|
|
622
|
+
description: "",
|
|
623
|
+
id: "precision"
|
|
624
|
+
},
|
|
625
|
+
{
|
|
626
|
+
description: "",
|
|
627
|
+
id: "f1"
|
|
628
|
+
}
|
|
629
|
+
],
|
|
630
|
+
models: [
|
|
631
|
+
{
|
|
632
|
+
description: "An easy-to-use model for Command Recognition.",
|
|
633
|
+
id: "speechbrain/google_speech_command_xvector"
|
|
634
|
+
},
|
|
635
|
+
{
|
|
636
|
+
description: "An Emotion Recognition model.",
|
|
637
|
+
id: "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
|
|
638
|
+
},
|
|
639
|
+
{
|
|
640
|
+
description: "A language identification model.",
|
|
641
|
+
id: "facebook/mms-lid-126"
|
|
642
|
+
}
|
|
643
|
+
],
|
|
644
|
+
spaces: [
|
|
645
|
+
{
|
|
646
|
+
description: "An application that can predict the language spoken in a given audio.",
|
|
647
|
+
id: "akhaliq/Speechbrain-audio-classification"
|
|
648
|
+
}
|
|
649
|
+
],
|
|
650
|
+
summary: "Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.",
|
|
651
|
+
widgetModels: ["facebook/mms-lid-126"],
|
|
652
|
+
youtubeId: "KWwzcmG98Ds"
|
|
653
|
+
};
|
|
654
|
+
var data_default = taskData;
|
|
655
|
+
|
|
656
|
+
// src/audio-to-audio/data.ts
|
|
657
|
+
var taskData2 = {
|
|
658
|
+
datasets: [
|
|
659
|
+
{
|
|
660
|
+
description: "512-element X-vector embeddings of speakers from CMU ARCTIC dataset.",
|
|
661
|
+
id: "Matthijs/cmu-arctic-xvectors"
|
|
662
|
+
}
|
|
663
|
+
],
|
|
664
|
+
demo: {
|
|
665
|
+
inputs: [
|
|
666
|
+
{
|
|
667
|
+
filename: "input.wav",
|
|
668
|
+
type: "audio"
|
|
669
|
+
}
|
|
670
|
+
],
|
|
671
|
+
outputs: [
|
|
672
|
+
{
|
|
673
|
+
filename: "label-0.wav",
|
|
674
|
+
type: "audio"
|
|
675
|
+
},
|
|
676
|
+
{
|
|
677
|
+
filename: "label-1.wav",
|
|
678
|
+
type: "audio"
|
|
679
|
+
}
|
|
680
|
+
]
|
|
681
|
+
},
|
|
682
|
+
metrics: [
|
|
683
|
+
{
|
|
684
|
+
description: "The Signal-to-Noise ratio is the relationship between the target signal level and the background noise level. It is calculated as the logarithm of the target signal divided by the background noise, in decibels.",
|
|
685
|
+
id: "snri"
|
|
686
|
+
},
|
|
687
|
+
{
|
|
688
|
+
description: "The Signal-to-Distortion ratio is the relationship between the target signal and the sum of noise, interference, and artifact errors",
|
|
689
|
+
id: "sdri"
|
|
690
|
+
}
|
|
691
|
+
],
|
|
692
|
+
models: [
|
|
693
|
+
{
|
|
694
|
+
description: "A solid model of audio source separation.",
|
|
695
|
+
id: "speechbrain/sepformer-wham"
|
|
696
|
+
},
|
|
697
|
+
{
|
|
698
|
+
description: "A speech enhancement model.",
|
|
699
|
+
id: "speechbrain/metricgan-plus-voicebank"
|
|
700
|
+
}
|
|
701
|
+
],
|
|
702
|
+
spaces: [
|
|
703
|
+
{
|
|
704
|
+
description: "An application for speech separation.",
|
|
705
|
+
id: "younver/speechbrain-speech-separation"
|
|
706
|
+
},
|
|
707
|
+
{
|
|
708
|
+
description: "An application for audio style transfer.",
|
|
709
|
+
id: "nakas/audio-diffusion_style_transfer"
|
|
710
|
+
}
|
|
711
|
+
],
|
|
712
|
+
summary: "Audio-to-Audio is a family of tasks in which the input is an audio and the output is one or multiple generated audios. Some example tasks are speech enhancement and source separation.",
|
|
713
|
+
widgetModels: ["speechbrain/sepformer-wham"],
|
|
714
|
+
youtubeId: "iohj7nCCYoM"
|
|
715
|
+
};
|
|
716
|
+
var data_default2 = taskData2;
|
|
717
|
+
|
|
718
|
+
// src/automatic-speech-recognition/data.ts
|
|
719
|
+
var taskData3 = {
|
|
720
|
+
datasets: [
|
|
721
|
+
{
|
|
722
|
+
description: "18,000 hours of multilingual audio-text dataset in 108 languages.",
|
|
723
|
+
id: "mozilla-foundation/common_voice_13_0"
|
|
724
|
+
},
|
|
725
|
+
{
|
|
726
|
+
description: "An English dataset with 1,000 hours of data.",
|
|
727
|
+
id: "librispeech_asr"
|
|
728
|
+
},
|
|
729
|
+
{
|
|
730
|
+
description: "High quality, multi-speaker audio data and their transcriptions in various languages.",
|
|
731
|
+
id: "openslr"
|
|
732
|
+
}
|
|
733
|
+
],
|
|
734
|
+
demo: {
|
|
735
|
+
inputs: [
|
|
736
|
+
{
|
|
737
|
+
filename: "input.flac",
|
|
738
|
+
type: "audio"
|
|
739
|
+
}
|
|
740
|
+
],
|
|
741
|
+
outputs: [
|
|
742
|
+
{
|
|
743
|
+
/// GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES I
|
|
744
|
+
label: "Transcript",
|
|
745
|
+
content: "Going along slushy country roads and speaking to damp audiences in...",
|
|
746
|
+
type: "text"
|
|
747
|
+
}
|
|
748
|
+
]
|
|
749
|
+
},
|
|
750
|
+
metrics: [
|
|
751
|
+
{
|
|
752
|
+
description: "",
|
|
753
|
+
id: "wer"
|
|
754
|
+
},
|
|
755
|
+
{
|
|
756
|
+
description: "",
|
|
757
|
+
id: "cer"
|
|
758
|
+
}
|
|
759
|
+
],
|
|
760
|
+
models: [
|
|
761
|
+
{
|
|
762
|
+
description: "A powerful ASR model by OpenAI.",
|
|
763
|
+
id: "openai/whisper-large-v2"
|
|
764
|
+
},
|
|
765
|
+
{
|
|
766
|
+
description: "A good generic ASR model by MetaAI.",
|
|
767
|
+
id: "facebook/wav2vec2-base-960h"
|
|
768
|
+
},
|
|
769
|
+
{
|
|
770
|
+
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
|
|
771
|
+
id: "facebook/s2t-small-mustc-en-fr-st"
|
|
772
|
+
}
|
|
773
|
+
],
|
|
774
|
+
spaces: [
|
|
775
|
+
{
|
|
776
|
+
description: "A powerful general-purpose speech recognition application.",
|
|
777
|
+
id: "openai/whisper"
|
|
778
|
+
},
|
|
779
|
+
{
|
|
780
|
+
description: "Fastest speech recognition application.",
|
|
781
|
+
id: "sanchit-gandhi/whisper-jax"
|
|
782
|
+
},
|
|
783
|
+
{
|
|
784
|
+
description: "An application that transcribes speeches in YouTube videos.",
|
|
785
|
+
id: "jeffistyping/Youtube-Whisperer"
|
|
786
|
+
}
|
|
787
|
+
],
|
|
788
|
+
summary: "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
|
|
789
|
+
widgetModels: ["openai/whisper-large-v2"],
|
|
790
|
+
youtubeId: "TksaY_FDgnk"
|
|
791
|
+
};
|
|
792
|
+
var data_default3 = taskData3;
|
|
793
|
+
|
|
794
|
+
// src/conversational/data.ts
|
|
795
|
+
var taskData4 = {
|
|
796
|
+
datasets: [
|
|
797
|
+
{
|
|
798
|
+
description: "A dataset of 7k conversations explicitly designed to exhibit multiple conversation modes: displaying personality, having empathy, and demonstrating knowledge.",
|
|
799
|
+
id: "blended_skill_talk"
|
|
800
|
+
},
|
|
801
|
+
{
|
|
802
|
+
description: "ConvAI is a dataset of human-to-bot conversations labeled for quality. This data can be used to train a metric for evaluating dialogue systems",
|
|
803
|
+
id: "conv_ai_2"
|
|
804
|
+
},
|
|
805
|
+
{
|
|
806
|
+
description: "EmpatheticDialogues, is a dataset of 25k conversations grounded in emotional situations",
|
|
807
|
+
id: "empathetic_dialogues"
|
|
808
|
+
}
|
|
809
|
+
],
|
|
810
|
+
demo: {
|
|
811
|
+
inputs: [
|
|
812
|
+
{
|
|
813
|
+
label: "Input",
|
|
814
|
+
content: "Hey my name is Julien! How are you?",
|
|
815
|
+
type: "text"
|
|
816
|
+
}
|
|
817
|
+
],
|
|
818
|
+
outputs: [
|
|
819
|
+
{
|
|
820
|
+
label: "Answer",
|
|
821
|
+
content: "Hi Julien! My name is Julia! I am well.",
|
|
822
|
+
type: "text"
|
|
823
|
+
}
|
|
824
|
+
]
|
|
825
|
+
},
|
|
826
|
+
metrics: [
|
|
827
|
+
{
|
|
828
|
+
description: "BLEU score is calculated by counting the number of shared single or subsequent tokens between the generated sequence and the reference. Subsequent n tokens are called \u201Cn-grams\u201D. Unigram refers to a single token while bi-gram refers to token pairs and n-grams refer to n subsequent tokens. The score ranges from 0 to 1, where 1 means the translation perfectly matched and 0 did not match at all",
|
|
829
|
+
id: "bleu"
|
|
830
|
+
}
|
|
831
|
+
],
|
|
832
|
+
models: [
|
|
833
|
+
{
|
|
834
|
+
description: "A faster and smaller model than the famous BERT model.",
|
|
835
|
+
id: "facebook/blenderbot-400M-distill"
|
|
836
|
+
},
|
|
837
|
+
{
|
|
838
|
+
description: "DialoGPT is a large-scale pretrained dialogue response generation model for multiturn conversations.",
|
|
839
|
+
id: "microsoft/DialoGPT-large"
|
|
840
|
+
}
|
|
841
|
+
],
|
|
842
|
+
spaces: [
|
|
843
|
+
{
|
|
844
|
+
description: "A chatbot based on Blender model.",
|
|
845
|
+
id: "EXFINITE/BlenderBot-UI"
|
|
846
|
+
}
|
|
847
|
+
],
|
|
848
|
+
summary: "Conversational response modelling is the task of generating conversational text that is relevant, coherent and knowledgable given a prompt. These models have applications in chatbots, and as a part of voice assistants",
|
|
849
|
+
widgetModels: ["facebook/blenderbot-400M-distill"],
|
|
850
|
+
youtubeId: ""
|
|
851
|
+
};
|
|
852
|
+
var data_default4 = taskData4;
|
|
853
|
+
|
|
854
|
+
// src/document-question-answering/data.ts
|
|
855
|
+
var taskData5 = {
|
|
856
|
+
datasets: [
|
|
857
|
+
{
|
|
858
|
+
// TODO write proper description
|
|
859
|
+
description: "Dataset from the 2020 DocVQA challenge. The documents are taken from the UCSF Industry Documents Library.",
|
|
860
|
+
id: "eliolio/docvqa"
|
|
861
|
+
}
|
|
862
|
+
],
|
|
863
|
+
demo: {
|
|
864
|
+
inputs: [
|
|
865
|
+
{
|
|
866
|
+
label: "Question",
|
|
867
|
+
content: "What is the idea behind the consumer relations efficiency team?",
|
|
868
|
+
type: "text"
|
|
869
|
+
},
|
|
870
|
+
{
|
|
871
|
+
filename: "document-question-answering-input.png",
|
|
872
|
+
type: "img"
|
|
873
|
+
}
|
|
874
|
+
],
|
|
875
|
+
outputs: [
|
|
876
|
+
{
|
|
877
|
+
label: "Answer",
|
|
878
|
+
content: "Balance cost efficiency with quality customer service",
|
|
879
|
+
type: "text"
|
|
880
|
+
}
|
|
881
|
+
]
|
|
882
|
+
},
|
|
883
|
+
metrics: [
|
|
884
|
+
{
|
|
885
|
+
description: "The evaluation metric for the DocVQA challenge is the Average Normalized Levenshtein Similarity (ANLS). This metric is flexible to character regognition errors and compares the predicted answer with the ground truth answer.",
|
|
886
|
+
id: "anls"
|
|
887
|
+
},
|
|
888
|
+
{
|
|
889
|
+
description: "Exact Match is a metric based on the strict character match of the predicted answer and the right answer. For answers predicted correctly, the Exact Match will be 1. Even if only one character is different, Exact Match will be 0",
|
|
890
|
+
id: "exact-match"
|
|
891
|
+
}
|
|
892
|
+
],
|
|
893
|
+
models: [
|
|
894
|
+
{
|
|
895
|
+
description: "A LayoutLM model for the document QA task, fine-tuned on DocVQA and SQuAD2.0.",
|
|
896
|
+
id: "impira/layoutlm-document-qa"
|
|
897
|
+
},
|
|
898
|
+
{
|
|
899
|
+
description: "A special model for OCR-free Document QA task. Donut model fine-tuned on DocVQA.",
|
|
900
|
+
id: "naver-clova-ix/donut-base-finetuned-docvqa"
|
|
901
|
+
}
|
|
902
|
+
],
|
|
903
|
+
spaces: [
|
|
904
|
+
{
|
|
905
|
+
description: "A robust document question answering application.",
|
|
906
|
+
id: "impira/docquery"
|
|
907
|
+
},
|
|
908
|
+
{
|
|
909
|
+
description: "An application that can answer questions from invoices.",
|
|
910
|
+
id: "impira/invoices"
|
|
911
|
+
}
|
|
912
|
+
],
|
|
913
|
+
summary: "Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.",
|
|
914
|
+
widgetModels: ["impira/layoutlm-document-qa"],
|
|
915
|
+
youtubeId: ""
|
|
916
|
+
};
|
|
917
|
+
var data_default5 = taskData5;
|
|
918
|
+
|
|
919
|
+
// src/feature-extraction/data.ts
|
|
920
|
+
var taskData6 = {
|
|
921
|
+
datasets: [
|
|
922
|
+
{
|
|
923
|
+
description: "Wikipedia dataset containing cleaned articles of all languages. Can be used to train `feature-extraction` models.",
|
|
924
|
+
id: "wikipedia"
|
|
925
|
+
}
|
|
926
|
+
],
|
|
927
|
+
demo: {
|
|
928
|
+
inputs: [
|
|
929
|
+
{
|
|
930
|
+
label: "Input",
|
|
931
|
+
content: "India, officially the Republic of India, is a country in South Asia.",
|
|
932
|
+
type: "text"
|
|
933
|
+
}
|
|
934
|
+
],
|
|
935
|
+
outputs: [
|
|
936
|
+
{
|
|
937
|
+
table: [
|
|
938
|
+
["Dimension 1", "Dimension 2", "Dimension 3"],
|
|
939
|
+
["2.583383083343506", "2.757075071334839", "0.9023529887199402"],
|
|
940
|
+
["8.29393482208252", "1.1071064472198486", "2.03399395942688"],
|
|
941
|
+
["-0.7754912972450256", "-1.647324562072754", "-0.6113331913948059"],
|
|
942
|
+
["0.07087723910808563", "1.5942802429199219", "1.4610432386398315"]
|
|
943
|
+
],
|
|
944
|
+
type: "tabular"
|
|
945
|
+
}
|
|
946
|
+
]
|
|
947
|
+
},
|
|
948
|
+
metrics: [
|
|
949
|
+
{
|
|
950
|
+
description: "",
|
|
951
|
+
id: ""
|
|
952
|
+
}
|
|
953
|
+
],
|
|
954
|
+
models: [
|
|
955
|
+
{
|
|
956
|
+
description: "A powerful feature extraction model for natural language processing tasks.",
|
|
957
|
+
id: "facebook/bart-base"
|
|
958
|
+
},
|
|
959
|
+
{
|
|
960
|
+
description: "A strong feature extraction model for coding tasks.",
|
|
961
|
+
id: "microsoft/codebert-base"
|
|
962
|
+
}
|
|
963
|
+
],
|
|
964
|
+
spaces: [],
|
|
965
|
+
summary: "Feature extraction refers to the process of transforming raw data into numerical features that can be processed while preserving the information in the original dataset.",
|
|
966
|
+
widgetModels: ["facebook/bart-base"]
|
|
967
|
+
};
|
|
968
|
+
var data_default6 = taskData6;
|
|
969
|
+
|
|
970
|
+
// src/fill-mask/data.ts
|
|
971
|
+
var taskData7 = {
|
|
972
|
+
datasets: [
|
|
973
|
+
{
|
|
974
|
+
description: "A common dataset that is used to train models for many languages.",
|
|
975
|
+
id: "wikipedia"
|
|
976
|
+
},
|
|
977
|
+
{
|
|
978
|
+
description: "A large English dataset with text crawled from the web.",
|
|
979
|
+
id: "c4"
|
|
980
|
+
}
|
|
981
|
+
],
|
|
982
|
+
demo: {
|
|
983
|
+
inputs: [
|
|
984
|
+
{
|
|
985
|
+
label: "Input",
|
|
986
|
+
content: "The <mask> barked at me",
|
|
987
|
+
type: "text"
|
|
988
|
+
}
|
|
989
|
+
],
|
|
990
|
+
outputs: [
|
|
991
|
+
{
|
|
992
|
+
type: "chart",
|
|
993
|
+
data: [
|
|
994
|
+
{
|
|
995
|
+
label: "wolf",
|
|
996
|
+
score: 0.487
|
|
997
|
+
},
|
|
998
|
+
{
|
|
999
|
+
label: "dog",
|
|
1000
|
+
score: 0.061
|
|
1001
|
+
},
|
|
1002
|
+
{
|
|
1003
|
+
label: "cat",
|
|
1004
|
+
score: 0.058
|
|
1005
|
+
},
|
|
1006
|
+
{
|
|
1007
|
+
label: "fox",
|
|
1008
|
+
score: 0.047
|
|
1009
|
+
},
|
|
1010
|
+
{
|
|
1011
|
+
label: "squirrel",
|
|
1012
|
+
score: 0.025
|
|
1013
|
+
}
|
|
1014
|
+
]
|
|
1015
|
+
}
|
|
1016
|
+
]
|
|
1017
|
+
},
|
|
1018
|
+
metrics: [
|
|
1019
|
+
{
|
|
1020
|
+
description: "Cross Entropy is a metric that calculates the difference between two probability distributions. Each probability distribution is the distribution of predicted words",
|
|
1021
|
+
id: "cross_entropy"
|
|
1022
|
+
},
|
|
1023
|
+
{
|
|
1024
|
+
description: "Perplexity is the exponential of the cross-entropy loss. It evaluates the probabilities assigned to the next word by the model. Lower perplexity indicates better performance",
|
|
1025
|
+
id: "perplexity"
|
|
1026
|
+
}
|
|
1027
|
+
],
|
|
1028
|
+
models: [
|
|
1029
|
+
{
|
|
1030
|
+
description: "A faster and smaller model than the famous BERT model.",
|
|
1031
|
+
id: "distilbert-base-uncased"
|
|
1032
|
+
},
|
|
1033
|
+
{
|
|
1034
|
+
description: "A multilingual model trained on 100 languages.",
|
|
1035
|
+
id: "xlm-roberta-base"
|
|
1036
|
+
}
|
|
1037
|
+
],
|
|
1038
|
+
spaces: [],
|
|
1039
|
+
summary: "Masked language modeling is the task of masking some of the words in a sentence and predicting which words should replace those masks. These models are useful when we want to get a statistical understanding of the language in which the model is trained in.",
|
|
1040
|
+
widgetModels: ["distilroberta-base"],
|
|
1041
|
+
youtubeId: "mqElG5QJWUg"
|
|
1042
|
+
};
|
|
1043
|
+
var data_default7 = taskData7;
|
|
1044
|
+
|
|
1045
|
+
// src/image-classification/data.ts
|
|
1046
|
+
var taskData8 = {
|
|
1047
|
+
datasets: [
|
|
1048
|
+
{
|
|
1049
|
+
// TODO write proper description
|
|
1050
|
+
description: "Benchmark dataset used for image classification with images that belong to 100 classes.",
|
|
1051
|
+
id: "cifar100"
|
|
1052
|
+
},
|
|
1053
|
+
{
|
|
1054
|
+
// TODO write proper description
|
|
1055
|
+
description: "Dataset consisting of images of garments.",
|
|
1056
|
+
id: "fashion_mnist"
|
|
1057
|
+
}
|
|
1058
|
+
],
|
|
1059
|
+
demo: {
|
|
1060
|
+
inputs: [
|
|
1061
|
+
{
|
|
1062
|
+
filename: "image-classification-input.jpeg",
|
|
1063
|
+
type: "img"
|
|
1064
|
+
}
|
|
1065
|
+
],
|
|
1066
|
+
outputs: [
|
|
1067
|
+
{
|
|
1068
|
+
type: "chart",
|
|
1069
|
+
data: [
|
|
1070
|
+
{
|
|
1071
|
+
label: "Egyptian cat",
|
|
1072
|
+
score: 0.514
|
|
1073
|
+
},
|
|
1074
|
+
{
|
|
1075
|
+
label: "Tabby cat",
|
|
1076
|
+
score: 0.193
|
|
1077
|
+
},
|
|
1078
|
+
{
|
|
1079
|
+
label: "Tiger cat",
|
|
1080
|
+
score: 0.068
|
|
1081
|
+
}
|
|
1082
|
+
]
|
|
1083
|
+
}
|
|
1084
|
+
]
|
|
1085
|
+
},
|
|
1086
|
+
metrics: [
|
|
1087
|
+
{
|
|
1088
|
+
description: "",
|
|
1089
|
+
id: "accuracy"
|
|
1090
|
+
},
|
|
1091
|
+
{
|
|
1092
|
+
description: "",
|
|
1093
|
+
id: "recall"
|
|
1094
|
+
},
|
|
1095
|
+
{
|
|
1096
|
+
description: "",
|
|
1097
|
+
id: "precision"
|
|
1098
|
+
},
|
|
1099
|
+
{
|
|
1100
|
+
description: "",
|
|
1101
|
+
id: "f1"
|
|
1102
|
+
}
|
|
1103
|
+
],
|
|
1104
|
+
models: [
|
|
1105
|
+
{
|
|
1106
|
+
description: "A strong image classification model.",
|
|
1107
|
+
id: "google/vit-base-patch16-224"
|
|
1108
|
+
},
|
|
1109
|
+
{
|
|
1110
|
+
description: "A robust image classification model.",
|
|
1111
|
+
id: "facebook/deit-base-distilled-patch16-224"
|
|
1112
|
+
},
|
|
1113
|
+
{
|
|
1114
|
+
description: "A strong image classification model.",
|
|
1115
|
+
id: "facebook/convnext-large-224"
|
|
1116
|
+
}
|
|
1117
|
+
],
|
|
1118
|
+
spaces: [
|
|
1119
|
+
{
|
|
1120
|
+
// TO DO: write description
|
|
1121
|
+
description: "An application that classifies what a given image is about.",
|
|
1122
|
+
id: "nielsr/perceiver-image-classification"
|
|
1123
|
+
}
|
|
1124
|
+
],
|
|
1125
|
+
summary: "Image classification is the task of assigning a label or class to an entire image. Images are expected to have only one class for each image. Image classification models take an image as input and return a prediction about which class the image belongs to.",
|
|
1126
|
+
widgetModels: ["google/vit-base-patch16-224"],
|
|
1127
|
+
youtubeId: "tjAIM7BOYhw"
|
|
1128
|
+
};
|
|
1129
|
+
var data_default8 = taskData8;
|
|
1130
|
+
|
|
1131
|
+
// src/image-to-image/data.ts
|
|
1132
|
+
var taskData9 = {
|
|
1133
|
+
datasets: [
|
|
1134
|
+
{
|
|
1135
|
+
description: "Synthetic dataset, for image relighting",
|
|
1136
|
+
id: "VIDIT"
|
|
1137
|
+
},
|
|
1138
|
+
{
|
|
1139
|
+
description: "Multiple images of celebrities, used for facial expression translation",
|
|
1140
|
+
id: "huggan/CelebA-faces"
|
|
1141
|
+
}
|
|
1142
|
+
],
|
|
1143
|
+
demo: {
|
|
1144
|
+
inputs: [
|
|
1145
|
+
{
|
|
1146
|
+
filename: "image-to-image-input.jpeg",
|
|
1147
|
+
type: "img"
|
|
1148
|
+
}
|
|
1149
|
+
],
|
|
1150
|
+
outputs: [
|
|
1151
|
+
{
|
|
1152
|
+
filename: "image-to-image-output.png",
|
|
1153
|
+
type: "img"
|
|
1154
|
+
}
|
|
1155
|
+
]
|
|
1156
|
+
},
|
|
1157
|
+
isPlaceholder: false,
|
|
1158
|
+
metrics: [
|
|
1159
|
+
{
|
|
1160
|
+
description: "Peak Signal to Noise Ratio (PSNR) is an approximation of the human perception, considering the ratio of the absolute intensity with respect to the variations. Measured in dB, a high value indicates a high fidelity.",
|
|
1161
|
+
id: "PSNR"
|
|
1162
|
+
},
|
|
1163
|
+
{
|
|
1164
|
+
description: "Structural Similarity Index (SSIM) is a perceptual metric which compares the luminance, contrast and structure of two images. The values of SSIM range between -1 and 1, and higher values indicate closer resemblance to the original image.",
|
|
1165
|
+
id: "SSIM"
|
|
1166
|
+
},
|
|
1167
|
+
{
|
|
1168
|
+
description: "Inception Score (IS) is an analysis of the labels predicted by an image classification model when presented with a sample of the generated images.",
|
|
1169
|
+
id: "IS"
|
|
1170
|
+
}
|
|
1171
|
+
],
|
|
1172
|
+
models: [
|
|
1173
|
+
{
|
|
1174
|
+
description: "A model that enhances images captured in low light conditions.",
|
|
1175
|
+
id: "keras-io/low-light-image-enhancement"
|
|
1176
|
+
},
|
|
1177
|
+
{
|
|
1178
|
+
description: "A model that increases the resolution of an image.",
|
|
1179
|
+
id: "keras-io/super-resolution"
|
|
1180
|
+
},
|
|
1181
|
+
{
|
|
1182
|
+
description: "A model that creates a set of variations of the input image in the style of DALL-E using Stable Diffusion.",
|
|
1183
|
+
id: "lambdalabs/sd-image-variations-diffusers"
|
|
1184
|
+
},
|
|
1185
|
+
{
|
|
1186
|
+
description: "A model that generates images based on segments in the input image and the text prompt.",
|
|
1187
|
+
id: "mfidabel/controlnet-segment-anything"
|
|
1188
|
+
},
|
|
1189
|
+
{
|
|
1190
|
+
description: "A model that takes an image and an instruction to edit the image.",
|
|
1191
|
+
id: "timbrooks/instruct-pix2pix"
|
|
1192
|
+
}
|
|
1193
|
+
],
|
|
1194
|
+
spaces: [
|
|
1195
|
+
{
|
|
1196
|
+
description: "Image enhancer application for low light.",
|
|
1197
|
+
id: "keras-io/low-light-image-enhancement"
|
|
1198
|
+
},
|
|
1199
|
+
{
|
|
1200
|
+
description: "Style transfer application.",
|
|
1201
|
+
id: "keras-io/neural-style-transfer"
|
|
1202
|
+
},
|
|
1203
|
+
{
|
|
1204
|
+
description: "An application that generates images based on segment control.",
|
|
1205
|
+
id: "mfidabel/controlnet-segment-anything"
|
|
1206
|
+
},
|
|
1207
|
+
{
|
|
1208
|
+
description: "Image generation application that takes image control and text prompt.",
|
|
1209
|
+
id: "hysts/ControlNet"
|
|
1210
|
+
},
|
|
1211
|
+
{
|
|
1212
|
+
description: "Colorize any image using this app.",
|
|
1213
|
+
id: "ioclab/brightness-controlnet"
|
|
1214
|
+
},
|
|
1215
|
+
{
|
|
1216
|
+
description: "Edit images with instructions.",
|
|
1217
|
+
id: "timbrooks/instruct-pix2pix"
|
|
1218
|
+
}
|
|
1219
|
+
],
|
|
1220
|
+
summary: "Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain. Any image manipulation and enhancement is possible with image to image models.",
|
|
1221
|
+
widgetModels: ["lllyasviel/sd-controlnet-canny"],
|
|
1222
|
+
youtubeId: ""
|
|
1223
|
+
};
|
|
1224
|
+
var data_default9 = taskData9;
|
|
1225
|
+
|
|
1226
|
+
// src/image-to-text/data.ts
|
|
1227
|
+
var taskData10 = {
|
|
1228
|
+
datasets: [
|
|
1229
|
+
{
|
|
1230
|
+
// TODO write proper description
|
|
1231
|
+
description: "Dataset from 12M image-text of Reddit",
|
|
1232
|
+
id: "red_caps"
|
|
1233
|
+
},
|
|
1234
|
+
{
|
|
1235
|
+
// TODO write proper description
|
|
1236
|
+
description: "Dataset from 3.3M images of Google",
|
|
1237
|
+
id: "datasets/conceptual_captions"
|
|
1238
|
+
}
|
|
1239
|
+
],
|
|
1240
|
+
demo: {
|
|
1241
|
+
inputs: [
|
|
1242
|
+
{
|
|
1243
|
+
filename: "savanna.jpg",
|
|
1244
|
+
type: "img"
|
|
1245
|
+
}
|
|
1246
|
+
],
|
|
1247
|
+
outputs: [
|
|
1248
|
+
{
|
|
1249
|
+
label: "Detailed description",
|
|
1250
|
+
content: "a herd of giraffes and zebras grazing in a field",
|
|
1251
|
+
type: "text"
|
|
1252
|
+
}
|
|
1253
|
+
]
|
|
1254
|
+
},
|
|
1255
|
+
metrics: [],
|
|
1256
|
+
models: [
|
|
1257
|
+
{
|
|
1258
|
+
description: "A robust image captioning model.",
|
|
1259
|
+
id: "Salesforce/blip-image-captioning-large"
|
|
1260
|
+
},
|
|
1261
|
+
{
|
|
1262
|
+
description: "A strong image captioning model.",
|
|
1263
|
+
id: "nlpconnect/vit-gpt2-image-captioning"
|
|
1264
|
+
},
|
|
1265
|
+
{
|
|
1266
|
+
description: "A strong optical character recognition model.",
|
|
1267
|
+
id: "microsoft/trocr-base-printed"
|
|
1268
|
+
},
|
|
1269
|
+
{
|
|
1270
|
+
description: "A strong visual question answering model for scientific diagrams.",
|
|
1271
|
+
id: "google/pix2struct-ai2d-base"
|
|
1272
|
+
},
|
|
1273
|
+
{
|
|
1274
|
+
description: "A strong captioning model for UI components.",
|
|
1275
|
+
id: "google/pix2struct-widget-captioning-base"
|
|
1276
|
+
},
|
|
1277
|
+
{
|
|
1278
|
+
description: "A captioning model for images that contain text.",
|
|
1279
|
+
id: "google/pix2struct-textcaps-base"
|
|
1280
|
+
}
|
|
1281
|
+
],
|
|
1282
|
+
spaces: [
|
|
1283
|
+
{
|
|
1284
|
+
description: "A robust image captioning application.",
|
|
1285
|
+
id: "flax-community/image-captioning"
|
|
1286
|
+
},
|
|
1287
|
+
{
|
|
1288
|
+
description: "An application that transcribes handwritings into text.",
|
|
1289
|
+
id: "nielsr/TrOCR-handwritten"
|
|
1290
|
+
},
|
|
1291
|
+
{
|
|
1292
|
+
description: "An application that can caption images and answer questions about a given image.",
|
|
1293
|
+
id: "Salesforce/BLIP"
|
|
1294
|
+
},
|
|
1295
|
+
{
|
|
1296
|
+
description: "An application that can caption images and answer questions with a conversational agent.",
|
|
1297
|
+
id: "Salesforce/BLIP2"
|
|
1298
|
+
},
|
|
1299
|
+
{
|
|
1300
|
+
description: "An image captioning application that demonstrates the effect of noise on captions.",
|
|
1301
|
+
id: "johko/capdec-image-captioning"
|
|
1302
|
+
}
|
|
1303
|
+
],
|
|
1304
|
+
summary: "Image to text models output a text from a given image. Image captioning or optical character recognition can be considered as the most common applications of image to text.",
|
|
1305
|
+
widgetModels: ["Salesforce/blip-image-captioning-base"],
|
|
1306
|
+
youtubeId: ""
|
|
1307
|
+
};
|
|
1308
|
+
var data_default10 = taskData10;
|
|
1309
|
+
|
|
1310
|
+
// src/image-segmentation/data.ts
|
|
1311
|
+
var taskData11 = {
|
|
1312
|
+
datasets: [
|
|
1313
|
+
{
|
|
1314
|
+
description: "Scene segmentation dataset.",
|
|
1315
|
+
id: "scene_parse_150"
|
|
1316
|
+
}
|
|
1317
|
+
],
|
|
1318
|
+
demo: {
|
|
1319
|
+
inputs: [
|
|
1320
|
+
{
|
|
1321
|
+
filename: "image-segmentation-input.jpeg",
|
|
1322
|
+
type: "img"
|
|
1323
|
+
}
|
|
1324
|
+
],
|
|
1325
|
+
outputs: [
|
|
1326
|
+
{
|
|
1327
|
+
filename: "image-segmentation-output.png",
|
|
1328
|
+
type: "img"
|
|
1329
|
+
}
|
|
1330
|
+
]
|
|
1331
|
+
},
|
|
1332
|
+
metrics: [
|
|
1333
|
+
{
|
|
1334
|
+
description: "Average Precision (AP) is the Area Under the PR Curve (AUC-PR). It is calculated for each semantic class separately",
|
|
1335
|
+
id: "Average Precision"
|
|
1336
|
+
},
|
|
1337
|
+
{
|
|
1338
|
+
description: "Mean Average Precision (mAP) is the overall average of the AP values",
|
|
1339
|
+
id: "Mean Average Precision"
|
|
1340
|
+
},
|
|
1341
|
+
{
|
|
1342
|
+
description: "Intersection over Union (IoU) is the overlap of segmentation masks. Mean IoU is the average of the IoU of all semantic classes",
|
|
1343
|
+
id: "Mean Intersection over Union"
|
|
1344
|
+
},
|
|
1345
|
+
{
|
|
1346
|
+
description: "AP\u03B1 is the Average Precision at the IoU threshold of a \u03B1 value, for example, AP50 and AP75",
|
|
1347
|
+
id: "AP\u03B1"
|
|
1348
|
+
}
|
|
1349
|
+
],
|
|
1350
|
+
models: [
|
|
1351
|
+
{
|
|
1352
|
+
// TO DO: write description
|
|
1353
|
+
description: "Solid panoptic segmentation model trained on the COCO 2017 benchmark dataset.",
|
|
1354
|
+
id: "facebook/detr-resnet-50-panoptic"
|
|
1355
|
+
},
|
|
1356
|
+
{
|
|
1357
|
+
description: "Semantic segmentation model trained on ADE20k benchmark dataset.",
|
|
1358
|
+
id: "microsoft/beit-large-finetuned-ade-640-640"
|
|
1359
|
+
},
|
|
1360
|
+
{
|
|
1361
|
+
description: "Semantic segmentation model trained on ADE20k benchmark dataset with 512x512 resolution.",
|
|
1362
|
+
id: "nvidia/segformer-b0-finetuned-ade-512-512"
|
|
1363
|
+
},
|
|
1364
|
+
{
|
|
1365
|
+
description: "Semantic segmentation model trained Cityscapes dataset.",
|
|
1366
|
+
id: "facebook/mask2former-swin-large-cityscapes-semantic"
|
|
1367
|
+
},
|
|
1368
|
+
{
|
|
1369
|
+
description: "Panoptic segmentation model trained COCO (common objects) dataset.",
|
|
1370
|
+
id: "facebook/mask2former-swin-large-coco-panoptic"
|
|
1371
|
+
}
|
|
1372
|
+
],
|
|
1373
|
+
spaces: [
|
|
1374
|
+
{
|
|
1375
|
+
description: "A semantic segmentation application that can predict unseen instances out of the box.",
|
|
1376
|
+
id: "facebook/ov-seg"
|
|
1377
|
+
},
|
|
1378
|
+
{
|
|
1379
|
+
description: "One of the strongest segmentation applications.",
|
|
1380
|
+
id: "jbrinkma/segment-anything"
|
|
1381
|
+
},
|
|
1382
|
+
{
|
|
1383
|
+
description: "A semantic segmentation application that predicts human silhouettes.",
|
|
1384
|
+
id: "keras-io/Human-Part-Segmentation"
|
|
1385
|
+
},
|
|
1386
|
+
{
|
|
1387
|
+
description: "An instance segmentation application to predict neuronal cell types from microscopy images.",
|
|
1388
|
+
id: "rashmi/sartorius-cell-instance-segmentation"
|
|
1389
|
+
},
|
|
1390
|
+
{
|
|
1391
|
+
description: "An application that segments videos.",
|
|
1392
|
+
id: "ArtGAN/Segment-Anything-Video"
|
|
1393
|
+
},
|
|
1394
|
+
{
|
|
1395
|
+
description: "An panoptic segmentation application built for outdoor environments.",
|
|
1396
|
+
id: "segments/panoptic-segment-anything"
|
|
1397
|
+
}
|
|
1398
|
+
],
|
|
1399
|
+
summary: "Image Segmentation divides an image into segments where each pixel in the image is mapped to an object. This task has multiple variants such as instance segmentation, panoptic segmentation and semantic segmentation.",
|
|
1400
|
+
widgetModels: ["facebook/detr-resnet-50-panoptic"],
|
|
1401
|
+
youtubeId: "dKE8SIt9C-w"
|
|
1402
|
+
};
|
|
1403
|
+
var data_default11 = taskData11;
|
|
1404
|
+
|
|
1405
|
+
// src/object-detection/data.ts
|
|
1406
|
+
var taskData12 = {
|
|
1407
|
+
datasets: [
|
|
1408
|
+
{
|
|
1409
|
+
// TODO write proper description
|
|
1410
|
+
description: "Widely used benchmark dataset for multiple Vision tasks.",
|
|
1411
|
+
id: "merve/coco2017"
|
|
1412
|
+
}
|
|
1413
|
+
],
|
|
1414
|
+
demo: {
|
|
1415
|
+
inputs: [
|
|
1416
|
+
{
|
|
1417
|
+
filename: "object-detection-input.jpg",
|
|
1418
|
+
type: "img"
|
|
1419
|
+
}
|
|
1420
|
+
],
|
|
1421
|
+
outputs: [
|
|
1422
|
+
{
|
|
1423
|
+
filename: "object-detection-output.jpg",
|
|
1424
|
+
type: "img"
|
|
1425
|
+
}
|
|
1426
|
+
]
|
|
1427
|
+
},
|
|
1428
|
+
metrics: [
|
|
1429
|
+
{
|
|
1430
|
+
description: "The Average Precision (AP) metric is the Area Under the PR Curve (AUC-PR). It is calculated for each class separately",
|
|
1431
|
+
id: "Average Precision"
|
|
1432
|
+
},
|
|
1433
|
+
{
|
|
1434
|
+
description: "The Mean Average Precision (mAP) metric is the overall average of the AP values",
|
|
1435
|
+
id: "Mean Average Precision"
|
|
1436
|
+
},
|
|
1437
|
+
{
|
|
1438
|
+
description: "The AP\u03B1 metric is the Average Precision at the IoU threshold of a \u03B1 value, for example, AP50 and AP75",
|
|
1439
|
+
id: "AP\u03B1"
|
|
1440
|
+
}
|
|
1441
|
+
],
|
|
1442
|
+
models: [
|
|
1443
|
+
{
|
|
1444
|
+
// TO DO: write description
|
|
1445
|
+
description: "Solid object detection model trained on the benchmark dataset COCO 2017.",
|
|
1446
|
+
id: "facebook/detr-resnet-50"
|
|
1447
|
+
},
|
|
1448
|
+
{
|
|
1449
|
+
description: "Strong object detection model trained on ImageNet-21k dataset.",
|
|
1450
|
+
id: "microsoft/beit-base-patch16-224-pt22k-ft22k"
|
|
1451
|
+
}
|
|
1452
|
+
],
|
|
1453
|
+
spaces: [
|
|
1454
|
+
{
|
|
1455
|
+
description: "An object detection application that can detect unseen objects out of the box.",
|
|
1456
|
+
id: "adirik/OWL-ViT"
|
|
1457
|
+
},
|
|
1458
|
+
{
|
|
1459
|
+
description: "An application that contains various object detection models to try from.",
|
|
1460
|
+
id: "Gradio-Blocks/Object-Detection-With-DETR-and-YOLOS"
|
|
1461
|
+
},
|
|
1462
|
+
{
|
|
1463
|
+
description: "An application that shows multiple cutting edge techniques for object detection and tracking.",
|
|
1464
|
+
id: "kadirnar/torchyolo"
|
|
1465
|
+
},
|
|
1466
|
+
{
|
|
1467
|
+
description: "An object tracking, segmentation and inpainting application.",
|
|
1468
|
+
id: "VIPLab/Track-Anything"
|
|
1469
|
+
}
|
|
1470
|
+
],
|
|
1471
|
+
summary: "Object Detection models allow users to identify objects of certain defined classes. Object detection models receive an image as input and output the images with bounding boxes and labels on detected objects.",
|
|
1472
|
+
widgetModels: ["facebook/detr-resnet-50"],
|
|
1473
|
+
youtubeId: "WdAeKSOpxhw"
|
|
1474
|
+
};
|
|
1475
|
+
var data_default12 = taskData12;
|
|
1476
|
+
|
|
1477
|
+
// src/depth-estimation/data.ts
|
|
1478
|
+
var taskData13 = {
|
|
1479
|
+
datasets: [
|
|
1480
|
+
{
|
|
1481
|
+
description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
|
|
1482
|
+
id: "sayakpaul/nyu_depth_v2"
|
|
1483
|
+
}
|
|
1484
|
+
],
|
|
1485
|
+
demo: {
|
|
1486
|
+
inputs: [
|
|
1487
|
+
{
|
|
1488
|
+
filename: "depth-estimation-input.jpg",
|
|
1489
|
+
type: "img"
|
|
1490
|
+
}
|
|
1491
|
+
],
|
|
1492
|
+
outputs: [
|
|
1493
|
+
{
|
|
1494
|
+
filename: "depth-estimation-output.png",
|
|
1495
|
+
type: "img"
|
|
1496
|
+
}
|
|
1497
|
+
]
|
|
1498
|
+
},
|
|
1499
|
+
metrics: [],
|
|
1500
|
+
models: [
|
|
1501
|
+
{
|
|
1502
|
+
// TO DO: write description
|
|
1503
|
+
description: "Strong Depth Estimation model trained on 1.4 million images.",
|
|
1504
|
+
id: "Intel/dpt-large"
|
|
1505
|
+
},
|
|
1506
|
+
{
|
|
1507
|
+
// TO DO: write description
|
|
1508
|
+
description: "Strong Depth Estimation model trained on the KITTI dataset.",
|
|
1509
|
+
id: "vinvino02/glpn-kitti"
|
|
1510
|
+
}
|
|
1511
|
+
],
|
|
1512
|
+
spaces: [
|
|
1513
|
+
{
|
|
1514
|
+
description: "An application that predicts the depth of an image and then reconstruct the 3D model as voxels.",
|
|
1515
|
+
id: "radames/dpt-depth-estimation-3d-voxels"
|
|
1516
|
+
},
|
|
1517
|
+
{
|
|
1518
|
+
description: "An application that can estimate the depth in a given image.",
|
|
1519
|
+
id: "keras-io/Monocular-Depth-Estimation"
|
|
1520
|
+
}
|
|
1521
|
+
],
|
|
1522
|
+
summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
|
|
1523
|
+
widgetModels: [""],
|
|
1524
|
+
youtubeId: ""
|
|
1525
|
+
};
|
|
1526
|
+
var data_default13 = taskData13;
|
|
1527
|
+
|
|
1528
|
+
// src/placeholder/data.ts
|
|
1529
|
+
var taskData14 = {
|
|
1530
|
+
datasets: [],
|
|
1531
|
+
demo: {
|
|
1532
|
+
inputs: [],
|
|
1533
|
+
outputs: []
|
|
1534
|
+
},
|
|
1535
|
+
isPlaceholder: true,
|
|
1536
|
+
metrics: [],
|
|
1537
|
+
models: [],
|
|
1538
|
+
spaces: [],
|
|
1539
|
+
summary: "",
|
|
1540
|
+
widgetModels: [],
|
|
1541
|
+
youtubeId: void 0
|
|
1542
|
+
};
|
|
1543
|
+
var data_default14 = taskData14;
|
|
1544
|
+
|
|
1545
|
+
// src/reinforcement-learning/data.ts
|
|
1546
|
+
var taskData15 = {
|
|
1547
|
+
datasets: [
|
|
1548
|
+
{
|
|
1549
|
+
description: "A curation of widely used datasets for Data Driven Deep Reinforcement Learning (D4RL)",
|
|
1550
|
+
id: "edbeeching/decision_transformer_gym_replay"
|
|
1551
|
+
}
|
|
1552
|
+
],
|
|
1553
|
+
demo: {
|
|
1554
|
+
inputs: [
|
|
1555
|
+
{
|
|
1556
|
+
label: "State",
|
|
1557
|
+
content: "Red traffic light, pedestrians are about to pass.",
|
|
1558
|
+
type: "text"
|
|
1559
|
+
}
|
|
1560
|
+
],
|
|
1561
|
+
outputs: [
|
|
1562
|
+
{
|
|
1563
|
+
label: "Action",
|
|
1564
|
+
content: "Stop the car.",
|
|
1565
|
+
type: "text"
|
|
1566
|
+
},
|
|
1567
|
+
{
|
|
1568
|
+
label: "Next State",
|
|
1569
|
+
content: "Yellow light, pedestrians have crossed.",
|
|
1570
|
+
type: "text"
|
|
1571
|
+
}
|
|
1572
|
+
]
|
|
1573
|
+
},
|
|
1574
|
+
metrics: [
|
|
1575
|
+
{
|
|
1576
|
+
description: "Accumulated reward across all time steps discounted by a factor that ranges between 0 and 1 and determines how much the agent optimizes for future relative to immediate rewards. Measures how good is the policy ultimately found by a given algorithm considering uncertainty over the future.",
|
|
1577
|
+
id: "Discounted Total Reward"
|
|
1578
|
+
},
|
|
1579
|
+
{
|
|
1580
|
+
description: "Average return obtained after running the policy for a certain number of evaluation episodes. As opposed to total reward, mean reward considers how much reward a given algorithm receives while learning.",
|
|
1581
|
+
id: "Mean Reward"
|
|
1582
|
+
},
|
|
1583
|
+
{
|
|
1584
|
+
description: "Measures how good a given algorithm is after a predefined time. Some algorithms may be guaranteed to converge to optimal behavior across many time steps. However, an agent that reaches an acceptable level of optimality after a given time horizon may be preferable to one that ultimately reaches optimality but takes a long time.",
|
|
1585
|
+
id: "Level of Performance After Some Time"
|
|
1586
|
+
}
|
|
1587
|
+
],
|
|
1588
|
+
models: [
|
|
1589
|
+
{
|
|
1590
|
+
description: "A Reinforcement Learning model trained on expert data from the Gym Hopper environment",
|
|
1591
|
+
id: "edbeeching/decision-transformer-gym-hopper-expert"
|
|
1592
|
+
},
|
|
1593
|
+
{
|
|
1594
|
+
description: "A PPO agent playing seals/CartPole-v0 using the stable-baselines3 library and the RL Zoo.",
|
|
1595
|
+
id: "HumanCompatibleAI/ppo-seals-CartPole-v0"
|
|
1596
|
+
}
|
|
1597
|
+
],
|
|
1598
|
+
spaces: [
|
|
1599
|
+
{
|
|
1600
|
+
description: "An application for a cute puppy agent learning to catch a stick.",
|
|
1601
|
+
id: "ThomasSimonini/Huggy"
|
|
1602
|
+
},
|
|
1603
|
+
{
|
|
1604
|
+
description: "An application to play Snowball Fight with a reinforcement learning agent.",
|
|
1605
|
+
id: "ThomasSimonini/SnowballFight"
|
|
1606
|
+
}
|
|
1607
|
+
],
|
|
1608
|
+
summary: "Reinforcement learning is the computational approach of learning from action by interacting with an environment through trial and error and receiving rewards (negative or positive) as feedback",
|
|
1609
|
+
widgetModels: [],
|
|
1610
|
+
youtubeId: "q0BiUn5LiBc"
|
|
1611
|
+
};
|
|
1612
|
+
var data_default15 = taskData15;
|
|
1613
|
+
|
|
1614
|
+
// src/question-answering/data.ts
|
|
1615
|
+
var taskData16 = {
|
|
1616
|
+
datasets: [
|
|
1617
|
+
{
|
|
1618
|
+
// TODO write proper description
|
|
1619
|
+
description: "A famous question answering dataset based on English articles from Wikipedia.",
|
|
1620
|
+
id: "squad_v2"
|
|
1621
|
+
},
|
|
1622
|
+
{
|
|
1623
|
+
// TODO write proper description
|
|
1624
|
+
description: "A dataset of aggregated anonymized actual queries issued to the Google search engine.",
|
|
1625
|
+
id: "natural_questions"
|
|
1626
|
+
}
|
|
1627
|
+
],
|
|
1628
|
+
demo: {
|
|
1629
|
+
inputs: [
|
|
1630
|
+
{
|
|
1631
|
+
label: "Question",
|
|
1632
|
+
content: "Which name is also used to describe the Amazon rainforest in English?",
|
|
1633
|
+
type: "text"
|
|
1634
|
+
},
|
|
1635
|
+
{
|
|
1636
|
+
label: "Context",
|
|
1637
|
+
content: "The Amazon rainforest, also known in English as Amazonia or the Amazon Jungle",
|
|
1638
|
+
type: "text"
|
|
1639
|
+
}
|
|
1640
|
+
],
|
|
1641
|
+
outputs: [
|
|
1642
|
+
{
|
|
1643
|
+
label: "Answer",
|
|
1644
|
+
content: "Amazonia",
|
|
1645
|
+
type: "text"
|
|
1646
|
+
}
|
|
1647
|
+
]
|
|
1648
|
+
},
|
|
1649
|
+
metrics: [
|
|
1650
|
+
{
|
|
1651
|
+
description: "Exact Match is a metric based on the strict character match of the predicted answer and the right answer. For answers predicted correctly, the Exact Match will be 1. Even if only one character is different, Exact Match will be 0",
|
|
1652
|
+
id: "exact-match"
|
|
1653
|
+
},
|
|
1654
|
+
{
|
|
1655
|
+
description: " The F1-Score metric is useful if we value both false positives and false negatives equally. The F1-Score is calculated on each word in the predicted sequence against the correct answer",
|
|
1656
|
+
id: "f1"
|
|
1657
|
+
}
|
|
1658
|
+
],
|
|
1659
|
+
models: [
|
|
1660
|
+
{
|
|
1661
|
+
description: "A robust baseline model for most question answering domains.",
|
|
1662
|
+
id: "deepset/roberta-base-squad2"
|
|
1663
|
+
},
|
|
1664
|
+
{
|
|
1665
|
+
description: "A special model that can answer questions from tables!",
|
|
1666
|
+
id: "google/tapas-base-finetuned-wtq"
|
|
1667
|
+
}
|
|
1668
|
+
],
|
|
1669
|
+
spaces: [
|
|
1670
|
+
{
|
|
1671
|
+
description: "An application that can answer a long question from Wikipedia.",
|
|
1672
|
+
id: "deepset/wikipedia-assistant"
|
|
1673
|
+
}
|
|
1674
|
+
],
|
|
1675
|
+
summary: "Question Answering models can retrieve the answer to a question from a given text, which is useful for searching for an answer in a document. Some question answering models can generate answers without context!",
|
|
1676
|
+
widgetModels: ["deepset/roberta-base-squad2"],
|
|
1677
|
+
youtubeId: "ajPx5LwJD-I"
|
|
1678
|
+
};
|
|
1679
|
+
var data_default16 = taskData16;
|
|
1680
|
+
|
|
1681
|
+
// src/sentence-similarity/data.ts
|
|
1682
|
+
var taskData17 = {
|
|
1683
|
+
datasets: [
|
|
1684
|
+
{
|
|
1685
|
+
description: "Bing queries with relevant passages from various web sources.",
|
|
1686
|
+
id: "ms_marco"
|
|
1687
|
+
}
|
|
1688
|
+
],
|
|
1689
|
+
demo: {
|
|
1690
|
+
inputs: [
|
|
1691
|
+
{
|
|
1692
|
+
label: "Source sentence",
|
|
1693
|
+
content: "Machine learning is so easy.",
|
|
1694
|
+
type: "text"
|
|
1695
|
+
},
|
|
1696
|
+
{
|
|
1697
|
+
label: "Sentences to compare to",
|
|
1698
|
+
content: "Deep learning is so straightforward.",
|
|
1699
|
+
type: "text"
|
|
1700
|
+
},
|
|
1701
|
+
{
|
|
1702
|
+
label: "",
|
|
1703
|
+
content: "This is so difficult, like rocket science.",
|
|
1704
|
+
type: "text"
|
|
1705
|
+
},
|
|
1706
|
+
{
|
|
1707
|
+
label: "",
|
|
1708
|
+
content: "I can't believe how much I struggled with this.",
|
|
1709
|
+
type: "text"
|
|
1710
|
+
}
|
|
1711
|
+
],
|
|
1712
|
+
outputs: [
|
|
1713
|
+
{
|
|
1714
|
+
type: "chart",
|
|
1715
|
+
data: [
|
|
1716
|
+
{
|
|
1717
|
+
label: "Deep learning is so straightforward.",
|
|
1718
|
+
score: 0.623
|
|
1719
|
+
},
|
|
1720
|
+
{
|
|
1721
|
+
label: "This is so difficult, like rocket science.",
|
|
1722
|
+
score: 0.413
|
|
1723
|
+
},
|
|
1724
|
+
{
|
|
1725
|
+
label: "I can't believe how much I struggled with this.",
|
|
1726
|
+
score: 0.256
|
|
1727
|
+
}
|
|
1728
|
+
]
|
|
1729
|
+
}
|
|
1730
|
+
]
|
|
1731
|
+
},
|
|
1732
|
+
metrics: [
|
|
1733
|
+
{
|
|
1734
|
+
description: "Reciprocal Rank is a measure used to rank the relevancy of documents given a set of documents. Reciprocal Rank is the reciprocal of the rank of the document retrieved, meaning, if the rank is 3, the Reciprocal Rank is 0.33. If the rank is 1, the Reciprocal Rank is 1",
|
|
1735
|
+
id: "Mean Reciprocal Rank"
|
|
1736
|
+
},
|
|
1737
|
+
{
|
|
1738
|
+
description: "The similarity of the embeddings is evaluated mainly on cosine similarity. It is calculated as the cosine of the angle between two vectors. It is particularly useful when your texts are not the same length",
|
|
1739
|
+
id: "Cosine Similarity"
|
|
1740
|
+
}
|
|
1741
|
+
],
|
|
1742
|
+
models: [
|
|
1743
|
+
{
|
|
1744
|
+
description: "This model works well for sentences and paragraphs and can be used for clustering/grouping and semantic searches.",
|
|
1745
|
+
id: "sentence-transformers/all-mpnet-base-v2"
|
|
1746
|
+
},
|
|
1747
|
+
{
|
|
1748
|
+
description: "A multilingual model trained for FAQ retrieval.",
|
|
1749
|
+
id: "clips/mfaq"
|
|
1750
|
+
}
|
|
1751
|
+
],
|
|
1752
|
+
spaces: [
|
|
1753
|
+
{
|
|
1754
|
+
description: "An application that leverages sentence similarity to answer questions from YouTube videos.",
|
|
1755
|
+
id: "Gradio-Blocks/Ask_Questions_To_YouTube_Videos"
|
|
1756
|
+
},
|
|
1757
|
+
{
|
|
1758
|
+
description: "An application that retrieves relevant PubMed abstracts for a given online article which can be used as further references.",
|
|
1759
|
+
id: "Gradio-Blocks/pubmed-abstract-retriever"
|
|
1760
|
+
},
|
|
1761
|
+
{
|
|
1762
|
+
description: "An application that leverages sentence similarity to summarize text.",
|
|
1763
|
+
id: "nickmuchi/article-text-summarizer"
|
|
1764
|
+
},
|
|
1765
|
+
{
|
|
1766
|
+
description: "A guide that explains how Sentence Transformers can be used for semantic search.",
|
|
1767
|
+
id: "sentence-transformers/Sentence_Transformers_for_semantic_search"
|
|
1768
|
+
}
|
|
1769
|
+
],
|
|
1770
|
+
summary: "Sentence Similarity is the task of determining how similar two texts are. Sentence similarity models convert input texts into vectors (embeddings) that capture semantic information and calculate how close (similar) they are between them. This task is particularly useful for information retrieval and clustering/grouping.",
|
|
1771
|
+
widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
|
|
1772
|
+
youtubeId: "VCZq5AkbNEU"
|
|
1773
|
+
};
|
|
1774
|
+
var data_default17 = taskData17;
|
|
1775
|
+
|
|
1776
|
+
// src/summarization/data.ts
|
|
1777
|
+
var taskData18 = {
|
|
1778
|
+
datasets: [
|
|
1779
|
+
{
|
|
1780
|
+
description: "News articles in five different languages along with their summaries. Widely used for benchmarking multilingual summarization models.",
|
|
1781
|
+
id: "mlsum"
|
|
1782
|
+
},
|
|
1783
|
+
{
|
|
1784
|
+
description: "English conversations and their summaries. Useful for benchmarking conversational agents.",
|
|
1785
|
+
id: "samsum"
|
|
1786
|
+
}
|
|
1787
|
+
],
|
|
1788
|
+
demo: {
|
|
1789
|
+
inputs: [
|
|
1790
|
+
{
|
|
1791
|
+
label: "Input",
|
|
1792
|
+
content: "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. It was the first structure to reach a height of 300 metres. Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
|
|
1793
|
+
type: "text"
|
|
1794
|
+
}
|
|
1795
|
+
],
|
|
1796
|
+
outputs: [
|
|
1797
|
+
{
|
|
1798
|
+
label: "Output",
|
|
1799
|
+
content: "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building. It was the first structure to reach a height of 300 metres.",
|
|
1800
|
+
type: "text"
|
|
1801
|
+
}
|
|
1802
|
+
]
|
|
1803
|
+
},
|
|
1804
|
+
metrics: [
|
|
1805
|
+
{
|
|
1806
|
+
description: "The generated sequence is compared against its summary, and the overlap of tokens are counted. ROUGE-N refers to overlap of N subsequent tokens, ROUGE-1 refers to overlap of single tokens and ROUGE-2 is the overlap of two subsequent tokens.",
|
|
1807
|
+
id: "rouge"
|
|
1808
|
+
}
|
|
1809
|
+
],
|
|
1810
|
+
models: [
|
|
1811
|
+
{
|
|
1812
|
+
description: "A strong summarization model trained on English news articles. Excels at generating factual summaries.",
|
|
1813
|
+
id: "facebook/bart-large-cnn"
|
|
1814
|
+
},
|
|
1815
|
+
{
|
|
1816
|
+
description: "A summarization model trained on medical articles.",
|
|
1817
|
+
id: "google/bigbird-pegasus-large-pubmed"
|
|
1818
|
+
}
|
|
1819
|
+
],
|
|
1820
|
+
spaces: [
|
|
1821
|
+
{
|
|
1822
|
+
description: "An application that can summarize long paragraphs.",
|
|
1823
|
+
id: "pszemraj/summarize-long-text"
|
|
1824
|
+
},
|
|
1825
|
+
{
|
|
1826
|
+
description: "A much needed summarization application for terms and conditions.",
|
|
1827
|
+
id: "ml6team/distilbart-tos-summarizer-tosdr"
|
|
1828
|
+
},
|
|
1829
|
+
{
|
|
1830
|
+
description: "An application that summarizes long documents.",
|
|
1831
|
+
id: "pszemraj/document-summarization"
|
|
1832
|
+
},
|
|
1833
|
+
{
|
|
1834
|
+
description: "An application that can detect errors in abstractive summarization.",
|
|
1835
|
+
id: "ml6team/post-processing-summarization"
|
|
1836
|
+
}
|
|
1837
|
+
],
|
|
1838
|
+
summary: "Summarization is the task of producing a shorter version of a document while preserving its important information. Some models can extract text from the original input, while other models can generate entirely new text.",
|
|
1839
|
+
widgetModels: ["sshleifer/distilbart-cnn-12-6"],
|
|
1840
|
+
youtubeId: "yHnr5Dk2zCI"
|
|
1841
|
+
};
|
|
1842
|
+
var data_default18 = taskData18;
|
|
1843
|
+
|
|
1844
|
+
// src/table-question-answering/data.ts
|
|
1845
|
+
var taskData19 = {
|
|
1846
|
+
datasets: [
|
|
1847
|
+
{
|
|
1848
|
+
description: "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
|
|
1849
|
+
id: "wikitablequestions"
|
|
1850
|
+
},
|
|
1851
|
+
{
|
|
1852
|
+
description: "WikiSQL is a dataset of 80654 hand-annotated examples of questions and SQL queries distributed across 24241 tables from Wikipedia.",
|
|
1853
|
+
id: "wikisql"
|
|
1854
|
+
}
|
|
1855
|
+
],
|
|
1856
|
+
demo: {
|
|
1857
|
+
inputs: [
|
|
1858
|
+
{
|
|
1859
|
+
table: [
|
|
1860
|
+
["Rank", "Name", "No.of reigns", "Combined days"],
|
|
1861
|
+
["1", "lou Thesz", "3", "3749"],
|
|
1862
|
+
["2", "Ric Flair", "8", "3103"],
|
|
1863
|
+
["3", "Harley Race", "7", "1799"]
|
|
1864
|
+
],
|
|
1865
|
+
type: "tabular"
|
|
1866
|
+
},
|
|
1867
|
+
{
|
|
1868
|
+
label: "Question",
|
|
1869
|
+
content: "What is the number of reigns for Harley Race?",
|
|
1870
|
+
type: "text"
|
|
1871
|
+
}
|
|
1872
|
+
],
|
|
1873
|
+
outputs: [
|
|
1874
|
+
{
|
|
1875
|
+
label: "Result",
|
|
1876
|
+
content: "7",
|
|
1877
|
+
type: "text"
|
|
1878
|
+
}
|
|
1879
|
+
]
|
|
1880
|
+
},
|
|
1881
|
+
metrics: [
|
|
1882
|
+
{
|
|
1883
|
+
description: "Checks whether the predicted answer(s) is the same as the ground-truth answer(s).",
|
|
1884
|
+
id: "Denotation Accuracy"
|
|
1885
|
+
}
|
|
1886
|
+
],
|
|
1887
|
+
models: [
|
|
1888
|
+
{
|
|
1889
|
+
description: "A table question answering model that is capable of neural SQL execution, i.e., employ TAPEX to execute a SQL query on a given table.",
|
|
1890
|
+
id: "microsoft/tapex-base"
|
|
1891
|
+
},
|
|
1892
|
+
{
|
|
1893
|
+
description: "A robust table question answering model.",
|
|
1894
|
+
id: "google/tapas-base-finetuned-wtq"
|
|
1895
|
+
}
|
|
1896
|
+
],
|
|
1897
|
+
spaces: [
|
|
1898
|
+
{
|
|
1899
|
+
description: "An application that answers questions based on table CSV files.",
|
|
1900
|
+
id: "katanaml/table-query"
|
|
1901
|
+
}
|
|
1902
|
+
],
|
|
1903
|
+
summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
|
|
1904
|
+
widgetModels: ["google/tapas-base-finetuned-wtq"]
|
|
1905
|
+
};
|
|
1906
|
+
var data_default19 = taskData19;
|
|
1907
|
+
|
|
1908
|
+
// src/tabular-classification/data.ts
|
|
1909
|
+
var taskData20 = {
|
|
1910
|
+
datasets: [
|
|
1911
|
+
{
|
|
1912
|
+
description: "A comprehensive curation of datasets covering all benchmarks.",
|
|
1913
|
+
id: "inria-soda/tabular-benchmark"
|
|
1914
|
+
}
|
|
1915
|
+
],
|
|
1916
|
+
demo: {
|
|
1917
|
+
inputs: [
|
|
1918
|
+
{
|
|
1919
|
+
table: [
|
|
1920
|
+
["Glucose", "Blood Pressure ", "Skin Thickness", "Insulin", "BMI"],
|
|
1921
|
+
["148", "72", "35", "0", "33.6"],
|
|
1922
|
+
["150", "50", "30", "0", "35.1"],
|
|
1923
|
+
["141", "60", "29", "1", "39.2"]
|
|
1924
|
+
],
|
|
1925
|
+
type: "tabular"
|
|
1926
|
+
}
|
|
1927
|
+
],
|
|
1928
|
+
outputs: [
|
|
1929
|
+
{
|
|
1930
|
+
table: [["Diabetes"], ["1"], ["1"], ["0"]],
|
|
1931
|
+
type: "tabular"
|
|
1932
|
+
}
|
|
1933
|
+
]
|
|
1934
|
+
},
|
|
1935
|
+
metrics: [
|
|
1936
|
+
{
|
|
1937
|
+
description: "",
|
|
1938
|
+
id: "accuracy"
|
|
1939
|
+
},
|
|
1940
|
+
{
|
|
1941
|
+
description: "",
|
|
1942
|
+
id: "recall"
|
|
1943
|
+
},
|
|
1944
|
+
{
|
|
1945
|
+
description: "",
|
|
1946
|
+
id: "precision"
|
|
1947
|
+
},
|
|
1948
|
+
{
|
|
1949
|
+
description: "",
|
|
1950
|
+
id: "f1"
|
|
1951
|
+
}
|
|
1952
|
+
],
|
|
1953
|
+
models: [
|
|
1954
|
+
{
|
|
1955
|
+
description: "Breast cancer prediction model based on decision trees.",
|
|
1956
|
+
id: "scikit-learn/cancer-prediction-trees"
|
|
1957
|
+
}
|
|
1958
|
+
],
|
|
1959
|
+
spaces: [
|
|
1960
|
+
{
|
|
1961
|
+
description: "An application that can predict defective products on a production line.",
|
|
1962
|
+
id: "scikit-learn/tabular-playground"
|
|
1963
|
+
},
|
|
1964
|
+
{
|
|
1965
|
+
description: "An application that compares various tabular classification techniques on different datasets.",
|
|
1966
|
+
id: "scikit-learn/classification"
|
|
1967
|
+
}
|
|
1968
|
+
],
|
|
1969
|
+
summary: "Tabular classification is the task of classifying a target category (a group) based on set of attributes.",
|
|
1970
|
+
widgetModels: ["scikit-learn/tabular-playground"],
|
|
1971
|
+
youtubeId: ""
|
|
1972
|
+
};
|
|
1973
|
+
var data_default20 = taskData20;
|
|
1974
|
+
|
|
1975
|
+
// src/tabular-regression/data.ts
|
|
1976
|
+
var taskData21 = {
|
|
1977
|
+
datasets: [
|
|
1978
|
+
{
|
|
1979
|
+
description: "A comprehensive curation of datasets covering all benchmarks.",
|
|
1980
|
+
id: "inria-soda/tabular-benchmark"
|
|
1981
|
+
}
|
|
1982
|
+
],
|
|
1983
|
+
demo: {
|
|
1984
|
+
inputs: [
|
|
1985
|
+
{
|
|
1986
|
+
table: [
|
|
1987
|
+
["Car Name", "Horsepower", "Weight"],
|
|
1988
|
+
["ford torino", "140", "3,449"],
|
|
1989
|
+
["amc hornet", "97", "2,774"],
|
|
1990
|
+
["toyota corolla", "65", "1,773"]
|
|
1991
|
+
],
|
|
1992
|
+
type: "tabular"
|
|
1993
|
+
}
|
|
1994
|
+
],
|
|
1995
|
+
outputs: [
|
|
1996
|
+
{
|
|
1997
|
+
table: [["MPG (miles per gallon)"], ["17"], ["18"], ["31"]],
|
|
1998
|
+
type: "tabular"
|
|
1999
|
+
}
|
|
2000
|
+
]
|
|
2001
|
+
},
|
|
2002
|
+
metrics: [
|
|
2003
|
+
{
|
|
2004
|
+
description: "",
|
|
2005
|
+
id: "mse"
|
|
2006
|
+
},
|
|
2007
|
+
{
|
|
2008
|
+
description: "Coefficient of determination (or R-squared) is a measure of how well the model fits the data. Higher R-squared is considered a better fit.",
|
|
2009
|
+
id: "r-squared"
|
|
2010
|
+
}
|
|
2011
|
+
],
|
|
2012
|
+
models: [
|
|
2013
|
+
{
|
|
2014
|
+
description: "Fish weight prediction based on length measurements and species.",
|
|
2015
|
+
id: "scikit-learn/Fish-Weight"
|
|
2016
|
+
}
|
|
2017
|
+
],
|
|
2018
|
+
spaces: [
|
|
2019
|
+
{
|
|
2020
|
+
description: "An application that can predict weight of a fish based on set of attributes.",
|
|
2021
|
+
id: "scikit-learn/fish-weight-prediction"
|
|
2022
|
+
}
|
|
2023
|
+
],
|
|
2024
|
+
summary: "Tabular regression is the task of predicting a numerical value given a set of attributes.",
|
|
2025
|
+
widgetModels: ["scikit-learn/Fish-Weight"],
|
|
2026
|
+
youtubeId: ""
|
|
2027
|
+
};
|
|
2028
|
+
var data_default21 = taskData21;
|
|
2029
|
+
|
|
2030
|
+
// src/text-to-image/data.ts
|
|
2031
|
+
var taskData22 = {
|
|
2032
|
+
datasets: [
|
|
2033
|
+
{
|
|
2034
|
+
description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
|
|
2035
|
+
id: "red_caps"
|
|
2036
|
+
},
|
|
2037
|
+
{
|
|
2038
|
+
description: "Conceptual Captions is a dataset consisting of ~3.3M images annotated with captions.",
|
|
2039
|
+
id: "conceptual_captions"
|
|
2040
|
+
}
|
|
2041
|
+
],
|
|
2042
|
+
demo: {
|
|
2043
|
+
inputs: [
|
|
2044
|
+
{
|
|
2045
|
+
label: "Input",
|
|
2046
|
+
content: "A city above clouds, pastel colors, Victorian style",
|
|
2047
|
+
type: "text"
|
|
2048
|
+
}
|
|
2049
|
+
],
|
|
2050
|
+
outputs: [
|
|
2051
|
+
{
|
|
2052
|
+
filename: "image.jpeg",
|
|
2053
|
+
type: "img"
|
|
2054
|
+
}
|
|
2055
|
+
]
|
|
2056
|
+
},
|
|
2057
|
+
metrics: [
|
|
2058
|
+
{
|
|
2059
|
+
description: "The Inception Score (IS) measure assesses diversity and meaningfulness. It uses a generated image sample to predict its label. A higher score signifies more diverse and meaningful images.",
|
|
2060
|
+
id: "IS"
|
|
2061
|
+
},
|
|
2062
|
+
{
|
|
2063
|
+
description: "The Fr\xE9chet Inception Distance (FID) calculates the distance between distributions between synthetic and real samples. A lower FID score indicates better similarity between the distributions of real and generated images.",
|
|
2064
|
+
id: "FID"
|
|
2065
|
+
},
|
|
2066
|
+
{
|
|
2067
|
+
description: "R-precision assesses how the generated image aligns with the provided text description. It uses the generated images as queries to retrieve relevant text descriptions. The top 'r' relevant descriptions are selected and used to calculate R-precision as r/R, where 'R' is the number of ground truth descriptions associated with the generated images. A higher R-precision value indicates a better model.",
|
|
2068
|
+
id: "R-Precision"
|
|
2069
|
+
}
|
|
2070
|
+
],
|
|
2071
|
+
models: [
|
|
2072
|
+
{
|
|
2073
|
+
description: "A latent text-to-image diffusion model capable of generating photo-realistic images given any text input.",
|
|
2074
|
+
id: "CompVis/stable-diffusion-v1-4"
|
|
2075
|
+
},
|
|
2076
|
+
{
|
|
2077
|
+
description: "A model that can be used to generate images based on text prompts. The DALL\xB7E Mega model is the largest version of DALLE Mini.",
|
|
2078
|
+
id: "dalle-mini/dalle-mega"
|
|
2079
|
+
},
|
|
2080
|
+
{
|
|
2081
|
+
description: "A text-to-image model that can generate coherent text inside image.",
|
|
2082
|
+
id: "DeepFloyd/IF-I-XL-v1.0"
|
|
2083
|
+
},
|
|
2084
|
+
{
|
|
2085
|
+
description: "A powerful text-to-image model.",
|
|
2086
|
+
id: "kakaobrain/karlo-v1-alpha"
|
|
2087
|
+
}
|
|
2088
|
+
],
|
|
2089
|
+
spaces: [
|
|
2090
|
+
{
|
|
2091
|
+
description: "A powerful text-to-image application.",
|
|
2092
|
+
id: "stabilityai/stable-diffusion"
|
|
2093
|
+
},
|
|
2094
|
+
{
|
|
2095
|
+
description: "An text-to-image application that can generate coherent text inside the image.",
|
|
2096
|
+
id: "DeepFloyd/IF"
|
|
2097
|
+
},
|
|
2098
|
+
{
|
|
2099
|
+
description: "An powerful text-to-image application that can generate images.",
|
|
2100
|
+
id: "kakaobrain/karlo"
|
|
2101
|
+
},
|
|
2102
|
+
{
|
|
2103
|
+
description: "An powerful text-to-image application that can generates 3D representations.",
|
|
2104
|
+
id: "hysts/Shap-E"
|
|
2105
|
+
},
|
|
2106
|
+
{
|
|
2107
|
+
description: "A strong application for `text-to-image`, `image-to-image` and image inpainting.",
|
|
2108
|
+
id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI"
|
|
2109
|
+
}
|
|
2110
|
+
],
|
|
2111
|
+
summary: "Generates images from input text. These models can be used to generate and modify images based on text prompts.",
|
|
2112
|
+
widgetModels: ["CompVis/stable-diffusion-v1-4"],
|
|
2113
|
+
youtubeId: ""
|
|
2114
|
+
};
|
|
2115
|
+
var data_default22 = taskData22;
|
|
2116
|
+
|
|
2117
|
+
// src/text-to-speech/data.ts
|
|
2118
|
+
var taskData23 = {
|
|
2119
|
+
datasets: [
|
|
2120
|
+
{
|
|
2121
|
+
description: "Thousands of short audio clips of a single speaker.",
|
|
2122
|
+
id: "lj_speech"
|
|
2123
|
+
},
|
|
2124
|
+
{
|
|
2125
|
+
description: "Multi-speaker English dataset.",
|
|
2126
|
+
id: "LibriTTS"
|
|
2127
|
+
}
|
|
2128
|
+
],
|
|
2129
|
+
demo: {
|
|
2130
|
+
inputs: [
|
|
2131
|
+
{
|
|
2132
|
+
label: "Input",
|
|
2133
|
+
content: "I love audio models on the Hub!",
|
|
2134
|
+
type: "text"
|
|
2135
|
+
}
|
|
2136
|
+
],
|
|
2137
|
+
outputs: [
|
|
2138
|
+
{
|
|
2139
|
+
filename: "audio.wav",
|
|
2140
|
+
type: "audio"
|
|
2141
|
+
}
|
|
2142
|
+
]
|
|
2143
|
+
},
|
|
2144
|
+
metrics: [
|
|
2145
|
+
{
|
|
2146
|
+
description: "The Mel Cepstral Distortion (MCD) metric is used to calculate the quality of generated speech.",
|
|
2147
|
+
id: "mel cepstral distortion"
|
|
2148
|
+
}
|
|
2149
|
+
],
|
|
2150
|
+
models: [
|
|
2151
|
+
{
|
|
2152
|
+
description: "A powerful TTS model.",
|
|
2153
|
+
id: "suno/bark"
|
|
2154
|
+
},
|
|
2155
|
+
{
|
|
2156
|
+
description: "A massively multi-lingual TTS model.",
|
|
2157
|
+
id: "facebook/mms-tts"
|
|
2158
|
+
},
|
|
2159
|
+
{
|
|
2160
|
+
description: "An end-to-end speech synthesis model.",
|
|
2161
|
+
id: "microsoft/speecht5_tts"
|
|
2162
|
+
}
|
|
2163
|
+
],
|
|
2164
|
+
spaces: [
|
|
2165
|
+
{
|
|
2166
|
+
description: "An application for generate highly realistic, multilingual speech.",
|
|
2167
|
+
id: "suno/bark"
|
|
2168
|
+
},
|
|
2169
|
+
{
|
|
2170
|
+
description: "An application that contains multiple speech synthesis models for various languages and accents.",
|
|
2171
|
+
id: "coqui/CoquiTTS"
|
|
2172
|
+
},
|
|
2173
|
+
{
|
|
2174
|
+
description: "An application that synthesizes speech for various speaker types.",
|
|
2175
|
+
id: "Matthijs/speecht5-tts-demo"
|
|
2176
|
+
}
|
|
2177
|
+
],
|
|
2178
|
+
summary: "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
|
|
2179
|
+
widgetModels: ["microsoft/speecht5_tts"],
|
|
2180
|
+
youtubeId: "NW62DpzJ274"
|
|
2181
|
+
};
|
|
2182
|
+
var data_default23 = taskData23;
|
|
2183
|
+
|
|
2184
|
+
// src/token-classification/data.ts
|
|
2185
|
+
var taskData24 = {
|
|
2186
|
+
datasets: [
|
|
2187
|
+
{
|
|
2188
|
+
description: "A widely used dataset useful to benchmark named entity recognition models.",
|
|
2189
|
+
id: "conll2003"
|
|
2190
|
+
},
|
|
2191
|
+
{
|
|
2192
|
+
description: "A multilingual dataset of Wikipedia articles annotated for named entity recognition in over 150 different languages.",
|
|
2193
|
+
id: "wikiann"
|
|
2194
|
+
}
|
|
2195
|
+
],
|
|
2196
|
+
demo: {
|
|
2197
|
+
inputs: [
|
|
2198
|
+
{
|
|
2199
|
+
label: "Input",
|
|
2200
|
+
content: "My name is Omar and I live in Z\xFCrich.",
|
|
2201
|
+
type: "text"
|
|
2202
|
+
}
|
|
2203
|
+
],
|
|
2204
|
+
outputs: [
|
|
2205
|
+
{
|
|
2206
|
+
text: "My name is Omar and I live in Z\xFCrich.",
|
|
2207
|
+
tokens: [
|
|
2208
|
+
{
|
|
2209
|
+
type: "PERSON",
|
|
2210
|
+
start: 11,
|
|
2211
|
+
end: 15
|
|
2212
|
+
},
|
|
2213
|
+
{
|
|
2214
|
+
type: "GPE",
|
|
2215
|
+
start: 30,
|
|
2216
|
+
end: 36
|
|
2217
|
+
}
|
|
2218
|
+
],
|
|
2219
|
+
type: "text-with-tokens"
|
|
2220
|
+
}
|
|
2221
|
+
]
|
|
2222
|
+
},
|
|
2223
|
+
metrics: [
|
|
2224
|
+
{
|
|
2225
|
+
description: "",
|
|
2226
|
+
id: "accuracy"
|
|
2227
|
+
},
|
|
2228
|
+
{
|
|
2229
|
+
description: "",
|
|
2230
|
+
id: "recall"
|
|
2231
|
+
},
|
|
2232
|
+
{
|
|
2233
|
+
description: "",
|
|
2234
|
+
id: "precision"
|
|
2235
|
+
},
|
|
2236
|
+
{
|
|
2237
|
+
description: "",
|
|
2238
|
+
id: "f1"
|
|
2239
|
+
}
|
|
2240
|
+
],
|
|
2241
|
+
models: [
|
|
2242
|
+
{
|
|
2243
|
+
description: "A robust performance model to identify people, locations, organizations and names of miscellaneous entities.",
|
|
2244
|
+
id: "dslim/bert-base-NER"
|
|
2245
|
+
},
|
|
2246
|
+
{
|
|
2247
|
+
description: "Flair models are typically the state of the art in named entity recognition tasks.",
|
|
2248
|
+
id: "flair/ner-english"
|
|
2249
|
+
}
|
|
2250
|
+
],
|
|
2251
|
+
spaces: [
|
|
2252
|
+
{
|
|
2253
|
+
description: "An application that can recognizes entities, extracts noun chunks and recognizes various linguistic features of each token.",
|
|
2254
|
+
id: "spacy/gradio_pipeline_visualizer"
|
|
2255
|
+
}
|
|
2256
|
+
],
|
|
2257
|
+
summary: "Token classification is a natural language understanding task in which a label is assigned to some tokens in a text. Some popular token classification subtasks are Named Entity Recognition (NER) and Part-of-Speech (PoS) tagging. NER models could be trained to identify specific entities in a text, such as dates, individuals and places; and PoS tagging would identify, for example, which words in a text are verbs, nouns, and punctuation marks.",
|
|
2258
|
+
widgetModels: ["dslim/bert-base-NER"],
|
|
2259
|
+
youtubeId: "wVHdVlPScxA"
|
|
2260
|
+
};
|
|
2261
|
+
var data_default24 = taskData24;
|
|
2262
|
+
|
|
2263
|
+
// src/translation/data.ts
|
|
2264
|
+
var taskData25 = {
|
|
2265
|
+
datasets: [
|
|
2266
|
+
{
|
|
2267
|
+
description: "A dataset of copyright-free books translated into 16 different languages.",
|
|
2268
|
+
id: "opus_books"
|
|
2269
|
+
},
|
|
2270
|
+
{
|
|
2271
|
+
description: "An example of translation between programming languages. This dataset consists of functions in Java and C#.",
|
|
2272
|
+
id: "code_x_glue_cc_code_to_code_trans"
|
|
2273
|
+
}
|
|
2274
|
+
],
|
|
2275
|
+
demo: {
|
|
2276
|
+
inputs: [
|
|
2277
|
+
{
|
|
2278
|
+
label: "Input",
|
|
2279
|
+
content: "My name is Omar and I live in Z\xFCrich.",
|
|
2280
|
+
type: "text"
|
|
2281
|
+
}
|
|
2282
|
+
],
|
|
2283
|
+
outputs: [
|
|
2284
|
+
{
|
|
2285
|
+
label: "Output",
|
|
2286
|
+
content: "Mein Name ist Omar und ich wohne in Z\xFCrich.",
|
|
2287
|
+
type: "text"
|
|
2288
|
+
}
|
|
2289
|
+
]
|
|
2290
|
+
},
|
|
2291
|
+
metrics: [
|
|
2292
|
+
{
|
|
2293
|
+
description: "BLEU score is calculated by counting the number of shared single or subsequent tokens between the generated sequence and the reference. Subsequent n tokens are called \u201Cn-grams\u201D. Unigram refers to a single token while bi-gram refers to token pairs and n-grams refer to n subsequent tokens. The score ranges from 0 to 1, where 1 means the translation perfectly matched and 0 did not match at all",
|
|
2294
|
+
id: "bleu"
|
|
2295
|
+
},
|
|
2296
|
+
{
|
|
2297
|
+
description: "",
|
|
2298
|
+
id: "sacrebleu"
|
|
2299
|
+
}
|
|
2300
|
+
],
|
|
2301
|
+
models: [
|
|
2302
|
+
{
|
|
2303
|
+
description: "A model that translates from English to French.",
|
|
2304
|
+
id: "Helsinki-NLP/opus-mt-en-fr"
|
|
2305
|
+
},
|
|
2306
|
+
{
|
|
2307
|
+
description: "A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.",
|
|
2308
|
+
id: "t5-base"
|
|
2309
|
+
}
|
|
2310
|
+
],
|
|
2311
|
+
spaces: [
|
|
2312
|
+
{
|
|
2313
|
+
description: "An application that can translate between 100 languages.",
|
|
2314
|
+
id: "Iker/Translate-100-languages"
|
|
2315
|
+
},
|
|
2316
|
+
{
|
|
2317
|
+
description: "An application that can translate between English, Spanish and Hindi.",
|
|
2318
|
+
id: "EuroPython2022/Translate-with-Bloom"
|
|
2319
|
+
}
|
|
2320
|
+
],
|
|
2321
|
+
summary: "Translation is the task of converting text from one language to another.",
|
|
2322
|
+
widgetModels: ["t5-small"],
|
|
2323
|
+
youtubeId: "1JvfrvZgi6c"
|
|
2324
|
+
};
|
|
2325
|
+
var data_default25 = taskData25;
|
|
2326
|
+
|
|
2327
|
+
// src/text-classification/data.ts
|
|
2328
|
+
var taskData26 = {
|
|
2329
|
+
datasets: [
|
|
2330
|
+
{
|
|
2331
|
+
description: "A widely used dataset used to benchmark multiple variants of text classification.",
|
|
2332
|
+
id: "glue"
|
|
2333
|
+
},
|
|
2334
|
+
{
|
|
2335
|
+
description: "A text classification dataset used to benchmark natural language inference models",
|
|
2336
|
+
id: "snli"
|
|
2337
|
+
}
|
|
2338
|
+
],
|
|
2339
|
+
demo: {
|
|
2340
|
+
inputs: [
|
|
2341
|
+
{
|
|
2342
|
+
label: "Input",
|
|
2343
|
+
content: "I love Hugging Face!",
|
|
2344
|
+
type: "text"
|
|
2345
|
+
}
|
|
2346
|
+
],
|
|
2347
|
+
outputs: [
|
|
2348
|
+
{
|
|
2349
|
+
type: "chart",
|
|
2350
|
+
data: [
|
|
2351
|
+
{
|
|
2352
|
+
label: "POSITIVE",
|
|
2353
|
+
score: 0.9
|
|
2354
|
+
},
|
|
2355
|
+
{
|
|
2356
|
+
label: "NEUTRAL",
|
|
2357
|
+
score: 0.1
|
|
2358
|
+
},
|
|
2359
|
+
{
|
|
2360
|
+
label: "NEGATIVE",
|
|
2361
|
+
score: 0
|
|
2362
|
+
}
|
|
2363
|
+
]
|
|
2364
|
+
}
|
|
2365
|
+
]
|
|
2366
|
+
},
|
|
2367
|
+
metrics: [
|
|
2368
|
+
{
|
|
2369
|
+
description: "",
|
|
2370
|
+
id: "accuracy"
|
|
2371
|
+
},
|
|
2372
|
+
{
|
|
2373
|
+
description: "",
|
|
2374
|
+
id: "recall"
|
|
2375
|
+
},
|
|
2376
|
+
{
|
|
2377
|
+
description: "",
|
|
2378
|
+
id: "precision"
|
|
2379
|
+
},
|
|
2380
|
+
{
|
|
2381
|
+
description: "The F1 metric is the harmonic mean of the precision and recall. It can be calculated as: F1 = 2 * (precision * recall) / (precision + recall)",
|
|
2382
|
+
id: "f1"
|
|
2383
|
+
}
|
|
2384
|
+
],
|
|
2385
|
+
models: [
|
|
2386
|
+
{
|
|
2387
|
+
description: "A robust model trained for sentiment analysis.",
|
|
2388
|
+
id: "distilbert-base-uncased-finetuned-sst-2-english"
|
|
2389
|
+
},
|
|
2390
|
+
{
|
|
2391
|
+
description: "Multi-genre natural language inference model.",
|
|
2392
|
+
id: "roberta-large-mnli"
|
|
2393
|
+
}
|
|
2394
|
+
],
|
|
2395
|
+
spaces: [
|
|
2396
|
+
{
|
|
2397
|
+
description: "An application that can classify financial sentiment.",
|
|
2398
|
+
id: "IoannisTr/Tech_Stocks_Trading_Assistant"
|
|
2399
|
+
},
|
|
2400
|
+
{
|
|
2401
|
+
description: "A dashboard that contains various text classification tasks.",
|
|
2402
|
+
id: "miesnerjacob/Multi-task-NLP"
|
|
2403
|
+
},
|
|
2404
|
+
{
|
|
2405
|
+
description: "An application that analyzes user reviews in healthcare.",
|
|
2406
|
+
id: "spacy/healthsea-demo"
|
|
2407
|
+
}
|
|
2408
|
+
],
|
|
2409
|
+
summary: "Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.",
|
|
2410
|
+
widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
|
|
2411
|
+
youtubeId: "leNG9fN9FQU"
|
|
2412
|
+
};
|
|
2413
|
+
var data_default26 = taskData26;
|
|
2414
|
+
|
|
2415
|
+
// src/text-generation/data.ts
|
|
2416
|
+
var taskData27 = {
|
|
2417
|
+
datasets: [
|
|
2418
|
+
{
|
|
2419
|
+
description: "A large multilingual dataset of text crawled from the web.",
|
|
2420
|
+
id: "mc4"
|
|
2421
|
+
},
|
|
2422
|
+
{
|
|
2423
|
+
description: "Diverse open-source data consisting of 22 smaller high-quality datasets. It was used to train GPT-Neo.",
|
|
2424
|
+
id: "the_pile"
|
|
2425
|
+
},
|
|
2426
|
+
{
|
|
2427
|
+
description: "A crowd-sourced instruction dataset to develop an AI assistant.",
|
|
2428
|
+
id: "OpenAssistant/oasst1"
|
|
2429
|
+
},
|
|
2430
|
+
{
|
|
2431
|
+
description: "A crowd-sourced instruction dataset created by Databricks employees.",
|
|
2432
|
+
id: "databricks/databricks-dolly-15k"
|
|
2433
|
+
}
|
|
2434
|
+
],
|
|
2435
|
+
demo: {
|
|
2436
|
+
inputs: [
|
|
2437
|
+
{
|
|
2438
|
+
label: "Input",
|
|
2439
|
+
content: "Once upon a time,",
|
|
2440
|
+
type: "text"
|
|
2441
|
+
}
|
|
2442
|
+
],
|
|
2443
|
+
outputs: [
|
|
2444
|
+
{
|
|
2445
|
+
label: "Output",
|
|
2446
|
+
content: "Once upon a time, we knew that our ancestors were on the verge of extinction. The great explorers and poets of the Old World, from Alexander the Great to Chaucer, are dead and gone. A good many of our ancient explorers and poets have",
|
|
2447
|
+
type: "text"
|
|
2448
|
+
}
|
|
2449
|
+
]
|
|
2450
|
+
},
|
|
2451
|
+
metrics: [
|
|
2452
|
+
{
|
|
2453
|
+
description: "Cross Entropy is a metric that calculates the difference between two probability distributions. Each probability distribution is the distribution of predicted words",
|
|
2454
|
+
id: "Cross Entropy"
|
|
2455
|
+
},
|
|
2456
|
+
{
|
|
2457
|
+
description: "The Perplexity metric is the exponential of the cross-entropy loss. It evaluates the probabilities assigned to the next word by the model. Lower perplexity indicates better performance",
|
|
2458
|
+
id: "Perplexity"
|
|
2459
|
+
}
|
|
2460
|
+
],
|
|
2461
|
+
models: [
|
|
2462
|
+
{
|
|
2463
|
+
description: "A large language model trained for text generation.",
|
|
2464
|
+
id: "bigscience/bloom-560m"
|
|
2465
|
+
},
|
|
2466
|
+
{
|
|
2467
|
+
description: "A large code generation model that can generate code in 80+ languages.",
|
|
2468
|
+
id: "bigcode/starcoder"
|
|
2469
|
+
},
|
|
2470
|
+
{
|
|
2471
|
+
description: "A model trained to follow instructions, uses Pythia-12b as base model.",
|
|
2472
|
+
id: "databricks/dolly-v2-12b"
|
|
2473
|
+
},
|
|
2474
|
+
{
|
|
2475
|
+
description: "A model trained to follow instructions curated by community, uses Pythia-12b as base model.",
|
|
2476
|
+
id: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
|
|
2477
|
+
},
|
|
2478
|
+
{
|
|
2479
|
+
description: "A large language model trained to generate text in English.",
|
|
2480
|
+
id: "stabilityai/stablelm-tuned-alpha-7b"
|
|
2481
|
+
},
|
|
2482
|
+
{
|
|
2483
|
+
description: "A model trained to follow instructions, based on mosaicml/mpt-7b.",
|
|
2484
|
+
id: "mosaicml/mpt-7b-instruct"
|
|
2485
|
+
},
|
|
2486
|
+
{
|
|
2487
|
+
description: "A large language model trained to generate text in English.",
|
|
2488
|
+
id: "EleutherAI/pythia-12b"
|
|
2489
|
+
},
|
|
2490
|
+
{
|
|
2491
|
+
description: "A large text-to-text model trained to follow instructions.",
|
|
2492
|
+
id: "google/flan-ul2"
|
|
2493
|
+
},
|
|
2494
|
+
{
|
|
2495
|
+
description: "A large and powerful text generation model.",
|
|
2496
|
+
id: "tiiuae/falcon-40b"
|
|
2497
|
+
},
|
|
2498
|
+
{
|
|
2499
|
+
description: "State-of-the-art open-source large language model.",
|
|
2500
|
+
id: "meta-llama/Llama-2-70b-hf"
|
|
2501
|
+
}
|
|
2502
|
+
],
|
|
2503
|
+
spaces: [
|
|
2504
|
+
{
|
|
2505
|
+
description: "A robust text generation model that can perform various tasks through natural language prompting.",
|
|
2506
|
+
id: "huggingface/bloom_demo"
|
|
2507
|
+
},
|
|
2508
|
+
{
|
|
2509
|
+
description: "An text generation based application that can write code for 80+ languages.",
|
|
2510
|
+
id: "bigcode/bigcode-playground"
|
|
2511
|
+
},
|
|
2512
|
+
{
|
|
2513
|
+
description: "An text generation based application for conversations.",
|
|
2514
|
+
id: "h2oai/h2ogpt-chatbot"
|
|
2515
|
+
},
|
|
2516
|
+
{
|
|
2517
|
+
description: "An text generation application that combines OpenAI and Hugging Face models.",
|
|
2518
|
+
id: "microsoft/HuggingGPT"
|
|
2519
|
+
},
|
|
2520
|
+
{
|
|
2521
|
+
description: "An text generation application that uses StableLM-tuned-alpha-7b.",
|
|
2522
|
+
id: "stabilityai/stablelm-tuned-alpha-chat"
|
|
2523
|
+
},
|
|
2524
|
+
{
|
|
2525
|
+
description: "An UI that uses StableLM-tuned-alpha-7b.",
|
|
2526
|
+
id: "togethercomputer/OpenChatKit"
|
|
2527
|
+
}
|
|
2528
|
+
],
|
|
2529
|
+
summary: "Generating text is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase.",
|
|
2530
|
+
widgetModels: ["tiiuae/falcon-7b-instruct"],
|
|
2531
|
+
youtubeId: "Vpjb1lu0MDk"
|
|
2532
|
+
};
|
|
2533
|
+
var data_default27 = taskData27;
|
|
2534
|
+
|
|
2535
|
+
// src/text-to-video/data.ts
|
|
2536
|
+
var taskData28 = {
|
|
2537
|
+
datasets: [
|
|
2538
|
+
{
|
|
2539
|
+
description: "Microsoft Research Video to Text is a large-scale dataset for open domain video captioning",
|
|
2540
|
+
id: "iejMac/CLIP-MSR-VTT"
|
|
2541
|
+
},
|
|
2542
|
+
{
|
|
2543
|
+
description: "UCF101 Human Actions dataset consists of 13,320 video clips from YouTube, with 101 classes.",
|
|
2544
|
+
id: "quchenyuan/UCF101-ZIP"
|
|
2545
|
+
},
|
|
2546
|
+
{
|
|
2547
|
+
description: "A high-quality dataset for human action recognition in YouTube videos.",
|
|
2548
|
+
id: "nateraw/kinetics"
|
|
2549
|
+
},
|
|
2550
|
+
{
|
|
2551
|
+
description: "A dataset of video clips of humans performing pre-defined basic actions with everyday objects.",
|
|
2552
|
+
id: "HuggingFaceM4/something_something_v2"
|
|
2553
|
+
},
|
|
2554
|
+
{
|
|
2555
|
+
description: "This dataset consists of text-video pairs and contains noisy samples with irrelevant video descriptions",
|
|
2556
|
+
id: "HuggingFaceM4/webvid"
|
|
2557
|
+
},
|
|
2558
|
+
{
|
|
2559
|
+
description: "A dataset of short Flickr videos for the temporal localization of events with descriptions.",
|
|
2560
|
+
id: "iejMac/CLIP-DiDeMo"
|
|
2561
|
+
}
|
|
2562
|
+
],
|
|
2563
|
+
demo: {
|
|
2564
|
+
inputs: [
|
|
2565
|
+
{
|
|
2566
|
+
label: "Input",
|
|
2567
|
+
content: "Darth Vader is surfing on the waves.",
|
|
2568
|
+
type: "text"
|
|
2569
|
+
}
|
|
2570
|
+
],
|
|
2571
|
+
outputs: [
|
|
2572
|
+
{
|
|
2573
|
+
filename: "text-to-video-output.gif",
|
|
2574
|
+
type: "img"
|
|
2575
|
+
}
|
|
2576
|
+
]
|
|
2577
|
+
},
|
|
2578
|
+
metrics: [
|
|
2579
|
+
{
|
|
2580
|
+
description: "Inception Score uses an image classification model that predicts class labels and evaluates how distinct and diverse the images are. A higher score indicates better video generation.",
|
|
2581
|
+
id: "is"
|
|
2582
|
+
},
|
|
2583
|
+
{
|
|
2584
|
+
description: "Frechet Inception Distance uses an image classification model to obtain image embeddings. The metric compares mean and standard deviation of the embeddings of real and generated images. A smaller score indicates better video generation.",
|
|
2585
|
+
id: "fid"
|
|
2586
|
+
},
|
|
2587
|
+
{
|
|
2588
|
+
description: "Frechet Video Distance uses a model that captures coherence for changes in frames and the quality of each frame. A smaller score indicates better video generation.",
|
|
2589
|
+
id: "fvd"
|
|
2590
|
+
},
|
|
2591
|
+
{
|
|
2592
|
+
description: "CLIPSIM measures similarity between video frames and text using an image-text similarity model. A higher score indicates better video generation.",
|
|
2593
|
+
id: "clipsim"
|
|
2594
|
+
}
|
|
2595
|
+
],
|
|
2596
|
+
models: [
|
|
2597
|
+
{
|
|
2598
|
+
description: "A strong model for video generation.",
|
|
2599
|
+
id: "PAIR/text2video-zero-controlnet-canny-arcane"
|
|
2600
|
+
},
|
|
2601
|
+
{
|
|
2602
|
+
description: "A robust model for text-to-video generation.",
|
|
2603
|
+
id: "damo-vilab/text-to-video-ms-1.7b"
|
|
2604
|
+
},
|
|
2605
|
+
{
|
|
2606
|
+
description: "A text-to-video generation model with high quality and smooth outputs.",
|
|
2607
|
+
id: "cerspense/zeroscope_v2_576w"
|
|
2608
|
+
}
|
|
2609
|
+
],
|
|
2610
|
+
spaces: [
|
|
2611
|
+
{
|
|
2612
|
+
description: "An application that generates video from text.",
|
|
2613
|
+
id: "fffiloni/zeroscope"
|
|
2614
|
+
},
|
|
2615
|
+
{
|
|
2616
|
+
description: "An application that generates video from image and text.",
|
|
2617
|
+
id: "TempoFunk/makeavid-sd-jax"
|
|
2618
|
+
},
|
|
2619
|
+
{
|
|
2620
|
+
description: "An application that generates videos from text and provides multi-model support.",
|
|
2621
|
+
id: "ArtGAN/Video-Diffusion-WebUI"
|
|
2622
|
+
}
|
|
2623
|
+
],
|
|
2624
|
+
summary: "Text-to-video models can be used in any application that requires generating consistent sequence of images from text. ",
|
|
2625
|
+
widgetModels: [],
|
|
2626
|
+
youtubeId: void 0
|
|
2627
|
+
};
|
|
2628
|
+
var data_default28 = taskData28;
|
|
2629
|
+
|
|
2630
|
+
// src/unconditional-image-generation/data.ts
|
|
2631
|
+
var taskData29 = {
|
|
2632
|
+
datasets: [
|
|
2633
|
+
{
|
|
2634
|
+
description: "The CIFAR-100 dataset consists of 60000 32x32 colour images in 100 classes, with 600 images per class.",
|
|
2635
|
+
id: "cifar100"
|
|
2636
|
+
},
|
|
2637
|
+
{
|
|
2638
|
+
description: "Multiple images of celebrities, used for facial expression translation.",
|
|
2639
|
+
id: "CelebA"
|
|
2640
|
+
}
|
|
2641
|
+
],
|
|
2642
|
+
demo: {
|
|
2643
|
+
inputs: [
|
|
2644
|
+
{
|
|
2645
|
+
label: "Seed",
|
|
2646
|
+
content: "42",
|
|
2647
|
+
type: "text"
|
|
2648
|
+
},
|
|
2649
|
+
{
|
|
2650
|
+
label: "Number of images to generate:",
|
|
2651
|
+
content: "4",
|
|
2652
|
+
type: "text"
|
|
2653
|
+
}
|
|
2654
|
+
],
|
|
2655
|
+
outputs: [
|
|
2656
|
+
{
|
|
2657
|
+
filename: "unconditional-image-generation-output.jpeg",
|
|
2658
|
+
type: "img"
|
|
2659
|
+
}
|
|
2660
|
+
]
|
|
2661
|
+
},
|
|
2662
|
+
metrics: [
|
|
2663
|
+
{
|
|
2664
|
+
description: "The inception score (IS) evaluates the quality of generated images. It measures the diversity of the generated images (the model predictions are evenly distributed across all possible labels) and their 'distinction' or 'sharpness' (the model confidently predicts a single label for each image).",
|
|
2665
|
+
id: "Inception score (IS)"
|
|
2666
|
+
},
|
|
2667
|
+
{
|
|
2668
|
+
description: "The Fr\xE9chet Inception Distance (FID) evaluates the quality of images created by a generative model by calculating the distance between feature vectors for real and generated images.",
|
|
2669
|
+
id: "Fre\u0107het Inception Distance (FID)"
|
|
2670
|
+
}
|
|
2671
|
+
],
|
|
2672
|
+
models: [
|
|
2673
|
+
{
|
|
2674
|
+
description: "High-quality image generation model trained on the CIFAR-10 dataset. It synthesizes images of the ten classes presented in the dataset using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics.",
|
|
2675
|
+
id: "google/ddpm-cifar10-32"
|
|
2676
|
+
},
|
|
2677
|
+
{
|
|
2678
|
+
description: "High-quality image generation model trained on the 256x256 CelebA-HQ dataset. It synthesizes images of faces using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics.",
|
|
2679
|
+
id: "google/ddpm-celebahq-256"
|
|
2680
|
+
}
|
|
2681
|
+
],
|
|
2682
|
+
spaces: [
|
|
2683
|
+
{
|
|
2684
|
+
description: "An application that can generate realistic faces.",
|
|
2685
|
+
id: "CompVis/celeba-latent-diffusion"
|
|
2686
|
+
}
|
|
2687
|
+
],
|
|
2688
|
+
summary: "Unconditional image generation is the task of generating images with no condition in any context (like a prompt text or another image). Once trained, the model will create images that resemble its training data distribution.",
|
|
2689
|
+
widgetModels: [""],
|
|
2690
|
+
// TODO: Add related video
|
|
2691
|
+
youtubeId: ""
|
|
2692
|
+
};
|
|
2693
|
+
var data_default29 = taskData29;
|
|
2694
|
+
|
|
2695
|
+
// src/video-classification/data.ts
|
|
2696
|
+
var taskData30 = {
|
|
2697
|
+
datasets: [
|
|
2698
|
+
{
|
|
2699
|
+
// TODO write proper description
|
|
2700
|
+
description: "Benchmark dataset used for video classification with videos that belong to 400 classes.",
|
|
2701
|
+
id: "kinetics400"
|
|
2702
|
+
}
|
|
2703
|
+
],
|
|
2704
|
+
demo: {
|
|
2705
|
+
inputs: [
|
|
2706
|
+
{
|
|
2707
|
+
filename: "video-classification-input.gif",
|
|
2708
|
+
type: "img"
|
|
2709
|
+
}
|
|
2710
|
+
],
|
|
2711
|
+
outputs: [
|
|
2712
|
+
{
|
|
2713
|
+
type: "chart",
|
|
2714
|
+
data: [
|
|
2715
|
+
{
|
|
2716
|
+
label: "Playing Guitar",
|
|
2717
|
+
score: 0.514
|
|
2718
|
+
},
|
|
2719
|
+
{
|
|
2720
|
+
label: "Playing Tennis",
|
|
2721
|
+
score: 0.193
|
|
2722
|
+
},
|
|
2723
|
+
{
|
|
2724
|
+
label: "Cooking",
|
|
2725
|
+
score: 0.068
|
|
2726
|
+
}
|
|
2727
|
+
]
|
|
2728
|
+
}
|
|
2729
|
+
]
|
|
2730
|
+
},
|
|
2731
|
+
metrics: [
|
|
2732
|
+
{
|
|
2733
|
+
description: "",
|
|
2734
|
+
id: "accuracy"
|
|
2735
|
+
},
|
|
2736
|
+
{
|
|
2737
|
+
description: "",
|
|
2738
|
+
id: "recall"
|
|
2739
|
+
},
|
|
2740
|
+
{
|
|
2741
|
+
description: "",
|
|
2742
|
+
id: "precision"
|
|
2743
|
+
},
|
|
2744
|
+
{
|
|
2745
|
+
description: "",
|
|
2746
|
+
id: "f1"
|
|
2747
|
+
}
|
|
2748
|
+
],
|
|
2749
|
+
models: [
|
|
2750
|
+
{
|
|
2751
|
+
// TO DO: write description
|
|
2752
|
+
description: "Strong Video Classification model trained on the Kinects 400 dataset.",
|
|
2753
|
+
id: "MCG-NJU/videomae-base-finetuned-kinetics"
|
|
2754
|
+
},
|
|
2755
|
+
{
|
|
2756
|
+
// TO DO: write description
|
|
2757
|
+
description: "Strong Video Classification model trained on the Kinects 400 dataset.",
|
|
2758
|
+
id: "microsoft/xclip-base-patch32"
|
|
2759
|
+
}
|
|
2760
|
+
],
|
|
2761
|
+
spaces: [
|
|
2762
|
+
{
|
|
2763
|
+
description: "An application that classifies video at different timestamps.",
|
|
2764
|
+
id: "nateraw/lavila"
|
|
2765
|
+
},
|
|
2766
|
+
{
|
|
2767
|
+
description: "An application that classifies video.",
|
|
2768
|
+
id: "fcakyon/video-classification"
|
|
2769
|
+
}
|
|
2770
|
+
],
|
|
2771
|
+
summary: "Video classification is the task of assigning a label or class to an entire video. Videos are expected to have only one class for each video. Video classification models take a video as input and return a prediction about which class the video belongs to.",
|
|
2772
|
+
widgetModels: [],
|
|
2773
|
+
youtubeId: ""
|
|
2774
|
+
};
|
|
2775
|
+
var data_default30 = taskData30;
|
|
2776
|
+
|
|
2777
|
+
// src/visual-question-answering/data.ts
|
|
2778
|
+
var taskData31 = {
|
|
2779
|
+
datasets: [
|
|
2780
|
+
{
|
|
2781
|
+
description: "A widely used dataset containing questions (with answers) about images.",
|
|
2782
|
+
id: "Graphcore/vqa"
|
|
2783
|
+
},
|
|
2784
|
+
{
|
|
2785
|
+
description: "A dataset to benchmark visual reasoning based on text in images.",
|
|
2786
|
+
id: "textvqa"
|
|
2787
|
+
}
|
|
2788
|
+
],
|
|
2789
|
+
demo: {
|
|
2790
|
+
inputs: [
|
|
2791
|
+
{
|
|
2792
|
+
filename: "elephant.jpeg",
|
|
2793
|
+
type: "img"
|
|
2794
|
+
},
|
|
2795
|
+
{
|
|
2796
|
+
label: "Question",
|
|
2797
|
+
content: "What is in this image?",
|
|
2798
|
+
type: "text"
|
|
2799
|
+
}
|
|
2800
|
+
],
|
|
2801
|
+
outputs: [
|
|
2802
|
+
{
|
|
2803
|
+
type: "chart",
|
|
2804
|
+
data: [
|
|
2805
|
+
{
|
|
2806
|
+
label: "elephant",
|
|
2807
|
+
score: 0.97
|
|
2808
|
+
},
|
|
2809
|
+
{
|
|
2810
|
+
label: "elephants",
|
|
2811
|
+
score: 0.06
|
|
2812
|
+
},
|
|
2813
|
+
{
|
|
2814
|
+
label: "animal",
|
|
2815
|
+
score: 3e-3
|
|
2816
|
+
}
|
|
2817
|
+
]
|
|
2818
|
+
}
|
|
2819
|
+
]
|
|
2820
|
+
},
|
|
2821
|
+
isPlaceholder: false,
|
|
2822
|
+
metrics: [
|
|
2823
|
+
{
|
|
2824
|
+
description: "",
|
|
2825
|
+
id: "accuracy"
|
|
2826
|
+
},
|
|
2827
|
+
{
|
|
2828
|
+
description: "Measures how much a predicted answer differs from the ground truth based on the difference in their semantic meaning.",
|
|
2829
|
+
id: "wu-palmer similarity"
|
|
2830
|
+
}
|
|
2831
|
+
],
|
|
2832
|
+
models: [
|
|
2833
|
+
{
|
|
2834
|
+
description: "A visual question answering model trained to convert charts and plots to text.",
|
|
2835
|
+
id: "google/deplot"
|
|
2836
|
+
},
|
|
2837
|
+
{
|
|
2838
|
+
description: "A visual question answering model trained for mathematical reasoning and chart derendering from images.",
|
|
2839
|
+
id: "google/matcha-base "
|
|
2840
|
+
},
|
|
2841
|
+
{
|
|
2842
|
+
description: "A strong visual question answering that answers questions from book covers.",
|
|
2843
|
+
id: "google/pix2struct-ocrvqa-large"
|
|
2844
|
+
}
|
|
2845
|
+
],
|
|
2846
|
+
spaces: [
|
|
2847
|
+
{
|
|
2848
|
+
description: "An application that can answer questions based on images.",
|
|
2849
|
+
id: "nielsr/vilt-vqa"
|
|
2850
|
+
},
|
|
2851
|
+
{
|
|
2852
|
+
description: "An application that can caption images and answer questions about a given image. ",
|
|
2853
|
+
id: "Salesforce/BLIP"
|
|
2854
|
+
},
|
|
2855
|
+
{
|
|
2856
|
+
description: "An application that can caption images and answer questions about a given image. ",
|
|
2857
|
+
id: "vumichien/Img2Prompt"
|
|
2858
|
+
}
|
|
2859
|
+
],
|
|
2860
|
+
summary: "Visual Question Answering is the task of answering open-ended questions based on an image. They output natural language responses to natural language questions.",
|
|
2861
|
+
widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
|
|
2862
|
+
youtubeId: ""
|
|
2863
|
+
};
|
|
2864
|
+
var data_default31 = taskData31;
|
|
2865
|
+
|
|
2866
|
+
// src/zero-shot-classification/data.ts
|
|
2867
|
+
var taskData32 = {
|
|
2868
|
+
datasets: [
|
|
2869
|
+
{
|
|
2870
|
+
description: "A widely used dataset used to benchmark multiple variants of text classification.",
|
|
2871
|
+
id: "glue"
|
|
2872
|
+
},
|
|
2873
|
+
{
|
|
2874
|
+
description: "The Multi-Genre Natural Language Inference (MultiNLI) corpus is a crowd-sourced collection of 433k sentence pairs annotated with textual entailment information.",
|
|
2875
|
+
id: "MultiNLI"
|
|
2876
|
+
},
|
|
2877
|
+
{
|
|
2878
|
+
description: "FEVER is a publicly available dataset for fact extraction and verification against textual sources.",
|
|
2879
|
+
id: "FEVER"
|
|
2880
|
+
}
|
|
2881
|
+
],
|
|
2882
|
+
demo: {
|
|
2883
|
+
inputs: [
|
|
2884
|
+
{
|
|
2885
|
+
label: "Text Input",
|
|
2886
|
+
content: "Dune is the best movie ever.",
|
|
2887
|
+
type: "text"
|
|
2888
|
+
},
|
|
2889
|
+
{
|
|
2890
|
+
label: "Candidate Labels",
|
|
2891
|
+
content: "CINEMA, ART, MUSIC",
|
|
2892
|
+
type: "text"
|
|
2893
|
+
}
|
|
2894
|
+
],
|
|
2895
|
+
outputs: [
|
|
2896
|
+
{
|
|
2897
|
+
type: "chart",
|
|
2898
|
+
data: [
|
|
2899
|
+
{
|
|
2900
|
+
label: "CINEMA",
|
|
2901
|
+
score: 0.9
|
|
2902
|
+
},
|
|
2903
|
+
{
|
|
2904
|
+
label: "ART",
|
|
2905
|
+
score: 0.1
|
|
2906
|
+
},
|
|
2907
|
+
{
|
|
2908
|
+
label: "MUSIC",
|
|
2909
|
+
score: 0
|
|
2910
|
+
}
|
|
2911
|
+
]
|
|
2912
|
+
}
|
|
2913
|
+
]
|
|
2914
|
+
},
|
|
2915
|
+
metrics: [],
|
|
2916
|
+
models: [
|
|
2917
|
+
{
|
|
2918
|
+
description: "Powerful zero-shot text classification model",
|
|
2919
|
+
id: "facebook/bart-large-mnli"
|
|
2920
|
+
}
|
|
2921
|
+
],
|
|
2922
|
+
spaces: [],
|
|
2923
|
+
summary: "Zero-shot text classification is a task in natural language processing where a model is trained on a set of labeled examples but is then able to classify new examples from previously unseen classes.",
|
|
2924
|
+
widgetModels: ["facebook/bart-large-mnli"]
|
|
2925
|
+
};
|
|
2926
|
+
var data_default32 = taskData32;
|
|
2927
|
+
|
|
2928
|
+
// src/zero-shot-image-classification/data.ts
|
|
2929
|
+
var taskData33 = {
|
|
2930
|
+
datasets: [
|
|
2931
|
+
{
|
|
2932
|
+
// TODO write proper description
|
|
2933
|
+
description: "",
|
|
2934
|
+
id: ""
|
|
2935
|
+
}
|
|
2936
|
+
],
|
|
2937
|
+
demo: {
|
|
2938
|
+
inputs: [
|
|
2939
|
+
{
|
|
2940
|
+
filename: "image-classification-input.jpeg",
|
|
2941
|
+
type: "img"
|
|
2942
|
+
},
|
|
2943
|
+
{
|
|
2944
|
+
label: "Classes",
|
|
2945
|
+
content: "cat, dog, bird",
|
|
2946
|
+
type: "text"
|
|
2947
|
+
}
|
|
2948
|
+
],
|
|
2949
|
+
outputs: [
|
|
2950
|
+
{
|
|
2951
|
+
type: "chart",
|
|
2952
|
+
data: [
|
|
2953
|
+
{
|
|
2954
|
+
label: "Cat",
|
|
2955
|
+
score: 0.664
|
|
2956
|
+
},
|
|
2957
|
+
{
|
|
2958
|
+
label: "Dog",
|
|
2959
|
+
score: 0.329
|
|
2960
|
+
},
|
|
2961
|
+
{
|
|
2962
|
+
label: "Bird",
|
|
2963
|
+
score: 8e-3
|
|
2964
|
+
}
|
|
2965
|
+
]
|
|
2966
|
+
}
|
|
2967
|
+
]
|
|
2968
|
+
},
|
|
2969
|
+
metrics: [
|
|
2970
|
+
{
|
|
2971
|
+
description: "Computes the number of times the correct label appears in top K labels predicted",
|
|
2972
|
+
id: "top-K accuracy"
|
|
2973
|
+
}
|
|
2974
|
+
],
|
|
2975
|
+
models: [
|
|
2976
|
+
{
|
|
2977
|
+
description: "Robust image classification model trained on publicly available image-caption data.",
|
|
2978
|
+
id: "openai/clip-vit-base-patch16"
|
|
2979
|
+
},
|
|
2980
|
+
{
|
|
2981
|
+
description: "Robust image classification model trained on publicly available image-caption data trained on additional high pixel data for better performance.",
|
|
2982
|
+
id: "openai/clip-vit-large-patch14-336"
|
|
2983
|
+
},
|
|
2984
|
+
{
|
|
2985
|
+
description: "Strong image classification model for biomedical domain.",
|
|
2986
|
+
id: "microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224"
|
|
2987
|
+
}
|
|
2988
|
+
],
|
|
2989
|
+
spaces: [
|
|
2990
|
+
{
|
|
2991
|
+
description: "An application that leverages zero shot image classification to find best captions to generate an image. ",
|
|
2992
|
+
id: "pharma/CLIP-Interrogator"
|
|
2993
|
+
}
|
|
2994
|
+
],
|
|
2995
|
+
summary: "Zero shot image classification is the task of classifying previously unseen classes during training of a model.",
|
|
2996
|
+
widgetModels: ["openai/clip-vit-large-patch14-336"],
|
|
2997
|
+
youtubeId: ""
|
|
2998
|
+
};
|
|
2999
|
+
var data_default33 = taskData33;
|
|
3000
|
+
|
|
3001
|
+
// src/const.ts
|
|
3002
|
+
var TASKS_MODEL_LIBRARIES = {
|
|
3003
|
+
"audio-classification": ["speechbrain", "transformers"],
|
|
3004
|
+
"audio-to-audio": ["asteroid", "speechbrain"],
|
|
3005
|
+
"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
|
|
3006
|
+
"conversational": ["transformers"],
|
|
3007
|
+
"depth-estimation": ["transformers"],
|
|
3008
|
+
"document-question-answering": ["transformers"],
|
|
3009
|
+
"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
|
|
3010
|
+
"fill-mask": ["transformers", "transformers.js"],
|
|
3011
|
+
"graph-ml": ["transformers"],
|
|
3012
|
+
"image-classification": ["keras", "timm", "transformers", "transformers.js"],
|
|
3013
|
+
"image-segmentation": ["transformers", "transformers.js"],
|
|
3014
|
+
"image-to-image": [],
|
|
3015
|
+
"image-to-text": ["transformers.js"],
|
|
3016
|
+
"video-classification": [],
|
|
3017
|
+
"multiple-choice": ["transformers"],
|
|
3018
|
+
"object-detection": ["transformers", "transformers.js"],
|
|
3019
|
+
"other": [],
|
|
3020
|
+
"question-answering": ["adapter-transformers", "allennlp", "transformers", "transformers.js"],
|
|
3021
|
+
"robotics": [],
|
|
3022
|
+
"reinforcement-learning": ["transformers", "stable-baselines3", "ml-agents", "sample-factory"],
|
|
3023
|
+
"sentence-similarity": ["sentence-transformers", "spacy", "transformers.js"],
|
|
3024
|
+
"summarization": ["transformers", "transformers.js"],
|
|
3025
|
+
"table-question-answering": ["transformers"],
|
|
3026
|
+
"table-to-text": ["transformers"],
|
|
3027
|
+
"tabular-classification": ["sklearn"],
|
|
3028
|
+
"tabular-regression": ["sklearn"],
|
|
3029
|
+
"tabular-to-text": ["transformers"],
|
|
3030
|
+
"text-classification": ["adapter-transformers", "spacy", "transformers", "transformers.js"],
|
|
3031
|
+
"text-generation": ["transformers", "transformers.js"],
|
|
3032
|
+
"text-retrieval": [],
|
|
3033
|
+
"text-to-image": [],
|
|
3034
|
+
"text-to-speech": ["espnet", "tensorflowtts", "transformers"],
|
|
3035
|
+
"text-to-audio": ["transformers"],
|
|
3036
|
+
"text-to-video": [],
|
|
3037
|
+
"text2text-generation": ["transformers", "transformers.js"],
|
|
3038
|
+
"time-series-forecasting": [],
|
|
3039
|
+
"token-classification": ["adapter-transformers", "flair", "spacy", "span-marker", "stanza", "transformers", "transformers.js"],
|
|
3040
|
+
"translation": ["transformers", "transformers.js"],
|
|
3041
|
+
"unconditional-image-generation": [],
|
|
3042
|
+
"visual-question-answering": [],
|
|
3043
|
+
"voice-activity-detection": [],
|
|
3044
|
+
"zero-shot-classification": ["transformers", "transformers.js"],
|
|
3045
|
+
"zero-shot-image-classification": ["transformers.js"]
|
|
3046
|
+
};
|
|
3047
|
+
|
|
3048
|
+
// src/tasksData.ts
|
|
3049
|
+
var TASKS_DATA = {
|
|
3050
|
+
"audio-classification": getData("audio-classification", data_default),
|
|
3051
|
+
"audio-to-audio": getData("audio-to-audio", data_default2),
|
|
3052
|
+
"automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
|
|
3053
|
+
"conversational": getData("conversational", data_default4),
|
|
3054
|
+
"depth-estimation": getData("depth-estimation", data_default13),
|
|
3055
|
+
"document-question-answering": getData("document-question-answering", data_default5),
|
|
3056
|
+
"feature-extraction": getData("feature-extraction", data_default6),
|
|
3057
|
+
"fill-mask": getData("fill-mask", data_default7),
|
|
3058
|
+
"graph-ml": void 0,
|
|
3059
|
+
"image-classification": getData("image-classification", data_default8),
|
|
3060
|
+
"image-segmentation": getData("image-segmentation", data_default11),
|
|
3061
|
+
"image-to-image": getData("image-to-image", data_default9),
|
|
3062
|
+
"image-to-text": getData("image-to-text", data_default10),
|
|
3063
|
+
"multiple-choice": void 0,
|
|
3064
|
+
"object-detection": getData("object-detection", data_default12),
|
|
3065
|
+
"video-classification": getData("video-classification", data_default30),
|
|
3066
|
+
"other": void 0,
|
|
3067
|
+
"question-answering": getData("question-answering", data_default16),
|
|
3068
|
+
"reinforcement-learning": getData("reinforcement-learning", data_default15),
|
|
3069
|
+
"robotics": void 0,
|
|
3070
|
+
"sentence-similarity": getData("sentence-similarity", data_default17),
|
|
3071
|
+
"summarization": getData("summarization", data_default18),
|
|
3072
|
+
"table-question-answering": getData("table-question-answering", data_default19),
|
|
3073
|
+
"table-to-text": void 0,
|
|
3074
|
+
"tabular-classification": getData("tabular-classification", data_default20),
|
|
3075
|
+
"tabular-regression": getData("tabular-regression", data_default21),
|
|
3076
|
+
"tabular-to-text": void 0,
|
|
3077
|
+
"text-classification": getData("text-classification", data_default26),
|
|
3078
|
+
"text-generation": getData("text-generation", data_default27),
|
|
3079
|
+
"text-retrieval": void 0,
|
|
3080
|
+
"text-to-image": getData("text-to-image", data_default22),
|
|
3081
|
+
"text-to-speech": getData("text-to-speech", data_default23),
|
|
3082
|
+
"text-to-audio": void 0,
|
|
3083
|
+
"text-to-video": getData("text-to-video", data_default28),
|
|
3084
|
+
"text2text-generation": void 0,
|
|
3085
|
+
"time-series-forecasting": void 0,
|
|
3086
|
+
"token-classification": getData("token-classification", data_default24),
|
|
3087
|
+
"translation": getData("translation", data_default25),
|
|
3088
|
+
"unconditional-image-generation": getData("unconditional-image-generation", data_default29),
|
|
3089
|
+
"visual-question-answering": getData("visual-question-answering", data_default31),
|
|
3090
|
+
"voice-activity-detection": void 0,
|
|
3091
|
+
"zero-shot-classification": getData("zero-shot-classification", data_default32),
|
|
3092
|
+
"zero-shot-image-classification": getData("zero-shot-image-classification", data_default33)
|
|
3093
|
+
};
|
|
3094
|
+
function getData(type, partialTaskData = data_default14) {
|
|
3095
|
+
return {
|
|
3096
|
+
...partialTaskData,
|
|
3097
|
+
id: type,
|
|
3098
|
+
label: PIPELINE_DATA[type].name,
|
|
3099
|
+
libraries: TASKS_MODEL_LIBRARIES[type]
|
|
3100
|
+
};
|
|
3101
|
+
}
|
|
3102
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
3103
|
+
0 && (module.exports = {
|
|
3104
|
+
TASKS_DATA
|
|
3105
|
+
});
|