@huggingface/tasks 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +120 -57
- package/dist/index.js +120 -57
- package/dist/src/model-libraries-snippets.d.ts +1 -0
- package/dist/src/model-libraries-snippets.d.ts.map +1 -1
- package/dist/src/model-libraries.d.ts +9 -2
- package/dist/src/model-libraries.d.ts.map +1 -1
- package/dist/src/tasks/audio-classification/data.d.ts.map +1 -1
- package/dist/src/tasks/audio-to-audio/data.d.ts.map +1 -1
- package/dist/src/tasks/automatic-speech-recognition/data.d.ts.map +1 -1
- package/dist/src/tasks/document-question-answering/data.d.ts.map +1 -1
- package/dist/src/tasks/question-answering/data.d.ts.map +1 -1
- package/dist/src/tasks/text-classification/data.d.ts.map +1 -1
- package/dist/src/tasks/text-to-speech/data.d.ts.map +1 -1
- package/dist/src/tasks/token-classification/data.d.ts.map +1 -1
- package/dist/src/tasks/translation/data.d.ts.map +1 -1
- package/dist/src/tasks/zero-shot-classification/data.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/model-libraries-snippets.ts +9 -0
- package/src/model-libraries.ts +7 -0
- package/src/tasks/audio-classification/data.ts +8 -4
- package/src/tasks/audio-to-audio/data.ts +5 -1
- package/src/tasks/automatic-speech-recognition/data.ts +6 -2
- package/src/tasks/document-question-answering/data.ts +7 -3
- package/src/tasks/fill-mask/data.ts +3 -3
- package/src/tasks/image-segmentation/data.ts +1 -1
- package/src/tasks/image-to-image/data.ts +1 -1
- package/src/tasks/image-to-text/data.ts +1 -1
- package/src/tasks/question-answering/data.ts +5 -1
- package/src/tasks/sentence-similarity/data.ts +3 -3
- package/src/tasks/summarization/data.ts +2 -2
- package/src/tasks/text-classification/data.ts +18 -6
- package/src/tasks/text-generation/data.ts +3 -3
- package/src/tasks/text-to-image/data.ts +1 -1
- package/src/tasks/text-to-speech/data.ts +7 -3
- package/src/tasks/token-classification/data.ts +11 -3
- package/src/tasks/translation/data.ts +9 -8
- package/src/tasks/video-classification/data.ts +3 -3
- package/src/tasks/visual-question-answering/data.ts +2 -2
- package/src/tasks/zero-shot-classification/data.ts +8 -4
- package/src/tasks/zero-shot-image-classification/data.ts +2 -2
package/dist/index.cjs
CHANGED
|
@@ -1429,7 +1429,11 @@ var taskData = {
|
|
|
1429
1429
|
datasets: [
|
|
1430
1430
|
{
|
|
1431
1431
|
description: "A benchmark of 10 different audio tasks.",
|
|
1432
|
-
id: "superb"
|
|
1432
|
+
id: "s3prl/superb"
|
|
1433
|
+
},
|
|
1434
|
+
{
|
|
1435
|
+
description: "A dataset of YouTube clips and their sound categories.",
|
|
1436
|
+
id: "agkphysics/AudioSet"
|
|
1433
1437
|
}
|
|
1434
1438
|
],
|
|
1435
1439
|
demo: {
|
|
@@ -1475,11 +1479,11 @@ var taskData = {
|
|
|
1475
1479
|
],
|
|
1476
1480
|
models: [
|
|
1477
1481
|
{
|
|
1478
|
-
description: "An easy-to-use model for
|
|
1482
|
+
description: "An easy-to-use model for command recognition.",
|
|
1479
1483
|
id: "speechbrain/google_speech_command_xvector"
|
|
1480
1484
|
},
|
|
1481
1485
|
{
|
|
1482
|
-
description: "An
|
|
1486
|
+
description: "An emotion recognition model.",
|
|
1483
1487
|
id: "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
|
|
1484
1488
|
},
|
|
1485
1489
|
{
|
|
@@ -1494,7 +1498,7 @@ var taskData = {
|
|
|
1494
1498
|
}
|
|
1495
1499
|
],
|
|
1496
1500
|
summary: "Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.",
|
|
1497
|
-
widgetModels: ["
|
|
1501
|
+
widgetModels: ["MIT/ast-finetuned-audioset-10-10-0.4593"],
|
|
1498
1502
|
youtubeId: "KWwzcmG98Ds"
|
|
1499
1503
|
};
|
|
1500
1504
|
var data_default = taskData;
|
|
@@ -1542,7 +1546,11 @@ var taskData2 = {
|
|
|
1542
1546
|
},
|
|
1543
1547
|
{
|
|
1544
1548
|
description: "A speech enhancement model.",
|
|
1545
|
-
id: "
|
|
1549
|
+
id: "ResembleAI/resemble-enhance"
|
|
1550
|
+
},
|
|
1551
|
+
{
|
|
1552
|
+
description: "A model that can change the voice in a speech recording.",
|
|
1553
|
+
id: "microsoft/speecht5_vc"
|
|
1546
1554
|
}
|
|
1547
1555
|
],
|
|
1548
1556
|
spaces: [
|
|
@@ -1569,8 +1577,8 @@ var taskData3 = {
|
|
|
1569
1577
|
id: "mozilla-foundation/common_voice_17_0"
|
|
1570
1578
|
},
|
|
1571
1579
|
{
|
|
1572
|
-
description: "
|
|
1573
|
-
id: "
|
|
1580
|
+
description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
|
|
1581
|
+
id: "parler-tts/mls_eng"
|
|
1574
1582
|
},
|
|
1575
1583
|
{
|
|
1576
1584
|
description: "A multi-lingual audio dataset with 370K hours of audio.",
|
|
@@ -1615,6 +1623,10 @@ var taskData3 = {
|
|
|
1615
1623
|
{
|
|
1616
1624
|
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
|
|
1617
1625
|
id: "facebook/seamless-m4t-v2-large"
|
|
1626
|
+
},
|
|
1627
|
+
{
|
|
1628
|
+
description: "Powerful speaker diarization model.",
|
|
1629
|
+
id: "pyannote/speaker-diarization-3.1"
|
|
1618
1630
|
}
|
|
1619
1631
|
],
|
|
1620
1632
|
spaces: [
|
|
@@ -1681,11 +1693,15 @@ var taskData4 = {
|
|
|
1681
1693
|
],
|
|
1682
1694
|
models: [
|
|
1683
1695
|
{
|
|
1684
|
-
description: "A
|
|
1696
|
+
description: "A robust document question answering model.",
|
|
1685
1697
|
id: "impira/layoutlm-document-qa"
|
|
1686
1698
|
},
|
|
1687
1699
|
{
|
|
1688
|
-
description: "A
|
|
1700
|
+
description: "A document question answering model specialized in invoices.",
|
|
1701
|
+
id: "impira/layoutlm-invoices"
|
|
1702
|
+
},
|
|
1703
|
+
{
|
|
1704
|
+
description: "A special model for OCR-free document question answering.",
|
|
1689
1705
|
id: "microsoft/udop-large"
|
|
1690
1706
|
},
|
|
1691
1707
|
{
|
|
@@ -1708,7 +1724,7 @@ var taskData4 = {
|
|
|
1708
1724
|
}
|
|
1709
1725
|
],
|
|
1710
1726
|
summary: "Document Question Answering (also known as Document Visual Question Answering) is the task of answering questions on document images. Document question answering models take a (document, question) pair as input and return an answer in natural language. Models usually rely on multi-modal features, combining text, position of words (bounding-boxes) and image.",
|
|
1711
|
-
widgetModels: ["impira/layoutlm-
|
|
1727
|
+
widgetModels: ["impira/layoutlm-invoices"],
|
|
1712
1728
|
youtubeId: ""
|
|
1713
1729
|
};
|
|
1714
1730
|
var data_default4 = taskData4;
|
|
@@ -1828,12 +1844,12 @@ var taskData6 = {
|
|
|
1828
1844
|
],
|
|
1829
1845
|
models: [
|
|
1830
1846
|
{
|
|
1831
|
-
description: "
|
|
1832
|
-
id: "
|
|
1847
|
+
description: "The famous BERT model.",
|
|
1848
|
+
id: "google-bert/bert-base-uncased"
|
|
1833
1849
|
},
|
|
1834
1850
|
{
|
|
1835
1851
|
description: "A multilingual model trained on 100 languages.",
|
|
1836
|
-
id: "xlm-roberta-base"
|
|
1852
|
+
id: "FacebookAI/xlm-roberta-base"
|
|
1837
1853
|
}
|
|
1838
1854
|
],
|
|
1839
1855
|
spaces: [],
|
|
@@ -2076,7 +2092,7 @@ var taskData9 = {
|
|
|
2076
2092
|
}
|
|
2077
2093
|
],
|
|
2078
2094
|
summary: "Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.",
|
|
2079
|
-
widgetModels: ["
|
|
2095
|
+
widgetModels: ["stabilityai/stable-diffusion-2-inpainting"],
|
|
2080
2096
|
youtubeId: ""
|
|
2081
2097
|
};
|
|
2082
2098
|
var data_default9 = taskData9;
|
|
@@ -2156,7 +2172,7 @@ var taskData10 = {
|
|
|
2156
2172
|
}
|
|
2157
2173
|
],
|
|
2158
2174
|
summary: "Image to text models output a text from a given image. Image captioning or optical character recognition can be considered as the most common applications of image to text.",
|
|
2159
|
-
widgetModels: ["Salesforce/blip-image-captioning-
|
|
2175
|
+
widgetModels: ["Salesforce/blip-image-captioning-large"],
|
|
2160
2176
|
youtubeId: ""
|
|
2161
2177
|
};
|
|
2162
2178
|
var data_default10 = taskData10;
|
|
@@ -2342,7 +2358,7 @@ var taskData12 = {
|
|
|
2342
2358
|
}
|
|
2343
2359
|
],
|
|
2344
2360
|
summary: "Image Segmentation divides an image into segments where each pixel in the image is mapped to an object. This task has multiple variants such as instance segmentation, panoptic segmentation and semantic segmentation.",
|
|
2345
|
-
widgetModels: ["
|
|
2361
|
+
widgetModels: ["nvidia/segformer-b0-finetuned-ade-512-512"],
|
|
2346
2362
|
youtubeId: "dKE8SIt9C-w"
|
|
2347
2363
|
};
|
|
2348
2364
|
var data_default12 = taskData12;
|
|
@@ -2682,7 +2698,11 @@ var taskData18 = {
|
|
|
2682
2698
|
id: "deepset/roberta-base-squad2"
|
|
2683
2699
|
},
|
|
2684
2700
|
{
|
|
2685
|
-
description: "
|
|
2701
|
+
description: "Small yet robust model that can answer questions.",
|
|
2702
|
+
id: "distilbert/distilbert-base-cased-distilled-squad"
|
|
2703
|
+
},
|
|
2704
|
+
{
|
|
2705
|
+
description: "A special model that can answer questions from tables.",
|
|
2686
2706
|
id: "google/tapas-base-finetuned-wtq"
|
|
2687
2707
|
}
|
|
2688
2708
|
],
|
|
@@ -2765,8 +2785,8 @@ var taskData19 = {
|
|
|
2765
2785
|
id: "sentence-transformers/all-mpnet-base-v2"
|
|
2766
2786
|
},
|
|
2767
2787
|
{
|
|
2768
|
-
description: "A multilingual
|
|
2769
|
-
id: "
|
|
2788
|
+
description: "A multilingual robust sentence similarity model..",
|
|
2789
|
+
id: "BAAI/bge-m3"
|
|
2770
2790
|
}
|
|
2771
2791
|
],
|
|
2772
2792
|
spaces: [
|
|
@@ -2788,7 +2808,7 @@ var taskData19 = {
|
|
|
2788
2808
|
}
|
|
2789
2809
|
],
|
|
2790
2810
|
summary: "Sentence Similarity is the task of determining how similar two texts are. Sentence similarity models convert input texts into vectors (embeddings) that capture semantic information and calculate how close (similar) they are between them. This task is particularly useful for information retrieval and clustering/grouping.",
|
|
2791
|
-
widgetModels: ["
|
|
2811
|
+
widgetModels: ["BAAI/bge-small-en-v1.5"],
|
|
2792
2812
|
youtubeId: "VCZq5AkbNEU"
|
|
2793
2813
|
};
|
|
2794
2814
|
var data_default19 = taskData19;
|
|
@@ -2835,7 +2855,7 @@ var taskData20 = {
|
|
|
2835
2855
|
},
|
|
2836
2856
|
{
|
|
2837
2857
|
description: "A summarization model trained on medical articles.",
|
|
2838
|
-
id: "
|
|
2858
|
+
id: "Falconsai/medical_summarization"
|
|
2839
2859
|
}
|
|
2840
2860
|
],
|
|
2841
2861
|
spaces: [
|
|
@@ -2857,7 +2877,7 @@ var taskData20 = {
|
|
|
2857
2877
|
}
|
|
2858
2878
|
],
|
|
2859
2879
|
summary: "Summarization is the task of producing a shorter version of a document while preserving its important information. Some models can extract text from the original input, while other models can generate entirely new text.",
|
|
2860
|
-
widgetModels: ["
|
|
2880
|
+
widgetModels: ["facebook/bart-large-cnn"],
|
|
2861
2881
|
youtubeId: "yHnr5Dk2zCI"
|
|
2862
2882
|
};
|
|
2863
2883
|
var data_default20 = taskData20;
|
|
@@ -3128,7 +3148,7 @@ var taskData24 = {
|
|
|
3128
3148
|
}
|
|
3129
3149
|
],
|
|
3130
3150
|
summary: "Generates images from input text. These models can be used to generate and modify images based on text prompts.",
|
|
3131
|
-
widgetModels: ["
|
|
3151
|
+
widgetModels: ["black-forest-labs/FLUX.1-dev"],
|
|
3132
3152
|
youtubeId: ""
|
|
3133
3153
|
};
|
|
3134
3154
|
var data_default24 = taskData24;
|
|
@@ -3143,7 +3163,7 @@ var taskData25 = {
|
|
|
3143
3163
|
},
|
|
3144
3164
|
{
|
|
3145
3165
|
description: "Multi-speaker English dataset.",
|
|
3146
|
-
id: "
|
|
3166
|
+
id: "mythicinfinity/libritts_r"
|
|
3147
3167
|
}
|
|
3148
3168
|
],
|
|
3149
3169
|
demo: {
|
|
@@ -3170,11 +3190,15 @@ var taskData25 = {
|
|
|
3170
3190
|
models: [
|
|
3171
3191
|
{
|
|
3172
3192
|
description: "A powerful TTS model.",
|
|
3173
|
-
id: "
|
|
3193
|
+
id: "parler-tts/parler-tts-large-v1"
|
|
3174
3194
|
},
|
|
3175
3195
|
{
|
|
3176
3196
|
description: "A massively multi-lingual TTS model.",
|
|
3177
|
-
id: "
|
|
3197
|
+
id: "coqui/XTTS-v2"
|
|
3198
|
+
},
|
|
3199
|
+
{
|
|
3200
|
+
description: "Robust TTS model.",
|
|
3201
|
+
id: "metavoiceio/metavoice-1B-v0.1"
|
|
3178
3202
|
},
|
|
3179
3203
|
{
|
|
3180
3204
|
description: "A prompt based, powerful TTS model.",
|
|
@@ -3206,11 +3230,11 @@ var taskData26 = {
|
|
|
3206
3230
|
datasets: [
|
|
3207
3231
|
{
|
|
3208
3232
|
description: "A widely used dataset useful to benchmark named entity recognition models.",
|
|
3209
|
-
id: "conll2003"
|
|
3233
|
+
id: "eriktks/conll2003"
|
|
3210
3234
|
},
|
|
3211
3235
|
{
|
|
3212
3236
|
description: "A multilingual dataset of Wikipedia articles annotated for named entity recognition in over 150 different languages.",
|
|
3213
|
-
id: "wikiann"
|
|
3237
|
+
id: "unimelb-nlp/wikiann"
|
|
3214
3238
|
}
|
|
3215
3239
|
],
|
|
3216
3240
|
demo: {
|
|
@@ -3263,6 +3287,14 @@ var taskData26 = {
|
|
|
3263
3287
|
description: "A robust performance model to identify people, locations, organizations and names of miscellaneous entities.",
|
|
3264
3288
|
id: "dslim/bert-base-NER"
|
|
3265
3289
|
},
|
|
3290
|
+
{
|
|
3291
|
+
description: "A strong model to identify people, locations, organizations and names in multiple languages.",
|
|
3292
|
+
id: "FacebookAI/xlm-roberta-large-finetuned-conll03-english"
|
|
3293
|
+
},
|
|
3294
|
+
{
|
|
3295
|
+
description: "A token classification model specialized on medical entity recognition.",
|
|
3296
|
+
id: "blaze999/Medical-NER"
|
|
3297
|
+
},
|
|
3266
3298
|
{
|
|
3267
3299
|
description: "Flair models are typically the state of the art in named entity recognition tasks.",
|
|
3268
3300
|
id: "flair/ner-english"
|
|
@@ -3275,7 +3307,7 @@ var taskData26 = {
|
|
|
3275
3307
|
}
|
|
3276
3308
|
],
|
|
3277
3309
|
summary: "Token classification is a natural language understanding task in which a label is assigned to some tokens in a text. Some popular token classification subtasks are Named Entity Recognition (NER) and Part-of-Speech (PoS) tagging. NER models could be trained to identify specific entities in a text, such as dates, individuals and places; and PoS tagging would identify, for example, which words in a text are verbs, nouns, and punctuation marks.",
|
|
3278
|
-
widgetModels: ["
|
|
3310
|
+
widgetModels: ["FacebookAI/xlm-roberta-large-finetuned-conll03-english"],
|
|
3279
3311
|
youtubeId: "wVHdVlPScxA"
|
|
3280
3312
|
};
|
|
3281
3313
|
var data_default26 = taskData26;
|
|
@@ -3286,11 +3318,11 @@ var taskData27 = {
|
|
|
3286
3318
|
datasets: [
|
|
3287
3319
|
{
|
|
3288
3320
|
description: "A dataset of copyright-free books translated into 16 different languages.",
|
|
3289
|
-
id: "opus_books"
|
|
3321
|
+
id: "Helsinki-NLP/opus_books"
|
|
3290
3322
|
},
|
|
3291
3323
|
{
|
|
3292
3324
|
description: "An example of translation between programming languages. This dataset consists of functions in Java and C#.",
|
|
3293
|
-
id: "code_x_glue_cc_code_to_code_trans"
|
|
3325
|
+
id: "google/code_x_glue_cc_code_to_code_trans"
|
|
3294
3326
|
}
|
|
3295
3327
|
],
|
|
3296
3328
|
demo: {
|
|
@@ -3321,12 +3353,12 @@ var taskData27 = {
|
|
|
3321
3353
|
],
|
|
3322
3354
|
models: [
|
|
3323
3355
|
{
|
|
3324
|
-
description: "
|
|
3325
|
-
id: "
|
|
3356
|
+
description: "Very powerful model that can translate many languages between each other, especially low-resource languages.",
|
|
3357
|
+
id: "facebook/nllb-200-1.3B"
|
|
3326
3358
|
},
|
|
3327
3359
|
{
|
|
3328
3360
|
description: "A general-purpose Transformer that can be used to translate from English to German, French, or Romanian.",
|
|
3329
|
-
id: "t5-base"
|
|
3361
|
+
id: "google-t5/t5-base"
|
|
3330
3362
|
}
|
|
3331
3363
|
],
|
|
3332
3364
|
spaces: [
|
|
@@ -3335,12 +3367,12 @@ var taskData27 = {
|
|
|
3335
3367
|
id: "Iker/Translate-100-languages"
|
|
3336
3368
|
},
|
|
3337
3369
|
{
|
|
3338
|
-
description: "An application that can translate between
|
|
3339
|
-
id: "
|
|
3370
|
+
description: "An application that can translate between many languages.",
|
|
3371
|
+
id: "Geonmo/nllb-translation-demo"
|
|
3340
3372
|
}
|
|
3341
3373
|
],
|
|
3342
3374
|
summary: "Translation is the task of converting text from one language to another.",
|
|
3343
|
-
widgetModels: ["
|
|
3375
|
+
widgetModels: ["facebook/mbart-large-50-many-to-many-mmt"],
|
|
3344
3376
|
youtubeId: "1JvfrvZgi6c"
|
|
3345
3377
|
};
|
|
3346
3378
|
var data_default27 = taskData27;
|
|
@@ -3350,11 +3382,11 @@ var taskData28 = {
|
|
|
3350
3382
|
datasets: [
|
|
3351
3383
|
{
|
|
3352
3384
|
description: "A widely used dataset used to benchmark multiple variants of text classification.",
|
|
3353
|
-
id: "glue"
|
|
3385
|
+
id: "nyu-mll/glue"
|
|
3354
3386
|
},
|
|
3355
3387
|
{
|
|
3356
3388
|
description: "A text classification dataset used to benchmark natural language inference models",
|
|
3357
|
-
id: "snli"
|
|
3389
|
+
id: "stanfordnlp/snli"
|
|
3358
3390
|
}
|
|
3359
3391
|
],
|
|
3360
3392
|
demo: {
|
|
@@ -3406,11 +3438,23 @@ var taskData28 = {
|
|
|
3406
3438
|
models: [
|
|
3407
3439
|
{
|
|
3408
3440
|
description: "A robust model trained for sentiment analysis.",
|
|
3409
|
-
id: "distilbert-base-uncased-finetuned-sst-2-english"
|
|
3441
|
+
id: "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
|
|
3442
|
+
},
|
|
3443
|
+
{
|
|
3444
|
+
description: "A sentiment analysis model specialized in financial sentiment.",
|
|
3445
|
+
id: "ProsusAI/finbert"
|
|
3446
|
+
},
|
|
3447
|
+
{
|
|
3448
|
+
description: "A sentiment analysis model specialized in analyzing tweets.",
|
|
3449
|
+
id: "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
|
3450
|
+
},
|
|
3451
|
+
{
|
|
3452
|
+
description: "A model that can classify languages.",
|
|
3453
|
+
id: "papluca/xlm-roberta-base-language-detection"
|
|
3410
3454
|
},
|
|
3411
3455
|
{
|
|
3412
|
-
description: "
|
|
3413
|
-
id: "
|
|
3456
|
+
description: "A model that can classify text generation attacks.",
|
|
3457
|
+
id: "meta-llama/Prompt-Guard-86M"
|
|
3414
3458
|
}
|
|
3415
3459
|
],
|
|
3416
3460
|
spaces: [
|
|
@@ -3428,7 +3472,7 @@ var taskData28 = {
|
|
|
3428
3472
|
}
|
|
3429
3473
|
],
|
|
3430
3474
|
summary: "Text Classification is the task of assigning a label or class to a given text. Some use cases are sentiment analysis, natural language inference, and assessing grammatical correctness.",
|
|
3431
|
-
widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
|
|
3475
|
+
widgetModels: ["distilbert/distilbert-base-uncased-finetuned-sst-2-english"],
|
|
3432
3476
|
youtubeId: "leNG9fN9FQU"
|
|
3433
3477
|
};
|
|
3434
3478
|
var data_default28 = taskData28;
|
|
@@ -3527,8 +3571,8 @@ var taskData29 = {
|
|
|
3527
3571
|
id: "HuggingFaceH4/zephyr-chat"
|
|
3528
3572
|
},
|
|
3529
3573
|
{
|
|
3530
|
-
description: "
|
|
3531
|
-
id: "
|
|
3574
|
+
description: "A leaderboard that ranks text generation models based on blind votes from people.",
|
|
3575
|
+
id: "lmsys/chatbot-arena-leaderboard"
|
|
3532
3576
|
},
|
|
3533
3577
|
{
|
|
3534
3578
|
description: "An chatbot to converse with a very powerful text generation model.",
|
|
@@ -3536,7 +3580,7 @@ var taskData29 = {
|
|
|
3536
3580
|
}
|
|
3537
3581
|
],
|
|
3538
3582
|
summary: "Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
|
|
3539
|
-
widgetModels: ["
|
|
3583
|
+
widgetModels: ["mistralai/Mistral-Nemo-Instruct-2407"],
|
|
3540
3584
|
youtubeId: "e9gNEAlsOvU"
|
|
3541
3585
|
};
|
|
3542
3586
|
var data_default29 = taskData29;
|
|
@@ -3758,12 +3802,12 @@ var taskData32 = {
|
|
|
3758
3802
|
models: [
|
|
3759
3803
|
{
|
|
3760
3804
|
// TO DO: write description
|
|
3761
|
-
description: "Strong Video Classification model trained on the
|
|
3762
|
-
id: "
|
|
3805
|
+
description: "Strong Video Classification model trained on the Kinetics 400 dataset.",
|
|
3806
|
+
id: "google/vivit-b-16x2-kinetics400"
|
|
3763
3807
|
},
|
|
3764
3808
|
{
|
|
3765
3809
|
// TO DO: write description
|
|
3766
|
-
description: "Strong Video Classification model trained on the
|
|
3810
|
+
description: "Strong Video Classification model trained on the Kinetics 400 dataset.",
|
|
3767
3811
|
id: "microsoft/xclip-base-patch32"
|
|
3768
3812
|
}
|
|
3769
3813
|
],
|
|
@@ -3792,7 +3836,7 @@ var taskData33 = {
|
|
|
3792
3836
|
},
|
|
3793
3837
|
{
|
|
3794
3838
|
description: "A dataset to benchmark visual reasoning based on text in images.",
|
|
3795
|
-
id: "textvqa"
|
|
3839
|
+
id: "facebook/textvqa"
|
|
3796
3840
|
}
|
|
3797
3841
|
],
|
|
3798
3842
|
demo: {
|
|
@@ -3845,7 +3889,7 @@ var taskData33 = {
|
|
|
3845
3889
|
},
|
|
3846
3890
|
{
|
|
3847
3891
|
description: "A visual question answering model trained for mathematical reasoning and chart derendering from images.",
|
|
3848
|
-
id: "google/matcha-base
|
|
3892
|
+
id: "google/matcha-base"
|
|
3849
3893
|
},
|
|
3850
3894
|
{
|
|
3851
3895
|
description: "A strong visual question answering that answers questions from book covers.",
|
|
@@ -3881,15 +3925,15 @@ var taskData34 = {
|
|
|
3881
3925
|
datasets: [
|
|
3882
3926
|
{
|
|
3883
3927
|
description: "A widely used dataset used to benchmark multiple variants of text classification.",
|
|
3884
|
-
id: "glue"
|
|
3928
|
+
id: "nyu-mll/glue"
|
|
3885
3929
|
},
|
|
3886
3930
|
{
|
|
3887
3931
|
description: "The Multi-Genre Natural Language Inference (MultiNLI) corpus is a crowd-sourced collection of 433k sentence pairs annotated with textual entailment information.",
|
|
3888
|
-
id: "
|
|
3932
|
+
id: "nyu-mll/multi_nli"
|
|
3889
3933
|
},
|
|
3890
3934
|
{
|
|
3891
3935
|
description: "FEVER is a publicly available dataset for fact extraction and verification against textual sources.",
|
|
3892
|
-
id: "
|
|
3936
|
+
id: "fever/fever"
|
|
3893
3937
|
}
|
|
3894
3938
|
],
|
|
3895
3939
|
demo: {
|
|
@@ -3928,8 +3972,12 @@ var taskData34 = {
|
|
|
3928
3972
|
metrics: [],
|
|
3929
3973
|
models: [
|
|
3930
3974
|
{
|
|
3931
|
-
description: "Powerful zero-shot text classification model",
|
|
3975
|
+
description: "Powerful zero-shot text classification model.",
|
|
3932
3976
|
id: "facebook/bart-large-mnli"
|
|
3977
|
+
},
|
|
3978
|
+
{
|
|
3979
|
+
description: "Powerful zero-shot multilingual text classification model that can accomplish multiple tasks.",
|
|
3980
|
+
id: "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7"
|
|
3933
3981
|
}
|
|
3934
3982
|
],
|
|
3935
3983
|
spaces: [],
|
|
@@ -3992,7 +4040,7 @@ var taskData35 = {
|
|
|
3992
4040
|
},
|
|
3993
4041
|
{
|
|
3994
4042
|
description: "Strong zero-shot image classification model.",
|
|
3995
|
-
id: "google/siglip-
|
|
4043
|
+
id: "google/siglip-so400m-patch14-224"
|
|
3996
4044
|
},
|
|
3997
4045
|
{
|
|
3998
4046
|
description: "Small yet powerful zero-shot image classification model that can run on edge devices.",
|
|
@@ -4014,7 +4062,7 @@ var taskData35 = {
|
|
|
4014
4062
|
}
|
|
4015
4063
|
],
|
|
4016
4064
|
summary: "Zero-shot image classification is the task of classifying previously unseen classes during training of a model.",
|
|
4017
|
-
widgetModels: ["
|
|
4065
|
+
widgetModels: ["google/siglip-so400m-patch14-224"],
|
|
4018
4066
|
youtubeId: ""
|
|
4019
4067
|
};
|
|
4020
4068
|
var data_default35 = taskData35;
|
|
@@ -5128,6 +5176,14 @@ wavs = chat.infer(texts, )
|
|
|
5128
5176
|
|
|
5129
5177
|
torchaudio.save("output1.wav", torch.from_numpy(wavs[0]), 24000)`
|
|
5130
5178
|
];
|
|
5179
|
+
var yolov10 = (model) => [
|
|
5180
|
+
`from ultralytics import YOLOv10
|
|
5181
|
+
|
|
5182
|
+
model = YOLOv10.from_pretrained("${model.id}")
|
|
5183
|
+
source = 'http://images.cocodataset.org/val2017/000000039769.jpg'
|
|
5184
|
+
model.predict(source=source, save=True)
|
|
5185
|
+
`
|
|
5186
|
+
];
|
|
5131
5187
|
var birefnet = (model) => [
|
|
5132
5188
|
`# Option 1: use with transformers
|
|
5133
5189
|
|
|
@@ -5814,6 +5870,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
5814
5870
|
docsUrl: "https://github.com/jasonppy/VoiceCraft",
|
|
5815
5871
|
snippets: voicecraft
|
|
5816
5872
|
},
|
|
5873
|
+
yolov10: {
|
|
5874
|
+
prettyLabel: "YOLOv10",
|
|
5875
|
+
repoName: "yolov10",
|
|
5876
|
+
repoUrl: "https://github.com/THU-MIG/yolov10",
|
|
5877
|
+
docsUrl: "https://github.com/THU-MIG/yolov10",
|
|
5878
|
+
snippets: yolov10
|
|
5879
|
+
},
|
|
5817
5880
|
whisperkit: {
|
|
5818
5881
|
prettyLabel: "WhisperKit",
|
|
5819
5882
|
repoName: "WhisperKit",
|