@huggingface/tasks 0.10.7 → 0.10.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +120 -17
- package/dist/index.js +120 -17
- package/dist/src/hardware.d.ts +94 -2
- package/dist/src/hardware.d.ts.map +1 -1
- package/dist/src/local-apps.d.ts +7 -0
- package/dist/src/local-apps.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/hardware.ts +96 -4
- package/src/local-apps.ts +7 -0
- package/src/tasks/automatic-speech-recognition/about.md +16 -16
- package/src/tasks/automatic-speech-recognition/data.ts +7 -7
- package/src/tasks/text-to-speech/about.md +1 -3
- package/src/tasks/text-to-speech/data.ts +6 -6
package/dist/index.cjs
CHANGED
|
@@ -1541,16 +1541,16 @@ var data_default2 = taskData2;
|
|
|
1541
1541
|
var taskData3 = {
|
|
1542
1542
|
datasets: [
|
|
1543
1543
|
{
|
|
1544
|
-
description: "
|
|
1545
|
-
id: "mozilla-foundation/
|
|
1544
|
+
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
|
|
1545
|
+
id: "mozilla-foundation/common_voice_17_0"
|
|
1546
1546
|
},
|
|
1547
1547
|
{
|
|
1548
1548
|
description: "An English dataset with 1,000 hours of data.",
|
|
1549
1549
|
id: "librispeech_asr"
|
|
1550
1550
|
},
|
|
1551
1551
|
{
|
|
1552
|
-
description: "
|
|
1553
|
-
id: "
|
|
1552
|
+
description: "A multi-lingual audio dataset with 370K hours of audio.",
|
|
1553
|
+
id: "espnet/yodas"
|
|
1554
1554
|
}
|
|
1555
1555
|
],
|
|
1556
1556
|
demo: {
|
|
@@ -1585,12 +1585,12 @@ var taskData3 = {
|
|
|
1585
1585
|
id: "openai/whisper-large-v3"
|
|
1586
1586
|
},
|
|
1587
1587
|
{
|
|
1588
|
-
description: "A good generic
|
|
1589
|
-
id: "facebook/
|
|
1588
|
+
description: "A good generic speech model by MetaAI for fine-tuning.",
|
|
1589
|
+
id: "facebook/w2v-bert-2.0"
|
|
1590
1590
|
},
|
|
1591
1591
|
{
|
|
1592
1592
|
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
|
|
1593
|
-
id: "facebook/
|
|
1593
|
+
id: "facebook/seamless-m4t-v2-large"
|
|
1594
1594
|
}
|
|
1595
1595
|
],
|
|
1596
1596
|
spaces: [
|
|
@@ -2992,8 +2992,8 @@ var taskData24 = {
|
|
|
2992
2992
|
canonicalId: "text-to-audio",
|
|
2993
2993
|
datasets: [
|
|
2994
2994
|
{
|
|
2995
|
-
description: "
|
|
2996
|
-
id: "
|
|
2995
|
+
description: "10K hours of multi-speaker English dataset.",
|
|
2996
|
+
id: "parler-tts/mls_eng_10k"
|
|
2997
2997
|
},
|
|
2998
2998
|
{
|
|
2999
2999
|
description: "Multi-speaker English dataset.",
|
|
@@ -3031,8 +3031,8 @@ var taskData24 = {
|
|
|
3031
3031
|
id: "facebook/mms-tts"
|
|
3032
3032
|
},
|
|
3033
3033
|
{
|
|
3034
|
-
description: "
|
|
3035
|
-
id: "
|
|
3034
|
+
description: "A prompt based, powerful TTS model.",
|
|
3035
|
+
id: "parler-tts/parler_tts_mini_v0.1"
|
|
3036
3036
|
}
|
|
3037
3037
|
],
|
|
3038
3038
|
spaces: [
|
|
@@ -3045,8 +3045,8 @@ var taskData24 = {
|
|
|
3045
3045
|
id: "coqui/xtts"
|
|
3046
3046
|
},
|
|
3047
3047
|
{
|
|
3048
|
-
description: "An application that synthesizes speech for
|
|
3049
|
-
id: "
|
|
3048
|
+
description: "An application that synthesizes speech for diverse speaker prompts.",
|
|
3049
|
+
id: "parler-tts/parler_tts_mini"
|
|
3050
3050
|
}
|
|
3051
3051
|
],
|
|
3052
3052
|
summary: "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
|
|
@@ -5476,6 +5476,30 @@ var SKUS = {
|
|
|
5476
5476
|
tflops: 91.1,
|
|
5477
5477
|
memory: [48]
|
|
5478
5478
|
},
|
|
5479
|
+
"RTX 5880 Ada": {
|
|
5480
|
+
tflops: 69.3,
|
|
5481
|
+
memory: [48]
|
|
5482
|
+
},
|
|
5483
|
+
"RTX 5000 Ada": {
|
|
5484
|
+
tflops: 65.3,
|
|
5485
|
+
memory: [32]
|
|
5486
|
+
},
|
|
5487
|
+
"RTX 4500 Ada": {
|
|
5488
|
+
tflops: 39.6,
|
|
5489
|
+
memory: [24]
|
|
5490
|
+
},
|
|
5491
|
+
"RTX 4000 Ada": {
|
|
5492
|
+
tflops: 26.7,
|
|
5493
|
+
memory: [20]
|
|
5494
|
+
},
|
|
5495
|
+
"RTX 4000 SFF Ada": {
|
|
5496
|
+
tflops: 19.2,
|
|
5497
|
+
memory: [20]
|
|
5498
|
+
},
|
|
5499
|
+
"RTX 2000 Ada": {
|
|
5500
|
+
tflops: 12,
|
|
5501
|
+
memory: [16]
|
|
5502
|
+
},
|
|
5479
5503
|
A100: {
|
|
5480
5504
|
tflops: 77.97,
|
|
5481
5505
|
memory: [80, 40]
|
|
@@ -5488,14 +5512,14 @@ var SKUS = {
|
|
|
5488
5512
|
tflops: 31.24,
|
|
5489
5513
|
memory: [24]
|
|
5490
5514
|
},
|
|
5491
|
-
T4: {
|
|
5492
|
-
tflops: 65.13,
|
|
5493
|
-
memory: [16]
|
|
5494
|
-
},
|
|
5495
5515
|
"RTX 4090": {
|
|
5496
5516
|
tflops: 82.58,
|
|
5497
5517
|
memory: [24]
|
|
5498
5518
|
},
|
|
5519
|
+
"RTX 4090D": {
|
|
5520
|
+
tflops: 79.49,
|
|
5521
|
+
memory: [24]
|
|
5522
|
+
},
|
|
5499
5523
|
"RTX 4080 SUPER": {
|
|
5500
5524
|
tflops: 52.2,
|
|
5501
5525
|
memory: [16]
|
|
@@ -5520,6 +5544,14 @@ var SKUS = {
|
|
|
5520
5544
|
tflops: 44.1,
|
|
5521
5545
|
memory: [16]
|
|
5522
5546
|
},
|
|
5547
|
+
"RTX 4060": {
|
|
5548
|
+
tflops: 15.11,
|
|
5549
|
+
memory: [8]
|
|
5550
|
+
},
|
|
5551
|
+
"RTX 4060 Ti": {
|
|
5552
|
+
tflops: 22.06,
|
|
5553
|
+
memory: [8, 16]
|
|
5554
|
+
},
|
|
5523
5555
|
"RTX 3090": {
|
|
5524
5556
|
tflops: 35.58,
|
|
5525
5557
|
memory: [24]
|
|
@@ -5548,13 +5580,61 @@ var SKUS = {
|
|
|
5548
5580
|
tflops: 16.6,
|
|
5549
5581
|
memory: [8]
|
|
5550
5582
|
},
|
|
5583
|
+
"RTX 3060 Ti": {
|
|
5584
|
+
tflops: 16.2,
|
|
5585
|
+
memory: [8]
|
|
5586
|
+
},
|
|
5587
|
+
"RTX 3060": {
|
|
5588
|
+
tflops: 12.74,
|
|
5589
|
+
memory: [12, 8]
|
|
5590
|
+
},
|
|
5591
|
+
"RTX 2070": {
|
|
5592
|
+
tflops: 14.93,
|
|
5593
|
+
memory: [8]
|
|
5594
|
+
},
|
|
5595
|
+
"RTX 3050 Mobile": {
|
|
5596
|
+
tflops: 7.639,
|
|
5597
|
+
memory: [6]
|
|
5598
|
+
},
|
|
5551
5599
|
"RTX 2060 Mobile": {
|
|
5552
5600
|
tflops: 9.22,
|
|
5553
5601
|
memory: [6]
|
|
5554
5602
|
},
|
|
5603
|
+
"GTX 1080 Ti": {
|
|
5604
|
+
tflops: 11.34,
|
|
5605
|
+
// float32 (GPU does not support native float16)
|
|
5606
|
+
memory: [11]
|
|
5607
|
+
},
|
|
5608
|
+
"GTX 1070 Ti": {
|
|
5609
|
+
tflops: 8.2,
|
|
5610
|
+
// float32 (GPU does not support native float16)
|
|
5611
|
+
memory: [8]
|
|
5612
|
+
},
|
|
5555
5613
|
"RTX Titan": {
|
|
5556
5614
|
tflops: 32.62,
|
|
5557
5615
|
memory: [24]
|
|
5616
|
+
},
|
|
5617
|
+
"GTX 1650 Mobile": {
|
|
5618
|
+
tflops: 6.39,
|
|
5619
|
+
memory: [4]
|
|
5620
|
+
},
|
|
5621
|
+
T4: {
|
|
5622
|
+
tflops: 65.13,
|
|
5623
|
+
memory: [16]
|
|
5624
|
+
},
|
|
5625
|
+
V100: {
|
|
5626
|
+
tflops: 28.26,
|
|
5627
|
+
memory: [32, 16]
|
|
5628
|
+
},
|
|
5629
|
+
"Quadro P6000": {
|
|
5630
|
+
tflops: 12.63,
|
|
5631
|
+
// float32 (GPU does not support native float16)
|
|
5632
|
+
memory: [24]
|
|
5633
|
+
},
|
|
5634
|
+
P40: {
|
|
5635
|
+
tflops: 11.76,
|
|
5636
|
+
// float32 (GPU does not support native float16)
|
|
5637
|
+
memory: [24]
|
|
5558
5638
|
}
|
|
5559
5639
|
},
|
|
5560
5640
|
AMD: {
|
|
@@ -5570,6 +5650,10 @@ var SKUS = {
|
|
|
5570
5650
|
tflops: 181,
|
|
5571
5651
|
memory: [64]
|
|
5572
5652
|
},
|
|
5653
|
+
MI100: {
|
|
5654
|
+
tflops: 184.6,
|
|
5655
|
+
memory: [32]
|
|
5656
|
+
},
|
|
5573
5657
|
"RX 7900 XTX": {
|
|
5574
5658
|
tflops: 122.8,
|
|
5575
5659
|
memory: [24]
|
|
@@ -5593,6 +5677,18 @@ var SKUS = {
|
|
|
5593
5677
|
"RX 7600 XT": {
|
|
5594
5678
|
tflops: 45.14,
|
|
5595
5679
|
memory: [16, 8]
|
|
5680
|
+
},
|
|
5681
|
+
"RX 6950 XT": {
|
|
5682
|
+
tflops: 47.31,
|
|
5683
|
+
memory: [16]
|
|
5684
|
+
},
|
|
5685
|
+
"RX 6800": {
|
|
5686
|
+
tflops: 32.33,
|
|
5687
|
+
memory: [16]
|
|
5688
|
+
},
|
|
5689
|
+
"Radeon Pro VII": {
|
|
5690
|
+
tflops: 26.11,
|
|
5691
|
+
memory: [16]
|
|
5596
5692
|
}
|
|
5597
5693
|
}
|
|
5598
5694
|
},
|
|
@@ -5803,6 +5899,13 @@ var LOCAL_APPS = {
|
|
|
5803
5899
|
displayOnModelPage: isGgufModel,
|
|
5804
5900
|
deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`)
|
|
5805
5901
|
},
|
|
5902
|
+
sanctum: {
|
|
5903
|
+
prettyLabel: "Sanctum",
|
|
5904
|
+
docsUrl: "https://sanctum.ai",
|
|
5905
|
+
mainTask: "text-generation",
|
|
5906
|
+
displayOnModelPage: isGgufModel,
|
|
5907
|
+
deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`)
|
|
5908
|
+
},
|
|
5806
5909
|
drawthings: {
|
|
5807
5910
|
prettyLabel: "Draw Things",
|
|
5808
5911
|
docsUrl: "https://drawthings.ai",
|
package/dist/index.js
CHANGED
|
@@ -1503,16 +1503,16 @@ var data_default2 = taskData2;
|
|
|
1503
1503
|
var taskData3 = {
|
|
1504
1504
|
datasets: [
|
|
1505
1505
|
{
|
|
1506
|
-
description: "
|
|
1507
|
-
id: "mozilla-foundation/
|
|
1506
|
+
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
|
|
1507
|
+
id: "mozilla-foundation/common_voice_17_0"
|
|
1508
1508
|
},
|
|
1509
1509
|
{
|
|
1510
1510
|
description: "An English dataset with 1,000 hours of data.",
|
|
1511
1511
|
id: "librispeech_asr"
|
|
1512
1512
|
},
|
|
1513
1513
|
{
|
|
1514
|
-
description: "
|
|
1515
|
-
id: "
|
|
1514
|
+
description: "A multi-lingual audio dataset with 370K hours of audio.",
|
|
1515
|
+
id: "espnet/yodas"
|
|
1516
1516
|
}
|
|
1517
1517
|
],
|
|
1518
1518
|
demo: {
|
|
@@ -1547,12 +1547,12 @@ var taskData3 = {
|
|
|
1547
1547
|
id: "openai/whisper-large-v3"
|
|
1548
1548
|
},
|
|
1549
1549
|
{
|
|
1550
|
-
description: "A good generic
|
|
1551
|
-
id: "facebook/
|
|
1550
|
+
description: "A good generic speech model by MetaAI for fine-tuning.",
|
|
1551
|
+
id: "facebook/w2v-bert-2.0"
|
|
1552
1552
|
},
|
|
1553
1553
|
{
|
|
1554
1554
|
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
|
|
1555
|
-
id: "facebook/
|
|
1555
|
+
id: "facebook/seamless-m4t-v2-large"
|
|
1556
1556
|
}
|
|
1557
1557
|
],
|
|
1558
1558
|
spaces: [
|
|
@@ -2954,8 +2954,8 @@ var taskData24 = {
|
|
|
2954
2954
|
canonicalId: "text-to-audio",
|
|
2955
2955
|
datasets: [
|
|
2956
2956
|
{
|
|
2957
|
-
description: "
|
|
2958
|
-
id: "
|
|
2957
|
+
description: "10K hours of multi-speaker English dataset.",
|
|
2958
|
+
id: "parler-tts/mls_eng_10k"
|
|
2959
2959
|
},
|
|
2960
2960
|
{
|
|
2961
2961
|
description: "Multi-speaker English dataset.",
|
|
@@ -2993,8 +2993,8 @@ var taskData24 = {
|
|
|
2993
2993
|
id: "facebook/mms-tts"
|
|
2994
2994
|
},
|
|
2995
2995
|
{
|
|
2996
|
-
description: "
|
|
2997
|
-
id: "
|
|
2996
|
+
description: "A prompt based, powerful TTS model.",
|
|
2997
|
+
id: "parler-tts/parler_tts_mini_v0.1"
|
|
2998
2998
|
}
|
|
2999
2999
|
],
|
|
3000
3000
|
spaces: [
|
|
@@ -3007,8 +3007,8 @@ var taskData24 = {
|
|
|
3007
3007
|
id: "coqui/xtts"
|
|
3008
3008
|
},
|
|
3009
3009
|
{
|
|
3010
|
-
description: "An application that synthesizes speech for
|
|
3011
|
-
id: "
|
|
3010
|
+
description: "An application that synthesizes speech for diverse speaker prompts.",
|
|
3011
|
+
id: "parler-tts/parler_tts_mini"
|
|
3012
3012
|
}
|
|
3013
3013
|
],
|
|
3014
3014
|
summary: "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
|
|
@@ -5438,6 +5438,30 @@ var SKUS = {
|
|
|
5438
5438
|
tflops: 91.1,
|
|
5439
5439
|
memory: [48]
|
|
5440
5440
|
},
|
|
5441
|
+
"RTX 5880 Ada": {
|
|
5442
|
+
tflops: 69.3,
|
|
5443
|
+
memory: [48]
|
|
5444
|
+
},
|
|
5445
|
+
"RTX 5000 Ada": {
|
|
5446
|
+
tflops: 65.3,
|
|
5447
|
+
memory: [32]
|
|
5448
|
+
},
|
|
5449
|
+
"RTX 4500 Ada": {
|
|
5450
|
+
tflops: 39.6,
|
|
5451
|
+
memory: [24]
|
|
5452
|
+
},
|
|
5453
|
+
"RTX 4000 Ada": {
|
|
5454
|
+
tflops: 26.7,
|
|
5455
|
+
memory: [20]
|
|
5456
|
+
},
|
|
5457
|
+
"RTX 4000 SFF Ada": {
|
|
5458
|
+
tflops: 19.2,
|
|
5459
|
+
memory: [20]
|
|
5460
|
+
},
|
|
5461
|
+
"RTX 2000 Ada": {
|
|
5462
|
+
tflops: 12,
|
|
5463
|
+
memory: [16]
|
|
5464
|
+
},
|
|
5441
5465
|
A100: {
|
|
5442
5466
|
tflops: 77.97,
|
|
5443
5467
|
memory: [80, 40]
|
|
@@ -5450,14 +5474,14 @@ var SKUS = {
|
|
|
5450
5474
|
tflops: 31.24,
|
|
5451
5475
|
memory: [24]
|
|
5452
5476
|
},
|
|
5453
|
-
T4: {
|
|
5454
|
-
tflops: 65.13,
|
|
5455
|
-
memory: [16]
|
|
5456
|
-
},
|
|
5457
5477
|
"RTX 4090": {
|
|
5458
5478
|
tflops: 82.58,
|
|
5459
5479
|
memory: [24]
|
|
5460
5480
|
},
|
|
5481
|
+
"RTX 4090D": {
|
|
5482
|
+
tflops: 79.49,
|
|
5483
|
+
memory: [24]
|
|
5484
|
+
},
|
|
5461
5485
|
"RTX 4080 SUPER": {
|
|
5462
5486
|
tflops: 52.2,
|
|
5463
5487
|
memory: [16]
|
|
@@ -5482,6 +5506,14 @@ var SKUS = {
|
|
|
5482
5506
|
tflops: 44.1,
|
|
5483
5507
|
memory: [16]
|
|
5484
5508
|
},
|
|
5509
|
+
"RTX 4060": {
|
|
5510
|
+
tflops: 15.11,
|
|
5511
|
+
memory: [8]
|
|
5512
|
+
},
|
|
5513
|
+
"RTX 4060 Ti": {
|
|
5514
|
+
tflops: 22.06,
|
|
5515
|
+
memory: [8, 16]
|
|
5516
|
+
},
|
|
5485
5517
|
"RTX 3090": {
|
|
5486
5518
|
tflops: 35.58,
|
|
5487
5519
|
memory: [24]
|
|
@@ -5510,13 +5542,61 @@ var SKUS = {
|
|
|
5510
5542
|
tflops: 16.6,
|
|
5511
5543
|
memory: [8]
|
|
5512
5544
|
},
|
|
5545
|
+
"RTX 3060 Ti": {
|
|
5546
|
+
tflops: 16.2,
|
|
5547
|
+
memory: [8]
|
|
5548
|
+
},
|
|
5549
|
+
"RTX 3060": {
|
|
5550
|
+
tflops: 12.74,
|
|
5551
|
+
memory: [12, 8]
|
|
5552
|
+
},
|
|
5553
|
+
"RTX 2070": {
|
|
5554
|
+
tflops: 14.93,
|
|
5555
|
+
memory: [8]
|
|
5556
|
+
},
|
|
5557
|
+
"RTX 3050 Mobile": {
|
|
5558
|
+
tflops: 7.639,
|
|
5559
|
+
memory: [6]
|
|
5560
|
+
},
|
|
5513
5561
|
"RTX 2060 Mobile": {
|
|
5514
5562
|
tflops: 9.22,
|
|
5515
5563
|
memory: [6]
|
|
5516
5564
|
},
|
|
5565
|
+
"GTX 1080 Ti": {
|
|
5566
|
+
tflops: 11.34,
|
|
5567
|
+
// float32 (GPU does not support native float16)
|
|
5568
|
+
memory: [11]
|
|
5569
|
+
},
|
|
5570
|
+
"GTX 1070 Ti": {
|
|
5571
|
+
tflops: 8.2,
|
|
5572
|
+
// float32 (GPU does not support native float16)
|
|
5573
|
+
memory: [8]
|
|
5574
|
+
},
|
|
5517
5575
|
"RTX Titan": {
|
|
5518
5576
|
tflops: 32.62,
|
|
5519
5577
|
memory: [24]
|
|
5578
|
+
},
|
|
5579
|
+
"GTX 1650 Mobile": {
|
|
5580
|
+
tflops: 6.39,
|
|
5581
|
+
memory: [4]
|
|
5582
|
+
},
|
|
5583
|
+
T4: {
|
|
5584
|
+
tflops: 65.13,
|
|
5585
|
+
memory: [16]
|
|
5586
|
+
},
|
|
5587
|
+
V100: {
|
|
5588
|
+
tflops: 28.26,
|
|
5589
|
+
memory: [32, 16]
|
|
5590
|
+
},
|
|
5591
|
+
"Quadro P6000": {
|
|
5592
|
+
tflops: 12.63,
|
|
5593
|
+
// float32 (GPU does not support native float16)
|
|
5594
|
+
memory: [24]
|
|
5595
|
+
},
|
|
5596
|
+
P40: {
|
|
5597
|
+
tflops: 11.76,
|
|
5598
|
+
// float32 (GPU does not support native float16)
|
|
5599
|
+
memory: [24]
|
|
5520
5600
|
}
|
|
5521
5601
|
},
|
|
5522
5602
|
AMD: {
|
|
@@ -5532,6 +5612,10 @@ var SKUS = {
|
|
|
5532
5612
|
tflops: 181,
|
|
5533
5613
|
memory: [64]
|
|
5534
5614
|
},
|
|
5615
|
+
MI100: {
|
|
5616
|
+
tflops: 184.6,
|
|
5617
|
+
memory: [32]
|
|
5618
|
+
},
|
|
5535
5619
|
"RX 7900 XTX": {
|
|
5536
5620
|
tflops: 122.8,
|
|
5537
5621
|
memory: [24]
|
|
@@ -5555,6 +5639,18 @@ var SKUS = {
|
|
|
5555
5639
|
"RX 7600 XT": {
|
|
5556
5640
|
tflops: 45.14,
|
|
5557
5641
|
memory: [16, 8]
|
|
5642
|
+
},
|
|
5643
|
+
"RX 6950 XT": {
|
|
5644
|
+
tflops: 47.31,
|
|
5645
|
+
memory: [16]
|
|
5646
|
+
},
|
|
5647
|
+
"RX 6800": {
|
|
5648
|
+
tflops: 32.33,
|
|
5649
|
+
memory: [16]
|
|
5650
|
+
},
|
|
5651
|
+
"Radeon Pro VII": {
|
|
5652
|
+
tflops: 26.11,
|
|
5653
|
+
memory: [16]
|
|
5558
5654
|
}
|
|
5559
5655
|
}
|
|
5560
5656
|
},
|
|
@@ -5765,6 +5861,13 @@ var LOCAL_APPS = {
|
|
|
5765
5861
|
displayOnModelPage: isGgufModel,
|
|
5766
5862
|
deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`)
|
|
5767
5863
|
},
|
|
5864
|
+
sanctum: {
|
|
5865
|
+
prettyLabel: "Sanctum",
|
|
5866
|
+
docsUrl: "https://sanctum.ai",
|
|
5867
|
+
mainTask: "text-generation",
|
|
5868
|
+
displayOnModelPage: isGgufModel,
|
|
5869
|
+
deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`)
|
|
5870
|
+
},
|
|
5768
5871
|
drawthings: {
|
|
5769
5872
|
prettyLabel: "Draw Things",
|
|
5770
5873
|
docsUrl: "https://drawthings.ai",
|
package/dist/src/hardware.d.ts
CHANGED
|
@@ -44,6 +44,30 @@ export declare const SKUS: {
|
|
|
44
44
|
tflops: number;
|
|
45
45
|
memory: number[];
|
|
46
46
|
};
|
|
47
|
+
"RTX 5880 Ada": {
|
|
48
|
+
tflops: number;
|
|
49
|
+
memory: number[];
|
|
50
|
+
};
|
|
51
|
+
"RTX 5000 Ada": {
|
|
52
|
+
tflops: number;
|
|
53
|
+
memory: number[];
|
|
54
|
+
};
|
|
55
|
+
"RTX 4500 Ada": {
|
|
56
|
+
tflops: number;
|
|
57
|
+
memory: number[];
|
|
58
|
+
};
|
|
59
|
+
"RTX 4000 Ada": {
|
|
60
|
+
tflops: number;
|
|
61
|
+
memory: number[];
|
|
62
|
+
};
|
|
63
|
+
"RTX 4000 SFF Ada": {
|
|
64
|
+
tflops: number;
|
|
65
|
+
memory: number[];
|
|
66
|
+
};
|
|
67
|
+
"RTX 2000 Ada": {
|
|
68
|
+
tflops: number;
|
|
69
|
+
memory: number[];
|
|
70
|
+
};
|
|
47
71
|
A100: {
|
|
48
72
|
tflops: number;
|
|
49
73
|
memory: number[];
|
|
@@ -56,11 +80,11 @@ export declare const SKUS: {
|
|
|
56
80
|
tflops: number;
|
|
57
81
|
memory: number[];
|
|
58
82
|
};
|
|
59
|
-
|
|
83
|
+
"RTX 4090": {
|
|
60
84
|
tflops: number;
|
|
61
85
|
memory: number[];
|
|
62
86
|
};
|
|
63
|
-
"RTX
|
|
87
|
+
"RTX 4090D": {
|
|
64
88
|
tflops: number;
|
|
65
89
|
memory: number[];
|
|
66
90
|
};
|
|
@@ -88,6 +112,14 @@ export declare const SKUS: {
|
|
|
88
112
|
tflops: number;
|
|
89
113
|
memory: number[];
|
|
90
114
|
};
|
|
115
|
+
"RTX 4060": {
|
|
116
|
+
tflops: number;
|
|
117
|
+
memory: number[];
|
|
118
|
+
};
|
|
119
|
+
"RTX 4060 Ti": {
|
|
120
|
+
tflops: number;
|
|
121
|
+
memory: number[];
|
|
122
|
+
};
|
|
91
123
|
"RTX 3090": {
|
|
92
124
|
tflops: number;
|
|
93
125
|
memory: number[];
|
|
@@ -116,14 +148,58 @@ export declare const SKUS: {
|
|
|
116
148
|
tflops: number;
|
|
117
149
|
memory: number[];
|
|
118
150
|
};
|
|
151
|
+
"RTX 3060 Ti": {
|
|
152
|
+
tflops: number;
|
|
153
|
+
memory: number[];
|
|
154
|
+
};
|
|
155
|
+
"RTX 3060": {
|
|
156
|
+
tflops: number;
|
|
157
|
+
memory: number[];
|
|
158
|
+
};
|
|
159
|
+
"RTX 2070": {
|
|
160
|
+
tflops: number;
|
|
161
|
+
memory: number[];
|
|
162
|
+
};
|
|
163
|
+
"RTX 3050 Mobile": {
|
|
164
|
+
tflops: number;
|
|
165
|
+
memory: number[];
|
|
166
|
+
};
|
|
119
167
|
"RTX 2060 Mobile": {
|
|
120
168
|
tflops: number;
|
|
121
169
|
memory: number[];
|
|
122
170
|
};
|
|
171
|
+
"GTX 1080 Ti": {
|
|
172
|
+
tflops: number;
|
|
173
|
+
memory: number[];
|
|
174
|
+
};
|
|
175
|
+
"GTX 1070 Ti": {
|
|
176
|
+
tflops: number;
|
|
177
|
+
memory: number[];
|
|
178
|
+
};
|
|
123
179
|
"RTX Titan": {
|
|
124
180
|
tflops: number;
|
|
125
181
|
memory: number[];
|
|
126
182
|
};
|
|
183
|
+
"GTX 1650 Mobile": {
|
|
184
|
+
tflops: number;
|
|
185
|
+
memory: number[];
|
|
186
|
+
};
|
|
187
|
+
T4: {
|
|
188
|
+
tflops: number;
|
|
189
|
+
memory: number[];
|
|
190
|
+
};
|
|
191
|
+
V100: {
|
|
192
|
+
tflops: number;
|
|
193
|
+
memory: number[];
|
|
194
|
+
};
|
|
195
|
+
"Quadro P6000": {
|
|
196
|
+
tflops: number;
|
|
197
|
+
memory: number[];
|
|
198
|
+
};
|
|
199
|
+
P40: {
|
|
200
|
+
tflops: number;
|
|
201
|
+
memory: number[];
|
|
202
|
+
};
|
|
127
203
|
};
|
|
128
204
|
AMD: {
|
|
129
205
|
MI300: {
|
|
@@ -138,6 +214,10 @@ export declare const SKUS: {
|
|
|
138
214
|
tflops: number;
|
|
139
215
|
memory: number[];
|
|
140
216
|
};
|
|
217
|
+
MI100: {
|
|
218
|
+
tflops: number;
|
|
219
|
+
memory: number[];
|
|
220
|
+
};
|
|
141
221
|
"RX 7900 XTX": {
|
|
142
222
|
tflops: number;
|
|
143
223
|
memory: number[];
|
|
@@ -162,6 +242,18 @@ export declare const SKUS: {
|
|
|
162
242
|
tflops: number;
|
|
163
243
|
memory: number[];
|
|
164
244
|
};
|
|
245
|
+
"RX 6950 XT": {
|
|
246
|
+
tflops: number;
|
|
247
|
+
memory: number[];
|
|
248
|
+
};
|
|
249
|
+
"RX 6800": {
|
|
250
|
+
tflops: number;
|
|
251
|
+
memory: number[];
|
|
252
|
+
};
|
|
253
|
+
"Radeon Pro VII": {
|
|
254
|
+
tflops: number;
|
|
255
|
+
memory: number[];
|
|
256
|
+
};
|
|
165
257
|
};
|
|
166
258
|
};
|
|
167
259
|
CPU: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hardware.d.ts","sourceRoot":"","sources":["../../src/hardware.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,eAAO,MAAM,iDAAiD,QAAW,CAAC;AAC1E,eAAO,MAAM,yDAAyD,QAAW,CAAC;AAClF,eAAO,MAAM,oCAAoC,QAAU,CAAC;AAE5D;;;GAGG;AACH,eAAO,MAAM,+CAA+C,QAAW,CAAC;AAExE,MAAM,WAAW,YAAY;IAC5B;;;;;;;;;OASG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,eAAO,MAAM,sBAAsB,UAAqD,CAAC;AAEzF,eAAO,MAAM,IAAI
|
|
1
|
+
{"version":3,"file":"hardware.d.ts","sourceRoot":"","sources":["../../src/hardware.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,eAAO,MAAM,iDAAiD,QAAW,CAAC;AAC1E,eAAO,MAAM,yDAAyD,QAAW,CAAC;AAClF,eAAO,MAAM,oCAAoC,QAAU,CAAC;AAE5D;;;GAGG;AACH,eAAO,MAAM,+CAA+C,QAAW,CAAC;AAExE,MAAM,WAAW,YAAY;IAC5B;;;;;;;;;OASG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,eAAO,MAAM,sBAAsB,UAAqD,CAAC;AAEzF,eAAO,MAAM,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA+XuD,CAAC;AAEzE,MAAM,MAAM,OAAO,GAAG,MAAM,OAAO,IAAI,CAAC"}
|
package/dist/src/local-apps.d.ts
CHANGED
|
@@ -77,6 +77,13 @@ export declare const LOCAL_APPS: {
|
|
|
77
77
|
displayOnModelPage: typeof isGgufModel;
|
|
78
78
|
deeplink: (model: ModelData) => URL;
|
|
79
79
|
};
|
|
80
|
+
sanctum: {
|
|
81
|
+
prettyLabel: string;
|
|
82
|
+
docsUrl: string;
|
|
83
|
+
mainTask: "text-generation";
|
|
84
|
+
displayOnModelPage: typeof isGgufModel;
|
|
85
|
+
deeplink: (model: ModelData) => URL;
|
|
86
|
+
};
|
|
80
87
|
drawthings: {
|
|
81
88
|
prettyLabel: string;
|
|
82
89
|
docsUrl: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,GAAG,CAAC;CACnC,GACD;IACA;;OAEG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,MAAM,EAAE,CAAC;CAChD,CACH,CAAC;AAEF,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,WAEpC;AAmBD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA5BS,SAAS,KAAG,MAAM,EAAE
|
|
1
|
+
{"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,GAAG,CAAC;CACnC,GACD;IACA;;OAEG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,MAAM,EAAE,CAAC;CAChD,CACH,CAAC;AAEF,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,WAEpC;AAmBD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA5BS,SAAS,KAAG,MAAM,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwFhB,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/tasks",
|
|
3
3
|
"packageManager": "pnpm@8.10.5",
|
|
4
|
-
"version": "0.10.
|
|
4
|
+
"version": "0.10.9",
|
|
5
5
|
"description": "List of ML tasks for huggingface.co/tasks",
|
|
6
6
|
"repository": "https://github.com/huggingface/huggingface.js.git",
|
|
7
7
|
"publishConfig": {
|
package/src/hardware.ts
CHANGED
|
@@ -48,6 +48,30 @@ export const SKUS = {
|
|
|
48
48
|
tflops: 91.1,
|
|
49
49
|
memory: [48],
|
|
50
50
|
},
|
|
51
|
+
"RTX 5880 Ada": {
|
|
52
|
+
tflops: 69.3,
|
|
53
|
+
memory: [48],
|
|
54
|
+
},
|
|
55
|
+
"RTX 5000 Ada": {
|
|
56
|
+
tflops: 65.3,
|
|
57
|
+
memory: [32],
|
|
58
|
+
},
|
|
59
|
+
"RTX 4500 Ada": {
|
|
60
|
+
tflops: 39.6,
|
|
61
|
+
memory: [24],
|
|
62
|
+
},
|
|
63
|
+
"RTX 4000 Ada": {
|
|
64
|
+
tflops: 26.7,
|
|
65
|
+
memory: [20],
|
|
66
|
+
},
|
|
67
|
+
"RTX 4000 SFF Ada": {
|
|
68
|
+
tflops: 19.2,
|
|
69
|
+
memory: [20],
|
|
70
|
+
},
|
|
71
|
+
"RTX 2000 Ada": {
|
|
72
|
+
tflops: 12.0,
|
|
73
|
+
memory: [16],
|
|
74
|
+
},
|
|
51
75
|
A100: {
|
|
52
76
|
tflops: 77.97,
|
|
53
77
|
memory: [80, 40],
|
|
@@ -60,14 +84,14 @@ export const SKUS = {
|
|
|
60
84
|
tflops: 31.24,
|
|
61
85
|
memory: [24],
|
|
62
86
|
},
|
|
63
|
-
T4: {
|
|
64
|
-
tflops: 65.13,
|
|
65
|
-
memory: [16],
|
|
66
|
-
},
|
|
67
87
|
"RTX 4090": {
|
|
68
88
|
tflops: 82.58,
|
|
69
89
|
memory: [24],
|
|
70
90
|
},
|
|
91
|
+
"RTX 4090D": {
|
|
92
|
+
tflops: 79.49,
|
|
93
|
+
memory: [24],
|
|
94
|
+
},
|
|
71
95
|
"RTX 4080 SUPER": {
|
|
72
96
|
tflops: 52.2,
|
|
73
97
|
memory: [16],
|
|
@@ -92,6 +116,14 @@ export const SKUS = {
|
|
|
92
116
|
tflops: 44.1,
|
|
93
117
|
memory: [16],
|
|
94
118
|
},
|
|
119
|
+
"RTX 4060": {
|
|
120
|
+
tflops: 15.11,
|
|
121
|
+
memory: [8],
|
|
122
|
+
},
|
|
123
|
+
"RTX 4060 Ti": {
|
|
124
|
+
tflops: 22.06,
|
|
125
|
+
memory: [8, 16],
|
|
126
|
+
},
|
|
95
127
|
"RTX 3090": {
|
|
96
128
|
tflops: 35.58,
|
|
97
129
|
memory: [24],
|
|
@@ -120,14 +152,58 @@ export const SKUS = {
|
|
|
120
152
|
tflops: 16.6,
|
|
121
153
|
memory: [8],
|
|
122
154
|
},
|
|
155
|
+
"RTX 3060 Ti": {
|
|
156
|
+
tflops: 16.2,
|
|
157
|
+
memory: [8],
|
|
158
|
+
},
|
|
159
|
+
"RTX 3060": {
|
|
160
|
+
tflops: 12.74,
|
|
161
|
+
memory: [12, 8],
|
|
162
|
+
},
|
|
163
|
+
"RTX 2070": {
|
|
164
|
+
tflops: 14.93,
|
|
165
|
+
memory: [8],
|
|
166
|
+
},
|
|
167
|
+
"RTX 3050 Mobile": {
|
|
168
|
+
tflops: 7.639,
|
|
169
|
+
memory: [6],
|
|
170
|
+
},
|
|
123
171
|
"RTX 2060 Mobile": {
|
|
124
172
|
tflops: 9.22,
|
|
125
173
|
memory: [6],
|
|
126
174
|
},
|
|
175
|
+
"GTX 1080 Ti": {
|
|
176
|
+
tflops: 11.34, // float32 (GPU does not support native float16)
|
|
177
|
+
memory: [11],
|
|
178
|
+
},
|
|
179
|
+
"GTX 1070 Ti": {
|
|
180
|
+
tflops: 8.2, // float32 (GPU does not support native float16)
|
|
181
|
+
memory: [8],
|
|
182
|
+
},
|
|
127
183
|
"RTX Titan": {
|
|
128
184
|
tflops: 32.62,
|
|
129
185
|
memory: [24],
|
|
130
186
|
},
|
|
187
|
+
"GTX 1650 Mobile": {
|
|
188
|
+
tflops: 6.39,
|
|
189
|
+
memory: [4],
|
|
190
|
+
},
|
|
191
|
+
T4: {
|
|
192
|
+
tflops: 65.13,
|
|
193
|
+
memory: [16],
|
|
194
|
+
},
|
|
195
|
+
V100: {
|
|
196
|
+
tflops: 28.26,
|
|
197
|
+
memory: [32, 16],
|
|
198
|
+
},
|
|
199
|
+
"Quadro P6000": {
|
|
200
|
+
tflops: 12.63, // float32 (GPU does not support native float16)
|
|
201
|
+
memory: [24],
|
|
202
|
+
},
|
|
203
|
+
P40: {
|
|
204
|
+
tflops: 11.76, // float32 (GPU does not support native float16)
|
|
205
|
+
memory: [24],
|
|
206
|
+
},
|
|
131
207
|
},
|
|
132
208
|
AMD: {
|
|
133
209
|
MI300: {
|
|
@@ -142,6 +218,10 @@ export const SKUS = {
|
|
|
142
218
|
tflops: 181.0,
|
|
143
219
|
memory: [64],
|
|
144
220
|
},
|
|
221
|
+
MI100: {
|
|
222
|
+
tflops: 184.6,
|
|
223
|
+
memory: [32],
|
|
224
|
+
},
|
|
145
225
|
"RX 7900 XTX": {
|
|
146
226
|
tflops: 122.8,
|
|
147
227
|
memory: [24],
|
|
@@ -166,6 +246,18 @@ export const SKUS = {
|
|
|
166
246
|
tflops: 45.14,
|
|
167
247
|
memory: [16, 8],
|
|
168
248
|
},
|
|
249
|
+
"RX 6950 XT": {
|
|
250
|
+
tflops: 47.31,
|
|
251
|
+
memory: [16],
|
|
252
|
+
},
|
|
253
|
+
"RX 6800": {
|
|
254
|
+
tflops: 32.33,
|
|
255
|
+
memory: [16],
|
|
256
|
+
},
|
|
257
|
+
"Radeon Pro VII": {
|
|
258
|
+
tflops: 26.11,
|
|
259
|
+
memory: [16],
|
|
260
|
+
},
|
|
169
261
|
},
|
|
170
262
|
},
|
|
171
263
|
CPU: {
|
package/src/local-apps.ts
CHANGED
|
@@ -103,6 +103,13 @@ export const LOCAL_APPS = {
|
|
|
103
103
|
displayOnModelPage: isGgufModel,
|
|
104
104
|
deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`),
|
|
105
105
|
},
|
|
106
|
+
sanctum: {
|
|
107
|
+
prettyLabel: "Sanctum",
|
|
108
|
+
docsUrl: "https://sanctum.ai",
|
|
109
|
+
mainTask: "text-generation",
|
|
110
|
+
displayOnModelPage: isGgufModel,
|
|
111
|
+
deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`),
|
|
112
|
+
},
|
|
106
113
|
drawthings: {
|
|
107
114
|
prettyLabel: "Draw Things",
|
|
108
115
|
docsUrl: "https://drawthings.ai",
|
|
@@ -18,7 +18,7 @@ The use of Multilingual ASR has become popular, the idea of maintaining just a s
|
|
|
18
18
|
|
|
19
19
|
## Inference
|
|
20
20
|
|
|
21
|
-
The Hub contains over [
|
|
21
|
+
The Hub contains over [17,000 ASR models](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=downloads) that you can test right away in your browser using the model page widgets. You can also use any model as a service using the Serverless Inference API. We also support libraries such as [transformers](https://huggingface.co/models?library=transformers&pipeline_tag=automatic-speech-recognition&sort=downloads), [speechbrain](https://huggingface.co/models?library=speechbrain&pipeline_tag=automatic-speech-recognition&sort=downloads), [NeMo](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=nemo&sort=downloads) and [espnet](https://huggingface.co/models?library=espnet&pipeline_tag=automatic-speech-recognition&sort=downloads) via the Serverless Inference API. Here's a simple code snippet to run inference:
|
|
22
22
|
|
|
23
23
|
```python
|
|
24
24
|
import json
|
|
@@ -36,20 +36,7 @@ def query(filename):
|
|
|
36
36
|
data = query("sample1.flac")
|
|
37
37
|
```
|
|
38
38
|
|
|
39
|
-
You can also use
|
|
40
|
-
|
|
41
|
-
```python
|
|
42
|
-
from transformers import pipeline
|
|
43
|
-
|
|
44
|
-
with open("sample.flac", "rb") as f:
|
|
45
|
-
data = f.read()
|
|
46
|
-
|
|
47
|
-
pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v2")
|
|
48
|
-
pipe("sample.flac")
|
|
49
|
-
# {'text': "GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES IN DRAUGHTY SCHOOL ROOMS DAY AFTER DAY FOR A FORTNIGHT HE'LL HAVE TO PUT IN AN APPEARANCE AT SOME PLACE OF WORSHIP ON SUNDAY MORNING AND HE CAN COME TO US IMMEDIATELY AFTERWARDS"}
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to transcribe text with javascript using models on Hugging Face Hub.
|
|
39
|
+
You can also use[huggingface.js](https://github.com/huggingface/huggingface.js), the JavaScript client, to transcribe models with the Inference API.
|
|
53
40
|
|
|
54
41
|
```javascript
|
|
55
42
|
import { HfInference } from "@huggingface/inference";
|
|
@@ -57,10 +44,23 @@ import { HfInference } from "@huggingface/inference";
|
|
|
57
44
|
const inference = new HfInference(HF_TOKEN);
|
|
58
45
|
await inference.automaticSpeechRecognition({
|
|
59
46
|
data: await (await fetch("sample.flac")).blob(),
|
|
60
|
-
model: "openai/whisper-large-
|
|
47
|
+
model: "openai/whisper-large-v3",
|
|
61
48
|
});
|
|
62
49
|
```
|
|
63
50
|
|
|
51
|
+
For transformers compatible models like Whisper, Wav2Vec2, HuBERT, etc. You can also run inference in Python using transformers as follows:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
# pip install --upgrade transformers
|
|
55
|
+
|
|
56
|
+
from transformers import pipeline
|
|
57
|
+
|
|
58
|
+
pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v3")
|
|
59
|
+
|
|
60
|
+
pipe("sample.flac")
|
|
61
|
+
# {'text': "GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES IN DRAUGHTY SCHOOL ROOMS DAY AFTER DAY FOR A FORTNIGHT HE'LL HAVE TO PUT IN AN APPEARANCE AT SOME PLACE OF WORSHIP ON SUNDAY MORNING AND HE CAN COME TO US IMMEDIATELY AFTERWARDS"}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
64
|
## Solving ASR for your own data
|
|
65
65
|
|
|
66
66
|
We have some great news! You can fine-tune (transfer learning) a foundational speech model on a specific language without tonnes of data. Pretrained models such as Whisper, Wav2Vec2-MMS and HuBERT exist. [OpenAI's Whisper model](https://huggingface.co/openai/whisper-large-v3) is a large multilingual model trained on 100+ languages and with 4 Million hours of speech.
|
|
@@ -3,16 +3,16 @@ import type { TaskDataCustom } from "..";
|
|
|
3
3
|
const taskData: TaskDataCustom = {
|
|
4
4
|
datasets: [
|
|
5
5
|
{
|
|
6
|
-
description: "
|
|
7
|
-
id: "mozilla-foundation/
|
|
6
|
+
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
|
|
7
|
+
id: "mozilla-foundation/common_voice_17_0",
|
|
8
8
|
},
|
|
9
9
|
{
|
|
10
10
|
description: "An English dataset with 1,000 hours of data.",
|
|
11
11
|
id: "librispeech_asr",
|
|
12
12
|
},
|
|
13
13
|
{
|
|
14
|
-
description: "
|
|
15
|
-
id: "
|
|
14
|
+
description: "A multi-lingual audio dataset with 370K hours of audio.",
|
|
15
|
+
id: "espnet/yodas",
|
|
16
16
|
},
|
|
17
17
|
],
|
|
18
18
|
demo: {
|
|
@@ -47,12 +47,12 @@ const taskData: TaskDataCustom = {
|
|
|
47
47
|
id: "openai/whisper-large-v3",
|
|
48
48
|
},
|
|
49
49
|
{
|
|
50
|
-
description: "A good generic
|
|
51
|
-
id: "facebook/
|
|
50
|
+
description: "A good generic speech model by MetaAI for fine-tuning.",
|
|
51
|
+
id: "facebook/w2v-bert-2.0",
|
|
52
52
|
},
|
|
53
53
|
{
|
|
54
54
|
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
|
|
55
|
-
id: "facebook/
|
|
55
|
+
id: "facebook/seamless-m4t-v2-large",
|
|
56
56
|
},
|
|
57
57
|
],
|
|
58
58
|
spaces: [
|
|
@@ -58,8 +58,6 @@ await inference.textToSpeech({
|
|
|
58
58
|
|
|
59
59
|
- [Hugging Face Audio Course](https://huggingface.co/learn/audio-course/chapter6/introduction)
|
|
60
60
|
- [ML for Audio Study Group - Text to Speech Deep Dive](https://www.youtube.com/watch?v=aLBedWj-5CQ)
|
|
61
|
-
- [An introduction to SpeechT5, a multi-purpose speech recognition and synthesis model](https://huggingface.co/blog/speecht5).
|
|
62
|
-
- [A guide on Fine-tuning Whisper For Multilingual ASR with 🤗Transformers](https://huggingface.co/blog/fine-tune-whisper)
|
|
63
61
|
- [Speech Synthesis, Recognition, and More With SpeechT5](https://huggingface.co/blog/speecht5)
|
|
64
62
|
- [Optimizing a Text-To-Speech model using 🤗 Transformers](https://huggingface.co/blog/optimizing-bark)
|
|
65
|
-
-
|
|
63
|
+
- [Train your own TTS models with Parler-TTS](https://github.com/huggingface/parler-tts)
|
|
@@ -4,8 +4,8 @@ const taskData: TaskDataCustom = {
|
|
|
4
4
|
canonicalId: "text-to-audio",
|
|
5
5
|
datasets: [
|
|
6
6
|
{
|
|
7
|
-
description: "
|
|
8
|
-
id: "
|
|
7
|
+
description: "10K hours of multi-speaker English dataset.",
|
|
8
|
+
id: "parler-tts/mls_eng_10k",
|
|
9
9
|
},
|
|
10
10
|
{
|
|
11
11
|
description: "Multi-speaker English dataset.",
|
|
@@ -43,8 +43,8 @@ const taskData: TaskDataCustom = {
|
|
|
43
43
|
id: "facebook/mms-tts",
|
|
44
44
|
},
|
|
45
45
|
{
|
|
46
|
-
description: "
|
|
47
|
-
id: "
|
|
46
|
+
description: "A prompt based, powerful TTS model.",
|
|
47
|
+
id: "parler-tts/parler_tts_mini_v0.1",
|
|
48
48
|
},
|
|
49
49
|
],
|
|
50
50
|
spaces: [
|
|
@@ -57,8 +57,8 @@ const taskData: TaskDataCustom = {
|
|
|
57
57
|
id: "coqui/xtts",
|
|
58
58
|
},
|
|
59
59
|
{
|
|
60
|
-
description: "An application that synthesizes speech for
|
|
61
|
-
id: "
|
|
60
|
+
description: "An application that synthesizes speech for diverse speaker prompts.",
|
|
61
|
+
id: "parler-tts/parler_tts_mini",
|
|
62
62
|
},
|
|
63
63
|
],
|
|
64
64
|
summary:
|