@huggingface/tasks 0.10.8 → 0.10.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +112 -17
- package/dist/index.js +112 -17
- package/dist/src/hardware.d.ts +86 -2
- package/dist/src/hardware.d.ts.map +1 -1
- package/dist/src/local-apps.d.ts +7 -0
- package/dist/src/local-apps.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/hardware.ts +88 -4
- package/src/local-apps.ts +7 -0
- package/src/tasks/automatic-speech-recognition/about.md +16 -16
- package/src/tasks/automatic-speech-recognition/data.ts +7 -7
- package/src/tasks/text-to-speech/about.md +1 -3
- package/src/tasks/text-to-speech/data.ts +6 -6
package/dist/index.cjs
CHANGED
|
@@ -1541,16 +1541,16 @@ var data_default2 = taskData2;
|
|
|
1541
1541
|
var taskData3 = {
|
|
1542
1542
|
datasets: [
|
|
1543
1543
|
{
|
|
1544
|
-
description: "
|
|
1545
|
-
id: "mozilla-foundation/
|
|
1544
|
+
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
|
|
1545
|
+
id: "mozilla-foundation/common_voice_17_0"
|
|
1546
1546
|
},
|
|
1547
1547
|
{
|
|
1548
1548
|
description: "An English dataset with 1,000 hours of data.",
|
|
1549
1549
|
id: "librispeech_asr"
|
|
1550
1550
|
},
|
|
1551
1551
|
{
|
|
1552
|
-
description: "
|
|
1553
|
-
id: "
|
|
1552
|
+
description: "A multi-lingual audio dataset with 370K hours of audio.",
|
|
1553
|
+
id: "espnet/yodas"
|
|
1554
1554
|
}
|
|
1555
1555
|
],
|
|
1556
1556
|
demo: {
|
|
@@ -1585,12 +1585,12 @@ var taskData3 = {
|
|
|
1585
1585
|
id: "openai/whisper-large-v3"
|
|
1586
1586
|
},
|
|
1587
1587
|
{
|
|
1588
|
-
description: "A good generic
|
|
1589
|
-
id: "facebook/
|
|
1588
|
+
description: "A good generic speech model by MetaAI for fine-tuning.",
|
|
1589
|
+
id: "facebook/w2v-bert-2.0"
|
|
1590
1590
|
},
|
|
1591
1591
|
{
|
|
1592
1592
|
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
|
|
1593
|
-
id: "facebook/
|
|
1593
|
+
id: "facebook/seamless-m4t-v2-large"
|
|
1594
1594
|
}
|
|
1595
1595
|
],
|
|
1596
1596
|
spaces: [
|
|
@@ -2992,8 +2992,8 @@ var taskData24 = {
|
|
|
2992
2992
|
canonicalId: "text-to-audio",
|
|
2993
2993
|
datasets: [
|
|
2994
2994
|
{
|
|
2995
|
-
description: "
|
|
2996
|
-
id: "
|
|
2995
|
+
description: "10K hours of multi-speaker English dataset.",
|
|
2996
|
+
id: "parler-tts/mls_eng_10k"
|
|
2997
2997
|
},
|
|
2998
2998
|
{
|
|
2999
2999
|
description: "Multi-speaker English dataset.",
|
|
@@ -3031,8 +3031,8 @@ var taskData24 = {
|
|
|
3031
3031
|
id: "facebook/mms-tts"
|
|
3032
3032
|
},
|
|
3033
3033
|
{
|
|
3034
|
-
description: "
|
|
3035
|
-
id: "
|
|
3034
|
+
description: "A prompt based, powerful TTS model.",
|
|
3035
|
+
id: "parler-tts/parler_tts_mini_v0.1"
|
|
3036
3036
|
}
|
|
3037
3037
|
],
|
|
3038
3038
|
spaces: [
|
|
@@ -3045,8 +3045,8 @@ var taskData24 = {
|
|
|
3045
3045
|
id: "coqui/xtts"
|
|
3046
3046
|
},
|
|
3047
3047
|
{
|
|
3048
|
-
description: "An application that synthesizes speech for
|
|
3049
|
-
id: "
|
|
3048
|
+
description: "An application that synthesizes speech for diverse speaker prompts.",
|
|
3049
|
+
id: "parler-tts/parler_tts_mini"
|
|
3050
3050
|
}
|
|
3051
3051
|
],
|
|
3052
3052
|
summary: "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
|
|
@@ -5476,6 +5476,30 @@ var SKUS = {
|
|
|
5476
5476
|
tflops: 91.1,
|
|
5477
5477
|
memory: [48]
|
|
5478
5478
|
},
|
|
5479
|
+
"RTX 5880 Ada": {
|
|
5480
|
+
tflops: 69.3,
|
|
5481
|
+
memory: [48]
|
|
5482
|
+
},
|
|
5483
|
+
"RTX 5000 Ada": {
|
|
5484
|
+
tflops: 65.3,
|
|
5485
|
+
memory: [32]
|
|
5486
|
+
},
|
|
5487
|
+
"RTX 4500 Ada": {
|
|
5488
|
+
tflops: 39.6,
|
|
5489
|
+
memory: [24]
|
|
5490
|
+
},
|
|
5491
|
+
"RTX 4000 Ada": {
|
|
5492
|
+
tflops: 26.7,
|
|
5493
|
+
memory: [20]
|
|
5494
|
+
},
|
|
5495
|
+
"RTX 4000 SFF Ada": {
|
|
5496
|
+
tflops: 19.2,
|
|
5497
|
+
memory: [20]
|
|
5498
|
+
},
|
|
5499
|
+
"RTX 2000 Ada": {
|
|
5500
|
+
tflops: 12,
|
|
5501
|
+
memory: [16]
|
|
5502
|
+
},
|
|
5479
5503
|
A100: {
|
|
5480
5504
|
tflops: 77.97,
|
|
5481
5505
|
memory: [80, 40]
|
|
@@ -5488,14 +5512,14 @@ var SKUS = {
|
|
|
5488
5512
|
tflops: 31.24,
|
|
5489
5513
|
memory: [24]
|
|
5490
5514
|
},
|
|
5491
|
-
T4: {
|
|
5492
|
-
tflops: 65.13,
|
|
5493
|
-
memory: [16]
|
|
5494
|
-
},
|
|
5495
5515
|
"RTX 4090": {
|
|
5496
5516
|
tflops: 82.58,
|
|
5497
5517
|
memory: [24]
|
|
5498
5518
|
},
|
|
5519
|
+
"RTX 4090D": {
|
|
5520
|
+
tflops: 79.49,
|
|
5521
|
+
memory: [24]
|
|
5522
|
+
},
|
|
5499
5523
|
"RTX 4080 SUPER": {
|
|
5500
5524
|
tflops: 52.2,
|
|
5501
5525
|
memory: [16]
|
|
@@ -5520,6 +5544,14 @@ var SKUS = {
|
|
|
5520
5544
|
tflops: 44.1,
|
|
5521
5545
|
memory: [16]
|
|
5522
5546
|
},
|
|
5547
|
+
"RTX 4060": {
|
|
5548
|
+
tflops: 15.11,
|
|
5549
|
+
memory: [8]
|
|
5550
|
+
},
|
|
5551
|
+
"RTX 4060 Ti": {
|
|
5552
|
+
tflops: 22.06,
|
|
5553
|
+
memory: [8, 16]
|
|
5554
|
+
},
|
|
5523
5555
|
"RTX 3090": {
|
|
5524
5556
|
tflops: 35.58,
|
|
5525
5557
|
memory: [24]
|
|
@@ -5556,13 +5588,53 @@ var SKUS = {
|
|
|
5556
5588
|
tflops: 12.74,
|
|
5557
5589
|
memory: [12, 8]
|
|
5558
5590
|
},
|
|
5591
|
+
"RTX 2070": {
|
|
5592
|
+
tflops: 14.93,
|
|
5593
|
+
memory: [8]
|
|
5594
|
+
},
|
|
5595
|
+
"RTX 3050 Mobile": {
|
|
5596
|
+
tflops: 7.639,
|
|
5597
|
+
memory: [6]
|
|
5598
|
+
},
|
|
5559
5599
|
"RTX 2060 Mobile": {
|
|
5560
5600
|
tflops: 9.22,
|
|
5561
5601
|
memory: [6]
|
|
5562
5602
|
},
|
|
5603
|
+
"GTX 1080 Ti": {
|
|
5604
|
+
tflops: 11.34,
|
|
5605
|
+
// float32 (GPU does not support native float16)
|
|
5606
|
+
memory: [11]
|
|
5607
|
+
},
|
|
5608
|
+
"GTX 1070 Ti": {
|
|
5609
|
+
tflops: 8.2,
|
|
5610
|
+
// float32 (GPU does not support native float16)
|
|
5611
|
+
memory: [8]
|
|
5612
|
+
},
|
|
5563
5613
|
"RTX Titan": {
|
|
5564
5614
|
tflops: 32.62,
|
|
5565
5615
|
memory: [24]
|
|
5616
|
+
},
|
|
5617
|
+
"GTX 1650 Mobile": {
|
|
5618
|
+
tflops: 6.39,
|
|
5619
|
+
memory: [4]
|
|
5620
|
+
},
|
|
5621
|
+
T4: {
|
|
5622
|
+
tflops: 65.13,
|
|
5623
|
+
memory: [16]
|
|
5624
|
+
},
|
|
5625
|
+
V100: {
|
|
5626
|
+
tflops: 28.26,
|
|
5627
|
+
memory: [32, 16]
|
|
5628
|
+
},
|
|
5629
|
+
"Quadro P6000": {
|
|
5630
|
+
tflops: 12.63,
|
|
5631
|
+
// float32 (GPU does not support native float16)
|
|
5632
|
+
memory: [24]
|
|
5633
|
+
},
|
|
5634
|
+
P40: {
|
|
5635
|
+
tflops: 11.76,
|
|
5636
|
+
// float32 (GPU does not support native float16)
|
|
5637
|
+
memory: [24]
|
|
5566
5638
|
}
|
|
5567
5639
|
},
|
|
5568
5640
|
AMD: {
|
|
@@ -5578,6 +5650,10 @@ var SKUS = {
|
|
|
5578
5650
|
tflops: 181,
|
|
5579
5651
|
memory: [64]
|
|
5580
5652
|
},
|
|
5653
|
+
MI100: {
|
|
5654
|
+
tflops: 184.6,
|
|
5655
|
+
memory: [32]
|
|
5656
|
+
},
|
|
5581
5657
|
"RX 7900 XTX": {
|
|
5582
5658
|
tflops: 122.8,
|
|
5583
5659
|
memory: [24]
|
|
@@ -5601,6 +5677,18 @@ var SKUS = {
|
|
|
5601
5677
|
"RX 7600 XT": {
|
|
5602
5678
|
tflops: 45.14,
|
|
5603
5679
|
memory: [16, 8]
|
|
5680
|
+
},
|
|
5681
|
+
"RX 6950 XT": {
|
|
5682
|
+
tflops: 47.31,
|
|
5683
|
+
memory: [16]
|
|
5684
|
+
},
|
|
5685
|
+
"RX 6800": {
|
|
5686
|
+
tflops: 32.33,
|
|
5687
|
+
memory: [16]
|
|
5688
|
+
},
|
|
5689
|
+
"Radeon Pro VII": {
|
|
5690
|
+
tflops: 26.11,
|
|
5691
|
+
memory: [16]
|
|
5604
5692
|
}
|
|
5605
5693
|
}
|
|
5606
5694
|
},
|
|
@@ -5811,6 +5899,13 @@ var LOCAL_APPS = {
|
|
|
5811
5899
|
displayOnModelPage: isGgufModel,
|
|
5812
5900
|
deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`)
|
|
5813
5901
|
},
|
|
5902
|
+
sanctum: {
|
|
5903
|
+
prettyLabel: "Sanctum",
|
|
5904
|
+
docsUrl: "https://sanctum.ai",
|
|
5905
|
+
mainTask: "text-generation",
|
|
5906
|
+
displayOnModelPage: isGgufModel,
|
|
5907
|
+
deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`)
|
|
5908
|
+
},
|
|
5814
5909
|
drawthings: {
|
|
5815
5910
|
prettyLabel: "Draw Things",
|
|
5816
5911
|
docsUrl: "https://drawthings.ai",
|
package/dist/index.js
CHANGED
|
@@ -1503,16 +1503,16 @@ var data_default2 = taskData2;
|
|
|
1503
1503
|
var taskData3 = {
|
|
1504
1504
|
datasets: [
|
|
1505
1505
|
{
|
|
1506
|
-
description: "
|
|
1507
|
-
id: "mozilla-foundation/
|
|
1506
|
+
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
|
|
1507
|
+
id: "mozilla-foundation/common_voice_17_0"
|
|
1508
1508
|
},
|
|
1509
1509
|
{
|
|
1510
1510
|
description: "An English dataset with 1,000 hours of data.",
|
|
1511
1511
|
id: "librispeech_asr"
|
|
1512
1512
|
},
|
|
1513
1513
|
{
|
|
1514
|
-
description: "
|
|
1515
|
-
id: "
|
|
1514
|
+
description: "A multi-lingual audio dataset with 370K hours of audio.",
|
|
1515
|
+
id: "espnet/yodas"
|
|
1516
1516
|
}
|
|
1517
1517
|
],
|
|
1518
1518
|
demo: {
|
|
@@ -1547,12 +1547,12 @@ var taskData3 = {
|
|
|
1547
1547
|
id: "openai/whisper-large-v3"
|
|
1548
1548
|
},
|
|
1549
1549
|
{
|
|
1550
|
-
description: "A good generic
|
|
1551
|
-
id: "facebook/
|
|
1550
|
+
description: "A good generic speech model by MetaAI for fine-tuning.",
|
|
1551
|
+
id: "facebook/w2v-bert-2.0"
|
|
1552
1552
|
},
|
|
1553
1553
|
{
|
|
1554
1554
|
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
|
|
1555
|
-
id: "facebook/
|
|
1555
|
+
id: "facebook/seamless-m4t-v2-large"
|
|
1556
1556
|
}
|
|
1557
1557
|
],
|
|
1558
1558
|
spaces: [
|
|
@@ -2954,8 +2954,8 @@ var taskData24 = {
|
|
|
2954
2954
|
canonicalId: "text-to-audio",
|
|
2955
2955
|
datasets: [
|
|
2956
2956
|
{
|
|
2957
|
-
description: "
|
|
2958
|
-
id: "
|
|
2957
|
+
description: "10K hours of multi-speaker English dataset.",
|
|
2958
|
+
id: "parler-tts/mls_eng_10k"
|
|
2959
2959
|
},
|
|
2960
2960
|
{
|
|
2961
2961
|
description: "Multi-speaker English dataset.",
|
|
@@ -2993,8 +2993,8 @@ var taskData24 = {
|
|
|
2993
2993
|
id: "facebook/mms-tts"
|
|
2994
2994
|
},
|
|
2995
2995
|
{
|
|
2996
|
-
description: "
|
|
2997
|
-
id: "
|
|
2996
|
+
description: "A prompt based, powerful TTS model.",
|
|
2997
|
+
id: "parler-tts/parler_tts_mini_v0.1"
|
|
2998
2998
|
}
|
|
2999
2999
|
],
|
|
3000
3000
|
spaces: [
|
|
@@ -3007,8 +3007,8 @@ var taskData24 = {
|
|
|
3007
3007
|
id: "coqui/xtts"
|
|
3008
3008
|
},
|
|
3009
3009
|
{
|
|
3010
|
-
description: "An application that synthesizes speech for
|
|
3011
|
-
id: "
|
|
3010
|
+
description: "An application that synthesizes speech for diverse speaker prompts.",
|
|
3011
|
+
id: "parler-tts/parler_tts_mini"
|
|
3012
3012
|
}
|
|
3013
3013
|
],
|
|
3014
3014
|
summary: "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
|
|
@@ -5438,6 +5438,30 @@ var SKUS = {
|
|
|
5438
5438
|
tflops: 91.1,
|
|
5439
5439
|
memory: [48]
|
|
5440
5440
|
},
|
|
5441
|
+
"RTX 5880 Ada": {
|
|
5442
|
+
tflops: 69.3,
|
|
5443
|
+
memory: [48]
|
|
5444
|
+
},
|
|
5445
|
+
"RTX 5000 Ada": {
|
|
5446
|
+
tflops: 65.3,
|
|
5447
|
+
memory: [32]
|
|
5448
|
+
},
|
|
5449
|
+
"RTX 4500 Ada": {
|
|
5450
|
+
tflops: 39.6,
|
|
5451
|
+
memory: [24]
|
|
5452
|
+
},
|
|
5453
|
+
"RTX 4000 Ada": {
|
|
5454
|
+
tflops: 26.7,
|
|
5455
|
+
memory: [20]
|
|
5456
|
+
},
|
|
5457
|
+
"RTX 4000 SFF Ada": {
|
|
5458
|
+
tflops: 19.2,
|
|
5459
|
+
memory: [20]
|
|
5460
|
+
},
|
|
5461
|
+
"RTX 2000 Ada": {
|
|
5462
|
+
tflops: 12,
|
|
5463
|
+
memory: [16]
|
|
5464
|
+
},
|
|
5441
5465
|
A100: {
|
|
5442
5466
|
tflops: 77.97,
|
|
5443
5467
|
memory: [80, 40]
|
|
@@ -5450,14 +5474,14 @@ var SKUS = {
|
|
|
5450
5474
|
tflops: 31.24,
|
|
5451
5475
|
memory: [24]
|
|
5452
5476
|
},
|
|
5453
|
-
T4: {
|
|
5454
|
-
tflops: 65.13,
|
|
5455
|
-
memory: [16]
|
|
5456
|
-
},
|
|
5457
5477
|
"RTX 4090": {
|
|
5458
5478
|
tflops: 82.58,
|
|
5459
5479
|
memory: [24]
|
|
5460
5480
|
},
|
|
5481
|
+
"RTX 4090D": {
|
|
5482
|
+
tflops: 79.49,
|
|
5483
|
+
memory: [24]
|
|
5484
|
+
},
|
|
5461
5485
|
"RTX 4080 SUPER": {
|
|
5462
5486
|
tflops: 52.2,
|
|
5463
5487
|
memory: [16]
|
|
@@ -5482,6 +5506,14 @@ var SKUS = {
|
|
|
5482
5506
|
tflops: 44.1,
|
|
5483
5507
|
memory: [16]
|
|
5484
5508
|
},
|
|
5509
|
+
"RTX 4060": {
|
|
5510
|
+
tflops: 15.11,
|
|
5511
|
+
memory: [8]
|
|
5512
|
+
},
|
|
5513
|
+
"RTX 4060 Ti": {
|
|
5514
|
+
tflops: 22.06,
|
|
5515
|
+
memory: [8, 16]
|
|
5516
|
+
},
|
|
5485
5517
|
"RTX 3090": {
|
|
5486
5518
|
tflops: 35.58,
|
|
5487
5519
|
memory: [24]
|
|
@@ -5518,13 +5550,53 @@ var SKUS = {
|
|
|
5518
5550
|
tflops: 12.74,
|
|
5519
5551
|
memory: [12, 8]
|
|
5520
5552
|
},
|
|
5553
|
+
"RTX 2070": {
|
|
5554
|
+
tflops: 14.93,
|
|
5555
|
+
memory: [8]
|
|
5556
|
+
},
|
|
5557
|
+
"RTX 3050 Mobile": {
|
|
5558
|
+
tflops: 7.639,
|
|
5559
|
+
memory: [6]
|
|
5560
|
+
},
|
|
5521
5561
|
"RTX 2060 Mobile": {
|
|
5522
5562
|
tflops: 9.22,
|
|
5523
5563
|
memory: [6]
|
|
5524
5564
|
},
|
|
5565
|
+
"GTX 1080 Ti": {
|
|
5566
|
+
tflops: 11.34,
|
|
5567
|
+
// float32 (GPU does not support native float16)
|
|
5568
|
+
memory: [11]
|
|
5569
|
+
},
|
|
5570
|
+
"GTX 1070 Ti": {
|
|
5571
|
+
tflops: 8.2,
|
|
5572
|
+
// float32 (GPU does not support native float16)
|
|
5573
|
+
memory: [8]
|
|
5574
|
+
},
|
|
5525
5575
|
"RTX Titan": {
|
|
5526
5576
|
tflops: 32.62,
|
|
5527
5577
|
memory: [24]
|
|
5578
|
+
},
|
|
5579
|
+
"GTX 1650 Mobile": {
|
|
5580
|
+
tflops: 6.39,
|
|
5581
|
+
memory: [4]
|
|
5582
|
+
},
|
|
5583
|
+
T4: {
|
|
5584
|
+
tflops: 65.13,
|
|
5585
|
+
memory: [16]
|
|
5586
|
+
},
|
|
5587
|
+
V100: {
|
|
5588
|
+
tflops: 28.26,
|
|
5589
|
+
memory: [32, 16]
|
|
5590
|
+
},
|
|
5591
|
+
"Quadro P6000": {
|
|
5592
|
+
tflops: 12.63,
|
|
5593
|
+
// float32 (GPU does not support native float16)
|
|
5594
|
+
memory: [24]
|
|
5595
|
+
},
|
|
5596
|
+
P40: {
|
|
5597
|
+
tflops: 11.76,
|
|
5598
|
+
// float32 (GPU does not support native float16)
|
|
5599
|
+
memory: [24]
|
|
5528
5600
|
}
|
|
5529
5601
|
},
|
|
5530
5602
|
AMD: {
|
|
@@ -5540,6 +5612,10 @@ var SKUS = {
|
|
|
5540
5612
|
tflops: 181,
|
|
5541
5613
|
memory: [64]
|
|
5542
5614
|
},
|
|
5615
|
+
MI100: {
|
|
5616
|
+
tflops: 184.6,
|
|
5617
|
+
memory: [32]
|
|
5618
|
+
},
|
|
5543
5619
|
"RX 7900 XTX": {
|
|
5544
5620
|
tflops: 122.8,
|
|
5545
5621
|
memory: [24]
|
|
@@ -5563,6 +5639,18 @@ var SKUS = {
|
|
|
5563
5639
|
"RX 7600 XT": {
|
|
5564
5640
|
tflops: 45.14,
|
|
5565
5641
|
memory: [16, 8]
|
|
5642
|
+
},
|
|
5643
|
+
"RX 6950 XT": {
|
|
5644
|
+
tflops: 47.31,
|
|
5645
|
+
memory: [16]
|
|
5646
|
+
},
|
|
5647
|
+
"RX 6800": {
|
|
5648
|
+
tflops: 32.33,
|
|
5649
|
+
memory: [16]
|
|
5650
|
+
},
|
|
5651
|
+
"Radeon Pro VII": {
|
|
5652
|
+
tflops: 26.11,
|
|
5653
|
+
memory: [16]
|
|
5566
5654
|
}
|
|
5567
5655
|
}
|
|
5568
5656
|
},
|
|
@@ -5773,6 +5861,13 @@ var LOCAL_APPS = {
|
|
|
5773
5861
|
displayOnModelPage: isGgufModel,
|
|
5774
5862
|
deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`)
|
|
5775
5863
|
},
|
|
5864
|
+
sanctum: {
|
|
5865
|
+
prettyLabel: "Sanctum",
|
|
5866
|
+
docsUrl: "https://sanctum.ai",
|
|
5867
|
+
mainTask: "text-generation",
|
|
5868
|
+
displayOnModelPage: isGgufModel,
|
|
5869
|
+
deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`)
|
|
5870
|
+
},
|
|
5776
5871
|
drawthings: {
|
|
5777
5872
|
prettyLabel: "Draw Things",
|
|
5778
5873
|
docsUrl: "https://drawthings.ai",
|
package/dist/src/hardware.d.ts
CHANGED
|
@@ -44,6 +44,30 @@ export declare const SKUS: {
|
|
|
44
44
|
tflops: number;
|
|
45
45
|
memory: number[];
|
|
46
46
|
};
|
|
47
|
+
"RTX 5880 Ada": {
|
|
48
|
+
tflops: number;
|
|
49
|
+
memory: number[];
|
|
50
|
+
};
|
|
51
|
+
"RTX 5000 Ada": {
|
|
52
|
+
tflops: number;
|
|
53
|
+
memory: number[];
|
|
54
|
+
};
|
|
55
|
+
"RTX 4500 Ada": {
|
|
56
|
+
tflops: number;
|
|
57
|
+
memory: number[];
|
|
58
|
+
};
|
|
59
|
+
"RTX 4000 Ada": {
|
|
60
|
+
tflops: number;
|
|
61
|
+
memory: number[];
|
|
62
|
+
};
|
|
63
|
+
"RTX 4000 SFF Ada": {
|
|
64
|
+
tflops: number;
|
|
65
|
+
memory: number[];
|
|
66
|
+
};
|
|
67
|
+
"RTX 2000 Ada": {
|
|
68
|
+
tflops: number;
|
|
69
|
+
memory: number[];
|
|
70
|
+
};
|
|
47
71
|
A100: {
|
|
48
72
|
tflops: number;
|
|
49
73
|
memory: number[];
|
|
@@ -56,11 +80,11 @@ export declare const SKUS: {
|
|
|
56
80
|
tflops: number;
|
|
57
81
|
memory: number[];
|
|
58
82
|
};
|
|
59
|
-
|
|
83
|
+
"RTX 4090": {
|
|
60
84
|
tflops: number;
|
|
61
85
|
memory: number[];
|
|
62
86
|
};
|
|
63
|
-
"RTX
|
|
87
|
+
"RTX 4090D": {
|
|
64
88
|
tflops: number;
|
|
65
89
|
memory: number[];
|
|
66
90
|
};
|
|
@@ -88,6 +112,14 @@ export declare const SKUS: {
|
|
|
88
112
|
tflops: number;
|
|
89
113
|
memory: number[];
|
|
90
114
|
};
|
|
115
|
+
"RTX 4060": {
|
|
116
|
+
tflops: number;
|
|
117
|
+
memory: number[];
|
|
118
|
+
};
|
|
119
|
+
"RTX 4060 Ti": {
|
|
120
|
+
tflops: number;
|
|
121
|
+
memory: number[];
|
|
122
|
+
};
|
|
91
123
|
"RTX 3090": {
|
|
92
124
|
tflops: number;
|
|
93
125
|
memory: number[];
|
|
@@ -124,14 +156,50 @@ export declare const SKUS: {
|
|
|
124
156
|
tflops: number;
|
|
125
157
|
memory: number[];
|
|
126
158
|
};
|
|
159
|
+
"RTX 2070": {
|
|
160
|
+
tflops: number;
|
|
161
|
+
memory: number[];
|
|
162
|
+
};
|
|
163
|
+
"RTX 3050 Mobile": {
|
|
164
|
+
tflops: number;
|
|
165
|
+
memory: number[];
|
|
166
|
+
};
|
|
127
167
|
"RTX 2060 Mobile": {
|
|
128
168
|
tflops: number;
|
|
129
169
|
memory: number[];
|
|
130
170
|
};
|
|
171
|
+
"GTX 1080 Ti": {
|
|
172
|
+
tflops: number;
|
|
173
|
+
memory: number[];
|
|
174
|
+
};
|
|
175
|
+
"GTX 1070 Ti": {
|
|
176
|
+
tflops: number;
|
|
177
|
+
memory: number[];
|
|
178
|
+
};
|
|
131
179
|
"RTX Titan": {
|
|
132
180
|
tflops: number;
|
|
133
181
|
memory: number[];
|
|
134
182
|
};
|
|
183
|
+
"GTX 1650 Mobile": {
|
|
184
|
+
tflops: number;
|
|
185
|
+
memory: number[];
|
|
186
|
+
};
|
|
187
|
+
T4: {
|
|
188
|
+
tflops: number;
|
|
189
|
+
memory: number[];
|
|
190
|
+
};
|
|
191
|
+
V100: {
|
|
192
|
+
tflops: number;
|
|
193
|
+
memory: number[];
|
|
194
|
+
};
|
|
195
|
+
"Quadro P6000": {
|
|
196
|
+
tflops: number;
|
|
197
|
+
memory: number[];
|
|
198
|
+
};
|
|
199
|
+
P40: {
|
|
200
|
+
tflops: number;
|
|
201
|
+
memory: number[];
|
|
202
|
+
};
|
|
135
203
|
};
|
|
136
204
|
AMD: {
|
|
137
205
|
MI300: {
|
|
@@ -146,6 +214,10 @@ export declare const SKUS: {
|
|
|
146
214
|
tflops: number;
|
|
147
215
|
memory: number[];
|
|
148
216
|
};
|
|
217
|
+
MI100: {
|
|
218
|
+
tflops: number;
|
|
219
|
+
memory: number[];
|
|
220
|
+
};
|
|
149
221
|
"RX 7900 XTX": {
|
|
150
222
|
tflops: number;
|
|
151
223
|
memory: number[];
|
|
@@ -170,6 +242,18 @@ export declare const SKUS: {
|
|
|
170
242
|
tflops: number;
|
|
171
243
|
memory: number[];
|
|
172
244
|
};
|
|
245
|
+
"RX 6950 XT": {
|
|
246
|
+
tflops: number;
|
|
247
|
+
memory: number[];
|
|
248
|
+
};
|
|
249
|
+
"RX 6800": {
|
|
250
|
+
tflops: number;
|
|
251
|
+
memory: number[];
|
|
252
|
+
};
|
|
253
|
+
"Radeon Pro VII": {
|
|
254
|
+
tflops: number;
|
|
255
|
+
memory: number[];
|
|
256
|
+
};
|
|
173
257
|
};
|
|
174
258
|
};
|
|
175
259
|
CPU: {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hardware.d.ts","sourceRoot":"","sources":["../../src/hardware.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,eAAO,MAAM,iDAAiD,QAAW,CAAC;AAC1E,eAAO,MAAM,yDAAyD,QAAW,CAAC;AAClF,eAAO,MAAM,oCAAoC,QAAU,CAAC;AAE5D;;;GAGG;AACH,eAAO,MAAM,+CAA+C,QAAW,CAAC;AAExE,MAAM,WAAW,YAAY;IAC5B;;;;;;;;;OASG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,eAAO,MAAM,sBAAsB,UAAqD,CAAC;AAEzF,eAAO,MAAM,IAAI
|
|
1
|
+
{"version":3,"file":"hardware.d.ts","sourceRoot":"","sources":["../../src/hardware.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,eAAO,MAAM,iDAAiD,QAAW,CAAC;AAC1E,eAAO,MAAM,yDAAyD,QAAW,CAAC;AAClF,eAAO,MAAM,oCAAoC,QAAU,CAAC;AAE5D;;;GAGG;AACH,eAAO,MAAM,+CAA+C,QAAW,CAAC;AAExE,MAAM,WAAW,YAAY;IAC5B;;;;;;;;;OASG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,eAAO,MAAM,sBAAsB,UAAqD,CAAC;AAEzF,eAAO,MAAM,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA+XuD,CAAC;AAEzE,MAAM,MAAM,OAAO,GAAG,MAAM,OAAO,IAAI,CAAC"}
|
package/dist/src/local-apps.d.ts
CHANGED
|
@@ -77,6 +77,13 @@ export declare const LOCAL_APPS: {
|
|
|
77
77
|
displayOnModelPage: typeof isGgufModel;
|
|
78
78
|
deeplink: (model: ModelData) => URL;
|
|
79
79
|
};
|
|
80
|
+
sanctum: {
|
|
81
|
+
prettyLabel: string;
|
|
82
|
+
docsUrl: string;
|
|
83
|
+
mainTask: "text-generation";
|
|
84
|
+
displayOnModelPage: typeof isGgufModel;
|
|
85
|
+
deeplink: (model: ModelData) => URL;
|
|
86
|
+
};
|
|
80
87
|
drawthings: {
|
|
81
88
|
prettyLabel: string;
|
|
82
89
|
docsUrl: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,GAAG,CAAC;CACnC,GACD;IACA;;OAEG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,MAAM,EAAE,CAAC;CAChD,CACH,CAAC;AAEF,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,WAEpC;AAmBD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA5BS,SAAS,KAAG,MAAM,EAAE
|
|
1
|
+
{"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,GAAG,CAAC;CACnC,GACD;IACA;;OAEG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,MAAM,EAAE,CAAC;CAChD,CACH,CAAC;AAEF,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,WAEpC;AAmBD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA5BS,SAAS,KAAG,MAAM,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwFhB,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/tasks",
|
|
3
3
|
"packageManager": "pnpm@8.10.5",
|
|
4
|
-
"version": "0.10.
|
|
4
|
+
"version": "0.10.9",
|
|
5
5
|
"description": "List of ML tasks for huggingface.co/tasks",
|
|
6
6
|
"repository": "https://github.com/huggingface/huggingface.js.git",
|
|
7
7
|
"publishConfig": {
|
package/src/hardware.ts
CHANGED
|
@@ -48,6 +48,30 @@ export const SKUS = {
|
|
|
48
48
|
tflops: 91.1,
|
|
49
49
|
memory: [48],
|
|
50
50
|
},
|
|
51
|
+
"RTX 5880 Ada": {
|
|
52
|
+
tflops: 69.3,
|
|
53
|
+
memory: [48],
|
|
54
|
+
},
|
|
55
|
+
"RTX 5000 Ada": {
|
|
56
|
+
tflops: 65.3,
|
|
57
|
+
memory: [32],
|
|
58
|
+
},
|
|
59
|
+
"RTX 4500 Ada": {
|
|
60
|
+
tflops: 39.6,
|
|
61
|
+
memory: [24],
|
|
62
|
+
},
|
|
63
|
+
"RTX 4000 Ada": {
|
|
64
|
+
tflops: 26.7,
|
|
65
|
+
memory: [20],
|
|
66
|
+
},
|
|
67
|
+
"RTX 4000 SFF Ada": {
|
|
68
|
+
tflops: 19.2,
|
|
69
|
+
memory: [20],
|
|
70
|
+
},
|
|
71
|
+
"RTX 2000 Ada": {
|
|
72
|
+
tflops: 12.0,
|
|
73
|
+
memory: [16],
|
|
74
|
+
},
|
|
51
75
|
A100: {
|
|
52
76
|
tflops: 77.97,
|
|
53
77
|
memory: [80, 40],
|
|
@@ -60,14 +84,14 @@ export const SKUS = {
|
|
|
60
84
|
tflops: 31.24,
|
|
61
85
|
memory: [24],
|
|
62
86
|
},
|
|
63
|
-
T4: {
|
|
64
|
-
tflops: 65.13,
|
|
65
|
-
memory: [16],
|
|
66
|
-
},
|
|
67
87
|
"RTX 4090": {
|
|
68
88
|
tflops: 82.58,
|
|
69
89
|
memory: [24],
|
|
70
90
|
},
|
|
91
|
+
"RTX 4090D": {
|
|
92
|
+
tflops: 79.49,
|
|
93
|
+
memory: [24],
|
|
94
|
+
},
|
|
71
95
|
"RTX 4080 SUPER": {
|
|
72
96
|
tflops: 52.2,
|
|
73
97
|
memory: [16],
|
|
@@ -92,6 +116,14 @@ export const SKUS = {
|
|
|
92
116
|
tflops: 44.1,
|
|
93
117
|
memory: [16],
|
|
94
118
|
},
|
|
119
|
+
"RTX 4060": {
|
|
120
|
+
tflops: 15.11,
|
|
121
|
+
memory: [8],
|
|
122
|
+
},
|
|
123
|
+
"RTX 4060 Ti": {
|
|
124
|
+
tflops: 22.06,
|
|
125
|
+
memory: [8, 16],
|
|
126
|
+
},
|
|
95
127
|
"RTX 3090": {
|
|
96
128
|
tflops: 35.58,
|
|
97
129
|
memory: [24],
|
|
@@ -128,14 +160,50 @@ export const SKUS = {
|
|
|
128
160
|
tflops: 12.74,
|
|
129
161
|
memory: [12, 8],
|
|
130
162
|
},
|
|
163
|
+
"RTX 2070": {
|
|
164
|
+
tflops: 14.93,
|
|
165
|
+
memory: [8],
|
|
166
|
+
},
|
|
167
|
+
"RTX 3050 Mobile": {
|
|
168
|
+
tflops: 7.639,
|
|
169
|
+
memory: [6],
|
|
170
|
+
},
|
|
131
171
|
"RTX 2060 Mobile": {
|
|
132
172
|
tflops: 9.22,
|
|
133
173
|
memory: [6],
|
|
134
174
|
},
|
|
175
|
+
"GTX 1080 Ti": {
|
|
176
|
+
tflops: 11.34, // float32 (GPU does not support native float16)
|
|
177
|
+
memory: [11],
|
|
178
|
+
},
|
|
179
|
+
"GTX 1070 Ti": {
|
|
180
|
+
tflops: 8.2, // float32 (GPU does not support native float16)
|
|
181
|
+
memory: [8],
|
|
182
|
+
},
|
|
135
183
|
"RTX Titan": {
|
|
136
184
|
tflops: 32.62,
|
|
137
185
|
memory: [24],
|
|
138
186
|
},
|
|
187
|
+
"GTX 1650 Mobile": {
|
|
188
|
+
tflops: 6.39,
|
|
189
|
+
memory: [4],
|
|
190
|
+
},
|
|
191
|
+
T4: {
|
|
192
|
+
tflops: 65.13,
|
|
193
|
+
memory: [16],
|
|
194
|
+
},
|
|
195
|
+
V100: {
|
|
196
|
+
tflops: 28.26,
|
|
197
|
+
memory: [32, 16],
|
|
198
|
+
},
|
|
199
|
+
"Quadro P6000": {
|
|
200
|
+
tflops: 12.63, // float32 (GPU does not support native float16)
|
|
201
|
+
memory: [24],
|
|
202
|
+
},
|
|
203
|
+
P40: {
|
|
204
|
+
tflops: 11.76, // float32 (GPU does not support native float16)
|
|
205
|
+
memory: [24],
|
|
206
|
+
},
|
|
139
207
|
},
|
|
140
208
|
AMD: {
|
|
141
209
|
MI300: {
|
|
@@ -150,6 +218,10 @@ export const SKUS = {
|
|
|
150
218
|
tflops: 181.0,
|
|
151
219
|
memory: [64],
|
|
152
220
|
},
|
|
221
|
+
MI100: {
|
|
222
|
+
tflops: 184.6,
|
|
223
|
+
memory: [32],
|
|
224
|
+
},
|
|
153
225
|
"RX 7900 XTX": {
|
|
154
226
|
tflops: 122.8,
|
|
155
227
|
memory: [24],
|
|
@@ -174,6 +246,18 @@ export const SKUS = {
|
|
|
174
246
|
tflops: 45.14,
|
|
175
247
|
memory: [16, 8],
|
|
176
248
|
},
|
|
249
|
+
"RX 6950 XT": {
|
|
250
|
+
tflops: 47.31,
|
|
251
|
+
memory: [16],
|
|
252
|
+
},
|
|
253
|
+
"RX 6800": {
|
|
254
|
+
tflops: 32.33,
|
|
255
|
+
memory: [16],
|
|
256
|
+
},
|
|
257
|
+
"Radeon Pro VII": {
|
|
258
|
+
tflops: 26.11,
|
|
259
|
+
memory: [16],
|
|
260
|
+
},
|
|
177
261
|
},
|
|
178
262
|
},
|
|
179
263
|
CPU: {
|
package/src/local-apps.ts
CHANGED
|
@@ -103,6 +103,13 @@ export const LOCAL_APPS = {
|
|
|
103
103
|
displayOnModelPage: isGgufModel,
|
|
104
104
|
deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`),
|
|
105
105
|
},
|
|
106
|
+
sanctum: {
|
|
107
|
+
prettyLabel: "Sanctum",
|
|
108
|
+
docsUrl: "https://sanctum.ai",
|
|
109
|
+
mainTask: "text-generation",
|
|
110
|
+
displayOnModelPage: isGgufModel,
|
|
111
|
+
deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`),
|
|
112
|
+
},
|
|
106
113
|
drawthings: {
|
|
107
114
|
prettyLabel: "Draw Things",
|
|
108
115
|
docsUrl: "https://drawthings.ai",
|
|
@@ -18,7 +18,7 @@ The use of Multilingual ASR has become popular, the idea of maintaining just a s
|
|
|
18
18
|
|
|
19
19
|
## Inference
|
|
20
20
|
|
|
21
|
-
The Hub contains over [
|
|
21
|
+
The Hub contains over [17,000 ASR models](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=downloads) that you can test right away in your browser using the model page widgets. You can also use any model as a service using the Serverless Inference API. We also support libraries such as [transformers](https://huggingface.co/models?library=transformers&pipeline_tag=automatic-speech-recognition&sort=downloads), [speechbrain](https://huggingface.co/models?library=speechbrain&pipeline_tag=automatic-speech-recognition&sort=downloads), [NeMo](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=nemo&sort=downloads) and [espnet](https://huggingface.co/models?library=espnet&pipeline_tag=automatic-speech-recognition&sort=downloads) via the Serverless Inference API. Here's a simple code snippet to run inference:
|
|
22
22
|
|
|
23
23
|
```python
|
|
24
24
|
import json
|
|
@@ -36,20 +36,7 @@ def query(filename):
|
|
|
36
36
|
data = query("sample1.flac")
|
|
37
37
|
```
|
|
38
38
|
|
|
39
|
-
You can also use
|
|
40
|
-
|
|
41
|
-
```python
|
|
42
|
-
from transformers import pipeline
|
|
43
|
-
|
|
44
|
-
with open("sample.flac", "rb") as f:
|
|
45
|
-
data = f.read()
|
|
46
|
-
|
|
47
|
-
pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v2")
|
|
48
|
-
pipe("sample.flac")
|
|
49
|
-
# {'text': "GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES IN DRAUGHTY SCHOOL ROOMS DAY AFTER DAY FOR A FORTNIGHT HE'LL HAVE TO PUT IN AN APPEARANCE AT SOME PLACE OF WORSHIP ON SUNDAY MORNING AND HE CAN COME TO US IMMEDIATELY AFTERWARDS"}
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to transcribe text with javascript using models on Hugging Face Hub.
|
|
39
|
+
You can also use[huggingface.js](https://github.com/huggingface/huggingface.js), the JavaScript client, to transcribe models with the Inference API.
|
|
53
40
|
|
|
54
41
|
```javascript
|
|
55
42
|
import { HfInference } from "@huggingface/inference";
|
|
@@ -57,10 +44,23 @@ import { HfInference } from "@huggingface/inference";
|
|
|
57
44
|
const inference = new HfInference(HF_TOKEN);
|
|
58
45
|
await inference.automaticSpeechRecognition({
|
|
59
46
|
data: await (await fetch("sample.flac")).blob(),
|
|
60
|
-
model: "openai/whisper-large-
|
|
47
|
+
model: "openai/whisper-large-v3",
|
|
61
48
|
});
|
|
62
49
|
```
|
|
63
50
|
|
|
51
|
+
For transformers compatible models like Whisper, Wav2Vec2, HuBERT, etc. You can also run inference in Python using transformers as follows:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
# pip install --upgrade transformers
|
|
55
|
+
|
|
56
|
+
from transformers import pipeline
|
|
57
|
+
|
|
58
|
+
pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v3")
|
|
59
|
+
|
|
60
|
+
pipe("sample.flac")
|
|
61
|
+
# {'text': "GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES IN DRAUGHTY SCHOOL ROOMS DAY AFTER DAY FOR A FORTNIGHT HE'LL HAVE TO PUT IN AN APPEARANCE AT SOME PLACE OF WORSHIP ON SUNDAY MORNING AND HE CAN COME TO US IMMEDIATELY AFTERWARDS"}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
64
|
## Solving ASR for your own data
|
|
65
65
|
|
|
66
66
|
We have some great news! You can fine-tune (transfer learning) a foundational speech model on a specific language without tonnes of data. Pretrained models such as Whisper, Wav2Vec2-MMS and HuBERT exist. [OpenAI's Whisper model](https://huggingface.co/openai/whisper-large-v3) is a large multilingual model trained on 100+ languages and with 4 Million hours of speech.
|
|
@@ -3,16 +3,16 @@ import type { TaskDataCustom } from "..";
|
|
|
3
3
|
const taskData: TaskDataCustom = {
|
|
4
4
|
datasets: [
|
|
5
5
|
{
|
|
6
|
-
description: "
|
|
7
|
-
id: "mozilla-foundation/
|
|
6
|
+
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
|
|
7
|
+
id: "mozilla-foundation/common_voice_17_0",
|
|
8
8
|
},
|
|
9
9
|
{
|
|
10
10
|
description: "An English dataset with 1,000 hours of data.",
|
|
11
11
|
id: "librispeech_asr",
|
|
12
12
|
},
|
|
13
13
|
{
|
|
14
|
-
description: "
|
|
15
|
-
id: "
|
|
14
|
+
description: "A multi-lingual audio dataset with 370K hours of audio.",
|
|
15
|
+
id: "espnet/yodas",
|
|
16
16
|
},
|
|
17
17
|
],
|
|
18
18
|
demo: {
|
|
@@ -47,12 +47,12 @@ const taskData: TaskDataCustom = {
|
|
|
47
47
|
id: "openai/whisper-large-v3",
|
|
48
48
|
},
|
|
49
49
|
{
|
|
50
|
-
description: "A good generic
|
|
51
|
-
id: "facebook/
|
|
50
|
+
description: "A good generic speech model by MetaAI for fine-tuning.",
|
|
51
|
+
id: "facebook/w2v-bert-2.0",
|
|
52
52
|
},
|
|
53
53
|
{
|
|
54
54
|
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
|
|
55
|
-
id: "facebook/
|
|
55
|
+
id: "facebook/seamless-m4t-v2-large",
|
|
56
56
|
},
|
|
57
57
|
],
|
|
58
58
|
spaces: [
|
|
@@ -58,8 +58,6 @@ await inference.textToSpeech({
|
|
|
58
58
|
|
|
59
59
|
- [Hugging Face Audio Course](https://huggingface.co/learn/audio-course/chapter6/introduction)
|
|
60
60
|
- [ML for Audio Study Group - Text to Speech Deep Dive](https://www.youtube.com/watch?v=aLBedWj-5CQ)
|
|
61
|
-
- [An introduction to SpeechT5, a multi-purpose speech recognition and synthesis model](https://huggingface.co/blog/speecht5).
|
|
62
|
-
- [A guide on Fine-tuning Whisper For Multilingual ASR with 🤗Transformers](https://huggingface.co/blog/fine-tune-whisper)
|
|
63
61
|
- [Speech Synthesis, Recognition, and More With SpeechT5](https://huggingface.co/blog/speecht5)
|
|
64
62
|
- [Optimizing a Text-To-Speech model using 🤗 Transformers](https://huggingface.co/blog/optimizing-bark)
|
|
65
|
-
-
|
|
63
|
+
- [Train your own TTS models with Parler-TTS](https://github.com/huggingface/parler-tts)
|
|
@@ -4,8 +4,8 @@ const taskData: TaskDataCustom = {
|
|
|
4
4
|
canonicalId: "text-to-audio",
|
|
5
5
|
datasets: [
|
|
6
6
|
{
|
|
7
|
-
description: "
|
|
8
|
-
id: "
|
|
7
|
+
description: "10K hours of multi-speaker English dataset.",
|
|
8
|
+
id: "parler-tts/mls_eng_10k",
|
|
9
9
|
},
|
|
10
10
|
{
|
|
11
11
|
description: "Multi-speaker English dataset.",
|
|
@@ -43,8 +43,8 @@ const taskData: TaskDataCustom = {
|
|
|
43
43
|
id: "facebook/mms-tts",
|
|
44
44
|
},
|
|
45
45
|
{
|
|
46
|
-
description: "
|
|
47
|
-
id: "
|
|
46
|
+
description: "A prompt based, powerful TTS model.",
|
|
47
|
+
id: "parler-tts/parler_tts_mini_v0.1",
|
|
48
48
|
},
|
|
49
49
|
],
|
|
50
50
|
spaces: [
|
|
@@ -57,8 +57,8 @@ const taskData: TaskDataCustom = {
|
|
|
57
57
|
id: "coqui/xtts",
|
|
58
58
|
},
|
|
59
59
|
{
|
|
60
|
-
description: "An application that synthesizes speech for
|
|
61
|
-
id: "
|
|
60
|
+
description: "An application that synthesizes speech for diverse speaker prompts.",
|
|
61
|
+
id: "parler-tts/parler_tts_mini",
|
|
62
62
|
},
|
|
63
63
|
],
|
|
64
64
|
summary:
|