@huggingface/tasks 0.10.8 → 0.10.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1541,16 +1541,16 @@ var data_default2 = taskData2;
1541
1541
  var taskData3 = {
1542
1542
  datasets: [
1543
1543
  {
1544
- description: "18,000 hours of multilingual audio-text dataset in 108 languages.",
1545
- id: "mozilla-foundation/common_voice_13_0"
1544
+ description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
1545
+ id: "mozilla-foundation/common_voice_17_0"
1546
1546
  },
1547
1547
  {
1548
1548
  description: "An English dataset with 1,000 hours of data.",
1549
1549
  id: "librispeech_asr"
1550
1550
  },
1551
1551
  {
1552
- description: "High quality, multi-speaker audio data and their transcriptions in various languages.",
1553
- id: "openslr"
1552
+ description: "A multi-lingual audio dataset with 370K hours of audio.",
1553
+ id: "espnet/yodas"
1554
1554
  }
1555
1555
  ],
1556
1556
  demo: {
@@ -1585,12 +1585,12 @@ var taskData3 = {
1585
1585
  id: "openai/whisper-large-v3"
1586
1586
  },
1587
1587
  {
1588
- description: "A good generic ASR model by MetaAI.",
1589
- id: "facebook/wav2vec2-base-960h"
1588
+ description: "A good generic speech model by MetaAI for fine-tuning.",
1589
+ id: "facebook/w2v-bert-2.0"
1590
1590
  },
1591
1591
  {
1592
1592
  description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
1593
- id: "facebook/s2t-small-mustc-en-fr-st"
1593
+ id: "facebook/seamless-m4t-v2-large"
1594
1594
  }
1595
1595
  ],
1596
1596
  spaces: [
@@ -2992,8 +2992,8 @@ var taskData24 = {
2992
2992
  canonicalId: "text-to-audio",
2993
2993
  datasets: [
2994
2994
  {
2995
- description: "Thousands of short audio clips of a single speaker.",
2996
- id: "lj_speech"
2995
+ description: "10K hours of multi-speaker English dataset.",
2996
+ id: "parler-tts/mls_eng_10k"
2997
2997
  },
2998
2998
  {
2999
2999
  description: "Multi-speaker English dataset.",
@@ -3031,8 +3031,8 @@ var taskData24 = {
3031
3031
  id: "facebook/mms-tts"
3032
3032
  },
3033
3033
  {
3034
- description: "An end-to-end speech synthesis model.",
3035
- id: "microsoft/speecht5_tts"
3034
+ description: "A prompt based, powerful TTS model.",
3035
+ id: "parler-tts/parler_tts_mini_v0.1"
3036
3036
  }
3037
3037
  ],
3038
3038
  spaces: [
@@ -3045,8 +3045,8 @@ var taskData24 = {
3045
3045
  id: "coqui/xtts"
3046
3046
  },
3047
3047
  {
3048
- description: "An application that synthesizes speech for various speaker types.",
3049
- id: "Matthijs/speecht5-tts-demo"
3048
+ description: "An application that synthesizes speech for diverse speaker prompts.",
3049
+ id: "parler-tts/parler_tts_mini"
3050
3050
  }
3051
3051
  ],
3052
3052
  summary: "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
@@ -5476,6 +5476,30 @@ var SKUS = {
5476
5476
  tflops: 91.1,
5477
5477
  memory: [48]
5478
5478
  },
5479
+ "RTX 5880 Ada": {
5480
+ tflops: 69.3,
5481
+ memory: [48]
5482
+ },
5483
+ "RTX 5000 Ada": {
5484
+ tflops: 65.3,
5485
+ memory: [32]
5486
+ },
5487
+ "RTX 4500 Ada": {
5488
+ tflops: 39.6,
5489
+ memory: [24]
5490
+ },
5491
+ "RTX 4000 Ada": {
5492
+ tflops: 26.7,
5493
+ memory: [20]
5494
+ },
5495
+ "RTX 4000 SFF Ada": {
5496
+ tflops: 19.2,
5497
+ memory: [20]
5498
+ },
5499
+ "RTX 2000 Ada": {
5500
+ tflops: 12,
5501
+ memory: [16]
5502
+ },
5479
5503
  A100: {
5480
5504
  tflops: 77.97,
5481
5505
  memory: [80, 40]
@@ -5488,14 +5512,14 @@ var SKUS = {
5488
5512
  tflops: 31.24,
5489
5513
  memory: [24]
5490
5514
  },
5491
- T4: {
5492
- tflops: 65.13,
5493
- memory: [16]
5494
- },
5495
5515
  "RTX 4090": {
5496
5516
  tflops: 82.58,
5497
5517
  memory: [24]
5498
5518
  },
5519
+ "RTX 4090D": {
5520
+ tflops: 79.49,
5521
+ memory: [24]
5522
+ },
5499
5523
  "RTX 4080 SUPER": {
5500
5524
  tflops: 52.2,
5501
5525
  memory: [16]
@@ -5520,6 +5544,14 @@ var SKUS = {
5520
5544
  tflops: 44.1,
5521
5545
  memory: [16]
5522
5546
  },
5547
+ "RTX 4060": {
5548
+ tflops: 15.11,
5549
+ memory: [8]
5550
+ },
5551
+ "RTX 4060 Ti": {
5552
+ tflops: 22.06,
5553
+ memory: [8, 16]
5554
+ },
5523
5555
  "RTX 3090": {
5524
5556
  tflops: 35.58,
5525
5557
  memory: [24]
@@ -5556,13 +5588,53 @@ var SKUS = {
5556
5588
  tflops: 12.74,
5557
5589
  memory: [12, 8]
5558
5590
  },
5591
+ "RTX 2070": {
5592
+ tflops: 14.93,
5593
+ memory: [8]
5594
+ },
5595
+ "RTX 3050 Mobile": {
5596
+ tflops: 7.639,
5597
+ memory: [6]
5598
+ },
5559
5599
  "RTX 2060 Mobile": {
5560
5600
  tflops: 9.22,
5561
5601
  memory: [6]
5562
5602
  },
5603
+ "GTX 1080 Ti": {
5604
+ tflops: 11.34,
5605
+ // float32 (GPU does not support native float16)
5606
+ memory: [11]
5607
+ },
5608
+ "GTX 1070 Ti": {
5609
+ tflops: 8.2,
5610
+ // float32 (GPU does not support native float16)
5611
+ memory: [8]
5612
+ },
5563
5613
  "RTX Titan": {
5564
5614
  tflops: 32.62,
5565
5615
  memory: [24]
5616
+ },
5617
+ "GTX 1650 Mobile": {
5618
+ tflops: 6.39,
5619
+ memory: [4]
5620
+ },
5621
+ T4: {
5622
+ tflops: 65.13,
5623
+ memory: [16]
5624
+ },
5625
+ V100: {
5626
+ tflops: 28.26,
5627
+ memory: [32, 16]
5628
+ },
5629
+ "Quadro P6000": {
5630
+ tflops: 12.63,
5631
+ // float32 (GPU does not support native float16)
5632
+ memory: [24]
5633
+ },
5634
+ P40: {
5635
+ tflops: 11.76,
5636
+ // float32 (GPU does not support native float16)
5637
+ memory: [24]
5566
5638
  }
5567
5639
  },
5568
5640
  AMD: {
@@ -5578,6 +5650,10 @@ var SKUS = {
5578
5650
  tflops: 181,
5579
5651
  memory: [64]
5580
5652
  },
5653
+ MI100: {
5654
+ tflops: 184.6,
5655
+ memory: [32]
5656
+ },
5581
5657
  "RX 7900 XTX": {
5582
5658
  tflops: 122.8,
5583
5659
  memory: [24]
@@ -5601,6 +5677,18 @@ var SKUS = {
5601
5677
  "RX 7600 XT": {
5602
5678
  tflops: 45.14,
5603
5679
  memory: [16, 8]
5680
+ },
5681
+ "RX 6950 XT": {
5682
+ tflops: 47.31,
5683
+ memory: [16]
5684
+ },
5685
+ "RX 6800": {
5686
+ tflops: 32.33,
5687
+ memory: [16]
5688
+ },
5689
+ "Radeon Pro VII": {
5690
+ tflops: 26.11,
5691
+ memory: [16]
5604
5692
  }
5605
5693
  }
5606
5694
  },
@@ -5811,6 +5899,13 @@ var LOCAL_APPS = {
5811
5899
  displayOnModelPage: isGgufModel,
5812
5900
  deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`)
5813
5901
  },
5902
+ sanctum: {
5903
+ prettyLabel: "Sanctum",
5904
+ docsUrl: "https://sanctum.ai",
5905
+ mainTask: "text-generation",
5906
+ displayOnModelPage: isGgufModel,
5907
+ deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`)
5908
+ },
5814
5909
  drawthings: {
5815
5910
  prettyLabel: "Draw Things",
5816
5911
  docsUrl: "https://drawthings.ai",
package/dist/index.js CHANGED
@@ -1503,16 +1503,16 @@ var data_default2 = taskData2;
1503
1503
  var taskData3 = {
1504
1504
  datasets: [
1505
1505
  {
1506
- description: "18,000 hours of multilingual audio-text dataset in 108 languages.",
1507
- id: "mozilla-foundation/common_voice_13_0"
1506
+ description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
1507
+ id: "mozilla-foundation/common_voice_17_0"
1508
1508
  },
1509
1509
  {
1510
1510
  description: "An English dataset with 1,000 hours of data.",
1511
1511
  id: "librispeech_asr"
1512
1512
  },
1513
1513
  {
1514
- description: "High quality, multi-speaker audio data and their transcriptions in various languages.",
1515
- id: "openslr"
1514
+ description: "A multi-lingual audio dataset with 370K hours of audio.",
1515
+ id: "espnet/yodas"
1516
1516
  }
1517
1517
  ],
1518
1518
  demo: {
@@ -1547,12 +1547,12 @@ var taskData3 = {
1547
1547
  id: "openai/whisper-large-v3"
1548
1548
  },
1549
1549
  {
1550
- description: "A good generic ASR model by MetaAI.",
1551
- id: "facebook/wav2vec2-base-960h"
1550
+ description: "A good generic speech model by MetaAI for fine-tuning.",
1551
+ id: "facebook/w2v-bert-2.0"
1552
1552
  },
1553
1553
  {
1554
1554
  description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
1555
- id: "facebook/s2t-small-mustc-en-fr-st"
1555
+ id: "facebook/seamless-m4t-v2-large"
1556
1556
  }
1557
1557
  ],
1558
1558
  spaces: [
@@ -2954,8 +2954,8 @@ var taskData24 = {
2954
2954
  canonicalId: "text-to-audio",
2955
2955
  datasets: [
2956
2956
  {
2957
- description: "Thousands of short audio clips of a single speaker.",
2958
- id: "lj_speech"
2957
+ description: "10K hours of multi-speaker English dataset.",
2958
+ id: "parler-tts/mls_eng_10k"
2959
2959
  },
2960
2960
  {
2961
2961
  description: "Multi-speaker English dataset.",
@@ -2993,8 +2993,8 @@ var taskData24 = {
2993
2993
  id: "facebook/mms-tts"
2994
2994
  },
2995
2995
  {
2996
- description: "An end-to-end speech synthesis model.",
2997
- id: "microsoft/speecht5_tts"
2996
+ description: "A prompt based, powerful TTS model.",
2997
+ id: "parler-tts/parler_tts_mini_v0.1"
2998
2998
  }
2999
2999
  ],
3000
3000
  spaces: [
@@ -3007,8 +3007,8 @@ var taskData24 = {
3007
3007
  id: "coqui/xtts"
3008
3008
  },
3009
3009
  {
3010
- description: "An application that synthesizes speech for various speaker types.",
3011
- id: "Matthijs/speecht5-tts-demo"
3010
+ description: "An application that synthesizes speech for diverse speaker prompts.",
3011
+ id: "parler-tts/parler_tts_mini"
3012
3012
  }
3013
3013
  ],
3014
3014
  summary: "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
@@ -5438,6 +5438,30 @@ var SKUS = {
5438
5438
  tflops: 91.1,
5439
5439
  memory: [48]
5440
5440
  },
5441
+ "RTX 5880 Ada": {
5442
+ tflops: 69.3,
5443
+ memory: [48]
5444
+ },
5445
+ "RTX 5000 Ada": {
5446
+ tflops: 65.3,
5447
+ memory: [32]
5448
+ },
5449
+ "RTX 4500 Ada": {
5450
+ tflops: 39.6,
5451
+ memory: [24]
5452
+ },
5453
+ "RTX 4000 Ada": {
5454
+ tflops: 26.7,
5455
+ memory: [20]
5456
+ },
5457
+ "RTX 4000 SFF Ada": {
5458
+ tflops: 19.2,
5459
+ memory: [20]
5460
+ },
5461
+ "RTX 2000 Ada": {
5462
+ tflops: 12,
5463
+ memory: [16]
5464
+ },
5441
5465
  A100: {
5442
5466
  tflops: 77.97,
5443
5467
  memory: [80, 40]
@@ -5450,14 +5474,14 @@ var SKUS = {
5450
5474
  tflops: 31.24,
5451
5475
  memory: [24]
5452
5476
  },
5453
- T4: {
5454
- tflops: 65.13,
5455
- memory: [16]
5456
- },
5457
5477
  "RTX 4090": {
5458
5478
  tflops: 82.58,
5459
5479
  memory: [24]
5460
5480
  },
5481
+ "RTX 4090D": {
5482
+ tflops: 79.49,
5483
+ memory: [24]
5484
+ },
5461
5485
  "RTX 4080 SUPER": {
5462
5486
  tflops: 52.2,
5463
5487
  memory: [16]
@@ -5482,6 +5506,14 @@ var SKUS = {
5482
5506
  tflops: 44.1,
5483
5507
  memory: [16]
5484
5508
  },
5509
+ "RTX 4060": {
5510
+ tflops: 15.11,
5511
+ memory: [8]
5512
+ },
5513
+ "RTX 4060 Ti": {
5514
+ tflops: 22.06,
5515
+ memory: [8, 16]
5516
+ },
5485
5517
  "RTX 3090": {
5486
5518
  tflops: 35.58,
5487
5519
  memory: [24]
@@ -5518,13 +5550,53 @@ var SKUS = {
5518
5550
  tflops: 12.74,
5519
5551
  memory: [12, 8]
5520
5552
  },
5553
+ "RTX 2070": {
5554
+ tflops: 14.93,
5555
+ memory: [8]
5556
+ },
5557
+ "RTX 3050 Mobile": {
5558
+ tflops: 7.639,
5559
+ memory: [6]
5560
+ },
5521
5561
  "RTX 2060 Mobile": {
5522
5562
  tflops: 9.22,
5523
5563
  memory: [6]
5524
5564
  },
5565
+ "GTX 1080 Ti": {
5566
+ tflops: 11.34,
5567
+ // float32 (GPU does not support native float16)
5568
+ memory: [11]
5569
+ },
5570
+ "GTX 1070 Ti": {
5571
+ tflops: 8.2,
5572
+ // float32 (GPU does not support native float16)
5573
+ memory: [8]
5574
+ },
5525
5575
  "RTX Titan": {
5526
5576
  tflops: 32.62,
5527
5577
  memory: [24]
5578
+ },
5579
+ "GTX 1650 Mobile": {
5580
+ tflops: 6.39,
5581
+ memory: [4]
5582
+ },
5583
+ T4: {
5584
+ tflops: 65.13,
5585
+ memory: [16]
5586
+ },
5587
+ V100: {
5588
+ tflops: 28.26,
5589
+ memory: [32, 16]
5590
+ },
5591
+ "Quadro P6000": {
5592
+ tflops: 12.63,
5593
+ // float32 (GPU does not support native float16)
5594
+ memory: [24]
5595
+ },
5596
+ P40: {
5597
+ tflops: 11.76,
5598
+ // float32 (GPU does not support native float16)
5599
+ memory: [24]
5528
5600
  }
5529
5601
  },
5530
5602
  AMD: {
@@ -5540,6 +5612,10 @@ var SKUS = {
5540
5612
  tflops: 181,
5541
5613
  memory: [64]
5542
5614
  },
5615
+ MI100: {
5616
+ tflops: 184.6,
5617
+ memory: [32]
5618
+ },
5543
5619
  "RX 7900 XTX": {
5544
5620
  tflops: 122.8,
5545
5621
  memory: [24]
@@ -5563,6 +5639,18 @@ var SKUS = {
5563
5639
  "RX 7600 XT": {
5564
5640
  tflops: 45.14,
5565
5641
  memory: [16, 8]
5642
+ },
5643
+ "RX 6950 XT": {
5644
+ tflops: 47.31,
5645
+ memory: [16]
5646
+ },
5647
+ "RX 6800": {
5648
+ tflops: 32.33,
5649
+ memory: [16]
5650
+ },
5651
+ "Radeon Pro VII": {
5652
+ tflops: 26.11,
5653
+ memory: [16]
5566
5654
  }
5567
5655
  }
5568
5656
  },
@@ -5773,6 +5861,13 @@ var LOCAL_APPS = {
5773
5861
  displayOnModelPage: isGgufModel,
5774
5862
  deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`)
5775
5863
  },
5864
+ sanctum: {
5865
+ prettyLabel: "Sanctum",
5866
+ docsUrl: "https://sanctum.ai",
5867
+ mainTask: "text-generation",
5868
+ displayOnModelPage: isGgufModel,
5869
+ deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`)
5870
+ },
5776
5871
  drawthings: {
5777
5872
  prettyLabel: "Draw Things",
5778
5873
  docsUrl: "https://drawthings.ai",
@@ -44,6 +44,30 @@ export declare const SKUS: {
44
44
  tflops: number;
45
45
  memory: number[];
46
46
  };
47
+ "RTX 5880 Ada": {
48
+ tflops: number;
49
+ memory: number[];
50
+ };
51
+ "RTX 5000 Ada": {
52
+ tflops: number;
53
+ memory: number[];
54
+ };
55
+ "RTX 4500 Ada": {
56
+ tflops: number;
57
+ memory: number[];
58
+ };
59
+ "RTX 4000 Ada": {
60
+ tflops: number;
61
+ memory: number[];
62
+ };
63
+ "RTX 4000 SFF Ada": {
64
+ tflops: number;
65
+ memory: number[];
66
+ };
67
+ "RTX 2000 Ada": {
68
+ tflops: number;
69
+ memory: number[];
70
+ };
47
71
  A100: {
48
72
  tflops: number;
49
73
  memory: number[];
@@ -56,11 +80,11 @@ export declare const SKUS: {
56
80
  tflops: number;
57
81
  memory: number[];
58
82
  };
59
- T4: {
83
+ "RTX 4090": {
60
84
  tflops: number;
61
85
  memory: number[];
62
86
  };
63
- "RTX 4090": {
87
+ "RTX 4090D": {
64
88
  tflops: number;
65
89
  memory: number[];
66
90
  };
@@ -88,6 +112,14 @@ export declare const SKUS: {
88
112
  tflops: number;
89
113
  memory: number[];
90
114
  };
115
+ "RTX 4060": {
116
+ tflops: number;
117
+ memory: number[];
118
+ };
119
+ "RTX 4060 Ti": {
120
+ tflops: number;
121
+ memory: number[];
122
+ };
91
123
  "RTX 3090": {
92
124
  tflops: number;
93
125
  memory: number[];
@@ -124,14 +156,50 @@ export declare const SKUS: {
124
156
  tflops: number;
125
157
  memory: number[];
126
158
  };
159
+ "RTX 2070": {
160
+ tflops: number;
161
+ memory: number[];
162
+ };
163
+ "RTX 3050 Mobile": {
164
+ tflops: number;
165
+ memory: number[];
166
+ };
127
167
  "RTX 2060 Mobile": {
128
168
  tflops: number;
129
169
  memory: number[];
130
170
  };
171
+ "GTX 1080 Ti": {
172
+ tflops: number;
173
+ memory: number[];
174
+ };
175
+ "GTX 1070 Ti": {
176
+ tflops: number;
177
+ memory: number[];
178
+ };
131
179
  "RTX Titan": {
132
180
  tflops: number;
133
181
  memory: number[];
134
182
  };
183
+ "GTX 1650 Mobile": {
184
+ tflops: number;
185
+ memory: number[];
186
+ };
187
+ T4: {
188
+ tflops: number;
189
+ memory: number[];
190
+ };
191
+ V100: {
192
+ tflops: number;
193
+ memory: number[];
194
+ };
195
+ "Quadro P6000": {
196
+ tflops: number;
197
+ memory: number[];
198
+ };
199
+ P40: {
200
+ tflops: number;
201
+ memory: number[];
202
+ };
135
203
  };
136
204
  AMD: {
137
205
  MI300: {
@@ -146,6 +214,10 @@ export declare const SKUS: {
146
214
  tflops: number;
147
215
  memory: number[];
148
216
  };
217
+ MI100: {
218
+ tflops: number;
219
+ memory: number[];
220
+ };
149
221
  "RX 7900 XTX": {
150
222
  tflops: number;
151
223
  memory: number[];
@@ -170,6 +242,18 @@ export declare const SKUS: {
170
242
  tflops: number;
171
243
  memory: number[];
172
244
  };
245
+ "RX 6950 XT": {
246
+ tflops: number;
247
+ memory: number[];
248
+ };
249
+ "RX 6800": {
250
+ tflops: number;
251
+ memory: number[];
252
+ };
253
+ "Radeon Pro VII": {
254
+ tflops: number;
255
+ memory: number[];
256
+ };
173
257
  };
174
258
  };
175
259
  CPU: {
@@ -1 +1 @@
1
- {"version":3,"file":"hardware.d.ts","sourceRoot":"","sources":["../../src/hardware.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,eAAO,MAAM,iDAAiD,QAAW,CAAC;AAC1E,eAAO,MAAM,yDAAyD,QAAW,CAAC;AAClF,eAAO,MAAM,oCAAoC,QAAU,CAAC;AAE5D;;;GAGG;AACH,eAAO,MAAM,+CAA+C,QAAW,CAAC;AAExE,MAAM,WAAW,YAAY;IAC5B;;;;;;;;;OASG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,eAAO,MAAM,sBAAsB,UAAqD,CAAC;AAEzF,eAAO,MAAM,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA2SuD,CAAC;AAEzE,MAAM,MAAM,OAAO,GAAG,MAAM,OAAO,IAAI,CAAC"}
1
+ {"version":3,"file":"hardware.d.ts","sourceRoot":"","sources":["../../src/hardware.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,eAAO,MAAM,iDAAiD,QAAW,CAAC;AAC1E,eAAO,MAAM,yDAAyD,QAAW,CAAC;AAClF,eAAO,MAAM,oCAAoC,QAAU,CAAC;AAE5D;;;GAGG;AACH,eAAO,MAAM,+CAA+C,QAAW,CAAC;AAExE,MAAM,WAAW,YAAY;IAC5B;;;;;;;;;OASG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,eAAO,MAAM,sBAAsB,UAAqD,CAAC;AAEzF,eAAO,MAAM,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA+XuD,CAAC;AAEzE,MAAM,MAAM,OAAO,GAAG,MAAM,OAAO,IAAI,CAAC"}
@@ -77,6 +77,13 @@ export declare const LOCAL_APPS: {
77
77
  displayOnModelPage: typeof isGgufModel;
78
78
  deeplink: (model: ModelData) => URL;
79
79
  };
80
+ sanctum: {
81
+ prettyLabel: string;
82
+ docsUrl: string;
83
+ mainTask: "text-generation";
84
+ displayOnModelPage: typeof isGgufModel;
85
+ deeplink: (model: ModelData) => URL;
86
+ };
80
87
  drawthings: {
81
88
  prettyLabel: string;
82
89
  docsUrl: string;
@@ -1 +1 @@
1
- {"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,GAAG,CAAC;CACnC,GACD;IACA;;OAEG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,MAAM,EAAE,CAAC;CAChD,CACH,CAAC;AAEF,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,WAEpC;AAmBD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA5BS,SAAS,KAAG,MAAM,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiFhB,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
1
+ {"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,GAAG,CAAC;CACnC,GACD;IACA;;OAEG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,MAAM,EAAE,CAAC;CAChD,CACH,CAAC;AAEF,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,WAEpC;AAmBD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA5BS,SAAS,KAAG,MAAM,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwFhB,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@huggingface/tasks",
3
3
  "packageManager": "pnpm@8.10.5",
4
- "version": "0.10.8",
4
+ "version": "0.10.9",
5
5
  "description": "List of ML tasks for huggingface.co/tasks",
6
6
  "repository": "https://github.com/huggingface/huggingface.js.git",
7
7
  "publishConfig": {
package/src/hardware.ts CHANGED
@@ -48,6 +48,30 @@ export const SKUS = {
48
48
  tflops: 91.1,
49
49
  memory: [48],
50
50
  },
51
+ "RTX 5880 Ada": {
52
+ tflops: 69.3,
53
+ memory: [48],
54
+ },
55
+ "RTX 5000 Ada": {
56
+ tflops: 65.3,
57
+ memory: [32],
58
+ },
59
+ "RTX 4500 Ada": {
60
+ tflops: 39.6,
61
+ memory: [24],
62
+ },
63
+ "RTX 4000 Ada": {
64
+ tflops: 26.7,
65
+ memory: [20],
66
+ },
67
+ "RTX 4000 SFF Ada": {
68
+ tflops: 19.2,
69
+ memory: [20],
70
+ },
71
+ "RTX 2000 Ada": {
72
+ tflops: 12.0,
73
+ memory: [16],
74
+ },
51
75
  A100: {
52
76
  tflops: 77.97,
53
77
  memory: [80, 40],
@@ -60,14 +84,14 @@ export const SKUS = {
60
84
  tflops: 31.24,
61
85
  memory: [24],
62
86
  },
63
- T4: {
64
- tflops: 65.13,
65
- memory: [16],
66
- },
67
87
  "RTX 4090": {
68
88
  tflops: 82.58,
69
89
  memory: [24],
70
90
  },
91
+ "RTX 4090D": {
92
+ tflops: 79.49,
93
+ memory: [24],
94
+ },
71
95
  "RTX 4080 SUPER": {
72
96
  tflops: 52.2,
73
97
  memory: [16],
@@ -92,6 +116,14 @@ export const SKUS = {
92
116
  tflops: 44.1,
93
117
  memory: [16],
94
118
  },
119
+ "RTX 4060": {
120
+ tflops: 15.11,
121
+ memory: [8],
122
+ },
123
+ "RTX 4060 Ti": {
124
+ tflops: 22.06,
125
+ memory: [8, 16],
126
+ },
95
127
  "RTX 3090": {
96
128
  tflops: 35.58,
97
129
  memory: [24],
@@ -128,14 +160,50 @@ export const SKUS = {
128
160
  tflops: 12.74,
129
161
  memory: [12, 8],
130
162
  },
163
+ "RTX 2070": {
164
+ tflops: 14.93,
165
+ memory: [8],
166
+ },
167
+ "RTX 3050 Mobile": {
168
+ tflops: 7.639,
169
+ memory: [6],
170
+ },
131
171
  "RTX 2060 Mobile": {
132
172
  tflops: 9.22,
133
173
  memory: [6],
134
174
  },
175
+ "GTX 1080 Ti": {
176
+ tflops: 11.34, // float32 (GPU does not support native float16)
177
+ memory: [11],
178
+ },
179
+ "GTX 1070 Ti": {
180
+ tflops: 8.2, // float32 (GPU does not support native float16)
181
+ memory: [8],
182
+ },
135
183
  "RTX Titan": {
136
184
  tflops: 32.62,
137
185
  memory: [24],
138
186
  },
187
+ "GTX 1650 Mobile": {
188
+ tflops: 6.39,
189
+ memory: [4],
190
+ },
191
+ T4: {
192
+ tflops: 65.13,
193
+ memory: [16],
194
+ },
195
+ V100: {
196
+ tflops: 28.26,
197
+ memory: [32, 16],
198
+ },
199
+ "Quadro P6000": {
200
+ tflops: 12.63, // float32 (GPU does not support native float16)
201
+ memory: [24],
202
+ },
203
+ P40: {
204
+ tflops: 11.76, // float32 (GPU does not support native float16)
205
+ memory: [24],
206
+ },
139
207
  },
140
208
  AMD: {
141
209
  MI300: {
@@ -150,6 +218,10 @@ export const SKUS = {
150
218
  tflops: 181.0,
151
219
  memory: [64],
152
220
  },
221
+ MI100: {
222
+ tflops: 184.6,
223
+ memory: [32],
224
+ },
153
225
  "RX 7900 XTX": {
154
226
  tflops: 122.8,
155
227
  memory: [24],
@@ -174,6 +246,18 @@ export const SKUS = {
174
246
  tflops: 45.14,
175
247
  memory: [16, 8],
176
248
  },
249
+ "RX 6950 XT": {
250
+ tflops: 47.31,
251
+ memory: [16],
252
+ },
253
+ "RX 6800": {
254
+ tflops: 32.33,
255
+ memory: [16],
256
+ },
257
+ "Radeon Pro VII": {
258
+ tflops: 26.11,
259
+ memory: [16],
260
+ },
177
261
  },
178
262
  },
179
263
  CPU: {
package/src/local-apps.ts CHANGED
@@ -103,6 +103,13 @@ export const LOCAL_APPS = {
103
103
  displayOnModelPage: isGgufModel,
104
104
  deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`),
105
105
  },
106
+ sanctum: {
107
+ prettyLabel: "Sanctum",
108
+ docsUrl: "https://sanctum.ai",
109
+ mainTask: "text-generation",
110
+ displayOnModelPage: isGgufModel,
111
+ deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`),
112
+ },
106
113
  drawthings: {
107
114
  prettyLabel: "Draw Things",
108
115
  docsUrl: "https://drawthings.ai",
@@ -18,7 +18,7 @@ The use of Multilingual ASR has become popular, the idea of maintaining just a s
18
18
 
19
19
  ## Inference
20
20
 
21
- The Hub contains over [~9,000 ASR models](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=downloads) that you can use right away by trying out the widgets directly in the browser or calling the models as a service using Inference Endpoints. Here is a simple code snippet to do exactly this:
21
+ The Hub contains over [17,000 ASR models](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=downloads) that you can test right away in your browser using the model page widgets. You can also use any model as a service using the Serverless Inference API. We also support libraries such as [transformers](https://huggingface.co/models?library=transformers&pipeline_tag=automatic-speech-recognition&sort=downloads), [speechbrain](https://huggingface.co/models?library=speechbrain&pipeline_tag=automatic-speech-recognition&sort=downloads), [NeMo](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=nemo&sort=downloads) and [espnet](https://huggingface.co/models?library=espnet&pipeline_tag=automatic-speech-recognition&sort=downloads) via the Serverless Inference API. Here's a simple code snippet to run inference:
22
22
 
23
23
  ```python
24
24
  import json
@@ -36,20 +36,7 @@ def query(filename):
36
36
  data = query("sample1.flac")
37
37
  ```
38
38
 
39
- You can also use libraries such as [transformers](https://huggingface.co/models?library=transformers&pipeline_tag=automatic-speech-recognition&sort=downloads), [speechbrain](https://huggingface.co/models?library=speechbrain&pipeline_tag=automatic-speech-recognition&sort=downloads), [NeMo](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=nemo&sort=downloads) and [espnet](https://huggingface.co/models?library=espnet&pipeline_tag=automatic-speech-recognition&sort=downloads) if you want one-click managed Inference without any hassle.
40
-
41
- ```python
42
- from transformers import pipeline
43
-
44
- with open("sample.flac", "rb") as f:
45
- data = f.read()
46
-
47
- pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v2")
48
- pipe("sample.flac")
49
- # {'text': "GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES IN DRAUGHTY SCHOOL ROOMS DAY AFTER DAY FOR A FORTNIGHT HE'LL HAVE TO PUT IN AN APPEARANCE AT SOME PLACE OF WORSHIP ON SUNDAY MORNING AND HE CAN COME TO US IMMEDIATELY AFTERWARDS"}
50
- ```
51
-
52
- You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to transcribe text with javascript using models on Hugging Face Hub.
39
+ You can also use[huggingface.js](https://github.com/huggingface/huggingface.js), the JavaScript client, to transcribe models with the Inference API.
53
40
 
54
41
  ```javascript
55
42
  import { HfInference } from "@huggingface/inference";
@@ -57,10 +44,23 @@ import { HfInference } from "@huggingface/inference";
57
44
  const inference = new HfInference(HF_TOKEN);
58
45
  await inference.automaticSpeechRecognition({
59
46
  data: await (await fetch("sample.flac")).blob(),
60
- model: "openai/whisper-large-v2",
47
+ model: "openai/whisper-large-v3",
61
48
  });
62
49
  ```
63
50
 
51
+ For transformers compatible models like Whisper, Wav2Vec2, HuBERT, etc. You can also run inference in Python using transformers as follows:
52
+
53
+ ```python
54
+ # pip install --upgrade transformers
55
+
56
+ from transformers import pipeline
57
+
58
+ pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v3")
59
+
60
+ pipe("sample.flac")
61
+ # {'text': "GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES IN DRAUGHTY SCHOOL ROOMS DAY AFTER DAY FOR A FORTNIGHT HE'LL HAVE TO PUT IN AN APPEARANCE AT SOME PLACE OF WORSHIP ON SUNDAY MORNING AND HE CAN COME TO US IMMEDIATELY AFTERWARDS"}
62
+ ```
63
+
64
64
  ## Solving ASR for your own data
65
65
 
66
66
  We have some great news! You can fine-tune (transfer learning) a foundational speech model on a specific language without tonnes of data. Pretrained models such as Whisper, Wav2Vec2-MMS and HuBERT exist. [OpenAI's Whisper model](https://huggingface.co/openai/whisper-large-v3) is a large multilingual model trained on 100+ languages and with 4 Million hours of speech.
@@ -3,16 +3,16 @@ import type { TaskDataCustom } from "..";
3
3
  const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
- description: "18,000 hours of multilingual audio-text dataset in 108 languages.",
7
- id: "mozilla-foundation/common_voice_13_0",
6
+ description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
7
+ id: "mozilla-foundation/common_voice_17_0",
8
8
  },
9
9
  {
10
10
  description: "An English dataset with 1,000 hours of data.",
11
11
  id: "librispeech_asr",
12
12
  },
13
13
  {
14
- description: "High quality, multi-speaker audio data and their transcriptions in various languages.",
15
- id: "openslr",
14
+ description: "A multi-lingual audio dataset with 370K hours of audio.",
15
+ id: "espnet/yodas",
16
16
  },
17
17
  ],
18
18
  demo: {
@@ -47,12 +47,12 @@ const taskData: TaskDataCustom = {
47
47
  id: "openai/whisper-large-v3",
48
48
  },
49
49
  {
50
- description: "A good generic ASR model by MetaAI.",
51
- id: "facebook/wav2vec2-base-960h",
50
+ description: "A good generic speech model by MetaAI for fine-tuning.",
51
+ id: "facebook/w2v-bert-2.0",
52
52
  },
53
53
  {
54
54
  description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
55
- id: "facebook/s2t-small-mustc-en-fr-st",
55
+ id: "facebook/seamless-m4t-v2-large",
56
56
  },
57
57
  ],
58
58
  spaces: [
@@ -58,8 +58,6 @@ await inference.textToSpeech({
58
58
 
59
59
  - [Hugging Face Audio Course](https://huggingface.co/learn/audio-course/chapter6/introduction)
60
60
  - [ML for Audio Study Group - Text to Speech Deep Dive](https://www.youtube.com/watch?v=aLBedWj-5CQ)
61
- - [An introduction to SpeechT5, a multi-purpose speech recognition and synthesis model](https://huggingface.co/blog/speecht5).
62
- - [A guide on Fine-tuning Whisper For Multilingual ASR with 🤗Transformers](https://huggingface.co/blog/fine-tune-whisper)
63
61
  - [Speech Synthesis, Recognition, and More With SpeechT5](https://huggingface.co/blog/speecht5)
64
62
  - [Optimizing a Text-To-Speech model using 🤗 Transformers](https://huggingface.co/blog/optimizing-bark)
65
- -
63
+ - [Train your own TTS models with Parler-TTS](https://github.com/huggingface/parler-tts)
@@ -4,8 +4,8 @@ const taskData: TaskDataCustom = {
4
4
  canonicalId: "text-to-audio",
5
5
  datasets: [
6
6
  {
7
- description: "Thousands of short audio clips of a single speaker.",
8
- id: "lj_speech",
7
+ description: "10K hours of multi-speaker English dataset.",
8
+ id: "parler-tts/mls_eng_10k",
9
9
  },
10
10
  {
11
11
  description: "Multi-speaker English dataset.",
@@ -43,8 +43,8 @@ const taskData: TaskDataCustom = {
43
43
  id: "facebook/mms-tts",
44
44
  },
45
45
  {
46
- description: "An end-to-end speech synthesis model.",
47
- id: "microsoft/speecht5_tts",
46
+ description: "A prompt based, powerful TTS model.",
47
+ id: "parler-tts/parler_tts_mini_v0.1",
48
48
  },
49
49
  ],
50
50
  spaces: [
@@ -57,8 +57,8 @@ const taskData: TaskDataCustom = {
57
57
  id: "coqui/xtts",
58
58
  },
59
59
  {
60
- description: "An application that synthesizes speech for various speaker types.",
61
- id: "Matthijs/speecht5-tts-demo",
60
+ description: "An application that synthesizes speech for diverse speaker prompts.",
61
+ id: "parler-tts/parler_tts_mini",
62
62
  },
63
63
  ],
64
64
  summary: