npm - @huggingface/tasks - Versions diffs - 0.10.8 → 0.10.9 - Mend

@huggingface/tasks 0.10.8 → 0.10.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/index.cjs +112 -17
package/dist/index.js +112 -17
package/dist/src/hardware.d.ts +86 -2
package/dist/src/hardware.d.ts.map +1 -1
package/dist/src/local-apps.d.ts +7 -0
package/dist/src/local-apps.d.ts.map +1 -1
package/package.json +1 -1
package/src/hardware.ts +88 -4
package/src/local-apps.ts +7 -0
package/src/tasks/automatic-speech-recognition/about.md +16 -16
package/src/tasks/automatic-speech-recognition/data.ts +7 -7
package/src/tasks/text-to-speech/about.md +1 -3
package/src/tasks/text-to-speech/data.ts +6 -6

package/dist/index.cjs CHANGED Viewed

@@ -1541,16 +1541,16 @@ var data_default2 = taskData2;
 var taskData3 = {
   datasets: [
     {
-      description: "18,000 hours of multilingual audio-text dataset in 108 languages.",
-      id: "mozilla-foundation/common_voice_13_0"
+      description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
+      id: "mozilla-foundation/common_voice_17_0"
     },
     {
       description: "An English dataset with 1,000 hours of data.",
       id: "librispeech_asr"
     },
     {
-      description: "High quality, multi-speaker audio data and their transcriptions in various languages.",
-      id: "openslr"
+      description: "A multi-lingual audio dataset with 370K hours of audio.",
+      id: "espnet/yodas"
     }
   ],
   demo: {
@@ -1585,12 +1585,12 @@ var taskData3 = {
       id: "openai/whisper-large-v3"
     },
     {
-      description: "A good generic ASR model by MetaAI.",
-      id: "facebook/wav2vec2-base-960h"
+      description: "A good generic speech model by MetaAI for fine-tuning.",
+      id: "facebook/w2v-bert-2.0"
     },
     {
       description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
-      id: "facebook/s2t-small-mustc-en-fr-st"
+      id: "facebook/seamless-m4t-v2-large"
     }
   ],
   spaces: [
@@ -2992,8 +2992,8 @@ var taskData24 = {
   canonicalId: "text-to-audio",
   datasets: [
     {
-      description: "Thousands of short audio clips of a single speaker.",
-      id: "lj_speech"
+      description: "10K hours of multi-speaker English dataset.",
+      id: "parler-tts/mls_eng_10k"
     },
     {
       description: "Multi-speaker English dataset.",
@@ -3031,8 +3031,8 @@ var taskData24 = {
       id: "facebook/mms-tts"
     },
     {
-      description: "An end-to-end speech synthesis model.",
-      id: "microsoft/speecht5_tts"
+      description: "A prompt based, powerful TTS model.",
+      id: "parler-tts/parler_tts_mini_v0.1"
     }
   ],
   spaces: [
@@ -3045,8 +3045,8 @@ var taskData24 = {
       id: "coqui/xtts"
     },
     {
-      description: "An application that synthesizes speech for various speaker types.",
-      id: "Matthijs/speecht5-tts-demo"
+      description: "An application that synthesizes speech for diverse speaker prompts.",
+      id: "parler-tts/parler_tts_mini"
     }
   ],
   summary: "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
@@ -5476,6 +5476,30 @@ var SKUS = {
         tflops: 91.1,
         memory: [48]
       },
+      "RTX 5880 Ada": {
+        tflops: 69.3,
+        memory: [48]
+      },
+      "RTX 5000 Ada": {
+        tflops: 65.3,
+        memory: [32]
+      },
+      "RTX 4500 Ada": {
+        tflops: 39.6,
+        memory: [24]
+      },
+      "RTX 4000 Ada": {
+        tflops: 26.7,
+        memory: [20]
+      },
+      "RTX 4000 SFF Ada": {
+        tflops: 19.2,
+        memory: [20]
+      },
+      "RTX 2000 Ada": {
+        tflops: 12,
+        memory: [16]
+      },
       A100: {
         tflops: 77.97,
         memory: [80, 40]
@@ -5488,14 +5512,14 @@ var SKUS = {
         tflops: 31.24,
         memory: [24]
       },
-      T4: {
-        tflops: 65.13,
-        memory: [16]
-      },
       "RTX 4090": {
         tflops: 82.58,
         memory: [24]
       },
+      "RTX 4090D": {
+        tflops: 79.49,
+        memory: [24]
+      },
       "RTX 4080 SUPER": {
         tflops: 52.2,
         memory: [16]
@@ -5520,6 +5544,14 @@ var SKUS = {
         tflops: 44.1,
         memory: [16]
       },
+      "RTX 4060": {
+        tflops: 15.11,
+        memory: [8]
+      },
+      "RTX 4060 Ti": {
+        tflops: 22.06,
+        memory: [8, 16]
+      },
       "RTX 3090": {
         tflops: 35.58,
         memory: [24]
@@ -5556,13 +5588,53 @@ var SKUS = {
         tflops: 12.74,
         memory: [12, 8]
       },
+      "RTX 2070": {
+        tflops: 14.93,
+        memory: [8]
+      },
+      "RTX 3050 Mobile": {
+        tflops: 7.639,
+        memory: [6]
+      },
       "RTX 2060 Mobile": {
         tflops: 9.22,
         memory: [6]
       },
+      "GTX 1080 Ti": {
+        tflops: 11.34,
+        // float32 (GPU does not support native float16)
+        memory: [11]
+      },
+      "GTX 1070 Ti": {
+        tflops: 8.2,
+        // float32 (GPU does not support native float16)
+        memory: [8]
+      },
       "RTX Titan": {
         tflops: 32.62,
         memory: [24]
+      },
+      "GTX 1650 Mobile": {
+        tflops: 6.39,
+        memory: [4]
+      },
+      T4: {
+        tflops: 65.13,
+        memory: [16]
+      },
+      V100: {
+        tflops: 28.26,
+        memory: [32, 16]
+      },
+      "Quadro P6000": {
+        tflops: 12.63,
+        // float32 (GPU does not support native float16)
+        memory: [24]
+      },
+      P40: {
+        tflops: 11.76,
+        // float32 (GPU does not support native float16)
+        memory: [24]
       }
     },
     AMD: {
@@ -5578,6 +5650,10 @@ var SKUS = {
         tflops: 181,
         memory: [64]
       },
+      MI100: {
+        tflops: 184.6,
+        memory: [32]
+      },
       "RX 7900 XTX": {
         tflops: 122.8,
         memory: [24]
@@ -5601,6 +5677,18 @@ var SKUS = {
       "RX 7600 XT": {
         tflops: 45.14,
         memory: [16, 8]
+      },
+      "RX 6950 XT": {
+        tflops: 47.31,
+        memory: [16]
+      },
+      "RX 6800": {
+        tflops: 32.33,
+        memory: [16]
+      },
+      "Radeon Pro VII": {
+        tflops: 26.11,
+        memory: [16]
       }
     }
   },
@@ -5811,6 +5899,13 @@ var LOCAL_APPS = {
     displayOnModelPage: isGgufModel,
     deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`)
   },
+  sanctum: {
+    prettyLabel: "Sanctum",
+    docsUrl: "https://sanctum.ai",
+    mainTask: "text-generation",
+    displayOnModelPage: isGgufModel,
+    deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`)
+  },
   drawthings: {
     prettyLabel: "Draw Things",
     docsUrl: "https://drawthings.ai",

package/dist/index.js CHANGED Viewed

@@ -1503,16 +1503,16 @@ var data_default2 = taskData2;
 var taskData3 = {
   datasets: [
     {
-      description: "18,000 hours of multilingual audio-text dataset in 108 languages.",
-      id: "mozilla-foundation/common_voice_13_0"
+      description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
+      id: "mozilla-foundation/common_voice_17_0"
     },
     {
       description: "An English dataset with 1,000 hours of data.",
       id: "librispeech_asr"
     },
     {
-      description: "High quality, multi-speaker audio data and their transcriptions in various languages.",
-      id: "openslr"
+      description: "A multi-lingual audio dataset with 370K hours of audio.",
+      id: "espnet/yodas"
     }
   ],
   demo: {
@@ -1547,12 +1547,12 @@ var taskData3 = {
       id: "openai/whisper-large-v3"
     },
     {
-      description: "A good generic ASR model by MetaAI.",
-      id: "facebook/wav2vec2-base-960h"
+      description: "A good generic speech model by MetaAI for fine-tuning.",
+      id: "facebook/w2v-bert-2.0"
     },
     {
       description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
-      id: "facebook/s2t-small-mustc-en-fr-st"
+      id: "facebook/seamless-m4t-v2-large"
     }
   ],
   spaces: [
@@ -2954,8 +2954,8 @@ var taskData24 = {
   canonicalId: "text-to-audio",
   datasets: [
     {
-      description: "Thousands of short audio clips of a single speaker.",
-      id: "lj_speech"
+      description: "10K hours of multi-speaker English dataset.",
+      id: "parler-tts/mls_eng_10k"
     },
     {
       description: "Multi-speaker English dataset.",
@@ -2993,8 +2993,8 @@ var taskData24 = {
       id: "facebook/mms-tts"
     },
     {
-      description: "An end-to-end speech synthesis model.",
-      id: "microsoft/speecht5_tts"
+      description: "A prompt based, powerful TTS model.",
+      id: "parler-tts/parler_tts_mini_v0.1"
     }
   ],
   spaces: [
@@ -3007,8 +3007,8 @@ var taskData24 = {
       id: "coqui/xtts"
     },
     {
-      description: "An application that synthesizes speech for various speaker types.",
-      id: "Matthijs/speecht5-tts-demo"
+      description: "An application that synthesizes speech for diverse speaker prompts.",
+      id: "parler-tts/parler_tts_mini"
     }
   ],
   summary: "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
@@ -5438,6 +5438,30 @@ var SKUS = {
         tflops: 91.1,
         memory: [48]
       },
+      "RTX 5880 Ada": {
+        tflops: 69.3,
+        memory: [48]
+      },
+      "RTX 5000 Ada": {
+        tflops: 65.3,
+        memory: [32]
+      },
+      "RTX 4500 Ada": {
+        tflops: 39.6,
+        memory: [24]
+      },
+      "RTX 4000 Ada": {
+        tflops: 26.7,
+        memory: [20]
+      },
+      "RTX 4000 SFF Ada": {
+        tflops: 19.2,
+        memory: [20]
+      },
+      "RTX 2000 Ada": {
+        tflops: 12,
+        memory: [16]
+      },
       A100: {
         tflops: 77.97,
         memory: [80, 40]
@@ -5450,14 +5474,14 @@ var SKUS = {
         tflops: 31.24,
         memory: [24]
       },
-      T4: {
-        tflops: 65.13,
-        memory: [16]
-      },
       "RTX 4090": {
         tflops: 82.58,
         memory: [24]
       },
+      "RTX 4090D": {
+        tflops: 79.49,
+        memory: [24]
+      },
       "RTX 4080 SUPER": {
         tflops: 52.2,
         memory: [16]
@@ -5482,6 +5506,14 @@ var SKUS = {
         tflops: 44.1,
         memory: [16]
       },
+      "RTX 4060": {
+        tflops: 15.11,
+        memory: [8]
+      },
+      "RTX 4060 Ti": {
+        tflops: 22.06,
+        memory: [8, 16]
+      },
       "RTX 3090": {
         tflops: 35.58,
         memory: [24]
@@ -5518,13 +5550,53 @@ var SKUS = {
         tflops: 12.74,
         memory: [12, 8]
       },
+      "RTX 2070": {
+        tflops: 14.93,
+        memory: [8]
+      },
+      "RTX 3050 Mobile": {
+        tflops: 7.639,
+        memory: [6]
+      },
       "RTX 2060 Mobile": {
         tflops: 9.22,
         memory: [6]
       },
+      "GTX 1080 Ti": {
+        tflops: 11.34,
+        // float32 (GPU does not support native float16)
+        memory: [11]
+      },
+      "GTX 1070 Ti": {
+        tflops: 8.2,
+        // float32 (GPU does not support native float16)
+        memory: [8]
+      },
       "RTX Titan": {
         tflops: 32.62,
         memory: [24]
+      },
+      "GTX 1650 Mobile": {
+        tflops: 6.39,
+        memory: [4]
+      },
+      T4: {
+        tflops: 65.13,
+        memory: [16]
+      },
+      V100: {
+        tflops: 28.26,
+        memory: [32, 16]
+      },
+      "Quadro P6000": {
+        tflops: 12.63,
+        // float32 (GPU does not support native float16)
+        memory: [24]
+      },
+      P40: {
+        tflops: 11.76,
+        // float32 (GPU does not support native float16)
+        memory: [24]
       }
     },
     AMD: {
@@ -5540,6 +5612,10 @@ var SKUS = {
         tflops: 181,
         memory: [64]
       },
+      MI100: {
+        tflops: 184.6,
+        memory: [32]
+      },
       "RX 7900 XTX": {
         tflops: 122.8,
         memory: [24]
@@ -5563,6 +5639,18 @@ var SKUS = {
       "RX 7600 XT": {
         tflops: 45.14,
         memory: [16, 8]
+      },
+      "RX 6950 XT": {
+        tflops: 47.31,
+        memory: [16]
+      },
+      "RX 6800": {
+        tflops: 32.33,
+        memory: [16]
+      },
+      "Radeon Pro VII": {
+        tflops: 26.11,
+        memory: [16]
       }
     }
   },
@@ -5773,6 +5861,13 @@ var LOCAL_APPS = {
     displayOnModelPage: isGgufModel,
     deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`)
   },
+  sanctum: {
+    prettyLabel: "Sanctum",
+    docsUrl: "https://sanctum.ai",
+    mainTask: "text-generation",
+    displayOnModelPage: isGgufModel,
+    deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`)
+  },
   drawthings: {
     prettyLabel: "Draw Things",
     docsUrl: "https://drawthings.ai",

package/dist/src/hardware.d.ts CHANGED Viewed

@@ -44,6 +44,30 @@ export declare const SKUS: {
                 tflops: number;
                 memory: number[];
             };
+            "RTX 5880 Ada": {
+                tflops: number;
+                memory: number[];
+            };
+            "RTX 5000 Ada": {
+                tflops: number;
+                memory: number[];
+            };
+            "RTX 4500 Ada": {
+                tflops: number;
+                memory: number[];
+            };
+            "RTX 4000 Ada": {
+                tflops: number;
+                memory: number[];
+            };
+            "RTX 4000 SFF Ada": {
+                tflops: number;
+                memory: number[];
+            };
+            "RTX 2000 Ada": {
+                tflops: number;
+                memory: number[];
+            };
             A100: {
                 tflops: number;
                 memory: number[];
@@ -56,11 +80,11 @@ export declare const SKUS: {
                 tflops: number;
                 memory: number[];
             };
-            T4: {
+            "RTX 4090": {
                 tflops: number;
                 memory: number[];
             };
-            "RTX 4090": {
+            "RTX 4090D": {
                 tflops: number;
                 memory: number[];
             };
@@ -88,6 +112,14 @@ export declare const SKUS: {
                 tflops: number;
                 memory: number[];
             };
+            "RTX 4060": {
+                tflops: number;
+                memory: number[];
+            };
+            "RTX 4060 Ti": {
+                tflops: number;
+                memory: number[];
+            };
             "RTX 3090": {
                 tflops: number;
                 memory: number[];
@@ -124,14 +156,50 @@ export declare const SKUS: {
                 tflops: number;
                 memory: number[];
             };
+            "RTX 2070": {
+                tflops: number;
+                memory: number[];
+            };
+            "RTX 3050 Mobile": {
+                tflops: number;
+                memory: number[];
+            };
             "RTX 2060 Mobile": {
                 tflops: number;
                 memory: number[];
             };
+            "GTX 1080 Ti": {
+                tflops: number;
+                memory: number[];
+            };
+            "GTX 1070 Ti": {
+                tflops: number;
+                memory: number[];
+            };
             "RTX Titan": {
                 tflops: number;
                 memory: number[];
             };
+            "GTX 1650 Mobile": {
+                tflops: number;
+                memory: number[];
+            };
+            T4: {
+                tflops: number;
+                memory: number[];
+            };
+            V100: {
+                tflops: number;
+                memory: number[];
+            };
+            "Quadro P6000": {
+                tflops: number;
+                memory: number[];
+            };
+            P40: {
+                tflops: number;
+                memory: number[];
+            };
         };
         AMD: {
             MI300: {
@@ -146,6 +214,10 @@ export declare const SKUS: {
                 tflops: number;
                 memory: number[];
             };
+            MI100: {
+                tflops: number;
+                memory: number[];
+            };
             "RX 7900 XTX": {
                 tflops: number;
                 memory: number[];
@@ -170,6 +242,18 @@ export declare const SKUS: {
                 tflops: number;
                 memory: number[];
             };
+            "RX 6950 XT": {
+                tflops: number;
+                memory: number[];
+            };
+            "RX 6800": {
+                tflops: number;
+                memory: number[];
+            };
+            "Radeon Pro VII": {
+                tflops: number;
+                memory: number[];
+            };
         };
     };
     CPU: {

package/dist/src/hardware.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"hardware.d.ts","sourceRoot":"","sources":["../../src/hardware.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,eAAO,MAAM,iDAAiD,QAAW,CAAC;AAC1E,eAAO,MAAM,yDAAyD,QAAW,CAAC;AAClF,eAAO,MAAM,oCAAoC,QAAU,CAAC;AAE5D;;;GAGG;AACH,eAAO,MAAM,+CAA+C,QAAW,CAAC;AAExE,MAAM,WAAW,YAAY;IAC5B;;;;;;;;;OASG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,eAAO,MAAM,sBAAsB,UAAqD,CAAC;AAEzF,eAAO,MAAM,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA2SuD,CAAC;AAEzE,MAAM,MAAM,OAAO,GAAG,MAAM,OAAO,IAAI,CAAC"}
1	+ {"version":3,"file":"hardware.d.ts","sourceRoot":"","sources":["../../src/hardware.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,eAAO,MAAM,iDAAiD,QAAW,CAAC;AAC1E,eAAO,MAAM,yDAAyD,QAAW,CAAC;AAClF,eAAO,MAAM,oCAAoC,QAAU,CAAC;AAE5D;;;GAGG;AACH,eAAO,MAAM,+CAA+C,QAAW,CAAC;AAExE,MAAM,WAAW,YAAY;IAC5B;;;;;;;;;OASG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,eAAO,MAAM,sBAAsB,UAAqD,CAAC;AAEzF,eAAO,MAAM,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA+XuD,CAAC;AAEzE,MAAM,MAAM,OAAO,GAAG,MAAM,OAAO,IAAI,CAAC"}

package/dist/src/local-apps.d.ts CHANGED Viewed

@@ -77,6 +77,13 @@ export declare const LOCAL_APPS: {
         displayOnModelPage: typeof isGgufModel;
         deeplink: (model: ModelData) => URL;
     };
+    sanctum: {
+        prettyLabel: string;
+        docsUrl: string;
+        mainTask: "text-generation";
+        displayOnModelPage: typeof isGgufModel;
+        deeplink: (model: ModelData) => URL;
+    };
     drawthings: {
         prettyLabel: string;
         docsUrl: string;

package/dist/src/local-apps.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,GAAG,CAAC;CACnC,GACD;IACA;;OAEG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,MAAM,EAAE,CAAC;CAChD,CACH,CAAC;AAEF,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,WAEpC;AAmBD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA5BS,SAAS,KAAG,MAAM,EAAE~~;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiFhB~~,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}
1	+ {"version":3,"file":"local-apps.d.ts","sourceRoot":"","sources":["../../src/local-apps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAC9C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG;IACtB;;OAEG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAChB;;OAEG;IACH,QAAQ,EAAE,YAAY,CAAC;IACvB;;OAEG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;OAEG;IACH,kBAAkB,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC;CAClD,GAAG,CACD;IACA;;OAEG;IACH,QAAQ,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,GAAG,CAAC;CACnC,GACD;IACA;;OAEG;IACH,OAAO,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,MAAM,GAAG,MAAM,EAAE,CAAC;CAChD,CACH,CAAC;AAEF,iBAAS,WAAW,CAAC,KAAK,EAAE,SAAS,WAEpC;AAmBD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,UAAU;;;;;;yBA5BS,SAAS,KAAG,MAAM,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAwFhB,CAAC;AAErC,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,UAAU,CAAC"}

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@huggingface/tasks",
   "packageManager": "pnpm@8.10.5",
-  "version": "0.10.8",
+  "version": "0.10.9",
   "description": "List of ML tasks for huggingface.co/tasks",
   "repository": "https://github.com/huggingface/huggingface.js.git",
   "publishConfig": {

package/src/hardware.ts CHANGED Viewed

@@ -48,6 +48,30 @@ export const SKUS = {
 				tflops: 91.1,
 				memory: [48],
 			},
+			"RTX 5880 Ada": {
+				tflops: 69.3,
+				memory: [48],
+			},
+			"RTX 5000 Ada": {
+				tflops: 65.3,
+				memory: [32],
+			},
+			"RTX 4500 Ada": {
+				tflops: 39.6,
+				memory: [24],
+			},
+			"RTX 4000 Ada": {
+				tflops: 26.7,
+				memory: [20],
+			},
+			"RTX 4000 SFF Ada": {
+				tflops: 19.2,
+				memory: [20],
+			},
+			"RTX 2000 Ada": {
+				tflops: 12.0,
+				memory: [16],
+			},
 			A100: {
 				tflops: 77.97,
 				memory: [80, 40],
@@ -60,14 +84,14 @@ export const SKUS = {
 				tflops: 31.24,
 				memory: [24],
 			},
-			T4: {
-				tflops: 65.13,
-				memory: [16],
-			},
 			"RTX 4090": {
 				tflops: 82.58,
 				memory: [24],
 			},
+			"RTX 4090D": {
+				tflops: 79.49,
+				memory: [24],
+			},
 			"RTX 4080 SUPER": {
 				tflops: 52.2,
 				memory: [16],
@@ -92,6 +116,14 @@ export const SKUS = {
 				tflops: 44.1,
 				memory: [16],
 			},
+			"RTX 4060": {
+				tflops: 15.11,
+				memory: [8],
+			},
+			"RTX 4060 Ti": {
+				tflops: 22.06,
+				memory: [8, 16],
+			},
 			"RTX 3090": {
 				tflops: 35.58,
 				memory: [24],
@@ -128,14 +160,50 @@ export const SKUS = {
 				tflops: 12.74,
 				memory: [12, 8],
 			},
+			"RTX 2070": {
+				tflops: 14.93,
+				memory: [8],
+			},
+			"RTX 3050 Mobile": {
+				tflops: 7.639,
+				memory: [6],
+			},
 			"RTX 2060 Mobile": {
 				tflops: 9.22,
 				memory: [6],
 			},
+			"GTX 1080 Ti": {
+				tflops: 11.34, // float32 (GPU does not support native float16)
+				memory: [11],
+			},
+			"GTX 1070 Ti": {
+				tflops: 8.2, // float32 (GPU does not support native float16)
+				memory: [8],
+			},
 			"RTX Titan": {
 				tflops: 32.62,
 				memory: [24],
 			},
+			"GTX 1650 Mobile": {
+				tflops: 6.39,
+				memory: [4],
+			},
+			T4: {
+				tflops: 65.13,
+				memory: [16],
+			},
+			V100: {
+				tflops: 28.26,
+				memory: [32, 16],
+			},
+			"Quadro P6000": {
+				tflops: 12.63, // float32 (GPU does not support native float16)
+				memory: [24],
+			},
+			P40: {
+				tflops: 11.76, // float32 (GPU does not support native float16)
+				memory: [24],
+			},
 		},
 		AMD: {
 			MI300: {
@@ -150,6 +218,10 @@ export const SKUS = {
 				tflops: 181.0,
 				memory: [64],
 			},
+			MI100: {
+				tflops: 184.6,
+				memory: [32],
+			},
 			"RX 7900 XTX": {
 				tflops: 122.8,
 				memory: [24],
@@ -174,6 +246,18 @@ export const SKUS = {
 				tflops: 45.14,
 				memory: [16, 8],
 			},
+			"RX 6950 XT": {
+				tflops: 47.31,
+				memory: [16],
+			},
+			"RX 6800": {
+				tflops: 32.33,
+				memory: [16],
+			},
+			"Radeon Pro VII": {
+				tflops: 26.11,
+				memory: [16],
+			},
 		},
 	},
 	CPU: {

package/src/local-apps.ts CHANGED Viewed

@@ -103,6 +103,13 @@ export const LOCAL_APPS = {
 		displayOnModelPage: isGgufModel,
 		deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`),
 	},
+	sanctum: {
+		prettyLabel: "Sanctum",
+		docsUrl: "https://sanctum.ai",
+		mainTask: "text-generation",
+		displayOnModelPage: isGgufModel,
+		deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`),
+	},
 	drawthings: {
 		prettyLabel: "Draw Things",
 		docsUrl: "https://drawthings.ai",

package/src/tasks/automatic-speech-recognition/about.md CHANGED Viewed

@@ -18,7 +18,7 @@ The use of Multilingual ASR has become popular, the idea of maintaining just a s
 ## Inference
-The Hub contains over [~9,000 ASR models](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=downloads) that you can use right away by trying out the widgets directly in the browser or calling the models as a service using Inference Endpoints. Here is a simple code snippet to do exactly this:
+The Hub contains over [17,000 ASR models](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&sort=downloads) that you can test right away in your browser using the model page widgets. You can also use any model as a service using the Serverless Inference API. We also support libraries such as [transformers](https://huggingface.co/models?library=transformers&pipeline_tag=automatic-speech-recognition&sort=downloads), [speechbrain](https://huggingface.co/models?library=speechbrain&pipeline_tag=automatic-speech-recognition&sort=downloads), [NeMo](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=nemo&sort=downloads) and [espnet](https://huggingface.co/models?library=espnet&pipeline_tag=automatic-speech-recognition&sort=downloads) via the Serverless Inference API. Here's a simple code snippet to run inference:
 ```python
 import json
@@ -36,20 +36,7 @@ def query(filename):
 data = query("sample1.flac")
 ```
-You can also use libraries such as [transformers](https://huggingface.co/models?library=transformers&pipeline_tag=automatic-speech-recognition&sort=downloads), [speechbrain](https://huggingface.co/models?library=speechbrain&pipeline_tag=automatic-speech-recognition&sort=downloads), [NeMo](https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=nemo&sort=downloads) and [espnet](https://huggingface.co/models?library=espnet&pipeline_tag=automatic-speech-recognition&sort=downloads) if you want one-click managed Inference without any hassle.
-```python
-from transformers import pipeline
-with open("sample.flac", "rb") as f:
-  data = f.read()
-pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v2")
-pipe("sample.flac")
-# {'text': "GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES IN DRAUGHTY SCHOOL ROOMS DAY AFTER DAY FOR A FORTNIGHT HE'LL HAVE TO PUT IN AN APPEARANCE AT SOME PLACE OF WORSHIP ON SUNDAY MORNING AND HE CAN COME TO US IMMEDIATELY AFTERWARDS"}
-```
-You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to transcribe text with javascript using models on Hugging Face Hub.
+You can also use[huggingface.js](https://github.com/huggingface/huggingface.js), the JavaScript client, to transcribe models with the Inference API.
 ```javascript
 import { HfInference } from "@huggingface/inference";
@@ -57,10 +44,23 @@ import { HfInference } from "@huggingface/inference";
 const inference = new HfInference(HF_TOKEN);
 await inference.automaticSpeechRecognition({
 	data: await (await fetch("sample.flac")).blob(),
-	model: "openai/whisper-large-v2",
+	model: "openai/whisper-large-v3",
 });
 ```
+For transformers compatible models like Whisper, Wav2Vec2, HuBERT, etc. You can also run inference in Python using transformers as follows:
+```python
+# pip install --upgrade transformers
+from transformers import pipeline
+pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v3")
+pipe("sample.flac")
+# {'text': "GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES IN DRAUGHTY SCHOOL ROOMS DAY AFTER DAY FOR A FORTNIGHT HE'LL HAVE TO PUT IN AN APPEARANCE AT SOME PLACE OF WORSHIP ON SUNDAY MORNING AND HE CAN COME TO US IMMEDIATELY AFTERWARDS"}
+```
 ## Solving ASR for your own data
 We have some great news! You can fine-tune (transfer learning) a foundational speech model on a specific language without tonnes of data. Pretrained models such as Whisper, Wav2Vec2-MMS and HuBERT exist. [OpenAI's Whisper model](https://huggingface.co/openai/whisper-large-v3) is a large multilingual model trained on 100+ languages and with 4 Million hours of speech.

package/src/tasks/automatic-speech-recognition/data.ts CHANGED Viewed

@@ -3,16 +3,16 @@ import type { TaskDataCustom } from "..";
 const taskData: TaskDataCustom = {
 	datasets: [
 		{
-			description: "18,000 hours of multilingual audio-text dataset in 108 languages.",
-			id: "mozilla-foundation/common_voice_13_0",
+			description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
+			id: "mozilla-foundation/common_voice_17_0",
 		},
 		{
 			description: "An English dataset with 1,000 hours of data.",
 			id: "librispeech_asr",
 		},
 		{
-			description: "High quality, multi-speaker audio data and their transcriptions in various languages.",
-			id: "openslr",
+			description: "A multi-lingual audio dataset with 370K hours of audio.",
+			id: "espnet/yodas",
 		},
 	],
 	demo: {
@@ -47,12 +47,12 @@ const taskData: TaskDataCustom = {
 			id: "openai/whisper-large-v3",
 		},
 		{
-			description: "A good generic ASR model by MetaAI.",
-			id: "facebook/wav2vec2-base-960h",
+			description: "A good generic speech model by MetaAI for fine-tuning.",
+			id: "facebook/w2v-bert-2.0",
 		},
 		{
 			description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
-			id: "facebook/s2t-small-mustc-en-fr-st",
+			id: "facebook/seamless-m4t-v2-large",
 		},
 	],
 	spaces: [

package/src/tasks/text-to-speech/about.md CHANGED Viewed

@@ -58,8 +58,6 @@ await inference.textToSpeech({
 - [Hugging Face Audio Course](https://huggingface.co/learn/audio-course/chapter6/introduction)
 - [ML for Audio Study Group - Text to Speech Deep Dive](https://www.youtube.com/watch?v=aLBedWj-5CQ)
-- [An introduction to SpeechT5, a multi-purpose speech recognition and synthesis model](https://huggingface.co/blog/speecht5).
-- [A guide on Fine-tuning Whisper For Multilingual ASR with 🤗Transformers](https://huggingface.co/blog/fine-tune-whisper)
 - [Speech Synthesis, Recognition, and More With SpeechT5](https://huggingface.co/blog/speecht5)
 - [Optimizing a Text-To-Speech model using 🤗 Transformers](https://huggingface.co/blog/optimizing-bark)
--
+- [Train your own TTS models with Parler-TTS](https://github.com/huggingface/parler-tts)

package/src/tasks/text-to-speech/data.ts CHANGED Viewed

@@ -4,8 +4,8 @@ const taskData: TaskDataCustom = {
 	canonicalId: "text-to-audio",
 	datasets: [
 		{
-			description: "Thousands of short audio clips of a single speaker.",
-			id: "lj_speech",
+			description: "10K hours of multi-speaker English dataset.",
+			id: "parler-tts/mls_eng_10k",
 		},
 		{
 			description: "Multi-speaker English dataset.",
@@ -43,8 +43,8 @@ const taskData: TaskDataCustom = {
 			id: "facebook/mms-tts",
 		},
 		{
-			description: "An end-to-end speech synthesis model.",
-			id: "microsoft/speecht5_tts",
+			description: "A prompt based, powerful TTS model.",
+			id: "parler-tts/parler_tts_mini_v0.1",
 		},
 	],
 	spaces: [
@@ -57,8 +57,8 @@ const taskData: TaskDataCustom = {
 			id: "coqui/xtts",
 		},
 		{
-			description: "An application that synthesizes speech for various speaker types.",
-			id: "Matthijs/speecht5-tts-demo",
+			description: "An application that synthesizes speech for diverse speaker prompts.",
+			id: "parler-tts/parler_tts_mini",
 		},
 	],
 	summary: