@huggingface/tasks 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1724,7 +1724,7 @@ var taskData5 = {
1724
1724
  }
1725
1725
  ],
1726
1726
  spaces: [],
1727
- summary: "Feature extraction refers to the process of transforming raw data into numerical features that can be processed while preserving the information in the original dataset.",
1727
+ summary: "Feature extraction is the task of extracting features learnt in a model.",
1728
1728
  widgetModels: ["facebook/bart-base"]
1729
1729
  };
1730
1730
  var data_default5 = taskData5;
@@ -1890,8 +1890,57 @@ var taskData7 = {
1890
1890
  };
1891
1891
  var data_default7 = taskData7;
1892
1892
 
1893
- // src/tasks/image-to-image/data.ts
1893
+ // src/tasks/image-feature-extraction/data.ts
1894
1894
  var taskData8 = {
1895
+ datasets: [
1896
+ {
1897
+ description: "ImageNet-1K is a image classification dataset in which images are used to train image-feature-extraction models.",
1898
+ id: "imagenet-1k"
1899
+ }
1900
+ ],
1901
+ demo: {
1902
+ inputs: [
1903
+ {
1904
+ filename: "mask-generation-input.png",
1905
+ type: "img"
1906
+ }
1907
+ ],
1908
+ outputs: [
1909
+ {
1910
+ table: [
1911
+ ["Dimension 1", "Dimension 2", "Dimension 3"],
1912
+ ["0.21236686408519745", "1.0919708013534546", "0.8512550592422485"],
1913
+ ["0.809657871723175", "-0.18544459342956543", "-0.7851548194885254"],
1914
+ ["1.3103108406066895", "-0.2479034662246704", "-0.9107287526130676"],
1915
+ ["1.8536205291748047", "-0.36419737339019775", "0.09717650711536407"]
1916
+ ],
1917
+ type: "tabular"
1918
+ }
1919
+ ]
1920
+ },
1921
+ metrics: [],
1922
+ models: [
1923
+ {
1924
+ description: "A powerful image feature extraction model.",
1925
+ id: "timm/vit_large_patch14_dinov2.lvd142m"
1926
+ },
1927
+ {
1928
+ description: "A strong image feature extraction model.",
1929
+ id: "google/vit-base-patch16-224-in21k"
1930
+ },
1931
+ {
1932
+ description: "A robust image feature extraction models.",
1933
+ id: "facebook/dino-vitb16"
1934
+ }
1935
+ ],
1936
+ spaces: [],
1937
+ summary: "Image feature extraction is the task of extracting features learnt in a computer vision model.",
1938
+ widgetModels: []
1939
+ };
1940
+ var data_default8 = taskData8;
1941
+
1942
+ // src/tasks/image-to-image/data.ts
1943
+ var taskData9 = {
1895
1944
  datasets: [
1896
1945
  {
1897
1946
  description: "Synthetic dataset, for image relighting",
@@ -1983,10 +2032,10 @@ var taskData8 = {
1983
2032
  widgetModels: ["lllyasviel/sd-controlnet-canny"],
1984
2033
  youtubeId: ""
1985
2034
  };
1986
- var data_default8 = taskData8;
2035
+ var data_default9 = taskData9;
1987
2036
 
1988
2037
  // src/tasks/image-to-text/data.ts
1989
- var taskData9 = {
2038
+ var taskData10 = {
1990
2039
  datasets: [
1991
2040
  {
1992
2041
  // TODO write proper description
@@ -2063,10 +2112,10 @@ var taskData9 = {
2063
2112
  widgetModels: ["Salesforce/blip-image-captioning-base"],
2064
2113
  youtubeId: ""
2065
2114
  };
2066
- var data_default9 = taskData9;
2115
+ var data_default10 = taskData10;
2067
2116
 
2068
2117
  // src/tasks/image-segmentation/data.ts
2069
- var taskData10 = {
2118
+ var taskData11 = {
2070
2119
  datasets: [
2071
2120
  {
2072
2121
  description: "Scene segmentation dataset.",
@@ -2158,10 +2207,10 @@ var taskData10 = {
2158
2207
  widgetModels: ["facebook/detr-resnet-50-panoptic"],
2159
2208
  youtubeId: "dKE8SIt9C-w"
2160
2209
  };
2161
- var data_default10 = taskData10;
2210
+ var data_default11 = taskData11;
2162
2211
 
2163
2212
  // src/tasks/mask-generation/data.ts
2164
- var taskData11 = {
2213
+ var taskData12 = {
2165
2214
  datasets: [],
2166
2215
  demo: {
2167
2216
  inputs: [
@@ -2210,10 +2259,10 @@ var taskData11 = {
2210
2259
  widgetModels: [],
2211
2260
  youtubeId: ""
2212
2261
  };
2213
- var data_default11 = taskData11;
2262
+ var data_default12 = taskData12;
2214
2263
 
2215
2264
  // src/tasks/object-detection/data.ts
2216
- var taskData12 = {
2265
+ var taskData13 = {
2217
2266
  datasets: [
2218
2267
  {
2219
2268
  // TODO write proper description
@@ -2285,10 +2334,10 @@ var taskData12 = {
2285
2334
  widgetModels: ["facebook/detr-resnet-50"],
2286
2335
  youtubeId: "WdAeKSOpxhw"
2287
2336
  };
2288
- var data_default12 = taskData12;
2337
+ var data_default13 = taskData13;
2289
2338
 
2290
2339
  // src/tasks/depth-estimation/data.ts
2291
- var taskData13 = {
2340
+ var taskData14 = {
2292
2341
  datasets: [
2293
2342
  {
2294
2343
  description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
@@ -2342,10 +2391,10 @@ var taskData13 = {
2342
2391
  widgetModels: [""],
2343
2392
  youtubeId: ""
2344
2393
  };
2345
- var data_default13 = taskData13;
2394
+ var data_default14 = taskData14;
2346
2395
 
2347
2396
  // src/tasks/placeholder/data.ts
2348
- var taskData14 = {
2397
+ var taskData15 = {
2349
2398
  datasets: [],
2350
2399
  demo: {
2351
2400
  inputs: [],
@@ -2362,10 +2411,10 @@ var taskData14 = {
2362
2411
  /// (eg, text2text-generation is the canonical ID of translation)
2363
2412
  canonicalId: void 0
2364
2413
  };
2365
- var data_default14 = taskData14;
2414
+ var data_default15 = taskData15;
2366
2415
 
2367
2416
  // src/tasks/reinforcement-learning/data.ts
2368
- var taskData15 = {
2417
+ var taskData16 = {
2369
2418
  datasets: [
2370
2419
  {
2371
2420
  description: "A curation of widely used datasets for Data Driven Deep Reinforcement Learning (D4RL)",
@@ -2431,10 +2480,10 @@ var taskData15 = {
2431
2480
  widgetModels: [],
2432
2481
  youtubeId: "q0BiUn5LiBc"
2433
2482
  };
2434
- var data_default15 = taskData15;
2483
+ var data_default16 = taskData16;
2435
2484
 
2436
2485
  // src/tasks/question-answering/data.ts
2437
- var taskData16 = {
2486
+ var taskData17 = {
2438
2487
  datasets: [
2439
2488
  {
2440
2489
  // TODO write proper description
@@ -2498,10 +2547,10 @@ var taskData16 = {
2498
2547
  widgetModels: ["deepset/roberta-base-squad2"],
2499
2548
  youtubeId: "ajPx5LwJD-I"
2500
2549
  };
2501
- var data_default16 = taskData16;
2550
+ var data_default17 = taskData17;
2502
2551
 
2503
2552
  // src/tasks/sentence-similarity/data.ts
2504
- var taskData17 = {
2553
+ var taskData18 = {
2505
2554
  datasets: [
2506
2555
  {
2507
2556
  description: "Bing queries with relevant passages from various web sources.",
@@ -2593,10 +2642,10 @@ var taskData17 = {
2593
2642
  widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
2594
2643
  youtubeId: "VCZq5AkbNEU"
2595
2644
  };
2596
- var data_default17 = taskData17;
2645
+ var data_default18 = taskData18;
2597
2646
 
2598
2647
  // src/tasks/summarization/data.ts
2599
- var taskData18 = {
2648
+ var taskData19 = {
2600
2649
  canonicalId: "text2text-generation",
2601
2650
  datasets: [
2602
2651
  {
@@ -2662,10 +2711,10 @@ var taskData18 = {
2662
2711
  widgetModels: ["sshleifer/distilbart-cnn-12-6"],
2663
2712
  youtubeId: "yHnr5Dk2zCI"
2664
2713
  };
2665
- var data_default18 = taskData18;
2714
+ var data_default19 = taskData19;
2666
2715
 
2667
2716
  // src/tasks/table-question-answering/data.ts
2668
- var taskData19 = {
2717
+ var taskData20 = {
2669
2718
  datasets: [
2670
2719
  {
2671
2720
  description: "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
@@ -2716,10 +2765,10 @@ var taskData19 = {
2716
2765
  summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
2717
2766
  widgetModels: ["google/tapas-base-finetuned-wtq"]
2718
2767
  };
2719
- var data_default19 = taskData19;
2768
+ var data_default20 = taskData20;
2720
2769
 
2721
2770
  // src/tasks/tabular-classification/data.ts
2722
- var taskData20 = {
2771
+ var taskData21 = {
2723
2772
  datasets: [
2724
2773
  {
2725
2774
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -2783,10 +2832,10 @@ var taskData20 = {
2783
2832
  widgetModels: ["scikit-learn/tabular-playground"],
2784
2833
  youtubeId: ""
2785
2834
  };
2786
- var data_default20 = taskData20;
2835
+ var data_default21 = taskData21;
2787
2836
 
2788
2837
  // src/tasks/tabular-regression/data.ts
2789
- var taskData21 = {
2838
+ var taskData22 = {
2790
2839
  datasets: [
2791
2840
  {
2792
2841
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -2838,10 +2887,10 @@ var taskData21 = {
2838
2887
  widgetModels: ["scikit-learn/Fish-Weight"],
2839
2888
  youtubeId: ""
2840
2889
  };
2841
- var data_default21 = taskData21;
2890
+ var data_default22 = taskData22;
2842
2891
 
2843
2892
  // src/tasks/text-to-image/data.ts
2844
- var taskData22 = {
2893
+ var taskData23 = {
2845
2894
  datasets: [
2846
2895
  {
2847
2896
  description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
@@ -2933,10 +2982,10 @@ var taskData22 = {
2933
2982
  widgetModels: ["CompVis/stable-diffusion-v1-4"],
2934
2983
  youtubeId: ""
2935
2984
  };
2936
- var data_default22 = taskData22;
2985
+ var data_default23 = taskData23;
2937
2986
 
2938
2987
  // src/tasks/text-to-speech/data.ts
2939
- var taskData23 = {
2988
+ var taskData24 = {
2940
2989
  canonicalId: "text-to-audio",
2941
2990
  datasets: [
2942
2991
  {
@@ -3001,10 +3050,10 @@ var taskData23 = {
3001
3050
  widgetModels: ["suno/bark"],
3002
3051
  youtubeId: "NW62DpzJ274"
3003
3052
  };
3004
- var data_default23 = taskData23;
3053
+ var data_default24 = taskData24;
3005
3054
 
3006
3055
  // src/tasks/token-classification/data.ts
3007
- var taskData24 = {
3056
+ var taskData25 = {
3008
3057
  datasets: [
3009
3058
  {
3010
3059
  description: "A widely used dataset useful to benchmark named entity recognition models.",
@@ -3080,10 +3129,10 @@ var taskData24 = {
3080
3129
  widgetModels: ["dslim/bert-base-NER"],
3081
3130
  youtubeId: "wVHdVlPScxA"
3082
3131
  };
3083
- var data_default24 = taskData24;
3132
+ var data_default25 = taskData25;
3084
3133
 
3085
3134
  // src/tasks/translation/data.ts
3086
- var taskData25 = {
3135
+ var taskData26 = {
3087
3136
  canonicalId: "text2text-generation",
3088
3137
  datasets: [
3089
3138
  {
@@ -3145,10 +3194,10 @@ var taskData25 = {
3145
3194
  widgetModels: ["t5-small"],
3146
3195
  youtubeId: "1JvfrvZgi6c"
3147
3196
  };
3148
- var data_default25 = taskData25;
3197
+ var data_default26 = taskData26;
3149
3198
 
3150
3199
  // src/tasks/text-classification/data.ts
3151
- var taskData26 = {
3200
+ var taskData27 = {
3152
3201
  datasets: [
3153
3202
  {
3154
3203
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3233,10 +3282,10 @@ var taskData26 = {
3233
3282
  widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
3234
3283
  youtubeId: "leNG9fN9FQU"
3235
3284
  };
3236
- var data_default26 = taskData26;
3285
+ var data_default27 = taskData27;
3237
3286
 
3238
3287
  // src/tasks/text-generation/data.ts
3239
- var taskData27 = {
3288
+ var taskData28 = {
3240
3289
  datasets: [
3241
3290
  {
3242
3291
  description: "A large multilingual dataset of text crawled from the web.",
@@ -3337,10 +3386,10 @@ var taskData27 = {
3337
3386
  widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
3338
3387
  youtubeId: "Vpjb1lu0MDk"
3339
3388
  };
3340
- var data_default27 = taskData27;
3389
+ var data_default28 = taskData28;
3341
3390
 
3342
3391
  // src/tasks/text-to-video/data.ts
3343
- var taskData28 = {
3392
+ var taskData29 = {
3344
3393
  datasets: [
3345
3394
  {
3346
3395
  description: "Microsoft Research Video to Text is a large-scale dataset for open domain video captioning",
@@ -3432,10 +3481,10 @@ var taskData28 = {
3432
3481
  widgetModels: [],
3433
3482
  youtubeId: void 0
3434
3483
  };
3435
- var data_default28 = taskData28;
3484
+ var data_default29 = taskData29;
3436
3485
 
3437
3486
  // src/tasks/unconditional-image-generation/data.ts
3438
- var taskData29 = {
3487
+ var taskData30 = {
3439
3488
  datasets: [
3440
3489
  {
3441
3490
  description: "The CIFAR-100 dataset consists of 60000 32x32 colour images in 100 classes, with 600 images per class.",
@@ -3497,10 +3546,10 @@ var taskData29 = {
3497
3546
  // TODO: Add related video
3498
3547
  youtubeId: ""
3499
3548
  };
3500
- var data_default29 = taskData29;
3549
+ var data_default30 = taskData30;
3501
3550
 
3502
3551
  // src/tasks/video-classification/data.ts
3503
- var taskData30 = {
3552
+ var taskData31 = {
3504
3553
  datasets: [
3505
3554
  {
3506
3555
  // TODO write proper description
@@ -3579,10 +3628,10 @@ var taskData30 = {
3579
3628
  widgetModels: [],
3580
3629
  youtubeId: ""
3581
3630
  };
3582
- var data_default30 = taskData30;
3631
+ var data_default31 = taskData31;
3583
3632
 
3584
3633
  // src/tasks/visual-question-answering/data.ts
3585
- var taskData31 = {
3634
+ var taskData32 = {
3586
3635
  datasets: [
3587
3636
  {
3588
3637
  description: "A widely used dataset containing questions (with answers) about images.",
@@ -3672,10 +3721,10 @@ var taskData31 = {
3672
3721
  widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
3673
3722
  youtubeId: ""
3674
3723
  };
3675
- var data_default31 = taskData31;
3724
+ var data_default32 = taskData32;
3676
3725
 
3677
3726
  // src/tasks/zero-shot-classification/data.ts
3678
- var taskData32 = {
3727
+ var taskData33 = {
3679
3728
  datasets: [
3680
3729
  {
3681
3730
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3734,10 +3783,10 @@ var taskData32 = {
3734
3783
  summary: "Zero-shot text classification is a task in natural language processing where a model is trained on a set of labeled examples but is then able to classify new examples from previously unseen classes.",
3735
3784
  widgetModels: ["facebook/bart-large-mnli"]
3736
3785
  };
3737
- var data_default32 = taskData32;
3786
+ var data_default33 = taskData33;
3738
3787
 
3739
3788
  // src/tasks/zero-shot-image-classification/data.ts
3740
- var taskData33 = {
3789
+ var taskData34 = {
3741
3790
  datasets: [
3742
3791
  {
3743
3792
  // TODO write proper description
@@ -3811,10 +3860,10 @@ var taskData33 = {
3811
3860
  widgetModels: ["openai/clip-vit-large-patch14-336"],
3812
3861
  youtubeId: ""
3813
3862
  };
3814
- var data_default33 = taskData33;
3863
+ var data_default34 = taskData34;
3815
3864
 
3816
3865
  // src/tasks/zero-shot-object-detection/data.ts
3817
- var taskData34 = {
3866
+ var taskData35 = {
3818
3867
  datasets: [],
3819
3868
  demo: {
3820
3869
  inputs: [
@@ -3869,7 +3918,7 @@ var taskData34 = {
3869
3918
  widgetModels: [],
3870
3919
  youtubeId: ""
3871
3920
  };
3872
- var data_default34 = taskData34;
3921
+ var data_default35 = taskData35;
3873
3922
 
3874
3923
  // src/tasks/index.ts
3875
3924
  var TASKS_MODEL_LIBRARIES = {
@@ -3931,7 +3980,7 @@ var TASKS_MODEL_LIBRARIES = {
3931
3980
  "text-to-3d": [],
3932
3981
  "image-to-3d": []
3933
3982
  };
3934
- function getData(type, partialTaskData = data_default14) {
3983
+ function getData(type, partialTaskData = data_default15) {
3935
3984
  return {
3936
3985
  ...partialTaskData,
3937
3986
  id: type,
@@ -3943,52 +3992,52 @@ var TASKS_DATA = {
3943
3992
  "audio-classification": getData("audio-classification", data_default),
3944
3993
  "audio-to-audio": getData("audio-to-audio", data_default2),
3945
3994
  "automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
3946
- "depth-estimation": getData("depth-estimation", data_default13),
3995
+ "depth-estimation": getData("depth-estimation", data_default14),
3947
3996
  "document-question-answering": getData("document-question-answering", data_default4),
3948
3997
  "feature-extraction": getData("feature-extraction", data_default5),
3949
3998
  "fill-mask": getData("fill-mask", data_default6),
3950
3999
  "graph-ml": void 0,
3951
4000
  "image-classification": getData("image-classification", data_default7),
3952
- "image-segmentation": getData("image-segmentation", data_default10),
4001
+ "image-feature-extraction": getData("image-feature-extraction", data_default8),
4002
+ "image-segmentation": getData("image-segmentation", data_default11),
3953
4003
  "image-text-to-text": void 0,
3954
- "image-to-image": getData("image-to-image", data_default8),
3955
- "image-to-text": getData("image-to-text", data_default9),
4004
+ "image-to-image": getData("image-to-image", data_default9),
4005
+ "image-to-text": getData("image-to-text", data_default10),
3956
4006
  "image-to-video": void 0,
3957
- "mask-generation": getData("mask-generation", data_default11),
4007
+ "mask-generation": getData("mask-generation", data_default12),
3958
4008
  "multiple-choice": void 0,
3959
- "object-detection": getData("object-detection", data_default12),
3960
- "video-classification": getData("video-classification", data_default30),
4009
+ "object-detection": getData("object-detection", data_default13),
4010
+ "video-classification": getData("video-classification", data_default31),
3961
4011
  other: void 0,
3962
- "question-answering": getData("question-answering", data_default16),
3963
- "reinforcement-learning": getData("reinforcement-learning", data_default15),
4012
+ "question-answering": getData("question-answering", data_default17),
4013
+ "reinforcement-learning": getData("reinforcement-learning", data_default16),
3964
4014
  robotics: void 0,
3965
- "sentence-similarity": getData("sentence-similarity", data_default17),
3966
- summarization: getData("summarization", data_default18),
3967
- "table-question-answering": getData("table-question-answering", data_default19),
4015
+ "sentence-similarity": getData("sentence-similarity", data_default18),
4016
+ summarization: getData("summarization", data_default19),
4017
+ "table-question-answering": getData("table-question-answering", data_default20),
3968
4018
  "table-to-text": void 0,
3969
- "tabular-classification": getData("tabular-classification", data_default20),
3970
- "tabular-regression": getData("tabular-regression", data_default21),
4019
+ "tabular-classification": getData("tabular-classification", data_default21),
4020
+ "tabular-regression": getData("tabular-regression", data_default22),
3971
4021
  "tabular-to-text": void 0,
3972
- "text-classification": getData("text-classification", data_default26),
3973
- "text-generation": getData("text-generation", data_default27),
4022
+ "text-classification": getData("text-classification", data_default27),
4023
+ "text-generation": getData("text-generation", data_default28),
3974
4024
  "text-retrieval": void 0,
3975
- "text-to-image": getData("text-to-image", data_default22),
3976
- "text-to-speech": getData("text-to-speech", data_default23),
4025
+ "text-to-image": getData("text-to-image", data_default23),
4026
+ "text-to-speech": getData("text-to-speech", data_default24),
3977
4027
  "text-to-audio": void 0,
3978
- "text-to-video": getData("text-to-video", data_default28),
4028
+ "text-to-video": getData("text-to-video", data_default29),
3979
4029
  "text2text-generation": void 0,
3980
4030
  "time-series-forecasting": void 0,
3981
- "token-classification": getData("token-classification", data_default24),
3982
- translation: getData("translation", data_default25),
3983
- "unconditional-image-generation": getData("unconditional-image-generation", data_default29),
3984
- "visual-question-answering": getData("visual-question-answering", data_default31),
4031
+ "token-classification": getData("token-classification", data_default25),
4032
+ translation: getData("translation", data_default26),
4033
+ "unconditional-image-generation": getData("unconditional-image-generation", data_default30),
4034
+ "visual-question-answering": getData("visual-question-answering", data_default32),
3985
4035
  "voice-activity-detection": void 0,
3986
- "zero-shot-classification": getData("zero-shot-classification", data_default32),
3987
- "zero-shot-image-classification": getData("zero-shot-image-classification", data_default33),
3988
- "zero-shot-object-detection": getData("zero-shot-object-detection", data_default34),
3989
- "text-to-3d": getData("text-to-3d", data_default14),
3990
- "image-to-3d": getData("image-to-3d", data_default14),
3991
- "image-feature-extraction": getData("image-feature-extraction", data_default14)
4036
+ "zero-shot-classification": getData("zero-shot-classification", data_default33),
4037
+ "zero-shot-image-classification": getData("zero-shot-image-classification", data_default34),
4038
+ "zero-shot-object-detection": getData("zero-shot-object-detection", data_default35),
4039
+ "text-to-3d": getData("text-to-3d", data_default15),
4040
+ "image-to-3d": getData("image-to-3d", data_default15)
3992
4041
  };
3993
4042
 
3994
4043
  // src/model-libraries-snippets.ts
@@ -4121,6 +4170,13 @@ var keras = (model) => [
4121
4170
  model = from_pretrained_keras("${model.id}")
4122
4171
  `
4123
4172
  ];
4173
+ var keras_nlp = (model) => [
4174
+ `import keras_nlp
4175
+
4176
+ tokenizer = keras_nlp.models.Tokenizer.from_preset("hf://${model.id}")
4177
+ backbone = keras_nlp.models.Backbone.from_preset("hf://${model.id}")
4178
+ `
4179
+ ];
4124
4180
  var open_clip = (model) => [
4125
4181
  `import open_clip
4126
4182
 
@@ -4455,6 +4511,11 @@ IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
4455
4511
  // Please see provided C# file for more details
4456
4512
  `
4457
4513
  ];
4514
+ var voicecraft = (model) => [
4515
+ `from voicecraft import VoiceCraft
4516
+
4517
+ model = VoiceCraft.from_pretrained("${model.id}")`
4518
+ ];
4458
4519
  var mlx = (model) => [
4459
4520
  `pip install huggingface_hub hf_transfer
4460
4521
 
@@ -4574,6 +4635,15 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4574
4635
  repoName: "doctr",
4575
4636
  repoUrl: "https://github.com/mindee/doctr"
4576
4637
  },
4638
+ elm: {
4639
+ prettyLabel: "ELM",
4640
+ repoName: "elm",
4641
+ repoUrl: "https://github.com/slicex-ai/elm",
4642
+ filter: false,
4643
+ countDownloads: {
4644
+ wildcard: { path: "*/slicex_elm_config.json" }
4645
+ }
4646
+ },
4577
4647
  espnet: {
4578
4648
  prettyLabel: "ESPnet",
4579
4649
  repoName: "ESPnet",
@@ -4643,6 +4713,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4643
4713
  filter: true,
4644
4714
  countDownloads: { term: { path: "saved_model.pb" } }
4645
4715
  },
4716
+ "keras-nlp": {
4717
+ prettyLabel: "KerasNLP",
4718
+ repoName: "KerasNLP",
4719
+ repoUrl: "https://keras.io/keras_nlp/",
4720
+ docsUrl: "https://github.com/keras-team/keras-nlp",
4721
+ snippets: keras_nlp
4722
+ },
4646
4723
  k2: {
4647
4724
  prettyLabel: "K2",
4648
4725
  repoName: "k2",
@@ -4859,6 +4936,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4859
4936
  wildcard: { path: "*.sentis" }
4860
4937
  }
4861
4938
  },
4939
+ voicecraft: {
4940
+ prettyLabel: "VoiceCraft",
4941
+ repoName: "VoiceCraft",
4942
+ repoUrl: "https://github.com/jasonppy/VoiceCraft",
4943
+ docsUrl: "https://github.com/jasonppy/VoiceCraft",
4944
+ snippets: voicecraft
4945
+ },
4862
4946
  whisperkit: {
4863
4947
  prettyLabel: "WhisperKit",
4864
4948
  repoName: "WhisperKit",
package/dist/index.d.ts CHANGED
@@ -799,6 +799,17 @@ declare const MODEL_LIBRARIES_UI_ELEMENTS: {
799
799
  repoName: string;
800
800
  repoUrl: string;
801
801
  };
802
+ elm: {
803
+ prettyLabel: string;
804
+ repoName: string;
805
+ repoUrl: string;
806
+ filter: false;
807
+ countDownloads: {
808
+ wildcard: {
809
+ path: string;
810
+ };
811
+ };
812
+ };
802
813
  espnet: {
803
814
  prettyLabel: string;
804
815
  repoName: string;
@@ -878,6 +889,13 @@ declare const MODEL_LIBRARIES_UI_ELEMENTS: {
878
889
  };
879
890
  };
880
891
  };
892
+ "keras-nlp": {
893
+ prettyLabel: string;
894
+ repoName: string;
895
+ repoUrl: string;
896
+ docsUrl: string;
897
+ snippets: (model: ModelData) => string[];
898
+ };
881
899
  k2: {
882
900
  prettyLabel: string;
883
901
  repoName: string;
@@ -1132,6 +1150,13 @@ declare const MODEL_LIBRARIES_UI_ELEMENTS: {
1132
1150
  };
1133
1151
  };
1134
1152
  };
1153
+ voicecraft: {
1154
+ prettyLabel: string;
1155
+ repoName: string;
1156
+ repoUrl: string;
1157
+ docsUrl: string;
1158
+ snippets: (model: ModelData) => string[];
1159
+ };
1135
1160
  whisperkit: {
1136
1161
  prettyLabel: string;
1137
1162
  repoName: string;
@@ -1144,8 +1169,8 @@ declare const MODEL_LIBRARIES_UI_ELEMENTS: {
1144
1169
  };
1145
1170
  };
1146
1171
  type ModelLibraryKey = keyof typeof MODEL_LIBRARIES_UI_ELEMENTS;
1147
- declare const ALL_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gliner" | "grok" | "keras" | "k2" | "mindspore" | "ml-agents" | "mlx" | "mlx-image" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "whisperkit")[];
1148
- declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "bertopic" | "diffusers" | "doctr" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gliner" | "grok" | "keras" | "k2" | "mindspore" | "ml-agents" | "mlx" | "mlx-image" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "whisperkit")[];
1172
+ declare const ALL_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "bertopic" | "diffusers" | "doctr" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gliner" | "grok" | "keras" | "keras-nlp" | "k2" | "mindspore" | "ml-agents" | "mlx" | "mlx-image" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "whisperkit")[];
1173
+ declare const ALL_DISPLAY_MODEL_LIBRARY_KEYS: ("sklearn" | "adapter-transformers" | "allennlp" | "asteroid" | "audiocraft" | "bertopic" | "diffusers" | "doctr" | "elm" | "espnet" | "fairseq" | "fastai" | "fasttext" | "flair" | "gliner" | "grok" | "keras" | "keras-nlp" | "k2" | "mindspore" | "ml-agents" | "mlx" | "mlx-image" | "nemo" | "open_clip" | "paddlenlp" | "peft" | "pyannote-audio" | "pythae" | "sample-factory" | "sentence-transformers" | "setfit" | "spacy" | "span-marker" | "speechbrain" | "stable-baselines3" | "stanza" | "tensorflowtts" | "timm" | "transformers" | "transformers.js" | "unity-sentis" | "voicecraft" | "whisperkit")[];
1149
1174
 
1150
1175
  /**
1151
1176
  * Mapping from library name to its supported tasks.