@huggingface/tasks 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1689,7 +1689,7 @@ var taskData5 = {
1689
1689
  }
1690
1690
  ],
1691
1691
  spaces: [],
1692
- summary: "Feature extraction refers to the process of transforming raw data into numerical features that can be processed while preserving the information in the original dataset.",
1692
+ summary: "Feature extraction is the task of extracting features learnt in a model.",
1693
1693
  widgetModels: ["facebook/bart-base"]
1694
1694
  };
1695
1695
  var data_default5 = taskData5;
@@ -1855,8 +1855,57 @@ var taskData7 = {
1855
1855
  };
1856
1856
  var data_default7 = taskData7;
1857
1857
 
1858
- // src/tasks/image-to-image/data.ts
1858
+ // src/tasks/image-feature-extraction/data.ts
1859
1859
  var taskData8 = {
1860
+ datasets: [
1861
+ {
1862
+ description: "ImageNet-1K is a image classification dataset in which images are used to train image-feature-extraction models.",
1863
+ id: "imagenet-1k"
1864
+ }
1865
+ ],
1866
+ demo: {
1867
+ inputs: [
1868
+ {
1869
+ filename: "mask-generation-input.png",
1870
+ type: "img"
1871
+ }
1872
+ ],
1873
+ outputs: [
1874
+ {
1875
+ table: [
1876
+ ["Dimension 1", "Dimension 2", "Dimension 3"],
1877
+ ["0.21236686408519745", "1.0919708013534546", "0.8512550592422485"],
1878
+ ["0.809657871723175", "-0.18544459342956543", "-0.7851548194885254"],
1879
+ ["1.3103108406066895", "-0.2479034662246704", "-0.9107287526130676"],
1880
+ ["1.8536205291748047", "-0.36419737339019775", "0.09717650711536407"]
1881
+ ],
1882
+ type: "tabular"
1883
+ }
1884
+ ]
1885
+ },
1886
+ metrics: [],
1887
+ models: [
1888
+ {
1889
+ description: "A powerful image feature extraction model.",
1890
+ id: "timm/vit_large_patch14_dinov2.lvd142m"
1891
+ },
1892
+ {
1893
+ description: "A strong image feature extraction model.",
1894
+ id: "google/vit-base-patch16-224-in21k"
1895
+ },
1896
+ {
1897
+ description: "A robust image feature extraction models.",
1898
+ id: "facebook/dino-vitb16"
1899
+ }
1900
+ ],
1901
+ spaces: [],
1902
+ summary: "Image feature extraction is the task of extracting features learnt in a computer vision model.",
1903
+ widgetModels: []
1904
+ };
1905
+ var data_default8 = taskData8;
1906
+
1907
+ // src/tasks/image-to-image/data.ts
1908
+ var taskData9 = {
1860
1909
  datasets: [
1861
1910
  {
1862
1911
  description: "Synthetic dataset, for image relighting",
@@ -1948,10 +1997,10 @@ var taskData8 = {
1948
1997
  widgetModels: ["lllyasviel/sd-controlnet-canny"],
1949
1998
  youtubeId: ""
1950
1999
  };
1951
- var data_default8 = taskData8;
2000
+ var data_default9 = taskData9;
1952
2001
 
1953
2002
  // src/tasks/image-to-text/data.ts
1954
- var taskData9 = {
2003
+ var taskData10 = {
1955
2004
  datasets: [
1956
2005
  {
1957
2006
  // TODO write proper description
@@ -2028,10 +2077,10 @@ var taskData9 = {
2028
2077
  widgetModels: ["Salesforce/blip-image-captioning-base"],
2029
2078
  youtubeId: ""
2030
2079
  };
2031
- var data_default9 = taskData9;
2080
+ var data_default10 = taskData10;
2032
2081
 
2033
2082
  // src/tasks/image-segmentation/data.ts
2034
- var taskData10 = {
2083
+ var taskData11 = {
2035
2084
  datasets: [
2036
2085
  {
2037
2086
  description: "Scene segmentation dataset.",
@@ -2123,10 +2172,10 @@ var taskData10 = {
2123
2172
  widgetModels: ["facebook/detr-resnet-50-panoptic"],
2124
2173
  youtubeId: "dKE8SIt9C-w"
2125
2174
  };
2126
- var data_default10 = taskData10;
2175
+ var data_default11 = taskData11;
2127
2176
 
2128
2177
  // src/tasks/mask-generation/data.ts
2129
- var taskData11 = {
2178
+ var taskData12 = {
2130
2179
  datasets: [],
2131
2180
  demo: {
2132
2181
  inputs: [
@@ -2175,10 +2224,10 @@ var taskData11 = {
2175
2224
  widgetModels: [],
2176
2225
  youtubeId: ""
2177
2226
  };
2178
- var data_default11 = taskData11;
2227
+ var data_default12 = taskData12;
2179
2228
 
2180
2229
  // src/tasks/object-detection/data.ts
2181
- var taskData12 = {
2230
+ var taskData13 = {
2182
2231
  datasets: [
2183
2232
  {
2184
2233
  // TODO write proper description
@@ -2250,10 +2299,10 @@ var taskData12 = {
2250
2299
  widgetModels: ["facebook/detr-resnet-50"],
2251
2300
  youtubeId: "WdAeKSOpxhw"
2252
2301
  };
2253
- var data_default12 = taskData12;
2302
+ var data_default13 = taskData13;
2254
2303
 
2255
2304
  // src/tasks/depth-estimation/data.ts
2256
- var taskData13 = {
2305
+ var taskData14 = {
2257
2306
  datasets: [
2258
2307
  {
2259
2308
  description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
@@ -2307,10 +2356,10 @@ var taskData13 = {
2307
2356
  widgetModels: [""],
2308
2357
  youtubeId: ""
2309
2358
  };
2310
- var data_default13 = taskData13;
2359
+ var data_default14 = taskData14;
2311
2360
 
2312
2361
  // src/tasks/placeholder/data.ts
2313
- var taskData14 = {
2362
+ var taskData15 = {
2314
2363
  datasets: [],
2315
2364
  demo: {
2316
2365
  inputs: [],
@@ -2327,10 +2376,10 @@ var taskData14 = {
2327
2376
  /// (eg, text2text-generation is the canonical ID of translation)
2328
2377
  canonicalId: void 0
2329
2378
  };
2330
- var data_default14 = taskData14;
2379
+ var data_default15 = taskData15;
2331
2380
 
2332
2381
  // src/tasks/reinforcement-learning/data.ts
2333
- var taskData15 = {
2382
+ var taskData16 = {
2334
2383
  datasets: [
2335
2384
  {
2336
2385
  description: "A curation of widely used datasets for Data Driven Deep Reinforcement Learning (D4RL)",
@@ -2396,10 +2445,10 @@ var taskData15 = {
2396
2445
  widgetModels: [],
2397
2446
  youtubeId: "q0BiUn5LiBc"
2398
2447
  };
2399
- var data_default15 = taskData15;
2448
+ var data_default16 = taskData16;
2400
2449
 
2401
2450
  // src/tasks/question-answering/data.ts
2402
- var taskData16 = {
2451
+ var taskData17 = {
2403
2452
  datasets: [
2404
2453
  {
2405
2454
  // TODO write proper description
@@ -2463,10 +2512,10 @@ var taskData16 = {
2463
2512
  widgetModels: ["deepset/roberta-base-squad2"],
2464
2513
  youtubeId: "ajPx5LwJD-I"
2465
2514
  };
2466
- var data_default16 = taskData16;
2515
+ var data_default17 = taskData17;
2467
2516
 
2468
2517
  // src/tasks/sentence-similarity/data.ts
2469
- var taskData17 = {
2518
+ var taskData18 = {
2470
2519
  datasets: [
2471
2520
  {
2472
2521
  description: "Bing queries with relevant passages from various web sources.",
@@ -2558,10 +2607,10 @@ var taskData17 = {
2558
2607
  widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
2559
2608
  youtubeId: "VCZq5AkbNEU"
2560
2609
  };
2561
- var data_default17 = taskData17;
2610
+ var data_default18 = taskData18;
2562
2611
 
2563
2612
  // src/tasks/summarization/data.ts
2564
- var taskData18 = {
2613
+ var taskData19 = {
2565
2614
  canonicalId: "text2text-generation",
2566
2615
  datasets: [
2567
2616
  {
@@ -2627,10 +2676,10 @@ var taskData18 = {
2627
2676
  widgetModels: ["sshleifer/distilbart-cnn-12-6"],
2628
2677
  youtubeId: "yHnr5Dk2zCI"
2629
2678
  };
2630
- var data_default18 = taskData18;
2679
+ var data_default19 = taskData19;
2631
2680
 
2632
2681
  // src/tasks/table-question-answering/data.ts
2633
- var taskData19 = {
2682
+ var taskData20 = {
2634
2683
  datasets: [
2635
2684
  {
2636
2685
  description: "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
@@ -2681,10 +2730,10 @@ var taskData19 = {
2681
2730
  summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
2682
2731
  widgetModels: ["google/tapas-base-finetuned-wtq"]
2683
2732
  };
2684
- var data_default19 = taskData19;
2733
+ var data_default20 = taskData20;
2685
2734
 
2686
2735
  // src/tasks/tabular-classification/data.ts
2687
- var taskData20 = {
2736
+ var taskData21 = {
2688
2737
  datasets: [
2689
2738
  {
2690
2739
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -2748,10 +2797,10 @@ var taskData20 = {
2748
2797
  widgetModels: ["scikit-learn/tabular-playground"],
2749
2798
  youtubeId: ""
2750
2799
  };
2751
- var data_default20 = taskData20;
2800
+ var data_default21 = taskData21;
2752
2801
 
2753
2802
  // src/tasks/tabular-regression/data.ts
2754
- var taskData21 = {
2803
+ var taskData22 = {
2755
2804
  datasets: [
2756
2805
  {
2757
2806
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -2803,10 +2852,10 @@ var taskData21 = {
2803
2852
  widgetModels: ["scikit-learn/Fish-Weight"],
2804
2853
  youtubeId: ""
2805
2854
  };
2806
- var data_default21 = taskData21;
2855
+ var data_default22 = taskData22;
2807
2856
 
2808
2857
  // src/tasks/text-to-image/data.ts
2809
- var taskData22 = {
2858
+ var taskData23 = {
2810
2859
  datasets: [
2811
2860
  {
2812
2861
  description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
@@ -2898,10 +2947,10 @@ var taskData22 = {
2898
2947
  widgetModels: ["CompVis/stable-diffusion-v1-4"],
2899
2948
  youtubeId: ""
2900
2949
  };
2901
- var data_default22 = taskData22;
2950
+ var data_default23 = taskData23;
2902
2951
 
2903
2952
  // src/tasks/text-to-speech/data.ts
2904
- var taskData23 = {
2953
+ var taskData24 = {
2905
2954
  canonicalId: "text-to-audio",
2906
2955
  datasets: [
2907
2956
  {
@@ -2966,10 +3015,10 @@ var taskData23 = {
2966
3015
  widgetModels: ["suno/bark"],
2967
3016
  youtubeId: "NW62DpzJ274"
2968
3017
  };
2969
- var data_default23 = taskData23;
3018
+ var data_default24 = taskData24;
2970
3019
 
2971
3020
  // src/tasks/token-classification/data.ts
2972
- var taskData24 = {
3021
+ var taskData25 = {
2973
3022
  datasets: [
2974
3023
  {
2975
3024
  description: "A widely used dataset useful to benchmark named entity recognition models.",
@@ -3045,10 +3094,10 @@ var taskData24 = {
3045
3094
  widgetModels: ["dslim/bert-base-NER"],
3046
3095
  youtubeId: "wVHdVlPScxA"
3047
3096
  };
3048
- var data_default24 = taskData24;
3097
+ var data_default25 = taskData25;
3049
3098
 
3050
3099
  // src/tasks/translation/data.ts
3051
- var taskData25 = {
3100
+ var taskData26 = {
3052
3101
  canonicalId: "text2text-generation",
3053
3102
  datasets: [
3054
3103
  {
@@ -3110,10 +3159,10 @@ var taskData25 = {
3110
3159
  widgetModels: ["t5-small"],
3111
3160
  youtubeId: "1JvfrvZgi6c"
3112
3161
  };
3113
- var data_default25 = taskData25;
3162
+ var data_default26 = taskData26;
3114
3163
 
3115
3164
  // src/tasks/text-classification/data.ts
3116
- var taskData26 = {
3165
+ var taskData27 = {
3117
3166
  datasets: [
3118
3167
  {
3119
3168
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3198,10 +3247,10 @@ var taskData26 = {
3198
3247
  widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
3199
3248
  youtubeId: "leNG9fN9FQU"
3200
3249
  };
3201
- var data_default26 = taskData26;
3250
+ var data_default27 = taskData27;
3202
3251
 
3203
3252
  // src/tasks/text-generation/data.ts
3204
- var taskData27 = {
3253
+ var taskData28 = {
3205
3254
  datasets: [
3206
3255
  {
3207
3256
  description: "A large multilingual dataset of text crawled from the web.",
@@ -3302,10 +3351,10 @@ var taskData27 = {
3302
3351
  widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
3303
3352
  youtubeId: "Vpjb1lu0MDk"
3304
3353
  };
3305
- var data_default27 = taskData27;
3354
+ var data_default28 = taskData28;
3306
3355
 
3307
3356
  // src/tasks/text-to-video/data.ts
3308
- var taskData28 = {
3357
+ var taskData29 = {
3309
3358
  datasets: [
3310
3359
  {
3311
3360
  description: "Microsoft Research Video to Text is a large-scale dataset for open domain video captioning",
@@ -3397,10 +3446,10 @@ var taskData28 = {
3397
3446
  widgetModels: [],
3398
3447
  youtubeId: void 0
3399
3448
  };
3400
- var data_default28 = taskData28;
3449
+ var data_default29 = taskData29;
3401
3450
 
3402
3451
  // src/tasks/unconditional-image-generation/data.ts
3403
- var taskData29 = {
3452
+ var taskData30 = {
3404
3453
  datasets: [
3405
3454
  {
3406
3455
  description: "The CIFAR-100 dataset consists of 60000 32x32 colour images in 100 classes, with 600 images per class.",
@@ -3462,10 +3511,10 @@ var taskData29 = {
3462
3511
  // TODO: Add related video
3463
3512
  youtubeId: ""
3464
3513
  };
3465
- var data_default29 = taskData29;
3514
+ var data_default30 = taskData30;
3466
3515
 
3467
3516
  // src/tasks/video-classification/data.ts
3468
- var taskData30 = {
3517
+ var taskData31 = {
3469
3518
  datasets: [
3470
3519
  {
3471
3520
  // TODO write proper description
@@ -3544,10 +3593,10 @@ var taskData30 = {
3544
3593
  widgetModels: [],
3545
3594
  youtubeId: ""
3546
3595
  };
3547
- var data_default30 = taskData30;
3596
+ var data_default31 = taskData31;
3548
3597
 
3549
3598
  // src/tasks/visual-question-answering/data.ts
3550
- var taskData31 = {
3599
+ var taskData32 = {
3551
3600
  datasets: [
3552
3601
  {
3553
3602
  description: "A widely used dataset containing questions (with answers) about images.",
@@ -3637,10 +3686,10 @@ var taskData31 = {
3637
3686
  widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
3638
3687
  youtubeId: ""
3639
3688
  };
3640
- var data_default31 = taskData31;
3689
+ var data_default32 = taskData32;
3641
3690
 
3642
3691
  // src/tasks/zero-shot-classification/data.ts
3643
- var taskData32 = {
3692
+ var taskData33 = {
3644
3693
  datasets: [
3645
3694
  {
3646
3695
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3699,10 +3748,10 @@ var taskData32 = {
3699
3748
  summary: "Zero-shot text classification is a task in natural language processing where a model is trained on a set of labeled examples but is then able to classify new examples from previously unseen classes.",
3700
3749
  widgetModels: ["facebook/bart-large-mnli"]
3701
3750
  };
3702
- var data_default32 = taskData32;
3751
+ var data_default33 = taskData33;
3703
3752
 
3704
3753
  // src/tasks/zero-shot-image-classification/data.ts
3705
- var taskData33 = {
3754
+ var taskData34 = {
3706
3755
  datasets: [
3707
3756
  {
3708
3757
  // TODO write proper description
@@ -3776,10 +3825,10 @@ var taskData33 = {
3776
3825
  widgetModels: ["openai/clip-vit-large-patch14-336"],
3777
3826
  youtubeId: ""
3778
3827
  };
3779
- var data_default33 = taskData33;
3828
+ var data_default34 = taskData34;
3780
3829
 
3781
3830
  // src/tasks/zero-shot-object-detection/data.ts
3782
- var taskData34 = {
3831
+ var taskData35 = {
3783
3832
  datasets: [],
3784
3833
  demo: {
3785
3834
  inputs: [
@@ -3834,7 +3883,7 @@ var taskData34 = {
3834
3883
  widgetModels: [],
3835
3884
  youtubeId: ""
3836
3885
  };
3837
- var data_default34 = taskData34;
3886
+ var data_default35 = taskData35;
3838
3887
 
3839
3888
  // src/tasks/index.ts
3840
3889
  var TASKS_MODEL_LIBRARIES = {
@@ -3896,7 +3945,7 @@ var TASKS_MODEL_LIBRARIES = {
3896
3945
  "text-to-3d": [],
3897
3946
  "image-to-3d": []
3898
3947
  };
3899
- function getData(type, partialTaskData = data_default14) {
3948
+ function getData(type, partialTaskData = data_default15) {
3900
3949
  return {
3901
3950
  ...partialTaskData,
3902
3951
  id: type,
@@ -3908,52 +3957,52 @@ var TASKS_DATA = {
3908
3957
  "audio-classification": getData("audio-classification", data_default),
3909
3958
  "audio-to-audio": getData("audio-to-audio", data_default2),
3910
3959
  "automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
3911
- "depth-estimation": getData("depth-estimation", data_default13),
3960
+ "depth-estimation": getData("depth-estimation", data_default14),
3912
3961
  "document-question-answering": getData("document-question-answering", data_default4),
3913
3962
  "feature-extraction": getData("feature-extraction", data_default5),
3914
3963
  "fill-mask": getData("fill-mask", data_default6),
3915
3964
  "graph-ml": void 0,
3916
3965
  "image-classification": getData("image-classification", data_default7),
3917
- "image-segmentation": getData("image-segmentation", data_default10),
3966
+ "image-feature-extraction": getData("image-feature-extraction", data_default8),
3967
+ "image-segmentation": getData("image-segmentation", data_default11),
3918
3968
  "image-text-to-text": void 0,
3919
- "image-to-image": getData("image-to-image", data_default8),
3920
- "image-to-text": getData("image-to-text", data_default9),
3969
+ "image-to-image": getData("image-to-image", data_default9),
3970
+ "image-to-text": getData("image-to-text", data_default10),
3921
3971
  "image-to-video": void 0,
3922
- "mask-generation": getData("mask-generation", data_default11),
3972
+ "mask-generation": getData("mask-generation", data_default12),
3923
3973
  "multiple-choice": void 0,
3924
- "object-detection": getData("object-detection", data_default12),
3925
- "video-classification": getData("video-classification", data_default30),
3974
+ "object-detection": getData("object-detection", data_default13),
3975
+ "video-classification": getData("video-classification", data_default31),
3926
3976
  other: void 0,
3927
- "question-answering": getData("question-answering", data_default16),
3928
- "reinforcement-learning": getData("reinforcement-learning", data_default15),
3977
+ "question-answering": getData("question-answering", data_default17),
3978
+ "reinforcement-learning": getData("reinforcement-learning", data_default16),
3929
3979
  robotics: void 0,
3930
- "sentence-similarity": getData("sentence-similarity", data_default17),
3931
- summarization: getData("summarization", data_default18),
3932
- "table-question-answering": getData("table-question-answering", data_default19),
3980
+ "sentence-similarity": getData("sentence-similarity", data_default18),
3981
+ summarization: getData("summarization", data_default19),
3982
+ "table-question-answering": getData("table-question-answering", data_default20),
3933
3983
  "table-to-text": void 0,
3934
- "tabular-classification": getData("tabular-classification", data_default20),
3935
- "tabular-regression": getData("tabular-regression", data_default21),
3984
+ "tabular-classification": getData("tabular-classification", data_default21),
3985
+ "tabular-regression": getData("tabular-regression", data_default22),
3936
3986
  "tabular-to-text": void 0,
3937
- "text-classification": getData("text-classification", data_default26),
3938
- "text-generation": getData("text-generation", data_default27),
3987
+ "text-classification": getData("text-classification", data_default27),
3988
+ "text-generation": getData("text-generation", data_default28),
3939
3989
  "text-retrieval": void 0,
3940
- "text-to-image": getData("text-to-image", data_default22),
3941
- "text-to-speech": getData("text-to-speech", data_default23),
3990
+ "text-to-image": getData("text-to-image", data_default23),
3991
+ "text-to-speech": getData("text-to-speech", data_default24),
3942
3992
  "text-to-audio": void 0,
3943
- "text-to-video": getData("text-to-video", data_default28),
3993
+ "text-to-video": getData("text-to-video", data_default29),
3944
3994
  "text2text-generation": void 0,
3945
3995
  "time-series-forecasting": void 0,
3946
- "token-classification": getData("token-classification", data_default24),
3947
- translation: getData("translation", data_default25),
3948
- "unconditional-image-generation": getData("unconditional-image-generation", data_default29),
3949
- "visual-question-answering": getData("visual-question-answering", data_default31),
3996
+ "token-classification": getData("token-classification", data_default25),
3997
+ translation: getData("translation", data_default26),
3998
+ "unconditional-image-generation": getData("unconditional-image-generation", data_default30),
3999
+ "visual-question-answering": getData("visual-question-answering", data_default32),
3950
4000
  "voice-activity-detection": void 0,
3951
- "zero-shot-classification": getData("zero-shot-classification", data_default32),
3952
- "zero-shot-image-classification": getData("zero-shot-image-classification", data_default33),
3953
- "zero-shot-object-detection": getData("zero-shot-object-detection", data_default34),
3954
- "text-to-3d": getData("text-to-3d", data_default14),
3955
- "image-to-3d": getData("image-to-3d", data_default14),
3956
- "image-feature-extraction": getData("image-feature-extraction", data_default14)
4001
+ "zero-shot-classification": getData("zero-shot-classification", data_default33),
4002
+ "zero-shot-image-classification": getData("zero-shot-image-classification", data_default34),
4003
+ "zero-shot-object-detection": getData("zero-shot-object-detection", data_default35),
4004
+ "text-to-3d": getData("text-to-3d", data_default15),
4005
+ "image-to-3d": getData("image-to-3d", data_default15)
3957
4006
  };
3958
4007
 
3959
4008
  // src/model-libraries-snippets.ts
@@ -4086,6 +4135,13 @@ var keras = (model) => [
4086
4135
  model = from_pretrained_keras("${model.id}")
4087
4136
  `
4088
4137
  ];
4138
+ var keras_nlp = (model) => [
4139
+ `import keras_nlp
4140
+
4141
+ tokenizer = keras_nlp.models.Tokenizer.from_preset("hf://${model.id}")
4142
+ backbone = keras_nlp.models.Backbone.from_preset("hf://${model.id}")
4143
+ `
4144
+ ];
4089
4145
  var open_clip = (model) => [
4090
4146
  `import open_clip
4091
4147
 
@@ -4420,6 +4476,11 @@ IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
4420
4476
  // Please see provided C# file for more details
4421
4477
  `
4422
4478
  ];
4479
+ var voicecraft = (model) => [
4480
+ `from voicecraft import VoiceCraft
4481
+
4482
+ model = VoiceCraft.from_pretrained("${model.id}")`
4483
+ ];
4423
4484
  var mlx = (model) => [
4424
4485
  `pip install huggingface_hub hf_transfer
4425
4486
 
@@ -4608,6 +4669,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4608
4669
  filter: true,
4609
4670
  countDownloads: { term: { path: "saved_model.pb" } }
4610
4671
  },
4672
+ "keras-nlp": {
4673
+ prettyLabel: "KerasNLP",
4674
+ repoName: "KerasNLP",
4675
+ repoUrl: "https://keras.io/keras_nlp/",
4676
+ docsUrl: "https://github.com/keras-team/keras-nlp",
4677
+ snippets: keras_nlp
4678
+ },
4611
4679
  k2: {
4612
4680
  prettyLabel: "K2",
4613
4681
  repoName: "k2",
@@ -4824,6 +4892,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4824
4892
  wildcard: { path: "*.sentis" }
4825
4893
  }
4826
4894
  },
4895
+ voicecraft: {
4896
+ prettyLabel: "VoiceCraft",
4897
+ repoName: "VoiceCraft",
4898
+ repoUrl: "https://github.com/jasonppy/VoiceCraft",
4899
+ docsUrl: "https://github.com/jasonppy/VoiceCraft",
4900
+ snippets: voicecraft
4901
+ },
4827
4902
  whisperkit: {
4828
4903
  prettyLabel: "WhisperKit",
4829
4904
  repoName: "WhisperKit",
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@huggingface/tasks",
3
3
  "packageManager": "pnpm@8.10.5",
4
- "version": "0.8.0",
4
+ "version": "0.9.0",
5
5
  "description": "List of ML tasks for huggingface.co/tasks",
6
6
  "repository": "https://github.com/huggingface/huggingface.js.git",
7
7
  "publishConfig": {
@@ -153,6 +153,14 @@ model = from_pretrained_keras("${model.id}")
153
153
  `,
154
154
  ];
155
155
 
156
+ export const keras_nlp = (model: ModelData): string[] => [
157
+ `import keras_nlp
158
+
159
+ tokenizer = keras_nlp.models.Tokenizer.from_preset("hf://${model.id}")
160
+ backbone = keras_nlp.models.Backbone.from_preset("hf://${model.id}")
161
+ `,
162
+ ];
163
+
156
164
  export const open_clip = (model: ModelData): string[] => [
157
165
  `import open_clip
158
166
 
@@ -531,6 +539,12 @@ IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
531
539
  `,
532
540
  ];
533
541
 
542
+ export const voicecraft = (model: ModelData): string[] => [
543
+ `from voicecraft import VoiceCraft
544
+
545
+ model = VoiceCraft.from_pretrained("${model.id}")`,
546
+ ];
547
+
534
548
  export const mlx = (model: ModelData): string[] => [
535
549
  `pip install huggingface_hub hf_transfer
536
550
 
@@ -185,6 +185,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
185
185
  filter: true,
186
186
  countDownloads: { term: { path: "saved_model.pb" } },
187
187
  },
188
+ "keras-nlp": {
189
+ prettyLabel: "KerasNLP",
190
+ repoName: "KerasNLP",
191
+ repoUrl: "https://keras.io/keras_nlp/",
192
+ docsUrl: "https://github.com/keras-team/keras-nlp",
193
+ snippets: snippets.keras_nlp,
194
+ },
188
195
  k2: {
189
196
  prettyLabel: "K2",
190
197
  repoName: "k2",
@@ -401,6 +408,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
401
408
  wildcard: { path: "*.sentis" },
402
409
  },
403
410
  },
411
+ voicecraft: {
412
+ prettyLabel: "VoiceCraft",
413
+ repoName: "VoiceCraft",
414
+ repoUrl: "https://github.com/jasonppy/VoiceCraft",
415
+ docsUrl: "https://github.com/jasonppy/VoiceCraft",
416
+ snippets: snippets.voicecraft,
417
+ },
404
418
  whisperkit: {
405
419
  prettyLabel: "WhisperKit",
406
420
  repoName: "WhisperKit",
@@ -1,20 +1,13 @@
1
- ## About the Task
2
-
3
- Feature extraction is the task of building features intended to be informative from a given dataset,
4
- facilitating the subsequent learning and generalization steps in various domains of machine learning.
5
-
6
1
  ## Use Cases
7
2
 
8
- Feature extraction can be used to do transfer learning in natural language processing, computer vision and audio models.
3
+ Models trained on a specific dataset can learn features about the data. For instance, a model trained on an English poetry dataset learns English grammar at a very high level. This information can be transferred to a new model that is going to be trained on tweets. This process of extracting features and transferring to another model is called transfer learning. One can pass their dataset through a feature extraction pipeline and feed the result to a classifier.
9
4
 
10
5
  ## Inference
11
6
 
12
- #### Feature Extraction
13
-
14
7
  ```python
15
8
  from transformers import pipeline
16
9
  checkpoint = "facebook/bart-base"
17
- feature_extractor = pipeline("feature-extraction",framework="pt",model=checkpoint)
10
+ feature_extractor = pipeline("feature-extraction", framework="pt", model=checkpoint)
18
11
  text = "Transformers is an awesome library!"
19
12
 
20
13
  #Reducing along the first dimension to get a 768 dimensional array
@@ -41,8 +41,7 @@ const taskData: TaskDataCustom = {
41
41
  },
42
42
  ],
43
43
  spaces: [],
44
- summary:
45
- "Feature extraction refers to the process of transforming raw data into numerical features that can be processed while preserving the information in the original dataset.",
44
+ summary: "Feature extraction is the task of extracting features learnt in a model.",
46
45
  widgetModels: ["facebook/bart-base"],
47
46
  };
48
47
 
@@ -0,0 +1,23 @@
1
+ ## Use Cases
2
+
3
+ ### Transfer Learning
4
+
5
+ Models trained on a specific dataset can learn features about the data. For instance, a model trained on a car classification dataset learns to recognize edges and curves on a very high level and car-specific features on a low level. This information can be transferred to a new model that is going to be trained on classifying trucks. This process of extracting features and transferring to another model is called transfer learning.
6
+
7
+ ### Similarity
8
+
9
+ Features extracted from models contain semantically meaningful information about the world. These features can be used to detect the similarity between two images. Assume there are two images: a photo of a stray cat in a street setting and a photo of a cat at home. These images both contain cats, and the features will contain the information that there's a cat in the image. Thus, comparing the features of a stray cat photo to the features of a domestic cat photo will result in higher similarity compared to any other image that doesn't contain any cats.
10
+
11
+ ## Inference
12
+
13
+ ```python
14
+ import torch
15
+ from transformers import pipeline
16
+
17
+ pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", framework="pt", pool=True)
18
+ pipe("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png")
19
+
20
+ feature_extractor(text,return_tensors = "pt")[0].numpy().mean(axis=0)
21
+
22
+ '[[[0.21236686408519745, 1.0919708013534546, 0.8512550592422485, ...]]]'
23
+ ```