@huggingface/tasks 0.10.15 → 0.10.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -2118,8 +2118,95 @@ var taskData10 = {
2118
2118
  };
2119
2119
  var data_default10 = taskData10;
2120
2120
 
2121
- // src/tasks/image-segmentation/data.ts
2121
+ // src/tasks/image-text-to-text/data.ts
2122
2122
  var taskData11 = {
2123
+ datasets: [
2124
+ {
2125
+ description: "Instructions composed of image and text.",
2126
+ id: "liuhaotian/LLaVA-Instruct-150K"
2127
+ },
2128
+ {
2129
+ description: "Conversation turns where questions involve image and text.",
2130
+ id: "liuhaotian/LLaVA-Pretrain"
2131
+ },
2132
+ {
2133
+ description: "A collection of datasets made for model fine-tuning.",
2134
+ id: "HuggingFaceM4/the_cauldron"
2135
+ },
2136
+ {
2137
+ description: "Screenshots of websites with their HTML/CSS codes.",
2138
+ id: "HuggingFaceM4/WebSight"
2139
+ }
2140
+ ],
2141
+ demo: {
2142
+ inputs: [
2143
+ {
2144
+ filename: "mask-generation-input.png",
2145
+ type: "img"
2146
+ },
2147
+ {
2148
+ label: "Text Prompt",
2149
+ content: "Describe the position of the bee in detail.",
2150
+ type: "text"
2151
+ }
2152
+ ],
2153
+ outputs: [
2154
+ {
2155
+ label: "Answer",
2156
+ content: "The bee is sitting on a pink flower, surrounded by other flowers. The bee is positioned in the center of the flower, with its head and front legs sticking out.",
2157
+ type: "text"
2158
+ }
2159
+ ]
2160
+ },
2161
+ metrics: [],
2162
+ models: [
2163
+ {
2164
+ description: "Cutting-edge vision language model that can also localize texts in images.",
2165
+ id: "liuhaotian/llava-v1.6-34b"
2166
+ },
2167
+ {
2168
+ description: "Cutting-edge conversational vision language model that can take multiple image inputs.",
2169
+ id: "HuggingFaceM4/idefics2-8b-chatty"
2170
+ },
2171
+ {
2172
+ description: "Small yet powerful model.",
2173
+ id: "vikhyatk/moondream2"
2174
+ },
2175
+ {
2176
+ description: "Strong image-text-to-text model made to understand documents.",
2177
+ id: "mPLUG/DocOwl1.5"
2178
+ }
2179
+ ],
2180
+ spaces: [
2181
+ {
2182
+ description: "Leaderboard to evaluate vision language models.",
2183
+ id: "opencompass/open_vlm_leaderboard"
2184
+ },
2185
+ {
2186
+ description: "Vision language models arena, where models are ranked by votes of users.",
2187
+ id: "WildVision/vision-arena"
2188
+ },
2189
+ {
2190
+ description: "Powerful vision-language model assistant.",
2191
+ id: "liuhaotian/LLaVA-1.6"
2192
+ },
2193
+ {
2194
+ description: "An application to compare outputs of different vision language models.",
2195
+ id: "merve/compare_VLMs"
2196
+ },
2197
+ {
2198
+ description: "An application for document vision language tasks.",
2199
+ id: "mPLUG/DocOwl"
2200
+ }
2201
+ ],
2202
+ summary: "Image-text-to-text models take in an image and text prompt and output text. These models are also called vision-language models, or VLMs. The difference from image-to-text models is that these models take an additional text input, not restricting the model to certain use cases like image captioning, and may also be trained to accept a conversation as input.",
2203
+ widgetModels: ["microsoft/kosmos-2-patch14-224"],
2204
+ youtubeId: ""
2205
+ };
2206
+ var data_default11 = taskData11;
2207
+
2208
+ // src/tasks/image-segmentation/data.ts
2209
+ var taskData12 = {
2123
2210
  datasets: [
2124
2211
  {
2125
2212
  description: "Scene segmentation dataset.",
@@ -2211,10 +2298,10 @@ var taskData11 = {
2211
2298
  widgetModels: ["facebook/detr-resnet-50-panoptic"],
2212
2299
  youtubeId: "dKE8SIt9C-w"
2213
2300
  };
2214
- var data_default11 = taskData11;
2301
+ var data_default12 = taskData12;
2215
2302
 
2216
2303
  // src/tasks/mask-generation/data.ts
2217
- var taskData12 = {
2304
+ var taskData13 = {
2218
2305
  datasets: [],
2219
2306
  demo: {
2220
2307
  inputs: [
@@ -2263,10 +2350,10 @@ var taskData12 = {
2263
2350
  widgetModels: [],
2264
2351
  youtubeId: ""
2265
2352
  };
2266
- var data_default12 = taskData12;
2353
+ var data_default13 = taskData13;
2267
2354
 
2268
2355
  // src/tasks/object-detection/data.ts
2269
- var taskData13 = {
2356
+ var taskData14 = {
2270
2357
  datasets: [
2271
2358
  {
2272
2359
  // TODO write proper description
@@ -2338,10 +2425,10 @@ var taskData13 = {
2338
2425
  widgetModels: ["facebook/detr-resnet-50"],
2339
2426
  youtubeId: "WdAeKSOpxhw"
2340
2427
  };
2341
- var data_default13 = taskData13;
2428
+ var data_default14 = taskData14;
2342
2429
 
2343
2430
  // src/tasks/depth-estimation/data.ts
2344
- var taskData14 = {
2431
+ var taskData15 = {
2345
2432
  datasets: [
2346
2433
  {
2347
2434
  description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
@@ -2395,10 +2482,10 @@ var taskData14 = {
2395
2482
  widgetModels: [""],
2396
2483
  youtubeId: ""
2397
2484
  };
2398
- var data_default14 = taskData14;
2485
+ var data_default15 = taskData15;
2399
2486
 
2400
2487
  // src/tasks/placeholder/data.ts
2401
- var taskData15 = {
2488
+ var taskData16 = {
2402
2489
  datasets: [],
2403
2490
  demo: {
2404
2491
  inputs: [],
@@ -2415,10 +2502,10 @@ var taskData15 = {
2415
2502
  /// (eg, text2text-generation is the canonical ID of translation)
2416
2503
  canonicalId: void 0
2417
2504
  };
2418
- var data_default15 = taskData15;
2505
+ var data_default16 = taskData16;
2419
2506
 
2420
2507
  // src/tasks/reinforcement-learning/data.ts
2421
- var taskData16 = {
2508
+ var taskData17 = {
2422
2509
  datasets: [
2423
2510
  {
2424
2511
  description: "A curation of widely used datasets for Data Driven Deep Reinforcement Learning (D4RL)",
@@ -2484,10 +2571,10 @@ var taskData16 = {
2484
2571
  widgetModels: [],
2485
2572
  youtubeId: "q0BiUn5LiBc"
2486
2573
  };
2487
- var data_default16 = taskData16;
2574
+ var data_default17 = taskData17;
2488
2575
 
2489
2576
  // src/tasks/question-answering/data.ts
2490
- var taskData17 = {
2577
+ var taskData18 = {
2491
2578
  datasets: [
2492
2579
  {
2493
2580
  // TODO write proper description
@@ -2551,10 +2638,10 @@ var taskData17 = {
2551
2638
  widgetModels: ["deepset/roberta-base-squad2"],
2552
2639
  youtubeId: "ajPx5LwJD-I"
2553
2640
  };
2554
- var data_default17 = taskData17;
2641
+ var data_default18 = taskData18;
2555
2642
 
2556
2643
  // src/tasks/sentence-similarity/data.ts
2557
- var taskData18 = {
2644
+ var taskData19 = {
2558
2645
  datasets: [
2559
2646
  {
2560
2647
  description: "Bing queries with relevant passages from various web sources.",
@@ -2646,10 +2733,10 @@ var taskData18 = {
2646
2733
  widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
2647
2734
  youtubeId: "VCZq5AkbNEU"
2648
2735
  };
2649
- var data_default18 = taskData18;
2736
+ var data_default19 = taskData19;
2650
2737
 
2651
2738
  // src/tasks/summarization/data.ts
2652
- var taskData19 = {
2739
+ var taskData20 = {
2653
2740
  canonicalId: "text2text-generation",
2654
2741
  datasets: [
2655
2742
  {
@@ -2715,10 +2802,10 @@ var taskData19 = {
2715
2802
  widgetModels: ["sshleifer/distilbart-cnn-12-6"],
2716
2803
  youtubeId: "yHnr5Dk2zCI"
2717
2804
  };
2718
- var data_default19 = taskData19;
2805
+ var data_default20 = taskData20;
2719
2806
 
2720
2807
  // src/tasks/table-question-answering/data.ts
2721
- var taskData20 = {
2808
+ var taskData21 = {
2722
2809
  datasets: [
2723
2810
  {
2724
2811
  description: "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
@@ -2769,10 +2856,10 @@ var taskData20 = {
2769
2856
  summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
2770
2857
  widgetModels: ["google/tapas-base-finetuned-wtq"]
2771
2858
  };
2772
- var data_default20 = taskData20;
2859
+ var data_default21 = taskData21;
2773
2860
 
2774
2861
  // src/tasks/tabular-classification/data.ts
2775
- var taskData21 = {
2862
+ var taskData22 = {
2776
2863
  datasets: [
2777
2864
  {
2778
2865
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -2836,10 +2923,10 @@ var taskData21 = {
2836
2923
  widgetModels: ["scikit-learn/tabular-playground"],
2837
2924
  youtubeId: ""
2838
2925
  };
2839
- var data_default21 = taskData21;
2926
+ var data_default22 = taskData22;
2840
2927
 
2841
2928
  // src/tasks/tabular-regression/data.ts
2842
- var taskData22 = {
2929
+ var taskData23 = {
2843
2930
  datasets: [
2844
2931
  {
2845
2932
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -2891,10 +2978,10 @@ var taskData22 = {
2891
2978
  widgetModels: ["scikit-learn/Fish-Weight"],
2892
2979
  youtubeId: ""
2893
2980
  };
2894
- var data_default22 = taskData22;
2981
+ var data_default23 = taskData23;
2895
2982
 
2896
2983
  // src/tasks/text-to-image/data.ts
2897
- var taskData23 = {
2984
+ var taskData24 = {
2898
2985
  datasets: [
2899
2986
  {
2900
2987
  description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
@@ -2986,10 +3073,10 @@ var taskData23 = {
2986
3073
  widgetModels: ["CompVis/stable-diffusion-v1-4"],
2987
3074
  youtubeId: ""
2988
3075
  };
2989
- var data_default23 = taskData23;
3076
+ var data_default24 = taskData24;
2990
3077
 
2991
3078
  // src/tasks/text-to-speech/data.ts
2992
- var taskData24 = {
3079
+ var taskData25 = {
2993
3080
  canonicalId: "text-to-audio",
2994
3081
  datasets: [
2995
3082
  {
@@ -3054,10 +3141,10 @@ var taskData24 = {
3054
3141
  widgetModels: ["suno/bark"],
3055
3142
  youtubeId: "NW62DpzJ274"
3056
3143
  };
3057
- var data_default24 = taskData24;
3144
+ var data_default25 = taskData25;
3058
3145
 
3059
3146
  // src/tasks/token-classification/data.ts
3060
- var taskData25 = {
3147
+ var taskData26 = {
3061
3148
  datasets: [
3062
3149
  {
3063
3150
  description: "A widely used dataset useful to benchmark named entity recognition models.",
@@ -3133,10 +3220,10 @@ var taskData25 = {
3133
3220
  widgetModels: ["dslim/bert-base-NER"],
3134
3221
  youtubeId: "wVHdVlPScxA"
3135
3222
  };
3136
- var data_default25 = taskData25;
3223
+ var data_default26 = taskData26;
3137
3224
 
3138
3225
  // src/tasks/translation/data.ts
3139
- var taskData26 = {
3226
+ var taskData27 = {
3140
3227
  canonicalId: "text2text-generation",
3141
3228
  datasets: [
3142
3229
  {
@@ -3198,10 +3285,10 @@ var taskData26 = {
3198
3285
  widgetModels: ["t5-small"],
3199
3286
  youtubeId: "1JvfrvZgi6c"
3200
3287
  };
3201
- var data_default26 = taskData26;
3288
+ var data_default27 = taskData27;
3202
3289
 
3203
3290
  // src/tasks/text-classification/data.ts
3204
- var taskData27 = {
3291
+ var taskData28 = {
3205
3292
  datasets: [
3206
3293
  {
3207
3294
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3286,10 +3373,10 @@ var taskData27 = {
3286
3373
  widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
3287
3374
  youtubeId: "leNG9fN9FQU"
3288
3375
  };
3289
- var data_default27 = taskData27;
3376
+ var data_default28 = taskData28;
3290
3377
 
3291
3378
  // src/tasks/text-generation/data.ts
3292
- var taskData28 = {
3379
+ var taskData29 = {
3293
3380
  datasets: [
3294
3381
  {
3295
3382
  description: "A large multilingual dataset of text crawled from the web.",
@@ -3390,10 +3477,10 @@ var taskData28 = {
3390
3477
  widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
3391
3478
  youtubeId: "Vpjb1lu0MDk"
3392
3479
  };
3393
- var data_default28 = taskData28;
3480
+ var data_default29 = taskData29;
3394
3481
 
3395
3482
  // src/tasks/text-to-video/data.ts
3396
- var taskData29 = {
3483
+ var taskData30 = {
3397
3484
  datasets: [
3398
3485
  {
3399
3486
  description: "Microsoft Research Video to Text is a large-scale dataset for open domain video captioning",
@@ -3485,10 +3572,10 @@ var taskData29 = {
3485
3572
  widgetModels: [],
3486
3573
  youtubeId: void 0
3487
3574
  };
3488
- var data_default29 = taskData29;
3575
+ var data_default30 = taskData30;
3489
3576
 
3490
3577
  // src/tasks/unconditional-image-generation/data.ts
3491
- var taskData30 = {
3578
+ var taskData31 = {
3492
3579
  datasets: [
3493
3580
  {
3494
3581
  description: "The CIFAR-100 dataset consists of 60000 32x32 colour images in 100 classes, with 600 images per class.",
@@ -3550,10 +3637,10 @@ var taskData30 = {
3550
3637
  // TODO: Add related video
3551
3638
  youtubeId: ""
3552
3639
  };
3553
- var data_default30 = taskData30;
3640
+ var data_default31 = taskData31;
3554
3641
 
3555
3642
  // src/tasks/video-classification/data.ts
3556
- var taskData31 = {
3643
+ var taskData32 = {
3557
3644
  datasets: [
3558
3645
  {
3559
3646
  // TODO write proper description
@@ -3632,10 +3719,10 @@ var taskData31 = {
3632
3719
  widgetModels: [],
3633
3720
  youtubeId: ""
3634
3721
  };
3635
- var data_default31 = taskData31;
3722
+ var data_default32 = taskData32;
3636
3723
 
3637
3724
  // src/tasks/visual-question-answering/data.ts
3638
- var taskData32 = {
3725
+ var taskData33 = {
3639
3726
  datasets: [
3640
3727
  {
3641
3728
  description: "A widely used dataset containing questions (with answers) about images.",
@@ -3725,10 +3812,10 @@ var taskData32 = {
3725
3812
  widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
3726
3813
  youtubeId: ""
3727
3814
  };
3728
- var data_default32 = taskData32;
3815
+ var data_default33 = taskData33;
3729
3816
 
3730
3817
  // src/tasks/zero-shot-classification/data.ts
3731
- var taskData33 = {
3818
+ var taskData34 = {
3732
3819
  datasets: [
3733
3820
  {
3734
3821
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3787,10 +3874,10 @@ var taskData33 = {
3787
3874
  summary: "Zero-shot text classification is a task in natural language processing where a model is trained on a set of labeled examples but is then able to classify new examples from previously unseen classes.",
3788
3875
  widgetModels: ["facebook/bart-large-mnli"]
3789
3876
  };
3790
- var data_default33 = taskData33;
3877
+ var data_default34 = taskData34;
3791
3878
 
3792
3879
  // src/tasks/zero-shot-image-classification/data.ts
3793
- var taskData34 = {
3880
+ var taskData35 = {
3794
3881
  datasets: [
3795
3882
  {
3796
3883
  // TODO write proper description
@@ -3864,10 +3951,10 @@ var taskData34 = {
3864
3951
  widgetModels: ["openai/clip-vit-large-patch14-336"],
3865
3952
  youtubeId: ""
3866
3953
  };
3867
- var data_default34 = taskData34;
3954
+ var data_default35 = taskData35;
3868
3955
 
3869
3956
  // src/tasks/zero-shot-object-detection/data.ts
3870
- var taskData35 = {
3957
+ var taskData36 = {
3871
3958
  datasets: [],
3872
3959
  demo: {
3873
3960
  inputs: [
@@ -3922,7 +4009,7 @@ var taskData35 = {
3922
4009
  widgetModels: [],
3923
4010
  youtubeId: ""
3924
4011
  };
3925
- var data_default35 = taskData35;
4012
+ var data_default36 = taskData36;
3926
4013
 
3927
4014
  // src/tasks/index.ts
3928
4015
  var TASKS_MODEL_LIBRARIES = {
@@ -3984,7 +4071,7 @@ var TASKS_MODEL_LIBRARIES = {
3984
4071
  "text-to-3d": [],
3985
4072
  "image-to-3d": []
3986
4073
  };
3987
- function getData(type, partialTaskData = data_default15) {
4074
+ function getData(type, partialTaskData = data_default16) {
3988
4075
  return {
3989
4076
  ...partialTaskData,
3990
4077
  id: type,
@@ -3996,52 +4083,52 @@ var TASKS_DATA = {
3996
4083
  "audio-classification": getData("audio-classification", data_default),
3997
4084
  "audio-to-audio": getData("audio-to-audio", data_default2),
3998
4085
  "automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
3999
- "depth-estimation": getData("depth-estimation", data_default14),
4086
+ "depth-estimation": getData("depth-estimation", data_default15),
4000
4087
  "document-question-answering": getData("document-question-answering", data_default4),
4001
4088
  "feature-extraction": getData("feature-extraction", data_default5),
4002
4089
  "fill-mask": getData("fill-mask", data_default6),
4003
4090
  "graph-ml": void 0,
4004
4091
  "image-classification": getData("image-classification", data_default7),
4005
4092
  "image-feature-extraction": getData("image-feature-extraction", data_default8),
4006
- "image-segmentation": getData("image-segmentation", data_default11),
4007
- "image-text-to-text": void 0,
4093
+ "image-segmentation": getData("image-segmentation", data_default12),
4008
4094
  "image-to-image": getData("image-to-image", data_default9),
4095
+ "image-text-to-text": getData("image-text-to-text", data_default11),
4009
4096
  "image-to-text": getData("image-to-text", data_default10),
4010
4097
  "image-to-video": void 0,
4011
- "mask-generation": getData("mask-generation", data_default12),
4098
+ "mask-generation": getData("mask-generation", data_default13),
4012
4099
  "multiple-choice": void 0,
4013
- "object-detection": getData("object-detection", data_default13),
4014
- "video-classification": getData("video-classification", data_default31),
4100
+ "object-detection": getData("object-detection", data_default14),
4101
+ "video-classification": getData("video-classification", data_default32),
4015
4102
  other: void 0,
4016
- "question-answering": getData("question-answering", data_default17),
4017
- "reinforcement-learning": getData("reinforcement-learning", data_default16),
4103
+ "question-answering": getData("question-answering", data_default18),
4104
+ "reinforcement-learning": getData("reinforcement-learning", data_default17),
4018
4105
  robotics: void 0,
4019
- "sentence-similarity": getData("sentence-similarity", data_default18),
4020
- summarization: getData("summarization", data_default19),
4021
- "table-question-answering": getData("table-question-answering", data_default20),
4106
+ "sentence-similarity": getData("sentence-similarity", data_default19),
4107
+ summarization: getData("summarization", data_default20),
4108
+ "table-question-answering": getData("table-question-answering", data_default21),
4022
4109
  "table-to-text": void 0,
4023
- "tabular-classification": getData("tabular-classification", data_default21),
4024
- "tabular-regression": getData("tabular-regression", data_default22),
4110
+ "tabular-classification": getData("tabular-classification", data_default22),
4111
+ "tabular-regression": getData("tabular-regression", data_default23),
4025
4112
  "tabular-to-text": void 0,
4026
- "text-classification": getData("text-classification", data_default27),
4027
- "text-generation": getData("text-generation", data_default28),
4113
+ "text-classification": getData("text-classification", data_default28),
4114
+ "text-generation": getData("text-generation", data_default29),
4028
4115
  "text-retrieval": void 0,
4029
- "text-to-image": getData("text-to-image", data_default23),
4030
- "text-to-speech": getData("text-to-speech", data_default24),
4116
+ "text-to-image": getData("text-to-image", data_default24),
4117
+ "text-to-speech": getData("text-to-speech", data_default25),
4031
4118
  "text-to-audio": void 0,
4032
- "text-to-video": getData("text-to-video", data_default29),
4119
+ "text-to-video": getData("text-to-video", data_default30),
4033
4120
  "text2text-generation": void 0,
4034
4121
  "time-series-forecasting": void 0,
4035
- "token-classification": getData("token-classification", data_default25),
4036
- translation: getData("translation", data_default26),
4037
- "unconditional-image-generation": getData("unconditional-image-generation", data_default30),
4038
- "visual-question-answering": getData("visual-question-answering", data_default32),
4122
+ "token-classification": getData("token-classification", data_default26),
4123
+ translation: getData("translation", data_default27),
4124
+ "unconditional-image-generation": getData("unconditional-image-generation", data_default31),
4125
+ "visual-question-answering": getData("visual-question-answering", data_default33),
4039
4126
  "voice-activity-detection": void 0,
4040
- "zero-shot-classification": getData("zero-shot-classification", data_default33),
4041
- "zero-shot-image-classification": getData("zero-shot-image-classification", data_default34),
4042
- "zero-shot-object-detection": getData("zero-shot-object-detection", data_default35),
4043
- "text-to-3d": getData("text-to-3d", data_default15),
4044
- "image-to-3d": getData("image-to-3d", data_default15)
4127
+ "zero-shot-classification": getData("zero-shot-classification", data_default34),
4128
+ "zero-shot-image-classification": getData("zero-shot-image-classification", data_default35),
4129
+ "zero-shot-object-detection": getData("zero-shot-object-detection", data_default36),
4130
+ "text-to-3d": getData("text-to-3d", data_default16),
4131
+ "image-to-3d": getData("image-to-3d", data_default16)
4045
4132
  };
4046
4133
 
4047
4134
  // src/model-libraries-snippets.ts
@@ -4089,6 +4176,11 @@ var bertopic = (model) => [
4089
4176
 
4090
4177
  model = BERTopic.load("${model.id}")`
4091
4178
  ];
4179
+ var bm25s = (model) => [
4180
+ `from bm25s.hf import BM25HF
4181
+
4182
+ retriever = BM25HF.load_from_hub("${model.id}")`
4183
+ ];
4092
4184
  var diffusers_default = (model) => [
4093
4185
  `from diffusers import DiffusionPipeline
4094
4186
 
@@ -4705,6 +4797,16 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4705
4797
  wildcard: { path: "*.npz" }
4706
4798
  }
4707
4799
  },
4800
+ bm25s: {
4801
+ prettyLabel: "BM25S",
4802
+ repoName: "bm25s",
4803
+ repoUrl: "https://github.com/xhluca/bm25s",
4804
+ snippets: bm25s,
4805
+ filter: false,
4806
+ countDownloads: {
4807
+ term: { path: "params.index.json" }
4808
+ }
4809
+ },
4708
4810
  chat_tts: {
4709
4811
  prettyLabel: "ChatTTS",
4710
4812
  repoName: "ChatTTS",
@@ -4817,6 +4919,14 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
4817
4919
  terms: { path: ["ckpt/tensor00000_000", "ckpt-0/tensor00000_000"] }
4818
4920
  }
4819
4921
  },
4922
+ "hunyuan-dit": {
4923
+ prettyLabel: "HunyuanDiT",
4924
+ repoName: "HunyuanDiT",
4925
+ repoUrl: "https://github.com/Tencent/HunyuanDiT",
4926
+ countDownloads: {
4927
+ terms: { path: ["pytorch_model_ema.pt", "pytorch_model_distill.pt"] }
4928
+ }
4929
+ },
4820
4930
  keras: {
4821
4931
  prettyLabel: "Keras",
4822
4932
  repoName: "Keras",