@huggingface/tasks 0.10.14 → 0.10.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +238 -79
- package/dist/index.js +238 -79
- package/dist/src/model-libraries-snippets.d.ts +2 -0
- package/dist/src/model-libraries-snippets.d.ts.map +1 -1
- package/dist/src/model-libraries.d.ts +70 -2
- package/dist/src/model-libraries.d.ts.map +1 -1
- package/dist/src/tasks/image-text-to-text/data.d.ts +4 -0
- package/dist/src/tasks/image-text-to-text/data.d.ts.map +1 -0
- package/dist/src/tasks/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/dataset-libraries.ts +1 -1
- package/src/model-libraries-snippets.ts +24 -0
- package/src/model-libraries.ts +50 -0
- package/src/tasks/image-text-to-text/about.md +74 -0
- package/src/tasks/image-text-to-text/data.ts +90 -0
- package/src/tasks/image-to-text/about.md +2 -19
- package/src/tasks/index.ts +2 -1
- package/src/tasks/text-generation/about.md +7 -7
package/dist/index.cjs
CHANGED
|
@@ -2118,8 +2118,95 @@ var taskData10 = {
|
|
|
2118
2118
|
};
|
|
2119
2119
|
var data_default10 = taskData10;
|
|
2120
2120
|
|
|
2121
|
-
// src/tasks/image-
|
|
2121
|
+
// src/tasks/image-text-to-text/data.ts
|
|
2122
2122
|
var taskData11 = {
|
|
2123
|
+
datasets: [
|
|
2124
|
+
{
|
|
2125
|
+
description: "Instructions composed of image and text.",
|
|
2126
|
+
id: "liuhaotian/LLaVA-Instruct-150K"
|
|
2127
|
+
},
|
|
2128
|
+
{
|
|
2129
|
+
description: "Conversation turns where questions involve image and text.",
|
|
2130
|
+
id: "liuhaotian/LLaVA-Pretrain"
|
|
2131
|
+
},
|
|
2132
|
+
{
|
|
2133
|
+
description: "A collection of datasets made for model fine-tuning.",
|
|
2134
|
+
id: "HuggingFaceM4/the_cauldron"
|
|
2135
|
+
},
|
|
2136
|
+
{
|
|
2137
|
+
description: "Screenshots of websites with their HTML/CSS codes.",
|
|
2138
|
+
id: "HuggingFaceM4/WebSight"
|
|
2139
|
+
}
|
|
2140
|
+
],
|
|
2141
|
+
demo: {
|
|
2142
|
+
inputs: [
|
|
2143
|
+
{
|
|
2144
|
+
filename: "mask-generation-input.png",
|
|
2145
|
+
type: "img"
|
|
2146
|
+
},
|
|
2147
|
+
{
|
|
2148
|
+
label: "Text Prompt",
|
|
2149
|
+
content: "Describe the position of the bee in detail.",
|
|
2150
|
+
type: "text"
|
|
2151
|
+
}
|
|
2152
|
+
],
|
|
2153
|
+
outputs: [
|
|
2154
|
+
{
|
|
2155
|
+
label: "Answer",
|
|
2156
|
+
content: "The bee is sitting on a pink flower, surrounded by other flowers. The bee is positioned in the center of the flower, with its head and front legs sticking out.",
|
|
2157
|
+
type: "text"
|
|
2158
|
+
}
|
|
2159
|
+
]
|
|
2160
|
+
},
|
|
2161
|
+
metrics: [],
|
|
2162
|
+
models: [
|
|
2163
|
+
{
|
|
2164
|
+
description: "Cutting-edge vision language model that can also localize texts in images.",
|
|
2165
|
+
id: "liuhaotian/llava-v1.6-34b"
|
|
2166
|
+
},
|
|
2167
|
+
{
|
|
2168
|
+
description: "Cutting-edge conversational vision language model that can take multiple image inputs.",
|
|
2169
|
+
id: "HuggingFaceM4/idefics2-8b-chatty"
|
|
2170
|
+
},
|
|
2171
|
+
{
|
|
2172
|
+
description: "Small yet powerful model.",
|
|
2173
|
+
id: "vikhyatk/moondream2"
|
|
2174
|
+
},
|
|
2175
|
+
{
|
|
2176
|
+
description: "Strong image-text-to-text model made to understand documents.",
|
|
2177
|
+
id: "mPLUG/DocOwl1.5"
|
|
2178
|
+
}
|
|
2179
|
+
],
|
|
2180
|
+
spaces: [
|
|
2181
|
+
{
|
|
2182
|
+
description: "Leaderboard to evaluate vision language models.",
|
|
2183
|
+
id: "opencompass/open_vlm_leaderboard"
|
|
2184
|
+
},
|
|
2185
|
+
{
|
|
2186
|
+
description: "Vision language models arena, where models are ranked by votes of users.",
|
|
2187
|
+
id: "WildVision/vision-arena"
|
|
2188
|
+
},
|
|
2189
|
+
{
|
|
2190
|
+
description: "Powerful vision-language model assistant.",
|
|
2191
|
+
id: "liuhaotian/LLaVA-1.6"
|
|
2192
|
+
},
|
|
2193
|
+
{
|
|
2194
|
+
description: "An application to compare outputs of different vision language models.",
|
|
2195
|
+
id: "merve/compare_VLMs"
|
|
2196
|
+
},
|
|
2197
|
+
{
|
|
2198
|
+
description: "An application for document vision language tasks.",
|
|
2199
|
+
id: "mPLUG/DocOwl"
|
|
2200
|
+
}
|
|
2201
|
+
],
|
|
2202
|
+
summary: "Image-text-to-text models take in an image and text prompt and output text. These models are also called vision-language models, or VLMs. The difference from image-to-text models is that these models take an additional text input, not restricting the model to certain use cases like image captioning, and may also be trained to accept a conversation as input.",
|
|
2203
|
+
widgetModels: ["microsoft/kosmos-2-patch14-224"],
|
|
2204
|
+
youtubeId: ""
|
|
2205
|
+
};
|
|
2206
|
+
var data_default11 = taskData11;
|
|
2207
|
+
|
|
2208
|
+
// src/tasks/image-segmentation/data.ts
|
|
2209
|
+
var taskData12 = {
|
|
2123
2210
|
datasets: [
|
|
2124
2211
|
{
|
|
2125
2212
|
description: "Scene segmentation dataset.",
|
|
@@ -2211,10 +2298,10 @@ var taskData11 = {
|
|
|
2211
2298
|
widgetModels: ["facebook/detr-resnet-50-panoptic"],
|
|
2212
2299
|
youtubeId: "dKE8SIt9C-w"
|
|
2213
2300
|
};
|
|
2214
|
-
var
|
|
2301
|
+
var data_default12 = taskData12;
|
|
2215
2302
|
|
|
2216
2303
|
// src/tasks/mask-generation/data.ts
|
|
2217
|
-
var
|
|
2304
|
+
var taskData13 = {
|
|
2218
2305
|
datasets: [],
|
|
2219
2306
|
demo: {
|
|
2220
2307
|
inputs: [
|
|
@@ -2263,10 +2350,10 @@ var taskData12 = {
|
|
|
2263
2350
|
widgetModels: [],
|
|
2264
2351
|
youtubeId: ""
|
|
2265
2352
|
};
|
|
2266
|
-
var
|
|
2353
|
+
var data_default13 = taskData13;
|
|
2267
2354
|
|
|
2268
2355
|
// src/tasks/object-detection/data.ts
|
|
2269
|
-
var
|
|
2356
|
+
var taskData14 = {
|
|
2270
2357
|
datasets: [
|
|
2271
2358
|
{
|
|
2272
2359
|
// TODO write proper description
|
|
@@ -2338,10 +2425,10 @@ var taskData13 = {
|
|
|
2338
2425
|
widgetModels: ["facebook/detr-resnet-50"],
|
|
2339
2426
|
youtubeId: "WdAeKSOpxhw"
|
|
2340
2427
|
};
|
|
2341
|
-
var
|
|
2428
|
+
var data_default14 = taskData14;
|
|
2342
2429
|
|
|
2343
2430
|
// src/tasks/depth-estimation/data.ts
|
|
2344
|
-
var
|
|
2431
|
+
var taskData15 = {
|
|
2345
2432
|
datasets: [
|
|
2346
2433
|
{
|
|
2347
2434
|
description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
|
|
@@ -2395,10 +2482,10 @@ var taskData14 = {
|
|
|
2395
2482
|
widgetModels: [""],
|
|
2396
2483
|
youtubeId: ""
|
|
2397
2484
|
};
|
|
2398
|
-
var
|
|
2485
|
+
var data_default15 = taskData15;
|
|
2399
2486
|
|
|
2400
2487
|
// src/tasks/placeholder/data.ts
|
|
2401
|
-
var
|
|
2488
|
+
var taskData16 = {
|
|
2402
2489
|
datasets: [],
|
|
2403
2490
|
demo: {
|
|
2404
2491
|
inputs: [],
|
|
@@ -2415,10 +2502,10 @@ var taskData15 = {
|
|
|
2415
2502
|
/// (eg, text2text-generation is the canonical ID of translation)
|
|
2416
2503
|
canonicalId: void 0
|
|
2417
2504
|
};
|
|
2418
|
-
var
|
|
2505
|
+
var data_default16 = taskData16;
|
|
2419
2506
|
|
|
2420
2507
|
// src/tasks/reinforcement-learning/data.ts
|
|
2421
|
-
var
|
|
2508
|
+
var taskData17 = {
|
|
2422
2509
|
datasets: [
|
|
2423
2510
|
{
|
|
2424
2511
|
description: "A curation of widely used datasets for Data Driven Deep Reinforcement Learning (D4RL)",
|
|
@@ -2484,10 +2571,10 @@ var taskData16 = {
|
|
|
2484
2571
|
widgetModels: [],
|
|
2485
2572
|
youtubeId: "q0BiUn5LiBc"
|
|
2486
2573
|
};
|
|
2487
|
-
var
|
|
2574
|
+
var data_default17 = taskData17;
|
|
2488
2575
|
|
|
2489
2576
|
// src/tasks/question-answering/data.ts
|
|
2490
|
-
var
|
|
2577
|
+
var taskData18 = {
|
|
2491
2578
|
datasets: [
|
|
2492
2579
|
{
|
|
2493
2580
|
// TODO write proper description
|
|
@@ -2551,10 +2638,10 @@ var taskData17 = {
|
|
|
2551
2638
|
widgetModels: ["deepset/roberta-base-squad2"],
|
|
2552
2639
|
youtubeId: "ajPx5LwJD-I"
|
|
2553
2640
|
};
|
|
2554
|
-
var
|
|
2641
|
+
var data_default18 = taskData18;
|
|
2555
2642
|
|
|
2556
2643
|
// src/tasks/sentence-similarity/data.ts
|
|
2557
|
-
var
|
|
2644
|
+
var taskData19 = {
|
|
2558
2645
|
datasets: [
|
|
2559
2646
|
{
|
|
2560
2647
|
description: "Bing queries with relevant passages from various web sources.",
|
|
@@ -2646,10 +2733,10 @@ var taskData18 = {
|
|
|
2646
2733
|
widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
|
|
2647
2734
|
youtubeId: "VCZq5AkbNEU"
|
|
2648
2735
|
};
|
|
2649
|
-
var
|
|
2736
|
+
var data_default19 = taskData19;
|
|
2650
2737
|
|
|
2651
2738
|
// src/tasks/summarization/data.ts
|
|
2652
|
-
var
|
|
2739
|
+
var taskData20 = {
|
|
2653
2740
|
canonicalId: "text2text-generation",
|
|
2654
2741
|
datasets: [
|
|
2655
2742
|
{
|
|
@@ -2715,10 +2802,10 @@ var taskData19 = {
|
|
|
2715
2802
|
widgetModels: ["sshleifer/distilbart-cnn-12-6"],
|
|
2716
2803
|
youtubeId: "yHnr5Dk2zCI"
|
|
2717
2804
|
};
|
|
2718
|
-
var
|
|
2805
|
+
var data_default20 = taskData20;
|
|
2719
2806
|
|
|
2720
2807
|
// src/tasks/table-question-answering/data.ts
|
|
2721
|
-
var
|
|
2808
|
+
var taskData21 = {
|
|
2722
2809
|
datasets: [
|
|
2723
2810
|
{
|
|
2724
2811
|
description: "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
|
|
@@ -2769,10 +2856,10 @@ var taskData20 = {
|
|
|
2769
2856
|
summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
|
|
2770
2857
|
widgetModels: ["google/tapas-base-finetuned-wtq"]
|
|
2771
2858
|
};
|
|
2772
|
-
var
|
|
2859
|
+
var data_default21 = taskData21;
|
|
2773
2860
|
|
|
2774
2861
|
// src/tasks/tabular-classification/data.ts
|
|
2775
|
-
var
|
|
2862
|
+
var taskData22 = {
|
|
2776
2863
|
datasets: [
|
|
2777
2864
|
{
|
|
2778
2865
|
description: "A comprehensive curation of datasets covering all benchmarks.",
|
|
@@ -2836,10 +2923,10 @@ var taskData21 = {
|
|
|
2836
2923
|
widgetModels: ["scikit-learn/tabular-playground"],
|
|
2837
2924
|
youtubeId: ""
|
|
2838
2925
|
};
|
|
2839
|
-
var
|
|
2926
|
+
var data_default22 = taskData22;
|
|
2840
2927
|
|
|
2841
2928
|
// src/tasks/tabular-regression/data.ts
|
|
2842
|
-
var
|
|
2929
|
+
var taskData23 = {
|
|
2843
2930
|
datasets: [
|
|
2844
2931
|
{
|
|
2845
2932
|
description: "A comprehensive curation of datasets covering all benchmarks.",
|
|
@@ -2891,10 +2978,10 @@ var taskData22 = {
|
|
|
2891
2978
|
widgetModels: ["scikit-learn/Fish-Weight"],
|
|
2892
2979
|
youtubeId: ""
|
|
2893
2980
|
};
|
|
2894
|
-
var
|
|
2981
|
+
var data_default23 = taskData23;
|
|
2895
2982
|
|
|
2896
2983
|
// src/tasks/text-to-image/data.ts
|
|
2897
|
-
var
|
|
2984
|
+
var taskData24 = {
|
|
2898
2985
|
datasets: [
|
|
2899
2986
|
{
|
|
2900
2987
|
description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
|
|
@@ -2986,10 +3073,10 @@ var taskData23 = {
|
|
|
2986
3073
|
widgetModels: ["CompVis/stable-diffusion-v1-4"],
|
|
2987
3074
|
youtubeId: ""
|
|
2988
3075
|
};
|
|
2989
|
-
var
|
|
3076
|
+
var data_default24 = taskData24;
|
|
2990
3077
|
|
|
2991
3078
|
// src/tasks/text-to-speech/data.ts
|
|
2992
|
-
var
|
|
3079
|
+
var taskData25 = {
|
|
2993
3080
|
canonicalId: "text-to-audio",
|
|
2994
3081
|
datasets: [
|
|
2995
3082
|
{
|
|
@@ -3054,10 +3141,10 @@ var taskData24 = {
|
|
|
3054
3141
|
widgetModels: ["suno/bark"],
|
|
3055
3142
|
youtubeId: "NW62DpzJ274"
|
|
3056
3143
|
};
|
|
3057
|
-
var
|
|
3144
|
+
var data_default25 = taskData25;
|
|
3058
3145
|
|
|
3059
3146
|
// src/tasks/token-classification/data.ts
|
|
3060
|
-
var
|
|
3147
|
+
var taskData26 = {
|
|
3061
3148
|
datasets: [
|
|
3062
3149
|
{
|
|
3063
3150
|
description: "A widely used dataset useful to benchmark named entity recognition models.",
|
|
@@ -3133,10 +3220,10 @@ var taskData25 = {
|
|
|
3133
3220
|
widgetModels: ["dslim/bert-base-NER"],
|
|
3134
3221
|
youtubeId: "wVHdVlPScxA"
|
|
3135
3222
|
};
|
|
3136
|
-
var
|
|
3223
|
+
var data_default26 = taskData26;
|
|
3137
3224
|
|
|
3138
3225
|
// src/tasks/translation/data.ts
|
|
3139
|
-
var
|
|
3226
|
+
var taskData27 = {
|
|
3140
3227
|
canonicalId: "text2text-generation",
|
|
3141
3228
|
datasets: [
|
|
3142
3229
|
{
|
|
@@ -3198,10 +3285,10 @@ var taskData26 = {
|
|
|
3198
3285
|
widgetModels: ["t5-small"],
|
|
3199
3286
|
youtubeId: "1JvfrvZgi6c"
|
|
3200
3287
|
};
|
|
3201
|
-
var
|
|
3288
|
+
var data_default27 = taskData27;
|
|
3202
3289
|
|
|
3203
3290
|
// src/tasks/text-classification/data.ts
|
|
3204
|
-
var
|
|
3291
|
+
var taskData28 = {
|
|
3205
3292
|
datasets: [
|
|
3206
3293
|
{
|
|
3207
3294
|
description: "A widely used dataset used to benchmark multiple variants of text classification.",
|
|
@@ -3286,10 +3373,10 @@ var taskData27 = {
|
|
|
3286
3373
|
widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
|
|
3287
3374
|
youtubeId: "leNG9fN9FQU"
|
|
3288
3375
|
};
|
|
3289
|
-
var
|
|
3376
|
+
var data_default28 = taskData28;
|
|
3290
3377
|
|
|
3291
3378
|
// src/tasks/text-generation/data.ts
|
|
3292
|
-
var
|
|
3379
|
+
var taskData29 = {
|
|
3293
3380
|
datasets: [
|
|
3294
3381
|
{
|
|
3295
3382
|
description: "A large multilingual dataset of text crawled from the web.",
|
|
@@ -3390,10 +3477,10 @@ var taskData28 = {
|
|
|
3390
3477
|
widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
|
|
3391
3478
|
youtubeId: "Vpjb1lu0MDk"
|
|
3392
3479
|
};
|
|
3393
|
-
var
|
|
3480
|
+
var data_default29 = taskData29;
|
|
3394
3481
|
|
|
3395
3482
|
// src/tasks/text-to-video/data.ts
|
|
3396
|
-
var
|
|
3483
|
+
var taskData30 = {
|
|
3397
3484
|
datasets: [
|
|
3398
3485
|
{
|
|
3399
3486
|
description: "Microsoft Research Video to Text is a large-scale dataset for open domain video captioning",
|
|
@@ -3485,10 +3572,10 @@ var taskData29 = {
|
|
|
3485
3572
|
widgetModels: [],
|
|
3486
3573
|
youtubeId: void 0
|
|
3487
3574
|
};
|
|
3488
|
-
var
|
|
3575
|
+
var data_default30 = taskData30;
|
|
3489
3576
|
|
|
3490
3577
|
// src/tasks/unconditional-image-generation/data.ts
|
|
3491
|
-
var
|
|
3578
|
+
var taskData31 = {
|
|
3492
3579
|
datasets: [
|
|
3493
3580
|
{
|
|
3494
3581
|
description: "The CIFAR-100 dataset consists of 60000 32x32 colour images in 100 classes, with 600 images per class.",
|
|
@@ -3550,10 +3637,10 @@ var taskData30 = {
|
|
|
3550
3637
|
// TODO: Add related video
|
|
3551
3638
|
youtubeId: ""
|
|
3552
3639
|
};
|
|
3553
|
-
var
|
|
3640
|
+
var data_default31 = taskData31;
|
|
3554
3641
|
|
|
3555
3642
|
// src/tasks/video-classification/data.ts
|
|
3556
|
-
var
|
|
3643
|
+
var taskData32 = {
|
|
3557
3644
|
datasets: [
|
|
3558
3645
|
{
|
|
3559
3646
|
// TODO write proper description
|
|
@@ -3632,10 +3719,10 @@ var taskData31 = {
|
|
|
3632
3719
|
widgetModels: [],
|
|
3633
3720
|
youtubeId: ""
|
|
3634
3721
|
};
|
|
3635
|
-
var
|
|
3722
|
+
var data_default32 = taskData32;
|
|
3636
3723
|
|
|
3637
3724
|
// src/tasks/visual-question-answering/data.ts
|
|
3638
|
-
var
|
|
3725
|
+
var taskData33 = {
|
|
3639
3726
|
datasets: [
|
|
3640
3727
|
{
|
|
3641
3728
|
description: "A widely used dataset containing questions (with answers) about images.",
|
|
@@ -3725,10 +3812,10 @@ var taskData32 = {
|
|
|
3725
3812
|
widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
|
|
3726
3813
|
youtubeId: ""
|
|
3727
3814
|
};
|
|
3728
|
-
var
|
|
3815
|
+
var data_default33 = taskData33;
|
|
3729
3816
|
|
|
3730
3817
|
// src/tasks/zero-shot-classification/data.ts
|
|
3731
|
-
var
|
|
3818
|
+
var taskData34 = {
|
|
3732
3819
|
datasets: [
|
|
3733
3820
|
{
|
|
3734
3821
|
description: "A widely used dataset used to benchmark multiple variants of text classification.",
|
|
@@ -3787,10 +3874,10 @@ var taskData33 = {
|
|
|
3787
3874
|
summary: "Zero-shot text classification is a task in natural language processing where a model is trained on a set of labeled examples but is then able to classify new examples from previously unseen classes.",
|
|
3788
3875
|
widgetModels: ["facebook/bart-large-mnli"]
|
|
3789
3876
|
};
|
|
3790
|
-
var
|
|
3877
|
+
var data_default34 = taskData34;
|
|
3791
3878
|
|
|
3792
3879
|
// src/tasks/zero-shot-image-classification/data.ts
|
|
3793
|
-
var
|
|
3880
|
+
var taskData35 = {
|
|
3794
3881
|
datasets: [
|
|
3795
3882
|
{
|
|
3796
3883
|
// TODO write proper description
|
|
@@ -3864,10 +3951,10 @@ var taskData34 = {
|
|
|
3864
3951
|
widgetModels: ["openai/clip-vit-large-patch14-336"],
|
|
3865
3952
|
youtubeId: ""
|
|
3866
3953
|
};
|
|
3867
|
-
var
|
|
3954
|
+
var data_default35 = taskData35;
|
|
3868
3955
|
|
|
3869
3956
|
// src/tasks/zero-shot-object-detection/data.ts
|
|
3870
|
-
var
|
|
3957
|
+
var taskData36 = {
|
|
3871
3958
|
datasets: [],
|
|
3872
3959
|
demo: {
|
|
3873
3960
|
inputs: [
|
|
@@ -3922,7 +4009,7 @@ var taskData35 = {
|
|
|
3922
4009
|
widgetModels: [],
|
|
3923
4010
|
youtubeId: ""
|
|
3924
4011
|
};
|
|
3925
|
-
var
|
|
4012
|
+
var data_default36 = taskData36;
|
|
3926
4013
|
|
|
3927
4014
|
// src/tasks/index.ts
|
|
3928
4015
|
var TASKS_MODEL_LIBRARIES = {
|
|
@@ -3984,7 +4071,7 @@ var TASKS_MODEL_LIBRARIES = {
|
|
|
3984
4071
|
"text-to-3d": [],
|
|
3985
4072
|
"image-to-3d": []
|
|
3986
4073
|
};
|
|
3987
|
-
function getData(type, partialTaskData =
|
|
4074
|
+
function getData(type, partialTaskData = data_default16) {
|
|
3988
4075
|
return {
|
|
3989
4076
|
...partialTaskData,
|
|
3990
4077
|
id: type,
|
|
@@ -3996,52 +4083,52 @@ var TASKS_DATA = {
|
|
|
3996
4083
|
"audio-classification": getData("audio-classification", data_default),
|
|
3997
4084
|
"audio-to-audio": getData("audio-to-audio", data_default2),
|
|
3998
4085
|
"automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
|
|
3999
|
-
"depth-estimation": getData("depth-estimation",
|
|
4086
|
+
"depth-estimation": getData("depth-estimation", data_default15),
|
|
4000
4087
|
"document-question-answering": getData("document-question-answering", data_default4),
|
|
4001
4088
|
"feature-extraction": getData("feature-extraction", data_default5),
|
|
4002
4089
|
"fill-mask": getData("fill-mask", data_default6),
|
|
4003
4090
|
"graph-ml": void 0,
|
|
4004
4091
|
"image-classification": getData("image-classification", data_default7),
|
|
4005
4092
|
"image-feature-extraction": getData("image-feature-extraction", data_default8),
|
|
4006
|
-
"image-segmentation": getData("image-segmentation",
|
|
4007
|
-
"image-text-to-text": void 0,
|
|
4093
|
+
"image-segmentation": getData("image-segmentation", data_default12),
|
|
4008
4094
|
"image-to-image": getData("image-to-image", data_default9),
|
|
4095
|
+
"image-text-to-text": getData("image-text-to-text", data_default11),
|
|
4009
4096
|
"image-to-text": getData("image-to-text", data_default10),
|
|
4010
4097
|
"image-to-video": void 0,
|
|
4011
|
-
"mask-generation": getData("mask-generation",
|
|
4098
|
+
"mask-generation": getData("mask-generation", data_default13),
|
|
4012
4099
|
"multiple-choice": void 0,
|
|
4013
|
-
"object-detection": getData("object-detection",
|
|
4014
|
-
"video-classification": getData("video-classification",
|
|
4100
|
+
"object-detection": getData("object-detection", data_default14),
|
|
4101
|
+
"video-classification": getData("video-classification", data_default32),
|
|
4015
4102
|
other: void 0,
|
|
4016
|
-
"question-answering": getData("question-answering",
|
|
4017
|
-
"reinforcement-learning": getData("reinforcement-learning",
|
|
4103
|
+
"question-answering": getData("question-answering", data_default18),
|
|
4104
|
+
"reinforcement-learning": getData("reinforcement-learning", data_default17),
|
|
4018
4105
|
robotics: void 0,
|
|
4019
|
-
"sentence-similarity": getData("sentence-similarity",
|
|
4020
|
-
summarization: getData("summarization",
|
|
4021
|
-
"table-question-answering": getData("table-question-answering",
|
|
4106
|
+
"sentence-similarity": getData("sentence-similarity", data_default19),
|
|
4107
|
+
summarization: getData("summarization", data_default20),
|
|
4108
|
+
"table-question-answering": getData("table-question-answering", data_default21),
|
|
4022
4109
|
"table-to-text": void 0,
|
|
4023
|
-
"tabular-classification": getData("tabular-classification",
|
|
4024
|
-
"tabular-regression": getData("tabular-regression",
|
|
4110
|
+
"tabular-classification": getData("tabular-classification", data_default22),
|
|
4111
|
+
"tabular-regression": getData("tabular-regression", data_default23),
|
|
4025
4112
|
"tabular-to-text": void 0,
|
|
4026
|
-
"text-classification": getData("text-classification",
|
|
4027
|
-
"text-generation": getData("text-generation",
|
|
4113
|
+
"text-classification": getData("text-classification", data_default28),
|
|
4114
|
+
"text-generation": getData("text-generation", data_default29),
|
|
4028
4115
|
"text-retrieval": void 0,
|
|
4029
|
-
"text-to-image": getData("text-to-image",
|
|
4030
|
-
"text-to-speech": getData("text-to-speech",
|
|
4116
|
+
"text-to-image": getData("text-to-image", data_default24),
|
|
4117
|
+
"text-to-speech": getData("text-to-speech", data_default25),
|
|
4031
4118
|
"text-to-audio": void 0,
|
|
4032
|
-
"text-to-video": getData("text-to-video",
|
|
4119
|
+
"text-to-video": getData("text-to-video", data_default30),
|
|
4033
4120
|
"text2text-generation": void 0,
|
|
4034
4121
|
"time-series-forecasting": void 0,
|
|
4035
|
-
"token-classification": getData("token-classification",
|
|
4036
|
-
translation: getData("translation",
|
|
4037
|
-
"unconditional-image-generation": getData("unconditional-image-generation",
|
|
4038
|
-
"visual-question-answering": getData("visual-question-answering",
|
|
4122
|
+
"token-classification": getData("token-classification", data_default26),
|
|
4123
|
+
translation: getData("translation", data_default27),
|
|
4124
|
+
"unconditional-image-generation": getData("unconditional-image-generation", data_default31),
|
|
4125
|
+
"visual-question-answering": getData("visual-question-answering", data_default33),
|
|
4039
4126
|
"voice-activity-detection": void 0,
|
|
4040
|
-
"zero-shot-classification": getData("zero-shot-classification",
|
|
4041
|
-
"zero-shot-image-classification": getData("zero-shot-image-classification",
|
|
4042
|
-
"zero-shot-object-detection": getData("zero-shot-object-detection",
|
|
4043
|
-
"text-to-3d": getData("text-to-3d",
|
|
4044
|
-
"image-to-3d": getData("image-to-3d",
|
|
4127
|
+
"zero-shot-classification": getData("zero-shot-classification", data_default34),
|
|
4128
|
+
"zero-shot-image-classification": getData("zero-shot-image-classification", data_default35),
|
|
4129
|
+
"zero-shot-object-detection": getData("zero-shot-object-detection", data_default36),
|
|
4130
|
+
"text-to-3d": getData("text-to-3d", data_default16),
|
|
4131
|
+
"image-to-3d": getData("image-to-3d", data_default16)
|
|
4045
4132
|
};
|
|
4046
4133
|
|
|
4047
4134
|
// src/model-libraries-snippets.ts
|
|
@@ -4089,6 +4176,11 @@ var bertopic = (model) => [
|
|
|
4089
4176
|
|
|
4090
4177
|
model = BERTopic.load("${model.id}")`
|
|
4091
4178
|
];
|
|
4179
|
+
var bm25s = (model) => [
|
|
4180
|
+
`from bm25s.hf import BM25HF
|
|
4181
|
+
|
|
4182
|
+
retriever = BM25HF.load_from_hub("${model.id}")`
|
|
4183
|
+
];
|
|
4092
4184
|
var diffusers_default = (model) => [
|
|
4093
4185
|
`from diffusers import DiffusionPipeline
|
|
4094
4186
|
|
|
@@ -4125,6 +4217,23 @@ var diffusers = (model) => {
|
|
|
4125
4217
|
return diffusers_default(model);
|
|
4126
4218
|
}
|
|
4127
4219
|
};
|
|
4220
|
+
var edsnlp = (model) => {
|
|
4221
|
+
const packageName = nameWithoutNamespace(model.id).replaceAll("-", "_");
|
|
4222
|
+
return [
|
|
4223
|
+
`# Load it from the Hub directly
|
|
4224
|
+
import edsnlp
|
|
4225
|
+
nlp = edsnlp.load("${model.id}")
|
|
4226
|
+
`,
|
|
4227
|
+
`# Or install it as a package
|
|
4228
|
+
!pip install git+https://huggingface.co/${model.id}
|
|
4229
|
+
|
|
4230
|
+
# and import it as a module
|
|
4231
|
+
import ${packageName}
|
|
4232
|
+
|
|
4233
|
+
nlp = ${packageName}.load() # or edsnlp.load("${packageName}")
|
|
4234
|
+
`
|
|
4235
|
+
];
|
|
4236
|
+
};
|
|
4128
4237
|
var espnetTTS = (model) => [
|
|
4129
4238
|
`from espnet2.bin.tts_inference import Text2Speech
|
|
4130
4239
|
|
|
@@ -4688,6 +4797,16 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
4688
4797
|
wildcard: { path: "*.npz" }
|
|
4689
4798
|
}
|
|
4690
4799
|
},
|
|
4800
|
+
bm25s: {
|
|
4801
|
+
prettyLabel: "BM25S",
|
|
4802
|
+
repoName: "bm25s",
|
|
4803
|
+
repoUrl: "https://github.com/xhluca/bm25s",
|
|
4804
|
+
snippets: bm25s,
|
|
4805
|
+
filter: false,
|
|
4806
|
+
countDownloads: {
|
|
4807
|
+
term: { path: "params.index.json" }
|
|
4808
|
+
}
|
|
4809
|
+
},
|
|
4691
4810
|
chat_tts: {
|
|
4692
4811
|
prettyLabel: "ChatTTS",
|
|
4693
4812
|
repoName: "ChatTTS",
|
|
@@ -4710,6 +4829,17 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
4710
4829
|
repoName: "doctr",
|
|
4711
4830
|
repoUrl: "https://github.com/mindee/doctr"
|
|
4712
4831
|
},
|
|
4832
|
+
edsnlp: {
|
|
4833
|
+
prettyLabel: "EDS-NLP",
|
|
4834
|
+
repoName: "edsnlp",
|
|
4835
|
+
repoUrl: "https://github.com/aphp/edsnlp",
|
|
4836
|
+
docsUrl: "https://aphp.github.io/edsnlp/latest/",
|
|
4837
|
+
filter: false,
|
|
4838
|
+
snippets: edsnlp,
|
|
4839
|
+
countDownloads: {
|
|
4840
|
+
wildcard: { path: "*/config.cfg" }
|
|
4841
|
+
}
|
|
4842
|
+
},
|
|
4713
4843
|
elm: {
|
|
4714
4844
|
prettyLabel: "ELM",
|
|
4715
4845
|
repoName: "elm",
|
|
@@ -4763,6 +4893,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
4763
4893
|
term: { path: "pytorch_model.bin" }
|
|
4764
4894
|
}
|
|
4765
4895
|
},
|
|
4896
|
+
"gemma.cpp": {
|
|
4897
|
+
prettyLabel: "gemma.cpp",
|
|
4898
|
+
repoName: "gemma.cpp",
|
|
4899
|
+
repoUrl: "https://github.com/google/gemma.cpp",
|
|
4900
|
+
filter: false,
|
|
4901
|
+
countDownloads: { wildcard: { path: "*.sbs" } }
|
|
4902
|
+
},
|
|
4766
4903
|
gliner: {
|
|
4767
4904
|
prettyLabel: "GLiNER",
|
|
4768
4905
|
repoName: "GLiNER",
|
|
@@ -4782,6 +4919,14 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
4782
4919
|
terms: { path: ["ckpt/tensor00000_000", "ckpt-0/tensor00000_000"] }
|
|
4783
4920
|
}
|
|
4784
4921
|
},
|
|
4922
|
+
"hunyuan-dit": {
|
|
4923
|
+
prettyLabel: "HunyuanDiT",
|
|
4924
|
+
repoName: "HunyuanDiT",
|
|
4925
|
+
repoUrl: "https://github.com/Tencent/HunyuanDiT",
|
|
4926
|
+
countDownloads: {
|
|
4927
|
+
terms: { path: ["pytorch_model_ema.pt", "pytorch_model_distill.pt"] }
|
|
4928
|
+
}
|
|
4929
|
+
},
|
|
4785
4930
|
keras: {
|
|
4786
4931
|
prettyLabel: "Keras",
|
|
4787
4932
|
repoName: "Keras",
|
|
@@ -4884,6 +5029,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
4884
5029
|
snippets: pythae,
|
|
4885
5030
|
filter: true
|
|
4886
5031
|
},
|
|
5032
|
+
recurrentgemma: {
|
|
5033
|
+
prettyLabel: "RecurrentGemma",
|
|
5034
|
+
repoName: "recurrentgemma",
|
|
5035
|
+
repoUrl: "https://github.com/google-deepmind/recurrentgemma",
|
|
5036
|
+
filter: false,
|
|
5037
|
+
countDownloads: { term: { path: "tokenizer.model" } }
|
|
5038
|
+
},
|
|
4887
5039
|
"sample-factory": {
|
|
4888
5040
|
prettyLabel: "sample-factory",
|
|
4889
5041
|
repoName: "sample-factory",
|
|
@@ -4985,6 +5137,13 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
|
|
|
4985
5137
|
repoUrl: "https://github.com/TensorSpeech/TensorFlowTTS",
|
|
4986
5138
|
snippets: tensorflowtts
|
|
4987
5139
|
},
|
|
5140
|
+
"tic-clip": {
|
|
5141
|
+
prettyLabel: "TiC-CLIP",
|
|
5142
|
+
repoName: "TiC-CLIP",
|
|
5143
|
+
repoUrl: "https://github.com/apple/ml-tic-clip",
|
|
5144
|
+
filter: false,
|
|
5145
|
+
countDownloads: { wildcard: { path: "checkpoints/*.pt" } }
|
|
5146
|
+
},
|
|
4988
5147
|
timesfm: {
|
|
4989
5148
|
prettyLabel: "TimesFM",
|
|
4990
5149
|
repoName: "timesfm",
|
|
@@ -6047,7 +6206,7 @@ var LOCAL_APPS = {
|
|
|
6047
6206
|
// src/dataset-libraries.ts
|
|
6048
6207
|
var DATASET_LIBRARIES_UI_ELEMENTS = {
|
|
6049
6208
|
mlcroissant: {
|
|
6050
|
-
prettyLabel: "
|
|
6209
|
+
prettyLabel: "Croissant",
|
|
6051
6210
|
repoName: "croissant",
|
|
6052
6211
|
repoUrl: "https://github.com/mlcommons/croissant/tree/main/python/mlcroissant",
|
|
6053
6212
|
docsUrl: "https://github.com/mlcommons/croissant/blob/main/python/mlcroissant/README.md"
|