@huggingface/tasks 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -31,6 +31,7 @@ __export(src_exports, {
31
31
  PIPELINE_DATA: () => PIPELINE_DATA,
32
32
  PIPELINE_TYPES: () => PIPELINE_TYPES,
33
33
  PIPELINE_TYPES_SET: () => PIPELINE_TYPES_SET,
34
+ SPECIAL_TOKENS_ATTRIBUTES: () => SPECIAL_TOKENS_ATTRIBUTES,
34
35
  SUBTASK_TYPES: () => SUBTASK_TYPES,
35
36
  TASKS_DATA: () => TASKS_DATA,
36
37
  TASKS_MODEL_LIBRARIES: () => TASKS_MODEL_LIBRARIES,
@@ -58,7 +59,7 @@ var LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS = {
58
59
  keras: ["image-classification"],
59
60
  nemo: ["automatic-speech-recognition"],
60
61
  open_clip: ["zero-shot-classification", "zero-shot-image-classification"],
61
- paddlenlp: ["conversational", "fill-mask", "summarization", "zero-shot-classification"],
62
+ paddlenlp: ["fill-mask", "summarization", "zero-shot-classification"],
62
63
  peft: ["text-generation"],
63
64
  "pyannote-audio": ["automatic-speech-recognition"],
64
65
  "sentence-transformers": ["feature-extraction", "sentence-similarity"],
@@ -929,17 +930,6 @@ var PIPELINE_DATA = {
929
930
  modality: "nlp",
930
931
  color: "indigo"
931
932
  },
932
- conversational: {
933
- name: "Conversational",
934
- subtasks: [
935
- {
936
- type: "dialogue-generation",
937
- name: "Dialogue Generation"
938
- }
939
- ],
940
- modality: "nlp",
941
- color: "green"
942
- },
943
933
  "feature-extraction": {
944
934
  name: "Feature Extraction",
945
935
  modality: "nlp",
@@ -952,6 +942,14 @@ var PIPELINE_DATA = {
952
942
  type: "dialogue-modeling",
953
943
  name: "Dialogue Modeling"
954
944
  },
945
+ {
946
+ type: "dialogue-generation",
947
+ name: "Dialogue Generation"
948
+ },
949
+ {
950
+ type: "conversational",
951
+ name: "Conversational"
952
+ },
955
953
  {
956
954
  type: "language-modeling",
957
955
  name: "Language Modeling"
@@ -1297,7 +1295,7 @@ var PIPELINE_DATA = {
1297
1295
  color: "green"
1298
1296
  },
1299
1297
  "image-text-to-text": {
1300
- name: "Image + Text to Image (VLLMs)",
1298
+ name: "Image + Text to Text (VLLMs)",
1301
1299
  modality: "multimodal",
1302
1300
  color: "red",
1303
1301
  hideInDatasets: true
@@ -1355,6 +1353,11 @@ var PIPELINE_DATA = {
1355
1353
  modality: "cv",
1356
1354
  color: "green"
1357
1355
  },
1356
+ "image-feature-extraction": {
1357
+ name: "Image Feature Extraction",
1358
+ modality: "cv",
1359
+ color: "indigo"
1360
+ },
1358
1361
  other: {
1359
1362
  name: "Other",
1360
1363
  modality: "other",
@@ -1580,68 +1583,8 @@ var taskData3 = {
1580
1583
  };
1581
1584
  var data_default3 = taskData3;
1582
1585
 
1583
- // src/tasks/conversational/data.ts
1584
- var taskData4 = {
1585
- datasets: [
1586
- {
1587
- description: "A dataset of 7k conversations explicitly designed to exhibit multiple conversation modes: displaying personality, having empathy, and demonstrating knowledge.",
1588
- id: "blended_skill_talk"
1589
- },
1590
- {
1591
- description: "ConvAI is a dataset of human-to-bot conversations labeled for quality. This data can be used to train a metric for evaluating dialogue systems",
1592
- id: "conv_ai_2"
1593
- },
1594
- {
1595
- description: "EmpatheticDialogues, is a dataset of 25k conversations grounded in emotional situations",
1596
- id: "empathetic_dialogues"
1597
- }
1598
- ],
1599
- demo: {
1600
- inputs: [
1601
- {
1602
- label: "Input",
1603
- content: "Hey my name is Julien! How are you?",
1604
- type: "text"
1605
- }
1606
- ],
1607
- outputs: [
1608
- {
1609
- label: "Answer",
1610
- content: "Hi Julien! My name is Julia! I am well.",
1611
- type: "text"
1612
- }
1613
- ]
1614
- },
1615
- metrics: [
1616
- {
1617
- description: "BLEU score is calculated by counting the number of shared single or subsequent tokens between the generated sequence and the reference. Subsequent n tokens are called \u201Cn-grams\u201D. Unigram refers to a single token while bi-gram refers to token pairs and n-grams refer to n subsequent tokens. The score ranges from 0 to 1, where 1 means the translation perfectly matched and 0 did not match at all",
1618
- id: "bleu"
1619
- }
1620
- ],
1621
- models: [
1622
- {
1623
- description: "A faster and smaller model than the famous BERT model.",
1624
- id: "facebook/blenderbot-400M-distill"
1625
- },
1626
- {
1627
- description: "DialoGPT is a large-scale pretrained dialogue response generation model for multiturn conversations.",
1628
- id: "microsoft/DialoGPT-large"
1629
- }
1630
- ],
1631
- spaces: [
1632
- {
1633
- description: "A chatbot based on Blender model.",
1634
- id: "EXFINITE/BlenderBot-UI"
1635
- }
1636
- ],
1637
- summary: "Conversational response modelling is the task of generating conversational text that is relevant, coherent and knowledgable given a prompt. These models have applications in chatbots, and as a part of voice assistants",
1638
- widgetModels: ["facebook/blenderbot-400M-distill"],
1639
- youtubeId: ""
1640
- };
1641
- var data_default4 = taskData4;
1642
-
1643
1586
  // src/tasks/document-question-answering/data.ts
1644
- var taskData5 = {
1587
+ var taskData4 = {
1645
1588
  datasets: [
1646
1589
  {
1647
1590
  // TODO write proper description
@@ -1711,10 +1654,10 @@ var taskData5 = {
1711
1654
  widgetModels: ["impira/layoutlm-document-qa"],
1712
1655
  youtubeId: ""
1713
1656
  };
1714
- var data_default5 = taskData5;
1657
+ var data_default4 = taskData4;
1715
1658
 
1716
1659
  // src/tasks/feature-extraction/data.ts
1717
- var taskData6 = {
1660
+ var taskData5 = {
1718
1661
  datasets: [
1719
1662
  {
1720
1663
  description: "Wikipedia dataset containing cleaned articles of all languages. Can be used to train `feature-extraction` models.",
@@ -1757,10 +1700,10 @@ var taskData6 = {
1757
1700
  summary: "Feature extraction refers to the process of transforming raw data into numerical features that can be processed while preserving the information in the original dataset.",
1758
1701
  widgetModels: ["facebook/bart-base"]
1759
1702
  };
1760
- var data_default6 = taskData6;
1703
+ var data_default5 = taskData5;
1761
1704
 
1762
1705
  // src/tasks/fill-mask/data.ts
1763
- var taskData7 = {
1706
+ var taskData6 = {
1764
1707
  datasets: [
1765
1708
  {
1766
1709
  description: "A common dataset that is used to train models for many languages.",
@@ -1832,10 +1775,10 @@ var taskData7 = {
1832
1775
  widgetModels: ["distilroberta-base"],
1833
1776
  youtubeId: "mqElG5QJWUg"
1834
1777
  };
1835
- var data_default7 = taskData7;
1778
+ var data_default6 = taskData6;
1836
1779
 
1837
1780
  // src/tasks/image-classification/data.ts
1838
- var taskData8 = {
1781
+ var taskData7 = {
1839
1782
  datasets: [
1840
1783
  {
1841
1784
  // TODO write proper description
@@ -1918,10 +1861,10 @@ var taskData8 = {
1918
1861
  widgetModels: ["google/vit-base-patch16-224"],
1919
1862
  youtubeId: "tjAIM7BOYhw"
1920
1863
  };
1921
- var data_default8 = taskData8;
1864
+ var data_default7 = taskData7;
1922
1865
 
1923
1866
  // src/tasks/image-to-image/data.ts
1924
- var taskData9 = {
1867
+ var taskData8 = {
1925
1868
  datasets: [
1926
1869
  {
1927
1870
  description: "Synthetic dataset, for image relighting",
@@ -2013,10 +1956,10 @@ var taskData9 = {
2013
1956
  widgetModels: ["lllyasviel/sd-controlnet-canny"],
2014
1957
  youtubeId: ""
2015
1958
  };
2016
- var data_default9 = taskData9;
1959
+ var data_default8 = taskData8;
2017
1960
 
2018
1961
  // src/tasks/image-to-text/data.ts
2019
- var taskData10 = {
1962
+ var taskData9 = {
2020
1963
  datasets: [
2021
1964
  {
2022
1965
  // TODO write proper description
@@ -2093,10 +2036,10 @@ var taskData10 = {
2093
2036
  widgetModels: ["Salesforce/blip-image-captioning-base"],
2094
2037
  youtubeId: ""
2095
2038
  };
2096
- var data_default10 = taskData10;
2039
+ var data_default9 = taskData9;
2097
2040
 
2098
2041
  // src/tasks/image-segmentation/data.ts
2099
- var taskData11 = {
2042
+ var taskData10 = {
2100
2043
  datasets: [
2101
2044
  {
2102
2045
  description: "Scene segmentation dataset.",
@@ -2188,10 +2131,10 @@ var taskData11 = {
2188
2131
  widgetModels: ["facebook/detr-resnet-50-panoptic"],
2189
2132
  youtubeId: "dKE8SIt9C-w"
2190
2133
  };
2191
- var data_default11 = taskData11;
2134
+ var data_default10 = taskData10;
2192
2135
 
2193
2136
  // src/tasks/mask-generation/data.ts
2194
- var taskData12 = {
2137
+ var taskData11 = {
2195
2138
  datasets: [],
2196
2139
  demo: {
2197
2140
  inputs: [
@@ -2240,10 +2183,10 @@ var taskData12 = {
2240
2183
  widgetModels: [],
2241
2184
  youtubeId: ""
2242
2185
  };
2243
- var data_default12 = taskData12;
2186
+ var data_default11 = taskData11;
2244
2187
 
2245
2188
  // src/tasks/object-detection/data.ts
2246
- var taskData13 = {
2189
+ var taskData12 = {
2247
2190
  datasets: [
2248
2191
  {
2249
2192
  // TODO write proper description
@@ -2315,10 +2258,10 @@ var taskData13 = {
2315
2258
  widgetModels: ["facebook/detr-resnet-50"],
2316
2259
  youtubeId: "WdAeKSOpxhw"
2317
2260
  };
2318
- var data_default13 = taskData13;
2261
+ var data_default12 = taskData12;
2319
2262
 
2320
2263
  // src/tasks/depth-estimation/data.ts
2321
- var taskData14 = {
2264
+ var taskData13 = {
2322
2265
  datasets: [
2323
2266
  {
2324
2267
  description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
@@ -2372,10 +2315,10 @@ var taskData14 = {
2372
2315
  widgetModels: [""],
2373
2316
  youtubeId: ""
2374
2317
  };
2375
- var data_default14 = taskData14;
2318
+ var data_default13 = taskData13;
2376
2319
 
2377
2320
  // src/tasks/placeholder/data.ts
2378
- var taskData15 = {
2321
+ var taskData14 = {
2379
2322
  datasets: [],
2380
2323
  demo: {
2381
2324
  inputs: [],
@@ -2392,10 +2335,10 @@ var taskData15 = {
2392
2335
  /// (eg, text2text-generation is the canonical ID of translation)
2393
2336
  canonicalId: void 0
2394
2337
  };
2395
- var data_default15 = taskData15;
2338
+ var data_default14 = taskData14;
2396
2339
 
2397
2340
  // src/tasks/reinforcement-learning/data.ts
2398
- var taskData16 = {
2341
+ var taskData15 = {
2399
2342
  datasets: [
2400
2343
  {
2401
2344
  description: "A curation of widely used datasets for Data Driven Deep Reinforcement Learning (D4RL)",
@@ -2461,10 +2404,10 @@ var taskData16 = {
2461
2404
  widgetModels: [],
2462
2405
  youtubeId: "q0BiUn5LiBc"
2463
2406
  };
2464
- var data_default16 = taskData16;
2407
+ var data_default15 = taskData15;
2465
2408
 
2466
2409
  // src/tasks/question-answering/data.ts
2467
- var taskData17 = {
2410
+ var taskData16 = {
2468
2411
  datasets: [
2469
2412
  {
2470
2413
  // TODO write proper description
@@ -2528,10 +2471,10 @@ var taskData17 = {
2528
2471
  widgetModels: ["deepset/roberta-base-squad2"],
2529
2472
  youtubeId: "ajPx5LwJD-I"
2530
2473
  };
2531
- var data_default17 = taskData17;
2474
+ var data_default16 = taskData16;
2532
2475
 
2533
2476
  // src/tasks/sentence-similarity/data.ts
2534
- var taskData18 = {
2477
+ var taskData17 = {
2535
2478
  datasets: [
2536
2479
  {
2537
2480
  description: "Bing queries with relevant passages from various web sources.",
@@ -2623,10 +2566,10 @@ var taskData18 = {
2623
2566
  widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
2624
2567
  youtubeId: "VCZq5AkbNEU"
2625
2568
  };
2626
- var data_default18 = taskData18;
2569
+ var data_default17 = taskData17;
2627
2570
 
2628
2571
  // src/tasks/summarization/data.ts
2629
- var taskData19 = {
2572
+ var taskData18 = {
2630
2573
  canonicalId: "text2text-generation",
2631
2574
  datasets: [
2632
2575
  {
@@ -2692,10 +2635,10 @@ var taskData19 = {
2692
2635
  widgetModels: ["sshleifer/distilbart-cnn-12-6"],
2693
2636
  youtubeId: "yHnr5Dk2zCI"
2694
2637
  };
2695
- var data_default19 = taskData19;
2638
+ var data_default18 = taskData18;
2696
2639
 
2697
2640
  // src/tasks/table-question-answering/data.ts
2698
- var taskData20 = {
2641
+ var taskData19 = {
2699
2642
  datasets: [
2700
2643
  {
2701
2644
  description: "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
@@ -2746,10 +2689,10 @@ var taskData20 = {
2746
2689
  summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
2747
2690
  widgetModels: ["google/tapas-base-finetuned-wtq"]
2748
2691
  };
2749
- var data_default20 = taskData20;
2692
+ var data_default19 = taskData19;
2750
2693
 
2751
2694
  // src/tasks/tabular-classification/data.ts
2752
- var taskData21 = {
2695
+ var taskData20 = {
2753
2696
  datasets: [
2754
2697
  {
2755
2698
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -2813,10 +2756,10 @@ var taskData21 = {
2813
2756
  widgetModels: ["scikit-learn/tabular-playground"],
2814
2757
  youtubeId: ""
2815
2758
  };
2816
- var data_default21 = taskData21;
2759
+ var data_default20 = taskData20;
2817
2760
 
2818
2761
  // src/tasks/tabular-regression/data.ts
2819
- var taskData22 = {
2762
+ var taskData21 = {
2820
2763
  datasets: [
2821
2764
  {
2822
2765
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -2868,10 +2811,10 @@ var taskData22 = {
2868
2811
  widgetModels: ["scikit-learn/Fish-Weight"],
2869
2812
  youtubeId: ""
2870
2813
  };
2871
- var data_default22 = taskData22;
2814
+ var data_default21 = taskData21;
2872
2815
 
2873
2816
  // src/tasks/text-to-image/data.ts
2874
- var taskData23 = {
2817
+ var taskData22 = {
2875
2818
  datasets: [
2876
2819
  {
2877
2820
  description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
@@ -2963,10 +2906,10 @@ var taskData23 = {
2963
2906
  widgetModels: ["CompVis/stable-diffusion-v1-4"],
2964
2907
  youtubeId: ""
2965
2908
  };
2966
- var data_default23 = taskData23;
2909
+ var data_default22 = taskData22;
2967
2910
 
2968
2911
  // src/tasks/text-to-speech/data.ts
2969
- var taskData24 = {
2912
+ var taskData23 = {
2970
2913
  canonicalId: "text-to-audio",
2971
2914
  datasets: [
2972
2915
  {
@@ -3031,10 +2974,10 @@ var taskData24 = {
3031
2974
  widgetModels: ["suno/bark"],
3032
2975
  youtubeId: "NW62DpzJ274"
3033
2976
  };
3034
- var data_default24 = taskData24;
2977
+ var data_default23 = taskData23;
3035
2978
 
3036
2979
  // src/tasks/token-classification/data.ts
3037
- var taskData25 = {
2980
+ var taskData24 = {
3038
2981
  datasets: [
3039
2982
  {
3040
2983
  description: "A widely used dataset useful to benchmark named entity recognition models.",
@@ -3110,10 +3053,10 @@ var taskData25 = {
3110
3053
  widgetModels: ["dslim/bert-base-NER"],
3111
3054
  youtubeId: "wVHdVlPScxA"
3112
3055
  };
3113
- var data_default25 = taskData25;
3056
+ var data_default24 = taskData24;
3114
3057
 
3115
3058
  // src/tasks/translation/data.ts
3116
- var taskData26 = {
3059
+ var taskData25 = {
3117
3060
  canonicalId: "text2text-generation",
3118
3061
  datasets: [
3119
3062
  {
@@ -3175,10 +3118,10 @@ var taskData26 = {
3175
3118
  widgetModels: ["t5-small"],
3176
3119
  youtubeId: "1JvfrvZgi6c"
3177
3120
  };
3178
- var data_default26 = taskData26;
3121
+ var data_default25 = taskData25;
3179
3122
 
3180
3123
  // src/tasks/text-classification/data.ts
3181
- var taskData27 = {
3124
+ var taskData26 = {
3182
3125
  datasets: [
3183
3126
  {
3184
3127
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3263,10 +3206,10 @@ var taskData27 = {
3263
3206
  widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
3264
3207
  youtubeId: "leNG9fN9FQU"
3265
3208
  };
3266
- var data_default27 = taskData27;
3209
+ var data_default26 = taskData26;
3267
3210
 
3268
3211
  // src/tasks/text-generation/data.ts
3269
- var taskData28 = {
3212
+ var taskData27 = {
3270
3213
  datasets: [
3271
3214
  {
3272
3215
  description: "A large multilingual dataset of text crawled from the web.",
@@ -3367,10 +3310,10 @@ var taskData28 = {
3367
3310
  widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
3368
3311
  youtubeId: "Vpjb1lu0MDk"
3369
3312
  };
3370
- var data_default28 = taskData28;
3313
+ var data_default27 = taskData27;
3371
3314
 
3372
3315
  // src/tasks/text-to-video/data.ts
3373
- var taskData29 = {
3316
+ var taskData28 = {
3374
3317
  datasets: [
3375
3318
  {
3376
3319
  description: "Microsoft Research Video to Text is a large-scale dataset for open domain video captioning",
@@ -3462,10 +3405,10 @@ var taskData29 = {
3462
3405
  widgetModels: [],
3463
3406
  youtubeId: void 0
3464
3407
  };
3465
- var data_default29 = taskData29;
3408
+ var data_default28 = taskData28;
3466
3409
 
3467
3410
  // src/tasks/unconditional-image-generation/data.ts
3468
- var taskData30 = {
3411
+ var taskData29 = {
3469
3412
  datasets: [
3470
3413
  {
3471
3414
  description: "The CIFAR-100 dataset consists of 60000 32x32 colour images in 100 classes, with 600 images per class.",
@@ -3527,10 +3470,10 @@ var taskData30 = {
3527
3470
  // TODO: Add related video
3528
3471
  youtubeId: ""
3529
3472
  };
3530
- var data_default30 = taskData30;
3473
+ var data_default29 = taskData29;
3531
3474
 
3532
3475
  // src/tasks/video-classification/data.ts
3533
- var taskData31 = {
3476
+ var taskData30 = {
3534
3477
  datasets: [
3535
3478
  {
3536
3479
  // TODO write proper description
@@ -3609,10 +3552,10 @@ var taskData31 = {
3609
3552
  widgetModels: [],
3610
3553
  youtubeId: ""
3611
3554
  };
3612
- var data_default31 = taskData31;
3555
+ var data_default30 = taskData30;
3613
3556
 
3614
3557
  // src/tasks/visual-question-answering/data.ts
3615
- var taskData32 = {
3558
+ var taskData31 = {
3616
3559
  datasets: [
3617
3560
  {
3618
3561
  description: "A widely used dataset containing questions (with answers) about images.",
@@ -3702,10 +3645,10 @@ var taskData32 = {
3702
3645
  widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
3703
3646
  youtubeId: ""
3704
3647
  };
3705
- var data_default32 = taskData32;
3648
+ var data_default31 = taskData31;
3706
3649
 
3707
3650
  // src/tasks/zero-shot-classification/data.ts
3708
- var taskData33 = {
3651
+ var taskData32 = {
3709
3652
  datasets: [
3710
3653
  {
3711
3654
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3764,10 +3707,10 @@ var taskData33 = {
3764
3707
  summary: "Zero-shot text classification is a task in natural language processing where a model is trained on a set of labeled examples but is then able to classify new examples from previously unseen classes.",
3765
3708
  widgetModels: ["facebook/bart-large-mnli"]
3766
3709
  };
3767
- var data_default33 = taskData33;
3710
+ var data_default32 = taskData32;
3768
3711
 
3769
3712
  // src/tasks/zero-shot-image-classification/data.ts
3770
- var taskData34 = {
3713
+ var taskData33 = {
3771
3714
  datasets: [
3772
3715
  {
3773
3716
  // TODO write proper description
@@ -3841,10 +3784,10 @@ var taskData34 = {
3841
3784
  widgetModels: ["openai/clip-vit-large-patch14-336"],
3842
3785
  youtubeId: ""
3843
3786
  };
3844
- var data_default34 = taskData34;
3787
+ var data_default33 = taskData33;
3845
3788
 
3846
3789
  // src/tasks/zero-shot-object-detection/data.ts
3847
- var taskData35 = {
3790
+ var taskData34 = {
3848
3791
  datasets: [],
3849
3792
  demo: {
3850
3793
  inputs: [
@@ -3899,20 +3842,20 @@ var taskData35 = {
3899
3842
  widgetModels: [],
3900
3843
  youtubeId: ""
3901
3844
  };
3902
- var data_default35 = taskData35;
3845
+ var data_default34 = taskData34;
3903
3846
 
3904
3847
  // src/tasks/index.ts
3905
3848
  var TASKS_MODEL_LIBRARIES = {
3906
3849
  "audio-classification": ["speechbrain", "transformers", "transformers.js"],
3907
3850
  "audio-to-audio": ["asteroid", "speechbrain"],
3908
3851
  "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
3909
- conversational: ["transformers"],
3910
3852
  "depth-estimation": ["transformers", "transformers.js"],
3911
3853
  "document-question-answering": ["transformers", "transformers.js"],
3912
3854
  "feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
3913
3855
  "fill-mask": ["transformers", "transformers.js"],
3914
3856
  "graph-ml": ["transformers"],
3915
3857
  "image-classification": ["keras", "timm", "transformers", "transformers.js"],
3858
+ "image-feature-extraction": ["timm", "transformers"],
3916
3859
  "image-segmentation": ["transformers", "transformers.js"],
3917
3860
  "image-text-to-text": ["transformers"],
3918
3861
  "image-to-image": ["diffusers", "transformers", "transformers.js"],
@@ -3961,7 +3904,7 @@ var TASKS_MODEL_LIBRARIES = {
3961
3904
  "text-to-3d": [],
3962
3905
  "image-to-3d": []
3963
3906
  };
3964
- function getData(type, partialTaskData = data_default15) {
3907
+ function getData(type, partialTaskData = data_default14) {
3965
3908
  return {
3966
3909
  ...partialTaskData,
3967
3910
  id: type,
@@ -3973,52 +3916,52 @@ var TASKS_DATA = {
3973
3916
  "audio-classification": getData("audio-classification", data_default),
3974
3917
  "audio-to-audio": getData("audio-to-audio", data_default2),
3975
3918
  "automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
3976
- conversational: getData("conversational", data_default4),
3977
- "depth-estimation": getData("depth-estimation", data_default14),
3978
- "document-question-answering": getData("document-question-answering", data_default5),
3979
- "feature-extraction": getData("feature-extraction", data_default6),
3980
- "fill-mask": getData("fill-mask", data_default7),
3919
+ "depth-estimation": getData("depth-estimation", data_default13),
3920
+ "document-question-answering": getData("document-question-answering", data_default4),
3921
+ "feature-extraction": getData("feature-extraction", data_default5),
3922
+ "fill-mask": getData("fill-mask", data_default6),
3981
3923
  "graph-ml": void 0,
3982
- "image-classification": getData("image-classification", data_default8),
3983
- "image-segmentation": getData("image-segmentation", data_default11),
3924
+ "image-classification": getData("image-classification", data_default7),
3925
+ "image-segmentation": getData("image-segmentation", data_default10),
3984
3926
  "image-text-to-text": void 0,
3985
- "image-to-image": getData("image-to-image", data_default9),
3986
- "image-to-text": getData("image-to-text", data_default10),
3927
+ "image-to-image": getData("image-to-image", data_default8),
3928
+ "image-to-text": getData("image-to-text", data_default9),
3987
3929
  "image-to-video": void 0,
3988
- "mask-generation": getData("mask-generation", data_default12),
3930
+ "mask-generation": getData("mask-generation", data_default11),
3989
3931
  "multiple-choice": void 0,
3990
- "object-detection": getData("object-detection", data_default13),
3991
- "video-classification": getData("video-classification", data_default31),
3932
+ "object-detection": getData("object-detection", data_default12),
3933
+ "video-classification": getData("video-classification", data_default30),
3992
3934
  other: void 0,
3993
- "question-answering": getData("question-answering", data_default17),
3994
- "reinforcement-learning": getData("reinforcement-learning", data_default16),
3935
+ "question-answering": getData("question-answering", data_default16),
3936
+ "reinforcement-learning": getData("reinforcement-learning", data_default15),
3995
3937
  robotics: void 0,
3996
- "sentence-similarity": getData("sentence-similarity", data_default18),
3997
- summarization: getData("summarization", data_default19),
3998
- "table-question-answering": getData("table-question-answering", data_default20),
3938
+ "sentence-similarity": getData("sentence-similarity", data_default17),
3939
+ summarization: getData("summarization", data_default18),
3940
+ "table-question-answering": getData("table-question-answering", data_default19),
3999
3941
  "table-to-text": void 0,
4000
- "tabular-classification": getData("tabular-classification", data_default21),
4001
- "tabular-regression": getData("tabular-regression", data_default22),
3942
+ "tabular-classification": getData("tabular-classification", data_default20),
3943
+ "tabular-regression": getData("tabular-regression", data_default21),
4002
3944
  "tabular-to-text": void 0,
4003
- "text-classification": getData("text-classification", data_default27),
4004
- "text-generation": getData("text-generation", data_default28),
3945
+ "text-classification": getData("text-classification", data_default26),
3946
+ "text-generation": getData("text-generation", data_default27),
4005
3947
  "text-retrieval": void 0,
4006
- "text-to-image": getData("text-to-image", data_default23),
4007
- "text-to-speech": getData("text-to-speech", data_default24),
3948
+ "text-to-image": getData("text-to-image", data_default22),
3949
+ "text-to-speech": getData("text-to-speech", data_default23),
4008
3950
  "text-to-audio": void 0,
4009
- "text-to-video": getData("text-to-video", data_default29),
3951
+ "text-to-video": getData("text-to-video", data_default28),
4010
3952
  "text2text-generation": void 0,
4011
3953
  "time-series-forecasting": void 0,
4012
- "token-classification": getData("token-classification", data_default25),
4013
- translation: getData("translation", data_default26),
4014
- "unconditional-image-generation": getData("unconditional-image-generation", data_default30),
4015
- "visual-question-answering": getData("visual-question-answering", data_default32),
3954
+ "token-classification": getData("token-classification", data_default24),
3955
+ translation: getData("translation", data_default25),
3956
+ "unconditional-image-generation": getData("unconditional-image-generation", data_default29),
3957
+ "visual-question-answering": getData("visual-question-answering", data_default31),
4016
3958
  "voice-activity-detection": void 0,
4017
- "zero-shot-classification": getData("zero-shot-classification", data_default33),
4018
- "zero-shot-image-classification": getData("zero-shot-image-classification", data_default34),
4019
- "zero-shot-object-detection": getData("zero-shot-object-detection", data_default35),
4020
- "text-to-3d": getData("text-to-3d", data_default15),
4021
- "image-to-3d": getData("image-to-3d", data_default15)
3959
+ "zero-shot-classification": getData("zero-shot-classification", data_default32),
3960
+ "zero-shot-image-classification": getData("zero-shot-image-classification", data_default33),
3961
+ "zero-shot-object-detection": getData("zero-shot-object-detection", data_default34),
3962
+ "text-to-3d": getData("text-to-3d", data_default14),
3963
+ "image-to-3d": getData("image-to-3d", data_default14),
3964
+ "image-feature-extraction": getData("image-feature-extraction", data_default14)
4022
3965
  };
4023
3966
 
4024
3967
  // src/model-libraries-snippets.ts
@@ -4824,6 +4767,18 @@ var InferenceDisplayability = /* @__PURE__ */ ((InferenceDisplayability2) => {
4824
4767
  return InferenceDisplayability2;
4825
4768
  })(InferenceDisplayability || {});
4826
4769
 
4770
+ // src/tokenizer-data.ts
4771
+ var SPECIAL_TOKENS_ATTRIBUTES = [
4772
+ "bos_token",
4773
+ "eos_token",
4774
+ "unk_token",
4775
+ "sep_token",
4776
+ "pad_token",
4777
+ "cls_token",
4778
+ "mask_token"
4779
+ // additional_special_tokens (TODO)
4780
+ ];
4781
+
4827
4782
  // src/snippets/index.ts
4828
4783
  var snippets_exports = {};
4829
4784
  __export(snippets_exports, {
@@ -4841,11 +4796,6 @@ __export(inputs_exports, {
4841
4796
  var inputsZeroShotClassification = () => `"Hi, I recently bought a device from your company but it is not working as advertised and I would like to get reimbursed!"`;
4842
4797
  var inputsTranslation = () => `"\u041C\u0435\u043D\u044F \u0437\u043E\u0432\u0443\u0442 \u0412\u043E\u043B\u044C\u0444\u0433\u0430\u043D\u0433 \u0438 \u044F \u0436\u0438\u0432\u0443 \u0432 \u0411\u0435\u0440\u043B\u0438\u043D\u0435"`;
4843
4798
  var inputsSummarization = () => `"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."`;
4844
- var inputsConversational = () => `{
4845
- "past_user_inputs": ["Which movie is the best ?"],
4846
- "generated_responses": ["It is Die Hard for sure."],
4847
- "text": "Can you explain why ?"
4848
- }`;
4849
4799
  var inputsTableQuestionAnswering = () => `{
4850
4800
  "query": "How many stars does the transformers repository have?",
4851
4801
  "table": {
@@ -4897,7 +4847,6 @@ var modelInputSnippets = {
4897
4847
  "audio-to-audio": inputsAudioToAudio,
4898
4848
  "audio-classification": inputsAudioClassification,
4899
4849
  "automatic-speech-recognition": inputsAutomaticSpeechRecognition,
4900
- conversational: inputsConversational,
4901
4850
  "document-question-answering": inputsVisualQuestionAnswering,
4902
4851
  "feature-extraction": inputsFeatureExtraction,
4903
4852
  "fill-mask": inputsFillMask,
@@ -4977,7 +4926,6 @@ var curlSnippets = {
4977
4926
  "zero-shot-classification": snippetZeroShotClassification,
4978
4927
  translation: snippetBasic,
4979
4928
  summarization: snippetBasic,
4980
- conversational: snippetBasic,
4981
4929
  "feature-extraction": snippetBasic,
4982
4930
  "text-generation": snippetBasic,
4983
4931
  "text2text-generation": snippetBasic,
@@ -5112,7 +5060,6 @@ var pythonSnippets = {
5112
5060
  "zero-shot-classification": snippetZeroShotClassification2,
5113
5061
  translation: snippetBasic2,
5114
5062
  summarization: snippetBasic2,
5115
- conversational: snippetBasic2,
5116
5063
  "feature-extraction": snippetBasic2,
5117
5064
  "text-generation": snippetBasic2,
5118
5065
  "text2text-generation": snippetBasic2,
@@ -5262,7 +5209,6 @@ var jsSnippets = {
5262
5209
  "zero-shot-classification": snippetZeroShotClassification3,
5263
5210
  translation: snippetBasic3,
5264
5211
  summarization: snippetBasic3,
5265
- conversational: snippetBasic3,
5266
5212
  "feature-extraction": snippetBasic3,
5267
5213
  "text-generation": snippetBasic3,
5268
5214
  "text2text-generation": snippetBasic3,
@@ -5298,6 +5244,7 @@ function hasJsInferenceSnippet(model) {
5298
5244
  PIPELINE_DATA,
5299
5245
  PIPELINE_TYPES,
5300
5246
  PIPELINE_TYPES_SET,
5247
+ SPECIAL_TOKENS_ATTRIBUTES,
5301
5248
  SUBTASK_TYPES,
5302
5249
  TASKS_DATA,
5303
5250
  TASKS_MODEL_LIBRARIES,