@huggingface/tasks 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/dist/{index.mjs → index.cjs} +280 -133
  2. package/dist/index.d.ts +4 -3
  3. package/dist/index.js +245 -168
  4. package/package.json +13 -8
  5. package/src/library-to-tasks.ts +1 -1
  6. package/src/library-ui-elements.ts +11 -11
  7. package/src/model-data.ts +1 -1
  8. package/src/model-libraries.ts +1 -1
  9. package/src/pipelines.ts +1 -1
  10. package/src/tasks/audio-classification/about.md +1 -1
  11. package/src/tasks/audio-classification/inference.ts +51 -0
  12. package/src/tasks/audio-classification/spec/input.json +34 -0
  13. package/src/tasks/audio-classification/spec/output.json +21 -0
  14. package/src/tasks/audio-to-audio/about.md +1 -1
  15. package/src/tasks/automatic-speech-recognition/about.md +4 -2
  16. package/src/tasks/automatic-speech-recognition/inference.ts +154 -0
  17. package/src/tasks/automatic-speech-recognition/spec/input.json +34 -0
  18. package/src/tasks/automatic-speech-recognition/spec/output.json +36 -0
  19. package/src/tasks/common-definitions.json +109 -0
  20. package/src/tasks/depth-estimation/data.ts +8 -4
  21. package/src/tasks/depth-estimation/inference.ts +35 -0
  22. package/src/tasks/depth-estimation/spec/input.json +30 -0
  23. package/src/tasks/depth-estimation/spec/output.json +10 -0
  24. package/src/tasks/document-question-answering/inference.ts +102 -0
  25. package/src/tasks/document-question-answering/spec/input.json +85 -0
  26. package/src/tasks/document-question-answering/spec/output.json +36 -0
  27. package/src/tasks/feature-extraction/inference.ts +22 -0
  28. package/src/tasks/feature-extraction/spec/input.json +26 -0
  29. package/src/tasks/feature-extraction/spec/output.json +7 -0
  30. package/src/tasks/fill-mask/inference.ts +61 -0
  31. package/src/tasks/fill-mask/spec/input.json +38 -0
  32. package/src/tasks/fill-mask/spec/output.json +29 -0
  33. package/src/tasks/image-classification/inference.ts +51 -0
  34. package/src/tasks/image-classification/spec/input.json +34 -0
  35. package/src/tasks/image-classification/spec/output.json +10 -0
  36. package/src/tasks/image-segmentation/inference.ts +65 -0
  37. package/src/tasks/image-segmentation/spec/input.json +54 -0
  38. package/src/tasks/image-segmentation/spec/output.json +25 -0
  39. package/src/tasks/image-to-image/inference.ts +67 -0
  40. package/src/tasks/image-to-image/spec/input.json +52 -0
  41. package/src/tasks/image-to-image/spec/output.json +12 -0
  42. package/src/tasks/image-to-text/inference.ts +138 -0
  43. package/src/tasks/image-to-text/spec/input.json +34 -0
  44. package/src/tasks/image-to-text/spec/output.json +17 -0
  45. package/src/tasks/index.ts +5 -2
  46. package/src/tasks/mask-generation/about.md +65 -0
  47. package/src/tasks/mask-generation/data.ts +42 -5
  48. package/src/tasks/object-detection/inference.ts +62 -0
  49. package/src/tasks/object-detection/spec/input.json +30 -0
  50. package/src/tasks/object-detection/spec/output.json +46 -0
  51. package/src/tasks/placeholder/data.ts +3 -0
  52. package/src/tasks/placeholder/spec/input.json +35 -0
  53. package/src/tasks/placeholder/spec/output.json +17 -0
  54. package/src/tasks/question-answering/inference.ts +99 -0
  55. package/src/tasks/question-answering/spec/input.json +67 -0
  56. package/src/tasks/question-answering/spec/output.json +29 -0
  57. package/src/tasks/sentence-similarity/about.md +2 -2
  58. package/src/tasks/sentence-similarity/inference.ts +32 -0
  59. package/src/tasks/sentence-similarity/spec/input.json +40 -0
  60. package/src/tasks/sentence-similarity/spec/output.json +12 -0
  61. package/src/tasks/summarization/data.ts +1 -0
  62. package/src/tasks/summarization/inference.ts +58 -0
  63. package/src/tasks/summarization/spec/input.json +7 -0
  64. package/src/tasks/summarization/spec/output.json +7 -0
  65. package/src/tasks/table-question-answering/inference.ts +61 -0
  66. package/src/tasks/table-question-answering/spec/input.json +39 -0
  67. package/src/tasks/table-question-answering/spec/output.json +40 -0
  68. package/src/tasks/tabular-classification/about.md +1 -1
  69. package/src/tasks/tabular-regression/about.md +1 -1
  70. package/src/tasks/text-classification/about.md +1 -0
  71. package/src/tasks/text-classification/inference.ts +51 -0
  72. package/src/tasks/text-classification/spec/input.json +35 -0
  73. package/src/tasks/text-classification/spec/output.json +10 -0
  74. package/src/tasks/text-generation/about.md +24 -13
  75. package/src/tasks/text-generation/data.ts +22 -38
  76. package/src/tasks/text-generation/inference.ts +85 -0
  77. package/src/tasks/text-generation/spec/input.json +74 -0
  78. package/src/tasks/text-generation/spec/output.json +17 -0
  79. package/src/tasks/text-to-audio/inference.ts +138 -0
  80. package/src/tasks/text-to-audio/spec/input.json +31 -0
  81. package/src/tasks/text-to-audio/spec/output.json +20 -0
  82. package/src/tasks/text-to-image/about.md +11 -2
  83. package/src/tasks/text-to-image/data.ts +6 -2
  84. package/src/tasks/text-to-image/inference.ts +73 -0
  85. package/src/tasks/text-to-image/spec/input.json +57 -0
  86. package/src/tasks/text-to-image/spec/output.json +15 -0
  87. package/src/tasks/text-to-speech/about.md +4 -2
  88. package/src/tasks/text-to-speech/data.ts +1 -0
  89. package/src/tasks/text-to-speech/inference.ts +146 -0
  90. package/src/tasks/text-to-speech/spec/input.json +7 -0
  91. package/src/tasks/text-to-speech/spec/output.json +7 -0
  92. package/src/tasks/text2text-generation/inference.ts +53 -0
  93. package/src/tasks/text2text-generation/spec/input.json +55 -0
  94. package/src/tasks/text2text-generation/spec/output.json +17 -0
  95. package/src/tasks/token-classification/inference.ts +82 -0
  96. package/src/tasks/token-classification/spec/input.json +65 -0
  97. package/src/tasks/token-classification/spec/output.json +33 -0
  98. package/src/tasks/translation/data.ts +1 -0
  99. package/src/tasks/translation/inference.ts +58 -0
  100. package/src/tasks/translation/spec/input.json +7 -0
  101. package/src/tasks/translation/spec/output.json +7 -0
  102. package/src/tasks/video-classification/inference.ts +59 -0
  103. package/src/tasks/video-classification/spec/input.json +42 -0
  104. package/src/tasks/video-classification/spec/output.json +10 -0
  105. package/src/tasks/visual-question-answering/inference.ts +63 -0
  106. package/src/tasks/visual-question-answering/spec/input.json +41 -0
  107. package/src/tasks/visual-question-answering/spec/output.json +21 -0
  108. package/src/tasks/zero-shot-classification/inference.ts +67 -0
  109. package/src/tasks/zero-shot-classification/spec/input.json +50 -0
  110. package/src/tasks/zero-shot-classification/spec/output.json +10 -0
  111. package/src/tasks/zero-shot-image-classification/data.ts +8 -5
  112. package/src/tasks/zero-shot-image-classification/inference.ts +61 -0
  113. package/src/tasks/zero-shot-image-classification/spec/input.json +45 -0
  114. package/src/tasks/zero-shot-image-classification/spec/output.json +10 -0
  115. package/src/tasks/zero-shot-object-detection/about.md +6 -0
  116. package/src/tasks/zero-shot-object-detection/data.ts +6 -1
  117. package/src/tasks/zero-shot-object-detection/inference.ts +66 -0
  118. package/src/tasks/zero-shot-object-detection/spec/input.json +40 -0
  119. package/src/tasks/zero-shot-object-detection/spec/output.json +47 -0
  120. package/tsconfig.json +3 -3
package/dist/index.js CHANGED
@@ -1,42 +1,8 @@
1
- "use strict";
2
1
  var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __hasOwnProp = Object.prototype.hasOwnProperty;
6
2
  var __export = (target, all) => {
7
3
  for (var name in all)
8
4
  __defProp(target, name, { get: all[name], enumerable: true });
9
5
  };
10
- var __copyProps = (to, from, except, desc) => {
11
- if (from && typeof from === "object" || typeof from === "function") {
12
- for (let key of __getOwnPropNames(from))
13
- if (!__hasOwnProp.call(to, key) && key !== except)
14
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
- }
16
- return to;
17
- };
18
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
-
20
- // src/index.ts
21
- var src_exports = {};
22
- __export(src_exports, {
23
- ALL_DISPLAY_MODEL_LIBRARY_KEYS: () => ALL_DISPLAY_MODEL_LIBRARY_KEYS,
24
- InferenceDisplayability: () => InferenceDisplayability,
25
- LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS: () => LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS,
26
- MAPPING_DEFAULT_WIDGET: () => MAPPING_DEFAULT_WIDGET,
27
- MODALITIES: () => MODALITIES,
28
- MODALITY_LABELS: () => MODALITY_LABELS,
29
- MODEL_LIBRARIES_UI_ELEMENTS: () => MODEL_LIBRARIES_UI_ELEMENTS,
30
- ModelLibrary: () => ModelLibrary,
31
- PIPELINE_DATA: () => PIPELINE_DATA,
32
- PIPELINE_TYPES: () => PIPELINE_TYPES,
33
- PIPELINE_TYPES_SET: () => PIPELINE_TYPES_SET,
34
- SUBTASK_TYPES: () => SUBTASK_TYPES,
35
- TASKS_DATA: () => TASKS_DATA,
36
- TASKS_MODEL_LIBRARIES: () => TASKS_MODEL_LIBRARIES,
37
- snippets: () => snippets_exports
38
- });
39
- module.exports = __toCommonJS(src_exports);
40
6
 
41
7
  // src/library-to-tasks.ts
42
8
  var LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS = {
@@ -83,11 +49,11 @@ function nameWithoutNamespace(modelId) {
83
49
  const splitted = modelId.split("/");
84
50
  return splitted.length === 1 ? splitted[0] : splitted[1];
85
51
  }
86
- var adapter_transformers = (model) => [
87
- `from transformers import ${model.config?.adapter_transformers?.model_class}
52
+ var adapters = (model) => [
53
+ `from adapters import AutoAdapterModel
88
54
 
89
- model = ${model.config?.adapter_transformers?.model_class}.from_pretrained("${model.config?.adapter_transformers?.model_name}")
90
- model.load_adapter("${model.id}", source="hf")`
55
+ model = AutoAdapterModel.from_pretrained("${model.config?.adapter_transformers?.model_name}")
56
+ model.load_adapter("${model.id}", set_active=True)`
91
57
  ];
92
58
  var allennlpUnknown = (model) => [
93
59
  `import allennlp_models
@@ -527,7 +493,7 @@ transcriptions = asr_model.transcribe(["file.wav"])`
527
493
  }
528
494
  };
529
495
  var mlAgents = (model) => [`mlagents-load-from-hf --repo-id="${model.id}" --local-dir="./downloads"`];
530
- var sentis = (model) => [
496
+ var sentis = () => [
531
497
  `string modelName = "[Your model name here].sentis";
532
498
  Model model = ModelLoader.Load(Application.streamingAssetsPath + "/" + modelName);
533
499
  IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
@@ -554,11 +520,11 @@ model = AutoModel.load_from_hf_hub("${model.id}")`
554
520
  ];
555
521
  var MODEL_LIBRARIES_UI_ELEMENTS = {
556
522
  "adapter-transformers": {
557
- btnLabel: "Adapter Transformers",
558
- repoName: "adapter-transformers",
559
- repoUrl: "https://github.com/Adapter-Hub/adapter-transformers",
560
- docsUrl: "https://huggingface.co/docs/hub/adapter-transformers",
561
- snippets: adapter_transformers
523
+ btnLabel: "Adapters",
524
+ repoName: "adapters",
525
+ repoUrl: "https://github.com/Adapter-Hub/adapters",
526
+ docsUrl: "https://huggingface.co/docs/hub/adapters",
527
+ snippets: adapters
562
528
  },
563
529
  allennlp: {
564
530
  btnLabel: "AllenNLP",
@@ -2880,8 +2846,60 @@ var taskData11 = {
2880
2846
  };
2881
2847
  var data_default11 = taskData11;
2882
2848
 
2883
- // src/tasks/object-detection/data.ts
2849
+ // src/tasks/mask-generation/data.ts
2884
2850
  var taskData12 = {
2851
+ datasets: [],
2852
+ demo: {
2853
+ inputs: [
2854
+ {
2855
+ filename: "mask-generation-input.png",
2856
+ type: "img"
2857
+ }
2858
+ ],
2859
+ outputs: [
2860
+ {
2861
+ filename: "mask-generation-output.png",
2862
+ type: "img"
2863
+ }
2864
+ ]
2865
+ },
2866
+ metrics: [],
2867
+ models: [
2868
+ {
2869
+ description: "Small yet powerful mask generation model.",
2870
+ id: "Zigeng/SlimSAM-uniform-50"
2871
+ },
2872
+ {
2873
+ description: "Very strong mask generation model.",
2874
+ id: "facebook/sam-vit-huge"
2875
+ }
2876
+ ],
2877
+ spaces: [
2878
+ {
2879
+ description: "An application that combines a mask generation model with an image embedding model for open-vocabulary image segmentation.",
2880
+ id: "SkalskiP/SAM_and_MetaCLIP"
2881
+ },
2882
+ {
2883
+ description: "An application that compares the performance of a large and a small mask generation model.",
2884
+ id: "merve/slimsam"
2885
+ },
2886
+ {
2887
+ description: "An application based on an improved mask generation model.",
2888
+ id: "linfanluntan/Grounded-SAM"
2889
+ },
2890
+ {
2891
+ description: "An application to remove objects from videos using mask generation models.",
2892
+ id: "SkalskiP/SAM_and_ProPainter"
2893
+ }
2894
+ ],
2895
+ summary: "Mask generation is the task of generating masks that identify a specific object or region of interest in a given image. Masks are often used in segmentation tasks, where they provide a precise way to isolate the object of interest for further processing or analysis.",
2896
+ widgetModels: [],
2897
+ youtubeId: ""
2898
+ };
2899
+ var data_default12 = taskData12;
2900
+
2901
+ // src/tasks/object-detection/data.ts
2902
+ var taskData13 = {
2885
2903
  datasets: [
2886
2904
  {
2887
2905
  // TODO write proper description
@@ -2953,10 +2971,10 @@ var taskData12 = {
2953
2971
  widgetModels: ["facebook/detr-resnet-50"],
2954
2972
  youtubeId: "WdAeKSOpxhw"
2955
2973
  };
2956
- var data_default12 = taskData12;
2974
+ var data_default13 = taskData13;
2957
2975
 
2958
2976
  // src/tasks/depth-estimation/data.ts
2959
- var taskData13 = {
2977
+ var taskData14 = {
2960
2978
  datasets: [
2961
2979
  {
2962
2980
  description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
@@ -2984,8 +3002,8 @@ var taskData13 = {
2984
3002
  id: "Intel/dpt-large"
2985
3003
  },
2986
3004
  {
2987
- description: "Strong Depth Estimation model trained on the KITTI dataset.",
2988
- id: "facebook/dpt-dinov2-large-kitti"
3005
+ description: "Strong Depth Estimation model trained on a big compilation of datasets.",
3006
+ id: "LiheYoung/depth-anything-large-hf"
2989
3007
  },
2990
3008
  {
2991
3009
  description: "A strong monocular depth estimation model.",
@@ -2998,18 +3016,22 @@ var taskData13 = {
2998
3016
  id: "radames/dpt-depth-estimation-3d-voxels"
2999
3017
  },
3000
3018
  {
3001
- description: "An application that can estimate the depth in a given image.",
3002
- id: "keras-io/Monocular-Depth-Estimation"
3019
+ description: "An application to compare the outputs of different depth estimation models.",
3020
+ id: "LiheYoung/Depth-Anything"
3021
+ },
3022
+ {
3023
+ description: "An application to try state-of-the-art depth estimation.",
3024
+ id: "merve/compare_depth_models"
3003
3025
  }
3004
3026
  ],
3005
3027
  summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
3006
3028
  widgetModels: [""],
3007
3029
  youtubeId: ""
3008
3030
  };
3009
- var data_default13 = taskData13;
3031
+ var data_default14 = taskData14;
3010
3032
 
3011
3033
  // src/tasks/placeholder/data.ts
3012
- var taskData14 = {
3034
+ var taskData15 = {
3013
3035
  datasets: [],
3014
3036
  demo: {
3015
3037
  inputs: [],
@@ -3021,12 +3043,15 @@ var taskData14 = {
3021
3043
  spaces: [],
3022
3044
  summary: "",
3023
3045
  widgetModels: [],
3024
- youtubeId: void 0
3046
+ youtubeId: void 0,
3047
+ /// If this is a subtask, link to the most general task ID
3048
+ /// (eg, text2text-generation is the canonical ID of translation)
3049
+ canonicalId: void 0
3025
3050
  };
3026
- var data_default14 = taskData14;
3051
+ var data_default15 = taskData15;
3027
3052
 
3028
3053
  // src/tasks/reinforcement-learning/data.ts
3029
- var taskData15 = {
3054
+ var taskData16 = {
3030
3055
  datasets: [
3031
3056
  {
3032
3057
  description: "A curation of widely used datasets for Data Driven Deep Reinforcement Learning (D4RL)",
@@ -3092,10 +3117,10 @@ var taskData15 = {
3092
3117
  widgetModels: [],
3093
3118
  youtubeId: "q0BiUn5LiBc"
3094
3119
  };
3095
- var data_default15 = taskData15;
3120
+ var data_default16 = taskData16;
3096
3121
 
3097
3122
  // src/tasks/question-answering/data.ts
3098
- var taskData16 = {
3123
+ var taskData17 = {
3099
3124
  datasets: [
3100
3125
  {
3101
3126
  // TODO write proper description
@@ -3159,10 +3184,10 @@ var taskData16 = {
3159
3184
  widgetModels: ["deepset/roberta-base-squad2"],
3160
3185
  youtubeId: "ajPx5LwJD-I"
3161
3186
  };
3162
- var data_default16 = taskData16;
3187
+ var data_default17 = taskData17;
3163
3188
 
3164
3189
  // src/tasks/sentence-similarity/data.ts
3165
- var taskData17 = {
3190
+ var taskData18 = {
3166
3191
  datasets: [
3167
3192
  {
3168
3193
  description: "Bing queries with relevant passages from various web sources.",
@@ -3254,10 +3279,11 @@ var taskData17 = {
3254
3279
  widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
3255
3280
  youtubeId: "VCZq5AkbNEU"
3256
3281
  };
3257
- var data_default17 = taskData17;
3282
+ var data_default18 = taskData18;
3258
3283
 
3259
3284
  // src/tasks/summarization/data.ts
3260
- var taskData18 = {
3285
+ var taskData19 = {
3286
+ canonicalId: "text2text-generation",
3261
3287
  datasets: [
3262
3288
  {
3263
3289
  description: "News articles in five different languages along with their summaries. Widely used for benchmarking multilingual summarization models.",
@@ -3322,10 +3348,10 @@ var taskData18 = {
3322
3348
  widgetModels: ["sshleifer/distilbart-cnn-12-6"],
3323
3349
  youtubeId: "yHnr5Dk2zCI"
3324
3350
  };
3325
- var data_default18 = taskData18;
3351
+ var data_default19 = taskData19;
3326
3352
 
3327
3353
  // src/tasks/table-question-answering/data.ts
3328
- var taskData19 = {
3354
+ var taskData20 = {
3329
3355
  datasets: [
3330
3356
  {
3331
3357
  description: "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
@@ -3376,10 +3402,10 @@ var taskData19 = {
3376
3402
  summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
3377
3403
  widgetModels: ["google/tapas-base-finetuned-wtq"]
3378
3404
  };
3379
- var data_default19 = taskData19;
3405
+ var data_default20 = taskData20;
3380
3406
 
3381
3407
  // src/tasks/tabular-classification/data.ts
3382
- var taskData20 = {
3408
+ var taskData21 = {
3383
3409
  datasets: [
3384
3410
  {
3385
3411
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -3443,10 +3469,10 @@ var taskData20 = {
3443
3469
  widgetModels: ["scikit-learn/tabular-playground"],
3444
3470
  youtubeId: ""
3445
3471
  };
3446
- var data_default20 = taskData20;
3472
+ var data_default21 = taskData21;
3447
3473
 
3448
3474
  // src/tasks/tabular-regression/data.ts
3449
- var taskData21 = {
3475
+ var taskData22 = {
3450
3476
  datasets: [
3451
3477
  {
3452
3478
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -3498,10 +3524,10 @@ var taskData21 = {
3498
3524
  widgetModels: ["scikit-learn/Fish-Weight"],
3499
3525
  youtubeId: ""
3500
3526
  };
3501
- var data_default21 = taskData21;
3527
+ var data_default22 = taskData22;
3502
3528
 
3503
3529
  // src/tasks/text-to-image/data.ts
3504
- var taskData22 = {
3530
+ var taskData23 = {
3505
3531
  datasets: [
3506
3532
  {
3507
3533
  description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
@@ -3577,22 +3603,27 @@ var taskData22 = {
3577
3603
  id: "latent-consistency/lcm-lora-for-sdxl"
3578
3604
  },
3579
3605
  {
3580
- description: "A powerful text-to-image application that can generate 3D representations.",
3581
- id: "hysts/Shap-E"
3606
+ description: "A gallery to explore various text-to-image models.",
3607
+ id: "multimodalart/LoraTheExplorer"
3582
3608
  },
3583
3609
  {
3584
3610
  description: "An application for `text-to-image`, `image-to-image` and image inpainting.",
3585
3611
  id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI"
3612
+ },
3613
+ {
3614
+ description: "An application to generate realistic images given photos of a person and a prompt.",
3615
+ id: "InstantX/InstantID"
3586
3616
  }
3587
3617
  ],
3588
3618
  summary: "Generates images from input text. These models can be used to generate and modify images based on text prompts.",
3589
3619
  widgetModels: ["CompVis/stable-diffusion-v1-4"],
3590
3620
  youtubeId: ""
3591
3621
  };
3592
- var data_default22 = taskData22;
3622
+ var data_default23 = taskData23;
3593
3623
 
3594
3624
  // src/tasks/text-to-speech/data.ts
3595
- var taskData23 = {
3625
+ var taskData24 = {
3626
+ canonicalId: "text-to-audio",
3596
3627
  datasets: [
3597
3628
  {
3598
3629
  description: "Thousands of short audio clips of a single speaker.",
@@ -3656,10 +3687,10 @@ var taskData23 = {
3656
3687
  widgetModels: ["suno/bark"],
3657
3688
  youtubeId: "NW62DpzJ274"
3658
3689
  };
3659
- var data_default23 = taskData23;
3690
+ var data_default24 = taskData24;
3660
3691
 
3661
3692
  // src/tasks/token-classification/data.ts
3662
- var taskData24 = {
3693
+ var taskData25 = {
3663
3694
  datasets: [
3664
3695
  {
3665
3696
  description: "A widely used dataset useful to benchmark named entity recognition models.",
@@ -3735,10 +3766,11 @@ var taskData24 = {
3735
3766
  widgetModels: ["dslim/bert-base-NER"],
3736
3767
  youtubeId: "wVHdVlPScxA"
3737
3768
  };
3738
- var data_default24 = taskData24;
3769
+ var data_default25 = taskData25;
3739
3770
 
3740
3771
  // src/tasks/translation/data.ts
3741
- var taskData25 = {
3772
+ var taskData26 = {
3773
+ canonicalId: "text2text-generation",
3742
3774
  datasets: [
3743
3775
  {
3744
3776
  description: "A dataset of copyright-free books translated into 16 different languages.",
@@ -3799,10 +3831,10 @@ var taskData25 = {
3799
3831
  widgetModels: ["t5-small"],
3800
3832
  youtubeId: "1JvfrvZgi6c"
3801
3833
  };
3802
- var data_default25 = taskData25;
3834
+ var data_default26 = taskData26;
3803
3835
 
3804
3836
  // src/tasks/text-classification/data.ts
3805
- var taskData26 = {
3837
+ var taskData27 = {
3806
3838
  datasets: [
3807
3839
  {
3808
3840
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3887,10 +3919,10 @@ var taskData26 = {
3887
3919
  widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
3888
3920
  youtubeId: "leNG9fN9FQU"
3889
3921
  };
3890
- var data_default26 = taskData26;
3922
+ var data_default27 = taskData27;
3891
3923
 
3892
3924
  // src/tasks/text-generation/data.ts
3893
- var taskData27 = {
3925
+ var taskData28 = {
3894
3926
  datasets: [
3895
3927
  {
3896
3928
  description: "A large multilingual dataset of text crawled from the web.",
@@ -3901,12 +3933,12 @@ var taskData27 = {
3901
3933
  id: "the_pile"
3902
3934
  },
3903
3935
  {
3904
- description: "A crowd-sourced instruction dataset to develop an AI assistant.",
3905
- id: "OpenAssistant/oasst1"
3936
+ description: "Truly open-source, curated and cleaned dialogue dataset.",
3937
+ id: "HuggingFaceH4/ultrachat_200k"
3906
3938
  },
3907
3939
  {
3908
- description: "A crowd-sourced instruction dataset created by Databricks employees.",
3909
- id: "databricks/databricks-dolly-15k"
3940
+ description: "An instruction dataset with preference ratings on responses.",
3941
+ id: "openbmb/UltraFeedback"
3910
3942
  }
3911
3943
  ],
3912
3944
  demo: {
@@ -3945,72 +3977,56 @@ var taskData27 = {
3945
3977
  id: "bigcode/starcoder"
3946
3978
  },
3947
3979
  {
3948
- description: "A model trained to follow instructions, uses Pythia-12b as base model.",
3949
- id: "databricks/dolly-v2-12b"
3980
+ description: "A very powerful text generation model.",
3981
+ id: "mistralai/Mixtral-8x7B-Instruct-v0.1"
3950
3982
  },
3951
3983
  {
3952
- description: "A model trained to follow instructions curated by community, uses Pythia-12b as base model.",
3953
- id: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
3984
+ description: "Small yet powerful text generation model.",
3985
+ id: "microsoft/phi-2"
3954
3986
  },
3955
3987
  {
3956
- description: "A large language model trained to generate text in English.",
3957
- id: "stabilityai/stablelm-tuned-alpha-7b"
3988
+ description: "A very powerful model that can chat, do mathematical reasoning and write code.",
3989
+ id: "openchat/openchat-3.5-0106"
3958
3990
  },
3959
3991
  {
3960
- description: "A model trained to follow instructions, based on mosaicml/mpt-7b.",
3961
- id: "mosaicml/mpt-7b-instruct"
3992
+ description: "Very strong yet small assistant model.",
3993
+ id: "HuggingFaceH4/zephyr-7b-beta"
3962
3994
  },
3963
3995
  {
3964
- description: "A large language model trained to generate text in English.",
3965
- id: "EleutherAI/pythia-12b"
3966
- },
3967
- {
3968
- description: "A large text-to-text model trained to follow instructions.",
3969
- id: "google/flan-ul2"
3970
- },
3971
- {
3972
- description: "A large and powerful text generation model.",
3973
- id: "tiiuae/falcon-40b"
3974
- },
3975
- {
3976
- description: "State-of-the-art open-source large language model.",
3996
+ description: "Very strong open-source large language model.",
3977
3997
  id: "meta-llama/Llama-2-70b-hf"
3978
3998
  }
3979
3999
  ],
3980
4000
  spaces: [
3981
4001
  {
3982
- description: "A robust text generation model that can perform various tasks through natural language prompting.",
3983
- id: "huggingface/bloom_demo"
4002
+ description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
4003
+ id: "HuggingFaceH4/open_llm_leaderboard"
3984
4004
  },
3985
4005
  {
3986
- description: "An text generation based application that can write code for 80+ languages.",
3987
- id: "bigcode/bigcode-playground"
4006
+ description: "An text generation based application based on a very powerful LLaMA2 model.",
4007
+ id: "ysharma/Explore_llamav2_with_TGI"
3988
4008
  },
3989
4009
  {
3990
- description: "An text generation based application for conversations.",
3991
- id: "h2oai/h2ogpt-chatbot"
4010
+ description: "An text generation based application to converse with Zephyr model.",
4011
+ id: "HuggingFaceH4/zephyr-chat"
3992
4012
  },
3993
4013
  {
3994
4014
  description: "An text generation application that combines OpenAI and Hugging Face models.",
3995
4015
  id: "microsoft/HuggingGPT"
3996
4016
  },
3997
4017
  {
3998
- description: "An text generation application that uses StableLM-tuned-alpha-7b.",
3999
- id: "stabilityai/stablelm-tuned-alpha-chat"
4000
- },
4001
- {
4002
- description: "An UI that uses StableLM-tuned-alpha-7b.",
4003
- id: "togethercomputer/OpenChatKit"
4018
+ description: "An chatbot to converse with a very powerful text generation model.",
4019
+ id: "mlabonne/phixtral-chat"
4004
4020
  }
4005
4021
  ],
4006
- summary: "Generating text is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase.",
4022
+ summary: "Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
4007
4023
  widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
4008
4024
  youtubeId: "Vpjb1lu0MDk"
4009
4025
  };
4010
- var data_default27 = taskData27;
4026
+ var data_default28 = taskData28;
4011
4027
 
4012
4028
  // src/tasks/text-to-video/data.ts
4013
- var taskData28 = {
4029
+ var taskData29 = {
4014
4030
  datasets: [
4015
4031
  {
4016
4032
  description: "Microsoft Research Video to Text is a large-scale dataset for open domain video captioning",
@@ -4102,10 +4118,10 @@ var taskData28 = {
4102
4118
  widgetModels: [],
4103
4119
  youtubeId: void 0
4104
4120
  };
4105
- var data_default28 = taskData28;
4121
+ var data_default29 = taskData29;
4106
4122
 
4107
4123
  // src/tasks/unconditional-image-generation/data.ts
4108
- var taskData29 = {
4124
+ var taskData30 = {
4109
4125
  datasets: [
4110
4126
  {
4111
4127
  description: "The CIFAR-100 dataset consists of 60000 32x32 colour images in 100 classes, with 600 images per class.",
@@ -4167,10 +4183,10 @@ var taskData29 = {
4167
4183
  // TODO: Add related video
4168
4184
  youtubeId: ""
4169
4185
  };
4170
- var data_default29 = taskData29;
4186
+ var data_default30 = taskData30;
4171
4187
 
4172
4188
  // src/tasks/video-classification/data.ts
4173
- var taskData30 = {
4189
+ var taskData31 = {
4174
4190
  datasets: [
4175
4191
  {
4176
4192
  // TODO write proper description
@@ -4249,10 +4265,10 @@ var taskData30 = {
4249
4265
  widgetModels: [],
4250
4266
  youtubeId: ""
4251
4267
  };
4252
- var data_default30 = taskData30;
4268
+ var data_default31 = taskData31;
4253
4269
 
4254
4270
  // src/tasks/visual-question-answering/data.ts
4255
- var taskData31 = {
4271
+ var taskData32 = {
4256
4272
  datasets: [
4257
4273
  {
4258
4274
  description: "A widely used dataset containing questions (with answers) about images.",
@@ -4342,10 +4358,10 @@ var taskData31 = {
4342
4358
  widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
4343
4359
  youtubeId: ""
4344
4360
  };
4345
- var data_default31 = taskData31;
4361
+ var data_default32 = taskData32;
4346
4362
 
4347
4363
  // src/tasks/zero-shot-classification/data.ts
4348
- var taskData32 = {
4364
+ var taskData33 = {
4349
4365
  datasets: [
4350
4366
  {
4351
4367
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -4404,10 +4420,10 @@ var taskData32 = {
4404
4420
  summary: "Zero-shot text classification is a task in natural language processing where a model is trained on a set of labeled examples but is then able to classify new examples from previously unseen classes.",
4405
4421
  widgetModels: ["facebook/bart-large-mnli"]
4406
4422
  };
4407
- var data_default32 = taskData32;
4423
+ var data_default33 = taskData33;
4408
4424
 
4409
4425
  // src/tasks/zero-shot-image-classification/data.ts
4410
- var taskData33 = {
4426
+ var taskData34 = {
4411
4427
  datasets: [
4412
4428
  {
4413
4429
  // TODO write proper description
@@ -4459,8 +4475,8 @@ var taskData33 = {
4459
4475
  id: "openai/clip-vit-base-patch16"
4460
4476
  },
4461
4477
  {
4462
- description: "Robust image classification model trained on publicly available image-caption data trained on additional high pixel data for better performance.",
4463
- id: "openai/clip-vit-large-patch14-336"
4478
+ description: "Strong zero-shot image classification model.",
4479
+ id: "google/siglip-base-patch16-224"
4464
4480
  },
4465
4481
  {
4466
4482
  description: "Strong image classification model for biomedical domain.",
@@ -4469,15 +4485,77 @@ var taskData33 = {
4469
4485
  ],
4470
4486
  spaces: [
4471
4487
  {
4472
- description: "An application that leverages zero shot image classification to find best captions to generate an image. ",
4488
+ description: "An application that leverages zero-shot image classification to find best captions to generate an image. ",
4473
4489
  id: "pharma/CLIP-Interrogator"
4490
+ },
4491
+ {
4492
+ description: "An application to compare different zero-shot image classification models. ",
4493
+ id: "merve/compare_clip_siglip"
4474
4494
  }
4475
4495
  ],
4476
- summary: "Zero shot image classification is the task of classifying previously unseen classes during training of a model.",
4496
+ summary: "Zero-shot image classification is the task of classifying previously unseen classes during training of a model.",
4477
4497
  widgetModels: ["openai/clip-vit-large-patch14-336"],
4478
4498
  youtubeId: ""
4479
4499
  };
4480
- var data_default33 = taskData33;
4500
+ var data_default34 = taskData34;
4501
+
4502
+ // src/tasks/zero-shot-object-detection/data.ts
4503
+ var taskData35 = {
4504
+ datasets: [],
4505
+ demo: {
4506
+ inputs: [
4507
+ {
4508
+ filename: "zero-shot-object-detection-input.jpg",
4509
+ type: "img"
4510
+ },
4511
+ {
4512
+ label: "Classes",
4513
+ content: "cat, dog, bird",
4514
+ type: "text"
4515
+ }
4516
+ ],
4517
+ outputs: [
4518
+ {
4519
+ filename: "zero-shot-object-detection-output.jpg",
4520
+ type: "img"
4521
+ }
4522
+ ]
4523
+ },
4524
+ metrics: [
4525
+ {
4526
+ description: "The Average Precision (AP) metric is the Area Under the PR Curve (AUC-PR). It is calculated for each class separately",
4527
+ id: "Average Precision"
4528
+ },
4529
+ {
4530
+ description: "The Mean Average Precision (mAP) metric is the overall average of the AP values",
4531
+ id: "Mean Average Precision"
4532
+ },
4533
+ {
4534
+ description: "The AP\u03B1 metric is the Average Precision at the IoU threshold of a \u03B1 value, for example, AP50 and AP75",
4535
+ id: "AP\u03B1"
4536
+ }
4537
+ ],
4538
+ models: [
4539
+ {
4540
+ description: "Solid zero-shot object detection model that uses CLIP as backbone.",
4541
+ id: "google/owlvit-base-patch32"
4542
+ },
4543
+ {
4544
+ description: "The improved version of the owlvit model.",
4545
+ id: "google/owlv2-base-patch16-ensemble"
4546
+ }
4547
+ ],
4548
+ spaces: [
4549
+ {
4550
+ description: "A demo to try the state-of-the-art zero-shot object detection model, OWLv2.",
4551
+ id: "merve/owlv2"
4552
+ }
4553
+ ],
4554
+ summary: "Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",
4555
+ widgetModels: [],
4556
+ youtubeId: ""
4557
+ };
4558
+ var data_default35 = taskData35;
4481
4559
 
4482
4560
  // src/tasks/index.ts
4483
4561
  var TASKS_MODEL_LIBRARIES = {
@@ -4538,7 +4616,7 @@ var TASKS_MODEL_LIBRARIES = {
4538
4616
  "text-to-3d": [],
4539
4617
  "image-to-3d": []
4540
4618
  };
4541
- function getData(type, partialTaskData = data_default14) {
4619
+ function getData(type, partialTaskData = data_default15) {
4542
4620
  return {
4543
4621
  ...partialTaskData,
4544
4622
  id: type,
@@ -4551,7 +4629,7 @@ var TASKS_DATA = {
4551
4629
  "audio-to-audio": getData("audio-to-audio", data_default2),
4552
4630
  "automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
4553
4631
  conversational: getData("conversational", data_default4),
4554
- "depth-estimation": getData("depth-estimation", data_default13),
4632
+ "depth-estimation": getData("depth-estimation", data_default14),
4555
4633
  "document-question-answering": getData("document-question-answering", data_default5),
4556
4634
  "feature-extraction": getData("feature-extraction", data_default6),
4557
4635
  "fill-mask": getData("fill-mask", data_default7),
@@ -4561,45 +4639,45 @@ var TASKS_DATA = {
4561
4639
  "image-to-image": getData("image-to-image", data_default9),
4562
4640
  "image-to-text": getData("image-to-text", data_default10),
4563
4641
  "image-to-video": void 0,
4564
- "mask-generation": getData("mask-generation", data_default14),
4642
+ "mask-generation": getData("mask-generation", data_default12),
4565
4643
  "multiple-choice": void 0,
4566
- "object-detection": getData("object-detection", data_default12),
4567
- "video-classification": getData("video-classification", data_default30),
4644
+ "object-detection": getData("object-detection", data_default13),
4645
+ "video-classification": getData("video-classification", data_default31),
4568
4646
  other: void 0,
4569
- "question-answering": getData("question-answering", data_default16),
4570
- "reinforcement-learning": getData("reinforcement-learning", data_default15),
4647
+ "question-answering": getData("question-answering", data_default17),
4648
+ "reinforcement-learning": getData("reinforcement-learning", data_default16),
4571
4649
  robotics: void 0,
4572
- "sentence-similarity": getData("sentence-similarity", data_default17),
4573
- summarization: getData("summarization", data_default18),
4574
- "table-question-answering": getData("table-question-answering", data_default19),
4650
+ "sentence-similarity": getData("sentence-similarity", data_default18),
4651
+ summarization: getData("summarization", data_default19),
4652
+ "table-question-answering": getData("table-question-answering", data_default20),
4575
4653
  "table-to-text": void 0,
4576
- "tabular-classification": getData("tabular-classification", data_default20),
4577
- "tabular-regression": getData("tabular-regression", data_default21),
4654
+ "tabular-classification": getData("tabular-classification", data_default21),
4655
+ "tabular-regression": getData("tabular-regression", data_default22),
4578
4656
  "tabular-to-text": void 0,
4579
- "text-classification": getData("text-classification", data_default26),
4580
- "text-generation": getData("text-generation", data_default27),
4657
+ "text-classification": getData("text-classification", data_default27),
4658
+ "text-generation": getData("text-generation", data_default28),
4581
4659
  "text-retrieval": void 0,
4582
- "text-to-image": getData("text-to-image", data_default22),
4583
- "text-to-speech": getData("text-to-speech", data_default23),
4660
+ "text-to-image": getData("text-to-image", data_default23),
4661
+ "text-to-speech": getData("text-to-speech", data_default24),
4584
4662
  "text-to-audio": void 0,
4585
- "text-to-video": getData("text-to-video", data_default28),
4663
+ "text-to-video": getData("text-to-video", data_default29),
4586
4664
  "text2text-generation": void 0,
4587
4665
  "time-series-forecasting": void 0,
4588
- "token-classification": getData("token-classification", data_default24),
4589
- translation: getData("translation", data_default25),
4590
- "unconditional-image-generation": getData("unconditional-image-generation", data_default29),
4591
- "visual-question-answering": getData("visual-question-answering", data_default31),
4666
+ "token-classification": getData("token-classification", data_default25),
4667
+ translation: getData("translation", data_default26),
4668
+ "unconditional-image-generation": getData("unconditional-image-generation", data_default30),
4669
+ "visual-question-answering": getData("visual-question-answering", data_default32),
4592
4670
  "voice-activity-detection": void 0,
4593
- "zero-shot-classification": getData("zero-shot-classification", data_default32),
4594
- "zero-shot-image-classification": getData("zero-shot-image-classification", data_default33),
4595
- "zero-shot-object-detection": getData("zero-shot-object-detection", data_default14),
4596
- "text-to-3d": getData("text-to-3d", data_default14),
4597
- "image-to-3d": getData("image-to-3d", data_default14)
4671
+ "zero-shot-classification": getData("zero-shot-classification", data_default33),
4672
+ "zero-shot-image-classification": getData("zero-shot-image-classification", data_default34),
4673
+ "zero-shot-object-detection": getData("zero-shot-object-detection", data_default35),
4674
+ "text-to-3d": getData("text-to-3d", data_default15),
4675
+ "image-to-3d": getData("image-to-3d", data_default15)
4598
4676
  };
4599
4677
 
4600
4678
  // src/model-libraries.ts
4601
4679
  var ModelLibrary = /* @__PURE__ */ ((ModelLibrary2) => {
4602
- ModelLibrary2["adapter-transformers"] = "Adapter Transformers";
4680
+ ModelLibrary2["adapter-transformers"] = "Adapters";
4603
4681
  ModelLibrary2["allennlp"] = "allenNLP";
4604
4682
  ModelLibrary2["asteroid"] = "Asteroid";
4605
4683
  ModelLibrary2["bertopic"] = "BERTopic";
@@ -5114,8 +5192,7 @@ function getJsInferenceSnippet(model, accessToken) {
5114
5192
  function hasJsInferenceSnippet(model) {
5115
5193
  return !!model.pipeline_tag && model.pipeline_tag in jsSnippets;
5116
5194
  }
5117
- // Annotate the CommonJS export names for ESM import in node:
5118
- 0 && (module.exports = {
5195
+ export {
5119
5196
  ALL_DISPLAY_MODEL_LIBRARY_KEYS,
5120
5197
  InferenceDisplayability,
5121
5198
  LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS,
@@ -5130,5 +5207,5 @@ function hasJsInferenceSnippet(model) {
5130
5207
  SUBTASK_TYPES,
5131
5208
  TASKS_DATA,
5132
5209
  TASKS_MODEL_LIBRARIES,
5133
- snippets
5134
- });
5210
+ snippets_exports as snippets
5211
+ };