@huggingface/tasks 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/dist/{index.mjs → index.cjs} +280 -133
  2. package/dist/index.d.ts +4 -3
  3. package/dist/index.js +245 -168
  4. package/package.json +13 -8
  5. package/src/library-to-tasks.ts +1 -1
  6. package/src/library-ui-elements.ts +11 -11
  7. package/src/model-data.ts +1 -1
  8. package/src/model-libraries.ts +1 -1
  9. package/src/pipelines.ts +1 -1
  10. package/src/tasks/audio-classification/about.md +1 -1
  11. package/src/tasks/audio-classification/inference.ts +51 -0
  12. package/src/tasks/audio-classification/spec/input.json +34 -0
  13. package/src/tasks/audio-classification/spec/output.json +21 -0
  14. package/src/tasks/audio-to-audio/about.md +1 -1
  15. package/src/tasks/automatic-speech-recognition/about.md +4 -2
  16. package/src/tasks/automatic-speech-recognition/inference.ts +154 -0
  17. package/src/tasks/automatic-speech-recognition/spec/input.json +34 -0
  18. package/src/tasks/automatic-speech-recognition/spec/output.json +36 -0
  19. package/src/tasks/common-definitions.json +109 -0
  20. package/src/tasks/depth-estimation/data.ts +8 -4
  21. package/src/tasks/depth-estimation/inference.ts +35 -0
  22. package/src/tasks/depth-estimation/spec/input.json +30 -0
  23. package/src/tasks/depth-estimation/spec/output.json +10 -0
  24. package/src/tasks/document-question-answering/inference.ts +102 -0
  25. package/src/tasks/document-question-answering/spec/input.json +85 -0
  26. package/src/tasks/document-question-answering/spec/output.json +36 -0
  27. package/src/tasks/feature-extraction/inference.ts +22 -0
  28. package/src/tasks/feature-extraction/spec/input.json +26 -0
  29. package/src/tasks/feature-extraction/spec/output.json +7 -0
  30. package/src/tasks/fill-mask/inference.ts +61 -0
  31. package/src/tasks/fill-mask/spec/input.json +38 -0
  32. package/src/tasks/fill-mask/spec/output.json +29 -0
  33. package/src/tasks/image-classification/inference.ts +51 -0
  34. package/src/tasks/image-classification/spec/input.json +34 -0
  35. package/src/tasks/image-classification/spec/output.json +10 -0
  36. package/src/tasks/image-segmentation/inference.ts +65 -0
  37. package/src/tasks/image-segmentation/spec/input.json +54 -0
  38. package/src/tasks/image-segmentation/spec/output.json +25 -0
  39. package/src/tasks/image-to-image/inference.ts +67 -0
  40. package/src/tasks/image-to-image/spec/input.json +52 -0
  41. package/src/tasks/image-to-image/spec/output.json +12 -0
  42. package/src/tasks/image-to-text/inference.ts +138 -0
  43. package/src/tasks/image-to-text/spec/input.json +34 -0
  44. package/src/tasks/image-to-text/spec/output.json +17 -0
  45. package/src/tasks/index.ts +5 -2
  46. package/src/tasks/mask-generation/about.md +65 -0
  47. package/src/tasks/mask-generation/data.ts +42 -5
  48. package/src/tasks/object-detection/inference.ts +62 -0
  49. package/src/tasks/object-detection/spec/input.json +30 -0
  50. package/src/tasks/object-detection/spec/output.json +46 -0
  51. package/src/tasks/placeholder/data.ts +3 -0
  52. package/src/tasks/placeholder/spec/input.json +35 -0
  53. package/src/tasks/placeholder/spec/output.json +17 -0
  54. package/src/tasks/question-answering/inference.ts +99 -0
  55. package/src/tasks/question-answering/spec/input.json +67 -0
  56. package/src/tasks/question-answering/spec/output.json +29 -0
  57. package/src/tasks/sentence-similarity/about.md +2 -2
  58. package/src/tasks/sentence-similarity/inference.ts +32 -0
  59. package/src/tasks/sentence-similarity/spec/input.json +40 -0
  60. package/src/tasks/sentence-similarity/spec/output.json +12 -0
  61. package/src/tasks/summarization/data.ts +1 -0
  62. package/src/tasks/summarization/inference.ts +58 -0
  63. package/src/tasks/summarization/spec/input.json +7 -0
  64. package/src/tasks/summarization/spec/output.json +7 -0
  65. package/src/tasks/table-question-answering/inference.ts +61 -0
  66. package/src/tasks/table-question-answering/spec/input.json +39 -0
  67. package/src/tasks/table-question-answering/spec/output.json +40 -0
  68. package/src/tasks/tabular-classification/about.md +1 -1
  69. package/src/tasks/tabular-regression/about.md +1 -1
  70. package/src/tasks/text-classification/about.md +1 -0
  71. package/src/tasks/text-classification/inference.ts +51 -0
  72. package/src/tasks/text-classification/spec/input.json +35 -0
  73. package/src/tasks/text-classification/spec/output.json +10 -0
  74. package/src/tasks/text-generation/about.md +24 -13
  75. package/src/tasks/text-generation/data.ts +22 -38
  76. package/src/tasks/text-generation/inference.ts +85 -0
  77. package/src/tasks/text-generation/spec/input.json +74 -0
  78. package/src/tasks/text-generation/spec/output.json +17 -0
  79. package/src/tasks/text-to-audio/inference.ts +138 -0
  80. package/src/tasks/text-to-audio/spec/input.json +31 -0
  81. package/src/tasks/text-to-audio/spec/output.json +20 -0
  82. package/src/tasks/text-to-image/about.md +11 -2
  83. package/src/tasks/text-to-image/data.ts +6 -2
  84. package/src/tasks/text-to-image/inference.ts +73 -0
  85. package/src/tasks/text-to-image/spec/input.json +57 -0
  86. package/src/tasks/text-to-image/spec/output.json +15 -0
  87. package/src/tasks/text-to-speech/about.md +4 -2
  88. package/src/tasks/text-to-speech/data.ts +1 -0
  89. package/src/tasks/text-to-speech/inference.ts +146 -0
  90. package/src/tasks/text-to-speech/spec/input.json +7 -0
  91. package/src/tasks/text-to-speech/spec/output.json +7 -0
  92. package/src/tasks/text2text-generation/inference.ts +53 -0
  93. package/src/tasks/text2text-generation/spec/input.json +55 -0
  94. package/src/tasks/text2text-generation/spec/output.json +17 -0
  95. package/src/tasks/token-classification/inference.ts +82 -0
  96. package/src/tasks/token-classification/spec/input.json +65 -0
  97. package/src/tasks/token-classification/spec/output.json +33 -0
  98. package/src/tasks/translation/data.ts +1 -0
  99. package/src/tasks/translation/inference.ts +58 -0
  100. package/src/tasks/translation/spec/input.json +7 -0
  101. package/src/tasks/translation/spec/output.json +7 -0
  102. package/src/tasks/video-classification/inference.ts +59 -0
  103. package/src/tasks/video-classification/spec/input.json +42 -0
  104. package/src/tasks/video-classification/spec/output.json +10 -0
  105. package/src/tasks/visual-question-answering/inference.ts +63 -0
  106. package/src/tasks/visual-question-answering/spec/input.json +41 -0
  107. package/src/tasks/visual-question-answering/spec/output.json +21 -0
  108. package/src/tasks/zero-shot-classification/inference.ts +67 -0
  109. package/src/tasks/zero-shot-classification/spec/input.json +50 -0
  110. package/src/tasks/zero-shot-classification/spec/output.json +10 -0
  111. package/src/tasks/zero-shot-image-classification/data.ts +8 -5
  112. package/src/tasks/zero-shot-image-classification/inference.ts +61 -0
  113. package/src/tasks/zero-shot-image-classification/spec/input.json +45 -0
  114. package/src/tasks/zero-shot-image-classification/spec/output.json +10 -0
  115. package/src/tasks/zero-shot-object-detection/about.md +6 -0
  116. package/src/tasks/zero-shot-object-detection/data.ts +6 -1
  117. package/src/tasks/zero-shot-object-detection/inference.ts +66 -0
  118. package/src/tasks/zero-shot-object-detection/spec/input.json +40 -0
  119. package/src/tasks/zero-shot-object-detection/spec/output.json +47 -0
  120. package/tsconfig.json +3 -3
@@ -1,8 +1,42 @@
1
+ "use strict";
1
2
  var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
2
6
  var __export = (target, all) => {
3
7
  for (var name in all)
4
8
  __defProp(target, name, { get: all[name], enumerable: true });
5
9
  };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var src_exports = {};
22
+ __export(src_exports, {
23
+ ALL_DISPLAY_MODEL_LIBRARY_KEYS: () => ALL_DISPLAY_MODEL_LIBRARY_KEYS,
24
+ InferenceDisplayability: () => InferenceDisplayability,
25
+ LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS: () => LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS,
26
+ MAPPING_DEFAULT_WIDGET: () => MAPPING_DEFAULT_WIDGET,
27
+ MODALITIES: () => MODALITIES,
28
+ MODALITY_LABELS: () => MODALITY_LABELS,
29
+ MODEL_LIBRARIES_UI_ELEMENTS: () => MODEL_LIBRARIES_UI_ELEMENTS,
30
+ ModelLibrary: () => ModelLibrary,
31
+ PIPELINE_DATA: () => PIPELINE_DATA,
32
+ PIPELINE_TYPES: () => PIPELINE_TYPES,
33
+ PIPELINE_TYPES_SET: () => PIPELINE_TYPES_SET,
34
+ SUBTASK_TYPES: () => SUBTASK_TYPES,
35
+ TASKS_DATA: () => TASKS_DATA,
36
+ TASKS_MODEL_LIBRARIES: () => TASKS_MODEL_LIBRARIES,
37
+ snippets: () => snippets_exports
38
+ });
39
+ module.exports = __toCommonJS(src_exports);
6
40
 
7
41
  // src/library-to-tasks.ts
8
42
  var LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS = {
@@ -49,11 +83,11 @@ function nameWithoutNamespace(modelId) {
49
83
  const splitted = modelId.split("/");
50
84
  return splitted.length === 1 ? splitted[0] : splitted[1];
51
85
  }
52
- var adapter_transformers = (model) => [
53
- `from transformers import ${model.config?.adapter_transformers?.model_class}
86
+ var adapters = (model) => [
87
+ `from adapters import AutoAdapterModel
54
88
 
55
- model = ${model.config?.adapter_transformers?.model_class}.from_pretrained("${model.config?.adapter_transformers?.model_name}")
56
- model.load_adapter("${model.id}", source="hf")`
89
+ model = AutoAdapterModel.from_pretrained("${model.config?.adapter_transformers?.model_name}")
90
+ model.load_adapter("${model.id}", set_active=True)`
57
91
  ];
58
92
  var allennlpUnknown = (model) => [
59
93
  `import allennlp_models
@@ -493,7 +527,7 @@ transcriptions = asr_model.transcribe(["file.wav"])`
493
527
  }
494
528
  };
495
529
  var mlAgents = (model) => [`mlagents-load-from-hf --repo-id="${model.id}" --local-dir="./downloads"`];
496
- var sentis = (model) => [
530
+ var sentis = () => [
497
531
  `string modelName = "[Your model name here].sentis";
498
532
  Model model = ModelLoader.Load(Application.streamingAssetsPath + "/" + modelName);
499
533
  IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
@@ -520,11 +554,11 @@ model = AutoModel.load_from_hf_hub("${model.id}")`
520
554
  ];
521
555
  var MODEL_LIBRARIES_UI_ELEMENTS = {
522
556
  "adapter-transformers": {
523
- btnLabel: "Adapter Transformers",
524
- repoName: "adapter-transformers",
525
- repoUrl: "https://github.com/Adapter-Hub/adapter-transformers",
526
- docsUrl: "https://huggingface.co/docs/hub/adapter-transformers",
527
- snippets: adapter_transformers
557
+ btnLabel: "Adapters",
558
+ repoName: "adapters",
559
+ repoUrl: "https://github.com/Adapter-Hub/adapters",
560
+ docsUrl: "https://huggingface.co/docs/hub/adapters",
561
+ snippets: adapters
528
562
  },
529
563
  allennlp: {
530
564
  btnLabel: "AllenNLP",
@@ -2846,8 +2880,60 @@ var taskData11 = {
2846
2880
  };
2847
2881
  var data_default11 = taskData11;
2848
2882
 
2849
- // src/tasks/object-detection/data.ts
2883
+ // src/tasks/mask-generation/data.ts
2850
2884
  var taskData12 = {
2885
+ datasets: [],
2886
+ demo: {
2887
+ inputs: [
2888
+ {
2889
+ filename: "mask-generation-input.png",
2890
+ type: "img"
2891
+ }
2892
+ ],
2893
+ outputs: [
2894
+ {
2895
+ filename: "mask-generation-output.png",
2896
+ type: "img"
2897
+ }
2898
+ ]
2899
+ },
2900
+ metrics: [],
2901
+ models: [
2902
+ {
2903
+ description: "Small yet powerful mask generation model.",
2904
+ id: "Zigeng/SlimSAM-uniform-50"
2905
+ },
2906
+ {
2907
+ description: "Very strong mask generation model.",
2908
+ id: "facebook/sam-vit-huge"
2909
+ }
2910
+ ],
2911
+ spaces: [
2912
+ {
2913
+ description: "An application that combines a mask generation model with an image embedding model for open-vocabulary image segmentation.",
2914
+ id: "SkalskiP/SAM_and_MetaCLIP"
2915
+ },
2916
+ {
2917
+ description: "An application that compares the performance of a large and a small mask generation model.",
2918
+ id: "merve/slimsam"
2919
+ },
2920
+ {
2921
+ description: "An application based on an improved mask generation model.",
2922
+ id: "linfanluntan/Grounded-SAM"
2923
+ },
2924
+ {
2925
+ description: "An application to remove objects from videos using mask generation models.",
2926
+ id: "SkalskiP/SAM_and_ProPainter"
2927
+ }
2928
+ ],
2929
+ summary: "Mask generation is the task of generating masks that identify a specific object or region of interest in a given image. Masks are often used in segmentation tasks, where they provide a precise way to isolate the object of interest for further processing or analysis.",
2930
+ widgetModels: [],
2931
+ youtubeId: ""
2932
+ };
2933
+ var data_default12 = taskData12;
2934
+
2935
+ // src/tasks/object-detection/data.ts
2936
+ var taskData13 = {
2851
2937
  datasets: [
2852
2938
  {
2853
2939
  // TODO write proper description
@@ -2919,10 +3005,10 @@ var taskData12 = {
2919
3005
  widgetModels: ["facebook/detr-resnet-50"],
2920
3006
  youtubeId: "WdAeKSOpxhw"
2921
3007
  };
2922
- var data_default12 = taskData12;
3008
+ var data_default13 = taskData13;
2923
3009
 
2924
3010
  // src/tasks/depth-estimation/data.ts
2925
- var taskData13 = {
3011
+ var taskData14 = {
2926
3012
  datasets: [
2927
3013
  {
2928
3014
  description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
@@ -2950,8 +3036,8 @@ var taskData13 = {
2950
3036
  id: "Intel/dpt-large"
2951
3037
  },
2952
3038
  {
2953
- description: "Strong Depth Estimation model trained on the KITTI dataset.",
2954
- id: "facebook/dpt-dinov2-large-kitti"
3039
+ description: "Strong Depth Estimation model trained on a big compilation of datasets.",
3040
+ id: "LiheYoung/depth-anything-large-hf"
2955
3041
  },
2956
3042
  {
2957
3043
  description: "A strong monocular depth estimation model.",
@@ -2964,18 +3050,22 @@ var taskData13 = {
2964
3050
  id: "radames/dpt-depth-estimation-3d-voxels"
2965
3051
  },
2966
3052
  {
2967
- description: "An application that can estimate the depth in a given image.",
2968
- id: "keras-io/Monocular-Depth-Estimation"
3053
+ description: "An application to compare the outputs of different depth estimation models.",
3054
+ id: "LiheYoung/Depth-Anything"
3055
+ },
3056
+ {
3057
+ description: "An application to try state-of-the-art depth estimation.",
3058
+ id: "merve/compare_depth_models"
2969
3059
  }
2970
3060
  ],
2971
3061
  summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
2972
3062
  widgetModels: [""],
2973
3063
  youtubeId: ""
2974
3064
  };
2975
- var data_default13 = taskData13;
3065
+ var data_default14 = taskData14;
2976
3066
 
2977
3067
  // src/tasks/placeholder/data.ts
2978
- var taskData14 = {
3068
+ var taskData15 = {
2979
3069
  datasets: [],
2980
3070
  demo: {
2981
3071
  inputs: [],
@@ -2987,12 +3077,15 @@ var taskData14 = {
2987
3077
  spaces: [],
2988
3078
  summary: "",
2989
3079
  widgetModels: [],
2990
- youtubeId: void 0
3080
+ youtubeId: void 0,
3081
+ /// If this is a subtask, link to the most general task ID
3082
+ /// (eg, text2text-generation is the canonical ID of translation)
3083
+ canonicalId: void 0
2991
3084
  };
2992
- var data_default14 = taskData14;
3085
+ var data_default15 = taskData15;
2993
3086
 
2994
3087
  // src/tasks/reinforcement-learning/data.ts
2995
- var taskData15 = {
3088
+ var taskData16 = {
2996
3089
  datasets: [
2997
3090
  {
2998
3091
  description: "A curation of widely used datasets for Data Driven Deep Reinforcement Learning (D4RL)",
@@ -3058,10 +3151,10 @@ var taskData15 = {
3058
3151
  widgetModels: [],
3059
3152
  youtubeId: "q0BiUn5LiBc"
3060
3153
  };
3061
- var data_default15 = taskData15;
3154
+ var data_default16 = taskData16;
3062
3155
 
3063
3156
  // src/tasks/question-answering/data.ts
3064
- var taskData16 = {
3157
+ var taskData17 = {
3065
3158
  datasets: [
3066
3159
  {
3067
3160
  // TODO write proper description
@@ -3125,10 +3218,10 @@ var taskData16 = {
3125
3218
  widgetModels: ["deepset/roberta-base-squad2"],
3126
3219
  youtubeId: "ajPx5LwJD-I"
3127
3220
  };
3128
- var data_default16 = taskData16;
3221
+ var data_default17 = taskData17;
3129
3222
 
3130
3223
  // src/tasks/sentence-similarity/data.ts
3131
- var taskData17 = {
3224
+ var taskData18 = {
3132
3225
  datasets: [
3133
3226
  {
3134
3227
  description: "Bing queries with relevant passages from various web sources.",
@@ -3220,10 +3313,11 @@ var taskData17 = {
3220
3313
  widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
3221
3314
  youtubeId: "VCZq5AkbNEU"
3222
3315
  };
3223
- var data_default17 = taskData17;
3316
+ var data_default18 = taskData18;
3224
3317
 
3225
3318
  // src/tasks/summarization/data.ts
3226
- var taskData18 = {
3319
+ var taskData19 = {
3320
+ canonicalId: "text2text-generation",
3227
3321
  datasets: [
3228
3322
  {
3229
3323
  description: "News articles in five different languages along with their summaries. Widely used for benchmarking multilingual summarization models.",
@@ -3288,10 +3382,10 @@ var taskData18 = {
3288
3382
  widgetModels: ["sshleifer/distilbart-cnn-12-6"],
3289
3383
  youtubeId: "yHnr5Dk2zCI"
3290
3384
  };
3291
- var data_default18 = taskData18;
3385
+ var data_default19 = taskData19;
3292
3386
 
3293
3387
  // src/tasks/table-question-answering/data.ts
3294
- var taskData19 = {
3388
+ var taskData20 = {
3295
3389
  datasets: [
3296
3390
  {
3297
3391
  description: "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
@@ -3342,10 +3436,10 @@ var taskData19 = {
3342
3436
  summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
3343
3437
  widgetModels: ["google/tapas-base-finetuned-wtq"]
3344
3438
  };
3345
- var data_default19 = taskData19;
3439
+ var data_default20 = taskData20;
3346
3440
 
3347
3441
  // src/tasks/tabular-classification/data.ts
3348
- var taskData20 = {
3442
+ var taskData21 = {
3349
3443
  datasets: [
3350
3444
  {
3351
3445
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -3409,10 +3503,10 @@ var taskData20 = {
3409
3503
  widgetModels: ["scikit-learn/tabular-playground"],
3410
3504
  youtubeId: ""
3411
3505
  };
3412
- var data_default20 = taskData20;
3506
+ var data_default21 = taskData21;
3413
3507
 
3414
3508
  // src/tasks/tabular-regression/data.ts
3415
- var taskData21 = {
3509
+ var taskData22 = {
3416
3510
  datasets: [
3417
3511
  {
3418
3512
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -3464,10 +3558,10 @@ var taskData21 = {
3464
3558
  widgetModels: ["scikit-learn/Fish-Weight"],
3465
3559
  youtubeId: ""
3466
3560
  };
3467
- var data_default21 = taskData21;
3561
+ var data_default22 = taskData22;
3468
3562
 
3469
3563
  // src/tasks/text-to-image/data.ts
3470
- var taskData22 = {
3564
+ var taskData23 = {
3471
3565
  datasets: [
3472
3566
  {
3473
3567
  description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
@@ -3543,22 +3637,27 @@ var taskData22 = {
3543
3637
  id: "latent-consistency/lcm-lora-for-sdxl"
3544
3638
  },
3545
3639
  {
3546
- description: "A powerful text-to-image application that can generate 3D representations.",
3547
- id: "hysts/Shap-E"
3640
+ description: "A gallery to explore various text-to-image models.",
3641
+ id: "multimodalart/LoraTheExplorer"
3548
3642
  },
3549
3643
  {
3550
3644
  description: "An application for `text-to-image`, `image-to-image` and image inpainting.",
3551
3645
  id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI"
3646
+ },
3647
+ {
3648
+ description: "An application to generate realistic images given photos of a person and a prompt.",
3649
+ id: "InstantX/InstantID"
3552
3650
  }
3553
3651
  ],
3554
3652
  summary: "Generates images from input text. These models can be used to generate and modify images based on text prompts.",
3555
3653
  widgetModels: ["CompVis/stable-diffusion-v1-4"],
3556
3654
  youtubeId: ""
3557
3655
  };
3558
- var data_default22 = taskData22;
3656
+ var data_default23 = taskData23;
3559
3657
 
3560
3658
  // src/tasks/text-to-speech/data.ts
3561
- var taskData23 = {
3659
+ var taskData24 = {
3660
+ canonicalId: "text-to-audio",
3562
3661
  datasets: [
3563
3662
  {
3564
3663
  description: "Thousands of short audio clips of a single speaker.",
@@ -3622,10 +3721,10 @@ var taskData23 = {
3622
3721
  widgetModels: ["suno/bark"],
3623
3722
  youtubeId: "NW62DpzJ274"
3624
3723
  };
3625
- var data_default23 = taskData23;
3724
+ var data_default24 = taskData24;
3626
3725
 
3627
3726
  // src/tasks/token-classification/data.ts
3628
- var taskData24 = {
3727
+ var taskData25 = {
3629
3728
  datasets: [
3630
3729
  {
3631
3730
  description: "A widely used dataset useful to benchmark named entity recognition models.",
@@ -3701,10 +3800,11 @@ var taskData24 = {
3701
3800
  widgetModels: ["dslim/bert-base-NER"],
3702
3801
  youtubeId: "wVHdVlPScxA"
3703
3802
  };
3704
- var data_default24 = taskData24;
3803
+ var data_default25 = taskData25;
3705
3804
 
3706
3805
  // src/tasks/translation/data.ts
3707
- var taskData25 = {
3806
+ var taskData26 = {
3807
+ canonicalId: "text2text-generation",
3708
3808
  datasets: [
3709
3809
  {
3710
3810
  description: "A dataset of copyright-free books translated into 16 different languages.",
@@ -3765,10 +3865,10 @@ var taskData25 = {
3765
3865
  widgetModels: ["t5-small"],
3766
3866
  youtubeId: "1JvfrvZgi6c"
3767
3867
  };
3768
- var data_default25 = taskData25;
3868
+ var data_default26 = taskData26;
3769
3869
 
3770
3870
  // src/tasks/text-classification/data.ts
3771
- var taskData26 = {
3871
+ var taskData27 = {
3772
3872
  datasets: [
3773
3873
  {
3774
3874
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3853,10 +3953,10 @@ var taskData26 = {
3853
3953
  widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
3854
3954
  youtubeId: "leNG9fN9FQU"
3855
3955
  };
3856
- var data_default26 = taskData26;
3956
+ var data_default27 = taskData27;
3857
3957
 
3858
3958
  // src/tasks/text-generation/data.ts
3859
- var taskData27 = {
3959
+ var taskData28 = {
3860
3960
  datasets: [
3861
3961
  {
3862
3962
  description: "A large multilingual dataset of text crawled from the web.",
@@ -3867,12 +3967,12 @@ var taskData27 = {
3867
3967
  id: "the_pile"
3868
3968
  },
3869
3969
  {
3870
- description: "A crowd-sourced instruction dataset to develop an AI assistant.",
3871
- id: "OpenAssistant/oasst1"
3970
+ description: "Truly open-source, curated and cleaned dialogue dataset.",
3971
+ id: "HuggingFaceH4/ultrachat_200k"
3872
3972
  },
3873
3973
  {
3874
- description: "A crowd-sourced instruction dataset created by Databricks employees.",
3875
- id: "databricks/databricks-dolly-15k"
3974
+ description: "An instruction dataset with preference ratings on responses.",
3975
+ id: "openbmb/UltraFeedback"
3876
3976
  }
3877
3977
  ],
3878
3978
  demo: {
@@ -3911,72 +4011,56 @@ var taskData27 = {
3911
4011
  id: "bigcode/starcoder"
3912
4012
  },
3913
4013
  {
3914
- description: "A model trained to follow instructions, uses Pythia-12b as base model.",
3915
- id: "databricks/dolly-v2-12b"
4014
+ description: "A very powerful text generation model.",
4015
+ id: "mistralai/Mixtral-8x7B-Instruct-v0.1"
3916
4016
  },
3917
4017
  {
3918
- description: "A model trained to follow instructions curated by community, uses Pythia-12b as base model.",
3919
- id: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
4018
+ description: "Small yet powerful text generation model.",
4019
+ id: "microsoft/phi-2"
3920
4020
  },
3921
4021
  {
3922
- description: "A large language model trained to generate text in English.",
3923
- id: "stabilityai/stablelm-tuned-alpha-7b"
4022
+ description: "A very powerful model that can chat, do mathematical reasoning and write code.",
4023
+ id: "openchat/openchat-3.5-0106"
3924
4024
  },
3925
4025
  {
3926
- description: "A model trained to follow instructions, based on mosaicml/mpt-7b.",
3927
- id: "mosaicml/mpt-7b-instruct"
4026
+ description: "Very strong yet small assistant model.",
4027
+ id: "HuggingFaceH4/zephyr-7b-beta"
3928
4028
  },
3929
4029
  {
3930
- description: "A large language model trained to generate text in English.",
3931
- id: "EleutherAI/pythia-12b"
3932
- },
3933
- {
3934
- description: "A large text-to-text model trained to follow instructions.",
3935
- id: "google/flan-ul2"
3936
- },
3937
- {
3938
- description: "A large and powerful text generation model.",
3939
- id: "tiiuae/falcon-40b"
3940
- },
3941
- {
3942
- description: "State-of-the-art open-source large language model.",
4030
+ description: "Very strong open-source large language model.",
3943
4031
  id: "meta-llama/Llama-2-70b-hf"
3944
4032
  }
3945
4033
  ],
3946
4034
  spaces: [
3947
4035
  {
3948
- description: "A robust text generation model that can perform various tasks through natural language prompting.",
3949
- id: "huggingface/bloom_demo"
4036
+ description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
4037
+ id: "HuggingFaceH4/open_llm_leaderboard"
3950
4038
  },
3951
4039
  {
3952
- description: "An text generation based application that can write code for 80+ languages.",
3953
- id: "bigcode/bigcode-playground"
4040
+ description: "An text generation based application based on a very powerful LLaMA2 model.",
4041
+ id: "ysharma/Explore_llamav2_with_TGI"
3954
4042
  },
3955
4043
  {
3956
- description: "An text generation based application for conversations.",
3957
- id: "h2oai/h2ogpt-chatbot"
4044
+ description: "An text generation based application to converse with Zephyr model.",
4045
+ id: "HuggingFaceH4/zephyr-chat"
3958
4046
  },
3959
4047
  {
3960
4048
  description: "An text generation application that combines OpenAI and Hugging Face models.",
3961
4049
  id: "microsoft/HuggingGPT"
3962
4050
  },
3963
4051
  {
3964
- description: "An text generation application that uses StableLM-tuned-alpha-7b.",
3965
- id: "stabilityai/stablelm-tuned-alpha-chat"
3966
- },
3967
- {
3968
- description: "An UI that uses StableLM-tuned-alpha-7b.",
3969
- id: "togethercomputer/OpenChatKit"
4052
+ description: "An chatbot to converse with a very powerful text generation model.",
4053
+ id: "mlabonne/phixtral-chat"
3970
4054
  }
3971
4055
  ],
3972
- summary: "Generating text is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase.",
4056
+ summary: "Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
3973
4057
  widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
3974
4058
  youtubeId: "Vpjb1lu0MDk"
3975
4059
  };
3976
- var data_default27 = taskData27;
4060
+ var data_default28 = taskData28;
3977
4061
 
3978
4062
  // src/tasks/text-to-video/data.ts
3979
- var taskData28 = {
4063
+ var taskData29 = {
3980
4064
  datasets: [
3981
4065
  {
3982
4066
  description: "Microsoft Research Video to Text is a large-scale dataset for open domain video captioning",
@@ -4068,10 +4152,10 @@ var taskData28 = {
4068
4152
  widgetModels: [],
4069
4153
  youtubeId: void 0
4070
4154
  };
4071
- var data_default28 = taskData28;
4155
+ var data_default29 = taskData29;
4072
4156
 
4073
4157
  // src/tasks/unconditional-image-generation/data.ts
4074
- var taskData29 = {
4158
+ var taskData30 = {
4075
4159
  datasets: [
4076
4160
  {
4077
4161
  description: "The CIFAR-100 dataset consists of 60000 32x32 colour images in 100 classes, with 600 images per class.",
@@ -4133,10 +4217,10 @@ var taskData29 = {
4133
4217
  // TODO: Add related video
4134
4218
  youtubeId: ""
4135
4219
  };
4136
- var data_default29 = taskData29;
4220
+ var data_default30 = taskData30;
4137
4221
 
4138
4222
  // src/tasks/video-classification/data.ts
4139
- var taskData30 = {
4223
+ var taskData31 = {
4140
4224
  datasets: [
4141
4225
  {
4142
4226
  // TODO write proper description
@@ -4215,10 +4299,10 @@ var taskData30 = {
4215
4299
  widgetModels: [],
4216
4300
  youtubeId: ""
4217
4301
  };
4218
- var data_default30 = taskData30;
4302
+ var data_default31 = taskData31;
4219
4303
 
4220
4304
  // src/tasks/visual-question-answering/data.ts
4221
- var taskData31 = {
4305
+ var taskData32 = {
4222
4306
  datasets: [
4223
4307
  {
4224
4308
  description: "A widely used dataset containing questions (with answers) about images.",
@@ -4308,10 +4392,10 @@ var taskData31 = {
4308
4392
  widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
4309
4393
  youtubeId: ""
4310
4394
  };
4311
- var data_default31 = taskData31;
4395
+ var data_default32 = taskData32;
4312
4396
 
4313
4397
  // src/tasks/zero-shot-classification/data.ts
4314
- var taskData32 = {
4398
+ var taskData33 = {
4315
4399
  datasets: [
4316
4400
  {
4317
4401
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -4370,10 +4454,10 @@ var taskData32 = {
4370
4454
  summary: "Zero-shot text classification is a task in natural language processing where a model is trained on a set of labeled examples but is then able to classify new examples from previously unseen classes.",
4371
4455
  widgetModels: ["facebook/bart-large-mnli"]
4372
4456
  };
4373
- var data_default32 = taskData32;
4457
+ var data_default33 = taskData33;
4374
4458
 
4375
4459
  // src/tasks/zero-shot-image-classification/data.ts
4376
- var taskData33 = {
4460
+ var taskData34 = {
4377
4461
  datasets: [
4378
4462
  {
4379
4463
  // TODO write proper description
@@ -4425,8 +4509,8 @@ var taskData33 = {
4425
4509
  id: "openai/clip-vit-base-patch16"
4426
4510
  },
4427
4511
  {
4428
- description: "Robust image classification model trained on publicly available image-caption data trained on additional high pixel data for better performance.",
4429
- id: "openai/clip-vit-large-patch14-336"
4512
+ description: "Strong zero-shot image classification model.",
4513
+ id: "google/siglip-base-patch16-224"
4430
4514
  },
4431
4515
  {
4432
4516
  description: "Strong image classification model for biomedical domain.",
@@ -4435,15 +4519,77 @@ var taskData33 = {
4435
4519
  ],
4436
4520
  spaces: [
4437
4521
  {
4438
- description: "An application that leverages zero shot image classification to find best captions to generate an image. ",
4522
+ description: "An application that leverages zero-shot image classification to find best captions to generate an image. ",
4439
4523
  id: "pharma/CLIP-Interrogator"
4524
+ },
4525
+ {
4526
+ description: "An application to compare different zero-shot image classification models. ",
4527
+ id: "merve/compare_clip_siglip"
4440
4528
  }
4441
4529
  ],
4442
- summary: "Zero shot image classification is the task of classifying previously unseen classes during training of a model.",
4530
+ summary: "Zero-shot image classification is the task of classifying previously unseen classes during training of a model.",
4443
4531
  widgetModels: ["openai/clip-vit-large-patch14-336"],
4444
4532
  youtubeId: ""
4445
4533
  };
4446
- var data_default33 = taskData33;
4534
+ var data_default34 = taskData34;
4535
+
4536
+ // src/tasks/zero-shot-object-detection/data.ts
4537
+ var taskData35 = {
4538
+ datasets: [],
4539
+ demo: {
4540
+ inputs: [
4541
+ {
4542
+ filename: "zero-shot-object-detection-input.jpg",
4543
+ type: "img"
4544
+ },
4545
+ {
4546
+ label: "Classes",
4547
+ content: "cat, dog, bird",
4548
+ type: "text"
4549
+ }
4550
+ ],
4551
+ outputs: [
4552
+ {
4553
+ filename: "zero-shot-object-detection-output.jpg",
4554
+ type: "img"
4555
+ }
4556
+ ]
4557
+ },
4558
+ metrics: [
4559
+ {
4560
+ description: "The Average Precision (AP) metric is the Area Under the PR Curve (AUC-PR). It is calculated for each class separately",
4561
+ id: "Average Precision"
4562
+ },
4563
+ {
4564
+ description: "The Mean Average Precision (mAP) metric is the overall average of the AP values",
4565
+ id: "Mean Average Precision"
4566
+ },
4567
+ {
4568
+ description: "The AP\u03B1 metric is the Average Precision at the IoU threshold of a \u03B1 value, for example, AP50 and AP75",
4569
+ id: "AP\u03B1"
4570
+ }
4571
+ ],
4572
+ models: [
4573
+ {
4574
+ description: "Solid zero-shot object detection model that uses CLIP as backbone.",
4575
+ id: "google/owlvit-base-patch32"
4576
+ },
4577
+ {
4578
+ description: "The improved version of the owlvit model.",
4579
+ id: "google/owlv2-base-patch16-ensemble"
4580
+ }
4581
+ ],
4582
+ spaces: [
4583
+ {
4584
+ description: "A demo to try the state-of-the-art zero-shot object detection model, OWLv2.",
4585
+ id: "merve/owlv2"
4586
+ }
4587
+ ],
4588
+ summary: "Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",
4589
+ widgetModels: [],
4590
+ youtubeId: ""
4591
+ };
4592
+ var data_default35 = taskData35;
4447
4593
 
4448
4594
  // src/tasks/index.ts
4449
4595
  var TASKS_MODEL_LIBRARIES = {
@@ -4504,7 +4650,7 @@ var TASKS_MODEL_LIBRARIES = {
4504
4650
  "text-to-3d": [],
4505
4651
  "image-to-3d": []
4506
4652
  };
4507
- function getData(type, partialTaskData = data_default14) {
4653
+ function getData(type, partialTaskData = data_default15) {
4508
4654
  return {
4509
4655
  ...partialTaskData,
4510
4656
  id: type,
@@ -4517,7 +4663,7 @@ var TASKS_DATA = {
4517
4663
  "audio-to-audio": getData("audio-to-audio", data_default2),
4518
4664
  "automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
4519
4665
  conversational: getData("conversational", data_default4),
4520
- "depth-estimation": getData("depth-estimation", data_default13),
4666
+ "depth-estimation": getData("depth-estimation", data_default14),
4521
4667
  "document-question-answering": getData("document-question-answering", data_default5),
4522
4668
  "feature-extraction": getData("feature-extraction", data_default6),
4523
4669
  "fill-mask": getData("fill-mask", data_default7),
@@ -4527,45 +4673,45 @@ var TASKS_DATA = {
4527
4673
  "image-to-image": getData("image-to-image", data_default9),
4528
4674
  "image-to-text": getData("image-to-text", data_default10),
4529
4675
  "image-to-video": void 0,
4530
- "mask-generation": getData("mask-generation", data_default14),
4676
+ "mask-generation": getData("mask-generation", data_default12),
4531
4677
  "multiple-choice": void 0,
4532
- "object-detection": getData("object-detection", data_default12),
4533
- "video-classification": getData("video-classification", data_default30),
4678
+ "object-detection": getData("object-detection", data_default13),
4679
+ "video-classification": getData("video-classification", data_default31),
4534
4680
  other: void 0,
4535
- "question-answering": getData("question-answering", data_default16),
4536
- "reinforcement-learning": getData("reinforcement-learning", data_default15),
4681
+ "question-answering": getData("question-answering", data_default17),
4682
+ "reinforcement-learning": getData("reinforcement-learning", data_default16),
4537
4683
  robotics: void 0,
4538
- "sentence-similarity": getData("sentence-similarity", data_default17),
4539
- summarization: getData("summarization", data_default18),
4540
- "table-question-answering": getData("table-question-answering", data_default19),
4684
+ "sentence-similarity": getData("sentence-similarity", data_default18),
4685
+ summarization: getData("summarization", data_default19),
4686
+ "table-question-answering": getData("table-question-answering", data_default20),
4541
4687
  "table-to-text": void 0,
4542
- "tabular-classification": getData("tabular-classification", data_default20),
4543
- "tabular-regression": getData("tabular-regression", data_default21),
4688
+ "tabular-classification": getData("tabular-classification", data_default21),
4689
+ "tabular-regression": getData("tabular-regression", data_default22),
4544
4690
  "tabular-to-text": void 0,
4545
- "text-classification": getData("text-classification", data_default26),
4546
- "text-generation": getData("text-generation", data_default27),
4691
+ "text-classification": getData("text-classification", data_default27),
4692
+ "text-generation": getData("text-generation", data_default28),
4547
4693
  "text-retrieval": void 0,
4548
- "text-to-image": getData("text-to-image", data_default22),
4549
- "text-to-speech": getData("text-to-speech", data_default23),
4694
+ "text-to-image": getData("text-to-image", data_default23),
4695
+ "text-to-speech": getData("text-to-speech", data_default24),
4550
4696
  "text-to-audio": void 0,
4551
- "text-to-video": getData("text-to-video", data_default28),
4697
+ "text-to-video": getData("text-to-video", data_default29),
4552
4698
  "text2text-generation": void 0,
4553
4699
  "time-series-forecasting": void 0,
4554
- "token-classification": getData("token-classification", data_default24),
4555
- translation: getData("translation", data_default25),
4556
- "unconditional-image-generation": getData("unconditional-image-generation", data_default29),
4557
- "visual-question-answering": getData("visual-question-answering", data_default31),
4700
+ "token-classification": getData("token-classification", data_default25),
4701
+ translation: getData("translation", data_default26),
4702
+ "unconditional-image-generation": getData("unconditional-image-generation", data_default30),
4703
+ "visual-question-answering": getData("visual-question-answering", data_default32),
4558
4704
  "voice-activity-detection": void 0,
4559
- "zero-shot-classification": getData("zero-shot-classification", data_default32),
4560
- "zero-shot-image-classification": getData("zero-shot-image-classification", data_default33),
4561
- "zero-shot-object-detection": getData("zero-shot-object-detection", data_default14),
4562
- "text-to-3d": getData("text-to-3d", data_default14),
4563
- "image-to-3d": getData("image-to-3d", data_default14)
4705
+ "zero-shot-classification": getData("zero-shot-classification", data_default33),
4706
+ "zero-shot-image-classification": getData("zero-shot-image-classification", data_default34),
4707
+ "zero-shot-object-detection": getData("zero-shot-object-detection", data_default35),
4708
+ "text-to-3d": getData("text-to-3d", data_default15),
4709
+ "image-to-3d": getData("image-to-3d", data_default15)
4564
4710
  };
4565
4711
 
4566
4712
  // src/model-libraries.ts
4567
4713
  var ModelLibrary = /* @__PURE__ */ ((ModelLibrary2) => {
4568
- ModelLibrary2["adapter-transformers"] = "Adapter Transformers";
4714
+ ModelLibrary2["adapter-transformers"] = "Adapters";
4569
4715
  ModelLibrary2["allennlp"] = "allenNLP";
4570
4716
  ModelLibrary2["asteroid"] = "Asteroid";
4571
4717
  ModelLibrary2["bertopic"] = "BERTopic";
@@ -5080,7 +5226,8 @@ function getJsInferenceSnippet(model, accessToken) {
5080
5226
  function hasJsInferenceSnippet(model) {
5081
5227
  return !!model.pipeline_tag && model.pipeline_tag in jsSnippets;
5082
5228
  }
5083
- export {
5229
+ // Annotate the CommonJS export names for ESM import in node:
5230
+ 0 && (module.exports = {
5084
5231
  ALL_DISPLAY_MODEL_LIBRARY_KEYS,
5085
5232
  InferenceDisplayability,
5086
5233
  LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS,
@@ -5095,5 +5242,5 @@ export {
5095
5242
  SUBTASK_TYPES,
5096
5243
  TASKS_DATA,
5097
5244
  TASKS_MODEL_LIBRARIES,
5098
- snippets_exports as snippets
5099
- };
5245
+ snippets
5246
+ });