@huggingface/tasks 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/dist/{index.mjs → index.cjs} +295 -134
  2. package/dist/index.d.ts +8 -6
  3. package/dist/index.js +260 -169
  4. package/package.json +13 -8
  5. package/src/library-to-tasks.ts +1 -1
  6. package/src/library-ui-elements.ts +24 -10
  7. package/src/model-data.ts +1 -1
  8. package/src/model-libraries.ts +3 -2
  9. package/src/pipelines.ts +1 -1
  10. package/src/tasks/audio-classification/about.md +1 -1
  11. package/src/tasks/audio-classification/inference.ts +51 -0
  12. package/src/tasks/audio-classification/spec/input.json +34 -0
  13. package/src/tasks/audio-classification/spec/output.json +21 -0
  14. package/src/tasks/audio-to-audio/about.md +1 -1
  15. package/src/tasks/automatic-speech-recognition/about.md +4 -2
  16. package/src/tasks/automatic-speech-recognition/inference.ts +154 -0
  17. package/src/tasks/automatic-speech-recognition/spec/input.json +34 -0
  18. package/src/tasks/automatic-speech-recognition/spec/output.json +36 -0
  19. package/src/tasks/common-definitions.json +109 -0
  20. package/src/tasks/depth-estimation/data.ts +8 -4
  21. package/src/tasks/depth-estimation/inference.ts +35 -0
  22. package/src/tasks/depth-estimation/spec/input.json +30 -0
  23. package/src/tasks/depth-estimation/spec/output.json +10 -0
  24. package/src/tasks/document-question-answering/inference.ts +102 -0
  25. package/src/tasks/document-question-answering/spec/input.json +85 -0
  26. package/src/tasks/document-question-answering/spec/output.json +36 -0
  27. package/src/tasks/feature-extraction/inference.ts +22 -0
  28. package/src/tasks/feature-extraction/spec/input.json +26 -0
  29. package/src/tasks/feature-extraction/spec/output.json +7 -0
  30. package/src/tasks/fill-mask/inference.ts +61 -0
  31. package/src/tasks/fill-mask/spec/input.json +38 -0
  32. package/src/tasks/fill-mask/spec/output.json +29 -0
  33. package/src/tasks/image-classification/inference.ts +51 -0
  34. package/src/tasks/image-classification/spec/input.json +34 -0
  35. package/src/tasks/image-classification/spec/output.json +10 -0
  36. package/src/tasks/image-segmentation/inference.ts +65 -0
  37. package/src/tasks/image-segmentation/spec/input.json +54 -0
  38. package/src/tasks/image-segmentation/spec/output.json +25 -0
  39. package/src/tasks/image-to-image/inference.ts +67 -0
  40. package/src/tasks/image-to-image/spec/input.json +52 -0
  41. package/src/tasks/image-to-image/spec/output.json +12 -0
  42. package/src/tasks/image-to-text/inference.ts +138 -0
  43. package/src/tasks/image-to-text/spec/input.json +34 -0
  44. package/src/tasks/image-to-text/spec/output.json +17 -0
  45. package/src/tasks/index.ts +5 -2
  46. package/src/tasks/mask-generation/about.md +65 -0
  47. package/src/tasks/mask-generation/data.ts +55 -0
  48. package/src/tasks/object-detection/inference.ts +62 -0
  49. package/src/tasks/object-detection/spec/input.json +30 -0
  50. package/src/tasks/object-detection/spec/output.json +46 -0
  51. package/src/tasks/placeholder/data.ts +3 -0
  52. package/src/tasks/placeholder/spec/input.json +35 -0
  53. package/src/tasks/placeholder/spec/output.json +17 -0
  54. package/src/tasks/question-answering/inference.ts +99 -0
  55. package/src/tasks/question-answering/spec/input.json +67 -0
  56. package/src/tasks/question-answering/spec/output.json +29 -0
  57. package/src/tasks/sentence-similarity/about.md +2 -2
  58. package/src/tasks/sentence-similarity/inference.ts +32 -0
  59. package/src/tasks/sentence-similarity/spec/input.json +40 -0
  60. package/src/tasks/sentence-similarity/spec/output.json +12 -0
  61. package/src/tasks/summarization/data.ts +1 -0
  62. package/src/tasks/summarization/inference.ts +58 -0
  63. package/src/tasks/summarization/spec/input.json +7 -0
  64. package/src/tasks/summarization/spec/output.json +7 -0
  65. package/src/tasks/table-question-answering/inference.ts +61 -0
  66. package/src/tasks/table-question-answering/spec/input.json +39 -0
  67. package/src/tasks/table-question-answering/spec/output.json +40 -0
  68. package/src/tasks/tabular-classification/about.md +1 -1
  69. package/src/tasks/tabular-regression/about.md +1 -1
  70. package/src/tasks/text-classification/about.md +1 -0
  71. package/src/tasks/text-classification/inference.ts +51 -0
  72. package/src/tasks/text-classification/spec/input.json +35 -0
  73. package/src/tasks/text-classification/spec/output.json +10 -0
  74. package/src/tasks/text-generation/about.md +24 -13
  75. package/src/tasks/text-generation/data.ts +22 -38
  76. package/src/tasks/text-generation/inference.ts +85 -0
  77. package/src/tasks/text-generation/spec/input.json +74 -0
  78. package/src/tasks/text-generation/spec/output.json +17 -0
  79. package/src/tasks/text-to-audio/inference.ts +138 -0
  80. package/src/tasks/text-to-audio/spec/input.json +31 -0
  81. package/src/tasks/text-to-audio/spec/output.json +20 -0
  82. package/src/tasks/text-to-image/about.md +11 -2
  83. package/src/tasks/text-to-image/data.ts +6 -2
  84. package/src/tasks/text-to-image/inference.ts +73 -0
  85. package/src/tasks/text-to-image/spec/input.json +57 -0
  86. package/src/tasks/text-to-image/spec/output.json +15 -0
  87. package/src/tasks/text-to-speech/about.md +4 -2
  88. package/src/tasks/text-to-speech/data.ts +1 -0
  89. package/src/tasks/text-to-speech/inference.ts +146 -0
  90. package/src/tasks/text-to-speech/spec/input.json +7 -0
  91. package/src/tasks/text-to-speech/spec/output.json +7 -0
  92. package/src/tasks/text2text-generation/inference.ts +53 -0
  93. package/src/tasks/text2text-generation/spec/input.json +55 -0
  94. package/src/tasks/text2text-generation/spec/output.json +17 -0
  95. package/src/tasks/token-classification/inference.ts +82 -0
  96. package/src/tasks/token-classification/spec/input.json +65 -0
  97. package/src/tasks/token-classification/spec/output.json +33 -0
  98. package/src/tasks/translation/data.ts +1 -0
  99. package/src/tasks/translation/inference.ts +58 -0
  100. package/src/tasks/translation/spec/input.json +7 -0
  101. package/src/tasks/translation/spec/output.json +7 -0
  102. package/src/tasks/video-classification/inference.ts +59 -0
  103. package/src/tasks/video-classification/spec/input.json +42 -0
  104. package/src/tasks/video-classification/spec/output.json +10 -0
  105. package/src/tasks/visual-question-answering/inference.ts +63 -0
  106. package/src/tasks/visual-question-answering/spec/input.json +41 -0
  107. package/src/tasks/visual-question-answering/spec/output.json +21 -0
  108. package/src/tasks/zero-shot-classification/inference.ts +67 -0
  109. package/src/tasks/zero-shot-classification/spec/input.json +50 -0
  110. package/src/tasks/zero-shot-classification/spec/output.json +10 -0
  111. package/src/tasks/zero-shot-image-classification/data.ts +8 -5
  112. package/src/tasks/zero-shot-image-classification/inference.ts +61 -0
  113. package/src/tasks/zero-shot-image-classification/spec/input.json +45 -0
  114. package/src/tasks/zero-shot-image-classification/spec/output.json +10 -0
  115. package/src/tasks/zero-shot-object-detection/about.md +45 -0
  116. package/src/tasks/zero-shot-object-detection/data.ts +62 -0
  117. package/src/tasks/zero-shot-object-detection/inference.ts +66 -0
  118. package/src/tasks/zero-shot-object-detection/spec/input.json +40 -0
  119. package/src/tasks/zero-shot-object-detection/spec/output.json +47 -0
  120. package/tsconfig.json +3 -3
package/dist/index.js CHANGED
@@ -1,42 +1,8 @@
1
- "use strict";
2
1
  var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __hasOwnProp = Object.prototype.hasOwnProperty;
6
2
  var __export = (target, all) => {
7
3
  for (var name in all)
8
4
  __defProp(target, name, { get: all[name], enumerable: true });
9
5
  };
10
- var __copyProps = (to, from, except, desc) => {
11
- if (from && typeof from === "object" || typeof from === "function") {
12
- for (let key of __getOwnPropNames(from))
13
- if (!__hasOwnProp.call(to, key) && key !== except)
14
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
- }
16
- return to;
17
- };
18
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
-
20
- // src/index.ts
21
- var src_exports = {};
22
- __export(src_exports, {
23
- ALL_DISPLAY_MODEL_LIBRARY_KEYS: () => ALL_DISPLAY_MODEL_LIBRARY_KEYS,
24
- InferenceDisplayability: () => InferenceDisplayability,
25
- LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS: () => LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS,
26
- MAPPING_DEFAULT_WIDGET: () => MAPPING_DEFAULT_WIDGET,
27
- MODALITIES: () => MODALITIES,
28
- MODALITY_LABELS: () => MODALITY_LABELS,
29
- MODEL_LIBRARIES_UI_ELEMENTS: () => MODEL_LIBRARIES_UI_ELEMENTS,
30
- ModelLibrary: () => ModelLibrary,
31
- PIPELINE_DATA: () => PIPELINE_DATA,
32
- PIPELINE_TYPES: () => PIPELINE_TYPES,
33
- PIPELINE_TYPES_SET: () => PIPELINE_TYPES_SET,
34
- SUBTASK_TYPES: () => SUBTASK_TYPES,
35
- TASKS_DATA: () => TASKS_DATA,
36
- TASKS_MODEL_LIBRARIES: () => TASKS_MODEL_LIBRARIES,
37
- snippets: () => snippets_exports
38
- });
39
- module.exports = __toCommonJS(src_exports);
40
6
 
41
7
  // src/library-to-tasks.ts
42
8
  var LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS = {
@@ -83,11 +49,11 @@ function nameWithoutNamespace(modelId) {
83
49
  const splitted = modelId.split("/");
84
50
  return splitted.length === 1 ? splitted[0] : splitted[1];
85
51
  }
86
- var adapter_transformers = (model) => [
87
- `from transformers import ${model.config?.adapter_transformers?.model_class}
52
+ var adapters = (model) => [
53
+ `from adapters import AutoAdapterModel
88
54
 
89
- model = ${model.config?.adapter_transformers?.model_class}.from_pretrained("${model.config?.adapter_transformers?.model_name}")
90
- model.load_adapter("${model.id}", source="hf")`
55
+ model = AutoAdapterModel.from_pretrained("${model.config?.adapter_transformers?.model_name}")
56
+ model.load_adapter("${model.id}", set_active=True)`
91
57
  ];
92
58
  var allennlpUnknown = (model) => [
93
59
  `import allennlp_models
@@ -527,6 +493,13 @@ transcriptions = asr_model.transcribe(["file.wav"])`
527
493
  }
528
494
  };
529
495
  var mlAgents = (model) => [`mlagents-load-from-hf --repo-id="${model.id}" --local-dir="./downloads"`];
496
+ var sentis = () => [
497
+ `string modelName = "[Your model name here].sentis";
498
+ Model model = ModelLoader.Load(Application.streamingAssetsPath + "/" + modelName);
499
+ IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
500
+ // Please see provided C# file for more details
501
+ `
502
+ ];
530
503
  var mlx = (model) => [
531
504
  `pip install huggingface_hub hf_transfer
532
505
 
@@ -547,11 +520,11 @@ model = AutoModel.load_from_hf_hub("${model.id}")`
547
520
  ];
548
521
  var MODEL_LIBRARIES_UI_ELEMENTS = {
549
522
  "adapter-transformers": {
550
- btnLabel: "Adapter Transformers",
551
- repoName: "adapter-transformers",
552
- repoUrl: "https://github.com/Adapter-Hub/adapter-transformers",
553
- docsUrl: "https://huggingface.co/docs/hub/adapter-transformers",
554
- snippets: adapter_transformers
523
+ btnLabel: "Adapters",
524
+ repoName: "adapters",
525
+ repoUrl: "https://github.com/Adapter-Hub/adapters",
526
+ docsUrl: "https://huggingface.co/docs/hub/adapters",
527
+ snippets: adapters
555
528
  },
556
529
  allennlp: {
557
530
  btnLabel: "AllenNLP",
@@ -749,10 +722,16 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
749
722
  "ml-agents": {
750
723
  btnLabel: "ml-agents",
751
724
  repoName: "ml-agents",
752
- repoUrl: "https://github.com/huggingface/ml-agents",
725
+ repoUrl: "https://github.com/Unity-Technologies/ml-agents",
753
726
  docsUrl: "https://huggingface.co/docs/hub/ml-agents",
754
727
  snippets: mlAgents
755
728
  },
729
+ "unity-sentis": {
730
+ btnLabel: "unity-sentis",
731
+ repoName: "unity-sentis",
732
+ repoUrl: "https://github.com/Unity-Technologies/sentis-samples",
733
+ snippets: sentis
734
+ },
756
735
  pythae: {
757
736
  btnLabel: "pythae",
758
737
  repoName: "pythae",
@@ -2867,8 +2846,60 @@ var taskData11 = {
2867
2846
  };
2868
2847
  var data_default11 = taskData11;
2869
2848
 
2870
- // src/tasks/object-detection/data.ts
2849
+ // src/tasks/mask-generation/data.ts
2871
2850
  var taskData12 = {
2851
+ datasets: [],
2852
+ demo: {
2853
+ inputs: [
2854
+ {
2855
+ filename: "mask-generation-input.png",
2856
+ type: "img"
2857
+ }
2858
+ ],
2859
+ outputs: [
2860
+ {
2861
+ filename: "mask-generation-output.png",
2862
+ type: "img"
2863
+ }
2864
+ ]
2865
+ },
2866
+ metrics: [],
2867
+ models: [
2868
+ {
2869
+ description: "Small yet powerful mask generation model.",
2870
+ id: "Zigeng/SlimSAM-uniform-50"
2871
+ },
2872
+ {
2873
+ description: "Very strong mask generation model.",
2874
+ id: "facebook/sam-vit-huge"
2875
+ }
2876
+ ],
2877
+ spaces: [
2878
+ {
2879
+ description: "An application that combines a mask generation model with an image embedding model for open-vocabulary image segmentation.",
2880
+ id: "SkalskiP/SAM_and_MetaCLIP"
2881
+ },
2882
+ {
2883
+ description: "An application that compares the performance of a large and a small mask generation model.",
2884
+ id: "merve/slimsam"
2885
+ },
2886
+ {
2887
+ description: "An application based on an improved mask generation model.",
2888
+ id: "linfanluntan/Grounded-SAM"
2889
+ },
2890
+ {
2891
+ description: "An application to remove objects from videos using mask generation models.",
2892
+ id: "SkalskiP/SAM_and_ProPainter"
2893
+ }
2894
+ ],
2895
+ summary: "Mask generation is the task of generating masks that identify a specific object or region of interest in a given image. Masks are often used in segmentation tasks, where they provide a precise way to isolate the object of interest for further processing or analysis.",
2896
+ widgetModels: [],
2897
+ youtubeId: ""
2898
+ };
2899
+ var data_default12 = taskData12;
2900
+
2901
+ // src/tasks/object-detection/data.ts
2902
+ var taskData13 = {
2872
2903
  datasets: [
2873
2904
  {
2874
2905
  // TODO write proper description
@@ -2940,10 +2971,10 @@ var taskData12 = {
2940
2971
  widgetModels: ["facebook/detr-resnet-50"],
2941
2972
  youtubeId: "WdAeKSOpxhw"
2942
2973
  };
2943
- var data_default12 = taskData12;
2974
+ var data_default13 = taskData13;
2944
2975
 
2945
2976
  // src/tasks/depth-estimation/data.ts
2946
- var taskData13 = {
2977
+ var taskData14 = {
2947
2978
  datasets: [
2948
2979
  {
2949
2980
  description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
@@ -2971,8 +3002,8 @@ var taskData13 = {
2971
3002
  id: "Intel/dpt-large"
2972
3003
  },
2973
3004
  {
2974
- description: "Strong Depth Estimation model trained on the KITTI dataset.",
2975
- id: "facebook/dpt-dinov2-large-kitti"
3005
+ description: "Strong Depth Estimation model trained on a big compilation of datasets.",
3006
+ id: "LiheYoung/depth-anything-large-hf"
2976
3007
  },
2977
3008
  {
2978
3009
  description: "A strong monocular depth estimation model.",
@@ -2985,18 +3016,22 @@ var taskData13 = {
2985
3016
  id: "radames/dpt-depth-estimation-3d-voxels"
2986
3017
  },
2987
3018
  {
2988
- description: "An application that can estimate the depth in a given image.",
2989
- id: "keras-io/Monocular-Depth-Estimation"
3019
+ description: "An application to compare the outputs of different depth estimation models.",
3020
+ id: "LiheYoung/Depth-Anything"
3021
+ },
3022
+ {
3023
+ description: "An application to try state-of-the-art depth estimation.",
3024
+ id: "merve/compare_depth_models"
2990
3025
  }
2991
3026
  ],
2992
3027
  summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
2993
3028
  widgetModels: [""],
2994
3029
  youtubeId: ""
2995
3030
  };
2996
- var data_default13 = taskData13;
3031
+ var data_default14 = taskData14;
2997
3032
 
2998
3033
  // src/tasks/placeholder/data.ts
2999
- var taskData14 = {
3034
+ var taskData15 = {
3000
3035
  datasets: [],
3001
3036
  demo: {
3002
3037
  inputs: [],
@@ -3008,12 +3043,15 @@ var taskData14 = {
3008
3043
  spaces: [],
3009
3044
  summary: "",
3010
3045
  widgetModels: [],
3011
- youtubeId: void 0
3046
+ youtubeId: void 0,
3047
+ /// If this is a subtask, link to the most general task ID
3048
+ /// (eg, text2text-generation is the canonical ID of translation)
3049
+ canonicalId: void 0
3012
3050
  };
3013
- var data_default14 = taskData14;
3051
+ var data_default15 = taskData15;
3014
3052
 
3015
3053
  // src/tasks/reinforcement-learning/data.ts
3016
- var taskData15 = {
3054
+ var taskData16 = {
3017
3055
  datasets: [
3018
3056
  {
3019
3057
  description: "A curation of widely used datasets for Data Driven Deep Reinforcement Learning (D4RL)",
@@ -3079,10 +3117,10 @@ var taskData15 = {
3079
3117
  widgetModels: [],
3080
3118
  youtubeId: "q0BiUn5LiBc"
3081
3119
  };
3082
- var data_default15 = taskData15;
3120
+ var data_default16 = taskData16;
3083
3121
 
3084
3122
  // src/tasks/question-answering/data.ts
3085
- var taskData16 = {
3123
+ var taskData17 = {
3086
3124
  datasets: [
3087
3125
  {
3088
3126
  // TODO write proper description
@@ -3146,10 +3184,10 @@ var taskData16 = {
3146
3184
  widgetModels: ["deepset/roberta-base-squad2"],
3147
3185
  youtubeId: "ajPx5LwJD-I"
3148
3186
  };
3149
- var data_default16 = taskData16;
3187
+ var data_default17 = taskData17;
3150
3188
 
3151
3189
  // src/tasks/sentence-similarity/data.ts
3152
- var taskData17 = {
3190
+ var taskData18 = {
3153
3191
  datasets: [
3154
3192
  {
3155
3193
  description: "Bing queries with relevant passages from various web sources.",
@@ -3241,10 +3279,11 @@ var taskData17 = {
3241
3279
  widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
3242
3280
  youtubeId: "VCZq5AkbNEU"
3243
3281
  };
3244
- var data_default17 = taskData17;
3282
+ var data_default18 = taskData18;
3245
3283
 
3246
3284
  // src/tasks/summarization/data.ts
3247
- var taskData18 = {
3285
+ var taskData19 = {
3286
+ canonicalId: "text2text-generation",
3248
3287
  datasets: [
3249
3288
  {
3250
3289
  description: "News articles in five different languages along with their summaries. Widely used for benchmarking multilingual summarization models.",
@@ -3309,10 +3348,10 @@ var taskData18 = {
3309
3348
  widgetModels: ["sshleifer/distilbart-cnn-12-6"],
3310
3349
  youtubeId: "yHnr5Dk2zCI"
3311
3350
  };
3312
- var data_default18 = taskData18;
3351
+ var data_default19 = taskData19;
3313
3352
 
3314
3353
  // src/tasks/table-question-answering/data.ts
3315
- var taskData19 = {
3354
+ var taskData20 = {
3316
3355
  datasets: [
3317
3356
  {
3318
3357
  description: "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
@@ -3363,10 +3402,10 @@ var taskData19 = {
3363
3402
  summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
3364
3403
  widgetModels: ["google/tapas-base-finetuned-wtq"]
3365
3404
  };
3366
- var data_default19 = taskData19;
3405
+ var data_default20 = taskData20;
3367
3406
 
3368
3407
  // src/tasks/tabular-classification/data.ts
3369
- var taskData20 = {
3408
+ var taskData21 = {
3370
3409
  datasets: [
3371
3410
  {
3372
3411
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -3430,10 +3469,10 @@ var taskData20 = {
3430
3469
  widgetModels: ["scikit-learn/tabular-playground"],
3431
3470
  youtubeId: ""
3432
3471
  };
3433
- var data_default20 = taskData20;
3472
+ var data_default21 = taskData21;
3434
3473
 
3435
3474
  // src/tasks/tabular-regression/data.ts
3436
- var taskData21 = {
3475
+ var taskData22 = {
3437
3476
  datasets: [
3438
3477
  {
3439
3478
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -3485,10 +3524,10 @@ var taskData21 = {
3485
3524
  widgetModels: ["scikit-learn/Fish-Weight"],
3486
3525
  youtubeId: ""
3487
3526
  };
3488
- var data_default21 = taskData21;
3527
+ var data_default22 = taskData22;
3489
3528
 
3490
3529
  // src/tasks/text-to-image/data.ts
3491
- var taskData22 = {
3530
+ var taskData23 = {
3492
3531
  datasets: [
3493
3532
  {
3494
3533
  description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
@@ -3564,22 +3603,27 @@ var taskData22 = {
3564
3603
  id: "latent-consistency/lcm-lora-for-sdxl"
3565
3604
  },
3566
3605
  {
3567
- description: "A powerful text-to-image application that can generate 3D representations.",
3568
- id: "hysts/Shap-E"
3606
+ description: "A gallery to explore various text-to-image models.",
3607
+ id: "multimodalart/LoraTheExplorer"
3569
3608
  },
3570
3609
  {
3571
3610
  description: "An application for `text-to-image`, `image-to-image` and image inpainting.",
3572
3611
  id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI"
3612
+ },
3613
+ {
3614
+ description: "An application to generate realistic images given photos of a person and a prompt.",
3615
+ id: "InstantX/InstantID"
3573
3616
  }
3574
3617
  ],
3575
3618
  summary: "Generates images from input text. These models can be used to generate and modify images based on text prompts.",
3576
3619
  widgetModels: ["CompVis/stable-diffusion-v1-4"],
3577
3620
  youtubeId: ""
3578
3621
  };
3579
- var data_default22 = taskData22;
3622
+ var data_default23 = taskData23;
3580
3623
 
3581
3624
  // src/tasks/text-to-speech/data.ts
3582
- var taskData23 = {
3625
+ var taskData24 = {
3626
+ canonicalId: "text-to-audio",
3583
3627
  datasets: [
3584
3628
  {
3585
3629
  description: "Thousands of short audio clips of a single speaker.",
@@ -3643,10 +3687,10 @@ var taskData23 = {
3643
3687
  widgetModels: ["suno/bark"],
3644
3688
  youtubeId: "NW62DpzJ274"
3645
3689
  };
3646
- var data_default23 = taskData23;
3690
+ var data_default24 = taskData24;
3647
3691
 
3648
3692
  // src/tasks/token-classification/data.ts
3649
- var taskData24 = {
3693
+ var taskData25 = {
3650
3694
  datasets: [
3651
3695
  {
3652
3696
  description: "A widely used dataset useful to benchmark named entity recognition models.",
@@ -3722,10 +3766,11 @@ var taskData24 = {
3722
3766
  widgetModels: ["dslim/bert-base-NER"],
3723
3767
  youtubeId: "wVHdVlPScxA"
3724
3768
  };
3725
- var data_default24 = taskData24;
3769
+ var data_default25 = taskData25;
3726
3770
 
3727
3771
  // src/tasks/translation/data.ts
3728
- var taskData25 = {
3772
+ var taskData26 = {
3773
+ canonicalId: "text2text-generation",
3729
3774
  datasets: [
3730
3775
  {
3731
3776
  description: "A dataset of copyright-free books translated into 16 different languages.",
@@ -3786,10 +3831,10 @@ var taskData25 = {
3786
3831
  widgetModels: ["t5-small"],
3787
3832
  youtubeId: "1JvfrvZgi6c"
3788
3833
  };
3789
- var data_default25 = taskData25;
3834
+ var data_default26 = taskData26;
3790
3835
 
3791
3836
  // src/tasks/text-classification/data.ts
3792
- var taskData26 = {
3837
+ var taskData27 = {
3793
3838
  datasets: [
3794
3839
  {
3795
3840
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3874,10 +3919,10 @@ var taskData26 = {
3874
3919
  widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
3875
3920
  youtubeId: "leNG9fN9FQU"
3876
3921
  };
3877
- var data_default26 = taskData26;
3922
+ var data_default27 = taskData27;
3878
3923
 
3879
3924
  // src/tasks/text-generation/data.ts
3880
- var taskData27 = {
3925
+ var taskData28 = {
3881
3926
  datasets: [
3882
3927
  {
3883
3928
  description: "A large multilingual dataset of text crawled from the web.",
@@ -3888,12 +3933,12 @@ var taskData27 = {
3888
3933
  id: "the_pile"
3889
3934
  },
3890
3935
  {
3891
- description: "A crowd-sourced instruction dataset to develop an AI assistant.",
3892
- id: "OpenAssistant/oasst1"
3936
+ description: "Truly open-source, curated and cleaned dialogue dataset.",
3937
+ id: "HuggingFaceH4/ultrachat_200k"
3893
3938
  },
3894
3939
  {
3895
- description: "A crowd-sourced instruction dataset created by Databricks employees.",
3896
- id: "databricks/databricks-dolly-15k"
3940
+ description: "An instruction dataset with preference ratings on responses.",
3941
+ id: "openbmb/UltraFeedback"
3897
3942
  }
3898
3943
  ],
3899
3944
  demo: {
@@ -3932,72 +3977,56 @@ var taskData27 = {
3932
3977
  id: "bigcode/starcoder"
3933
3978
  },
3934
3979
  {
3935
- description: "A model trained to follow instructions, uses Pythia-12b as base model.",
3936
- id: "databricks/dolly-v2-12b"
3937
- },
3938
- {
3939
- description: "A model trained to follow instructions curated by community, uses Pythia-12b as base model.",
3940
- id: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
3941
- },
3942
- {
3943
- description: "A large language model trained to generate text in English.",
3944
- id: "stabilityai/stablelm-tuned-alpha-7b"
3945
- },
3946
- {
3947
- description: "A model trained to follow instructions, based on mosaicml/mpt-7b.",
3948
- id: "mosaicml/mpt-7b-instruct"
3980
+ description: "A very powerful text generation model.",
3981
+ id: "mistralai/Mixtral-8x7B-Instruct-v0.1"
3949
3982
  },
3950
3983
  {
3951
- description: "A large language model trained to generate text in English.",
3952
- id: "EleutherAI/pythia-12b"
3984
+ description: "Small yet powerful text generation model.",
3985
+ id: "microsoft/phi-2"
3953
3986
  },
3954
3987
  {
3955
- description: "A large text-to-text model trained to follow instructions.",
3956
- id: "google/flan-ul2"
3988
+ description: "A very powerful model that can chat, do mathematical reasoning and write code.",
3989
+ id: "openchat/openchat-3.5-0106"
3957
3990
  },
3958
3991
  {
3959
- description: "A large and powerful text generation model.",
3960
- id: "tiiuae/falcon-40b"
3992
+ description: "Very strong yet small assistant model.",
3993
+ id: "HuggingFaceH4/zephyr-7b-beta"
3961
3994
  },
3962
3995
  {
3963
- description: "State-of-the-art open-source large language model.",
3996
+ description: "Very strong open-source large language model.",
3964
3997
  id: "meta-llama/Llama-2-70b-hf"
3965
3998
  }
3966
3999
  ],
3967
4000
  spaces: [
3968
4001
  {
3969
- description: "A robust text generation model that can perform various tasks through natural language prompting.",
3970
- id: "huggingface/bloom_demo"
4002
+ description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
4003
+ id: "HuggingFaceH4/open_llm_leaderboard"
3971
4004
  },
3972
4005
  {
3973
- description: "An text generation based application that can write code for 80+ languages.",
3974
- id: "bigcode/bigcode-playground"
4006
+ description: "An text generation based application based on a very powerful LLaMA2 model.",
4007
+ id: "ysharma/Explore_llamav2_with_TGI"
3975
4008
  },
3976
4009
  {
3977
- description: "An text generation based application for conversations.",
3978
- id: "h2oai/h2ogpt-chatbot"
4010
+ description: "An text generation based application to converse with Zephyr model.",
4011
+ id: "HuggingFaceH4/zephyr-chat"
3979
4012
  },
3980
4013
  {
3981
4014
  description: "An text generation application that combines OpenAI and Hugging Face models.",
3982
4015
  id: "microsoft/HuggingGPT"
3983
4016
  },
3984
4017
  {
3985
- description: "An text generation application that uses StableLM-tuned-alpha-7b.",
3986
- id: "stabilityai/stablelm-tuned-alpha-chat"
3987
- },
3988
- {
3989
- description: "An UI that uses StableLM-tuned-alpha-7b.",
3990
- id: "togethercomputer/OpenChatKit"
4018
+ description: "An chatbot to converse with a very powerful text generation model.",
4019
+ id: "mlabonne/phixtral-chat"
3991
4020
  }
3992
4021
  ],
3993
- summary: "Generating text is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase.",
4022
+ summary: "Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
3994
4023
  widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
3995
4024
  youtubeId: "Vpjb1lu0MDk"
3996
4025
  };
3997
- var data_default27 = taskData27;
4026
+ var data_default28 = taskData28;
3998
4027
 
3999
4028
  // src/tasks/text-to-video/data.ts
4000
- var taskData28 = {
4029
+ var taskData29 = {
4001
4030
  datasets: [
4002
4031
  {
4003
4032
  description: "Microsoft Research Video to Text is a large-scale dataset for open domain video captioning",
@@ -4089,10 +4118,10 @@ var taskData28 = {
4089
4118
  widgetModels: [],
4090
4119
  youtubeId: void 0
4091
4120
  };
4092
- var data_default28 = taskData28;
4121
+ var data_default29 = taskData29;
4093
4122
 
4094
4123
  // src/tasks/unconditional-image-generation/data.ts
4095
- var taskData29 = {
4124
+ var taskData30 = {
4096
4125
  datasets: [
4097
4126
  {
4098
4127
  description: "The CIFAR-100 dataset consists of 60000 32x32 colour images in 100 classes, with 600 images per class.",
@@ -4154,10 +4183,10 @@ var taskData29 = {
4154
4183
  // TODO: Add related video
4155
4184
  youtubeId: ""
4156
4185
  };
4157
- var data_default29 = taskData29;
4186
+ var data_default30 = taskData30;
4158
4187
 
4159
4188
  // src/tasks/video-classification/data.ts
4160
- var taskData30 = {
4189
+ var taskData31 = {
4161
4190
  datasets: [
4162
4191
  {
4163
4192
  // TODO write proper description
@@ -4236,10 +4265,10 @@ var taskData30 = {
4236
4265
  widgetModels: [],
4237
4266
  youtubeId: ""
4238
4267
  };
4239
- var data_default30 = taskData30;
4268
+ var data_default31 = taskData31;
4240
4269
 
4241
4270
  // src/tasks/visual-question-answering/data.ts
4242
- var taskData31 = {
4271
+ var taskData32 = {
4243
4272
  datasets: [
4244
4273
  {
4245
4274
  description: "A widely used dataset containing questions (with answers) about images.",
@@ -4329,10 +4358,10 @@ var taskData31 = {
4329
4358
  widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
4330
4359
  youtubeId: ""
4331
4360
  };
4332
- var data_default31 = taskData31;
4361
+ var data_default32 = taskData32;
4333
4362
 
4334
4363
  // src/tasks/zero-shot-classification/data.ts
4335
- var taskData32 = {
4364
+ var taskData33 = {
4336
4365
  datasets: [
4337
4366
  {
4338
4367
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -4391,10 +4420,10 @@ var taskData32 = {
4391
4420
  summary: "Zero-shot text classification is a task in natural language processing where a model is trained on a set of labeled examples but is then able to classify new examples from previously unseen classes.",
4392
4421
  widgetModels: ["facebook/bart-large-mnli"]
4393
4422
  };
4394
- var data_default32 = taskData32;
4423
+ var data_default33 = taskData33;
4395
4424
 
4396
4425
  // src/tasks/zero-shot-image-classification/data.ts
4397
- var taskData33 = {
4426
+ var taskData34 = {
4398
4427
  datasets: [
4399
4428
  {
4400
4429
  // TODO write proper description
@@ -4446,8 +4475,8 @@ var taskData33 = {
4446
4475
  id: "openai/clip-vit-base-patch16"
4447
4476
  },
4448
4477
  {
4449
- description: "Robust image classification model trained on publicly available image-caption data trained on additional high pixel data for better performance.",
4450
- id: "openai/clip-vit-large-patch14-336"
4478
+ description: "Strong zero-shot image classification model.",
4479
+ id: "google/siglip-base-patch16-224"
4451
4480
  },
4452
4481
  {
4453
4482
  description: "Strong image classification model for biomedical domain.",
@@ -4456,15 +4485,77 @@ var taskData33 = {
4456
4485
  ],
4457
4486
  spaces: [
4458
4487
  {
4459
- description: "An application that leverages zero shot image classification to find best captions to generate an image. ",
4488
+ description: "An application that leverages zero-shot image classification to find best captions to generate an image. ",
4460
4489
  id: "pharma/CLIP-Interrogator"
4490
+ },
4491
+ {
4492
+ description: "An application to compare different zero-shot image classification models. ",
4493
+ id: "merve/compare_clip_siglip"
4461
4494
  }
4462
4495
  ],
4463
- summary: "Zero shot image classification is the task of classifying previously unseen classes during training of a model.",
4496
+ summary: "Zero-shot image classification is the task of classifying previously unseen classes during training of a model.",
4464
4497
  widgetModels: ["openai/clip-vit-large-patch14-336"],
4465
4498
  youtubeId: ""
4466
4499
  };
4467
- var data_default33 = taskData33;
4500
+ var data_default34 = taskData34;
4501
+
4502
+ // src/tasks/zero-shot-object-detection/data.ts
4503
+ var taskData35 = {
4504
+ datasets: [],
4505
+ demo: {
4506
+ inputs: [
4507
+ {
4508
+ filename: "zero-shot-object-detection-input.jpg",
4509
+ type: "img"
4510
+ },
4511
+ {
4512
+ label: "Classes",
4513
+ content: "cat, dog, bird",
4514
+ type: "text"
4515
+ }
4516
+ ],
4517
+ outputs: [
4518
+ {
4519
+ filename: "zero-shot-object-detection-output.jpg",
4520
+ type: "img"
4521
+ }
4522
+ ]
4523
+ },
4524
+ metrics: [
4525
+ {
4526
+ description: "The Average Precision (AP) metric is the Area Under the PR Curve (AUC-PR). It is calculated for each class separately",
4527
+ id: "Average Precision"
4528
+ },
4529
+ {
4530
+ description: "The Mean Average Precision (mAP) metric is the overall average of the AP values",
4531
+ id: "Mean Average Precision"
4532
+ },
4533
+ {
4534
+ description: "The AP\u03B1 metric is the Average Precision at the IoU threshold of a \u03B1 value, for example, AP50 and AP75",
4535
+ id: "AP\u03B1"
4536
+ }
4537
+ ],
4538
+ models: [
4539
+ {
4540
+ description: "Solid zero-shot object detection model that uses CLIP as backbone.",
4541
+ id: "google/owlvit-base-patch32"
4542
+ },
4543
+ {
4544
+ description: "The improved version of the owlvit model.",
4545
+ id: "google/owlv2-base-patch16-ensemble"
4546
+ }
4547
+ ],
4548
+ spaces: [
4549
+ {
4550
+ description: "A demo to try the state-of-the-art zero-shot object detection model, OWLv2.",
4551
+ id: "merve/owlv2"
4552
+ }
4553
+ ],
4554
+ summary: "Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",
4555
+ widgetModels: [],
4556
+ youtubeId: ""
4557
+ };
4558
+ var data_default35 = taskData35;
4468
4559
 
4469
4560
  // src/tasks/index.ts
4470
4561
  var TASKS_MODEL_LIBRARIES = {
@@ -4525,7 +4616,7 @@ var TASKS_MODEL_LIBRARIES = {
4525
4616
  "text-to-3d": [],
4526
4617
  "image-to-3d": []
4527
4618
  };
4528
- function getData(type, partialTaskData = data_default14) {
4619
+ function getData(type, partialTaskData = data_default15) {
4529
4620
  return {
4530
4621
  ...partialTaskData,
4531
4622
  id: type,
@@ -4538,7 +4629,7 @@ var TASKS_DATA = {
4538
4629
  "audio-to-audio": getData("audio-to-audio", data_default2),
4539
4630
  "automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
4540
4631
  conversational: getData("conversational", data_default4),
4541
- "depth-estimation": getData("depth-estimation", data_default13),
4632
+ "depth-estimation": getData("depth-estimation", data_default14),
4542
4633
  "document-question-answering": getData("document-question-answering", data_default5),
4543
4634
  "feature-extraction": getData("feature-extraction", data_default6),
4544
4635
  "fill-mask": getData("fill-mask", data_default7),
@@ -4548,45 +4639,45 @@ var TASKS_DATA = {
4548
4639
  "image-to-image": getData("image-to-image", data_default9),
4549
4640
  "image-to-text": getData("image-to-text", data_default10),
4550
4641
  "image-to-video": void 0,
4551
- "mask-generation": getData("mask-generation", data_default14),
4642
+ "mask-generation": getData("mask-generation", data_default12),
4552
4643
  "multiple-choice": void 0,
4553
- "object-detection": getData("object-detection", data_default12),
4554
- "video-classification": getData("video-classification", data_default30),
4644
+ "object-detection": getData("object-detection", data_default13),
4645
+ "video-classification": getData("video-classification", data_default31),
4555
4646
  other: void 0,
4556
- "question-answering": getData("question-answering", data_default16),
4557
- "reinforcement-learning": getData("reinforcement-learning", data_default15),
4647
+ "question-answering": getData("question-answering", data_default17),
4648
+ "reinforcement-learning": getData("reinforcement-learning", data_default16),
4558
4649
  robotics: void 0,
4559
- "sentence-similarity": getData("sentence-similarity", data_default17),
4560
- summarization: getData("summarization", data_default18),
4561
- "table-question-answering": getData("table-question-answering", data_default19),
4650
+ "sentence-similarity": getData("sentence-similarity", data_default18),
4651
+ summarization: getData("summarization", data_default19),
4652
+ "table-question-answering": getData("table-question-answering", data_default20),
4562
4653
  "table-to-text": void 0,
4563
- "tabular-classification": getData("tabular-classification", data_default20),
4564
- "tabular-regression": getData("tabular-regression", data_default21),
4654
+ "tabular-classification": getData("tabular-classification", data_default21),
4655
+ "tabular-regression": getData("tabular-regression", data_default22),
4565
4656
  "tabular-to-text": void 0,
4566
- "text-classification": getData("text-classification", data_default26),
4567
- "text-generation": getData("text-generation", data_default27),
4657
+ "text-classification": getData("text-classification", data_default27),
4658
+ "text-generation": getData("text-generation", data_default28),
4568
4659
  "text-retrieval": void 0,
4569
- "text-to-image": getData("text-to-image", data_default22),
4570
- "text-to-speech": getData("text-to-speech", data_default23),
4660
+ "text-to-image": getData("text-to-image", data_default23),
4661
+ "text-to-speech": getData("text-to-speech", data_default24),
4571
4662
  "text-to-audio": void 0,
4572
- "text-to-video": getData("text-to-video", data_default28),
4663
+ "text-to-video": getData("text-to-video", data_default29),
4573
4664
  "text2text-generation": void 0,
4574
4665
  "time-series-forecasting": void 0,
4575
- "token-classification": getData("token-classification", data_default24),
4576
- translation: getData("translation", data_default25),
4577
- "unconditional-image-generation": getData("unconditional-image-generation", data_default29),
4578
- "visual-question-answering": getData("visual-question-answering", data_default31),
4666
+ "token-classification": getData("token-classification", data_default25),
4667
+ translation: getData("translation", data_default26),
4668
+ "unconditional-image-generation": getData("unconditional-image-generation", data_default30),
4669
+ "visual-question-answering": getData("visual-question-answering", data_default32),
4579
4670
  "voice-activity-detection": void 0,
4580
- "zero-shot-classification": getData("zero-shot-classification", data_default32),
4581
- "zero-shot-image-classification": getData("zero-shot-image-classification", data_default33),
4582
- "zero-shot-object-detection": getData("zero-shot-object-detection", data_default14),
4583
- "text-to-3d": getData("text-to-3d", data_default14),
4584
- "image-to-3d": getData("image-to-3d", data_default14)
4671
+ "zero-shot-classification": getData("zero-shot-classification", data_default33),
4672
+ "zero-shot-image-classification": getData("zero-shot-image-classification", data_default34),
4673
+ "zero-shot-object-detection": getData("zero-shot-object-detection", data_default35),
4674
+ "text-to-3d": getData("text-to-3d", data_default15),
4675
+ "image-to-3d": getData("image-to-3d", data_default15)
4585
4676
  };
4586
4677
 
4587
4678
  // src/model-libraries.ts
4588
4679
  var ModelLibrary = /* @__PURE__ */ ((ModelLibrary2) => {
4589
- ModelLibrary2["adapter-transformers"] = "Adapter Transformers";
4680
+ ModelLibrary2["adapter-transformers"] = "Adapters";
4590
4681
  ModelLibrary2["allennlp"] = "allenNLP";
4591
4682
  ModelLibrary2["asteroid"] = "Asteroid";
4592
4683
  ModelLibrary2["bertopic"] = "BERTopic";
@@ -4618,9 +4709,10 @@ var ModelLibrary = /* @__PURE__ */ ((ModelLibrary2) => {
4618
4709
  ModelLibrary2["stanza"] = "Stanza";
4619
4710
  ModelLibrary2["fasttext"] = "fastText";
4620
4711
  ModelLibrary2["stable-baselines3"] = "Stable-Baselines3";
4621
- ModelLibrary2["ml-agents"] = "ML-Agents";
4712
+ ModelLibrary2["ml-agents"] = "Unity ML-Agents";
4622
4713
  ModelLibrary2["pythae"] = "Pythae";
4623
4714
  ModelLibrary2["mindspore"] = "MindSpore";
4715
+ ModelLibrary2["unity-sentis"] = "Unity Sentis";
4624
4716
  return ModelLibrary2;
4625
4717
  })(ModelLibrary || {});
4626
4718
  var ALL_MODEL_LIBRARY_KEYS = Object.keys(ModelLibrary);
@@ -5100,8 +5192,7 @@ function getJsInferenceSnippet(model, accessToken) {
5100
5192
  function hasJsInferenceSnippet(model) {
5101
5193
  return !!model.pipeline_tag && model.pipeline_tag in jsSnippets;
5102
5194
  }
5103
- // Annotate the CommonJS export names for ESM import in node:
5104
- 0 && (module.exports = {
5195
+ export {
5105
5196
  ALL_DISPLAY_MODEL_LIBRARY_KEYS,
5106
5197
  InferenceDisplayability,
5107
5198
  LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS,
@@ -5116,5 +5207,5 @@ function hasJsInferenceSnippet(model) {
5116
5207
  SUBTASK_TYPES,
5117
5208
  TASKS_DATA,
5118
5209
  TASKS_MODEL_LIBRARIES,
5119
- snippets
5120
- });
5210
+ snippets_exports as snippets
5211
+ };