@huggingface/tasks 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/dist/{index.mjs → index.cjs} +295 -134
  2. package/dist/index.d.ts +8 -6
  3. package/dist/index.js +260 -169
  4. package/package.json +13 -8
  5. package/src/library-to-tasks.ts +1 -1
  6. package/src/library-ui-elements.ts +24 -10
  7. package/src/model-data.ts +1 -1
  8. package/src/model-libraries.ts +3 -2
  9. package/src/pipelines.ts +1 -1
  10. package/src/tasks/audio-classification/about.md +1 -1
  11. package/src/tasks/audio-classification/inference.ts +51 -0
  12. package/src/tasks/audio-classification/spec/input.json +34 -0
  13. package/src/tasks/audio-classification/spec/output.json +21 -0
  14. package/src/tasks/audio-to-audio/about.md +1 -1
  15. package/src/tasks/automatic-speech-recognition/about.md +4 -2
  16. package/src/tasks/automatic-speech-recognition/inference.ts +154 -0
  17. package/src/tasks/automatic-speech-recognition/spec/input.json +34 -0
  18. package/src/tasks/automatic-speech-recognition/spec/output.json +36 -0
  19. package/src/tasks/common-definitions.json +109 -0
  20. package/src/tasks/depth-estimation/data.ts +8 -4
  21. package/src/tasks/depth-estimation/inference.ts +35 -0
  22. package/src/tasks/depth-estimation/spec/input.json +30 -0
  23. package/src/tasks/depth-estimation/spec/output.json +10 -0
  24. package/src/tasks/document-question-answering/inference.ts +102 -0
  25. package/src/tasks/document-question-answering/spec/input.json +85 -0
  26. package/src/tasks/document-question-answering/spec/output.json +36 -0
  27. package/src/tasks/feature-extraction/inference.ts +22 -0
  28. package/src/tasks/feature-extraction/spec/input.json +26 -0
  29. package/src/tasks/feature-extraction/spec/output.json +7 -0
  30. package/src/tasks/fill-mask/inference.ts +61 -0
  31. package/src/tasks/fill-mask/spec/input.json +38 -0
  32. package/src/tasks/fill-mask/spec/output.json +29 -0
  33. package/src/tasks/image-classification/inference.ts +51 -0
  34. package/src/tasks/image-classification/spec/input.json +34 -0
  35. package/src/tasks/image-classification/spec/output.json +10 -0
  36. package/src/tasks/image-segmentation/inference.ts +65 -0
  37. package/src/tasks/image-segmentation/spec/input.json +54 -0
  38. package/src/tasks/image-segmentation/spec/output.json +25 -0
  39. package/src/tasks/image-to-image/inference.ts +67 -0
  40. package/src/tasks/image-to-image/spec/input.json +52 -0
  41. package/src/tasks/image-to-image/spec/output.json +12 -0
  42. package/src/tasks/image-to-text/inference.ts +138 -0
  43. package/src/tasks/image-to-text/spec/input.json +34 -0
  44. package/src/tasks/image-to-text/spec/output.json +17 -0
  45. package/src/tasks/index.ts +5 -2
  46. package/src/tasks/mask-generation/about.md +65 -0
  47. package/src/tasks/mask-generation/data.ts +55 -0
  48. package/src/tasks/object-detection/inference.ts +62 -0
  49. package/src/tasks/object-detection/spec/input.json +30 -0
  50. package/src/tasks/object-detection/spec/output.json +46 -0
  51. package/src/tasks/placeholder/data.ts +3 -0
  52. package/src/tasks/placeholder/spec/input.json +35 -0
  53. package/src/tasks/placeholder/spec/output.json +17 -0
  54. package/src/tasks/question-answering/inference.ts +99 -0
  55. package/src/tasks/question-answering/spec/input.json +67 -0
  56. package/src/tasks/question-answering/spec/output.json +29 -0
  57. package/src/tasks/sentence-similarity/about.md +2 -2
  58. package/src/tasks/sentence-similarity/inference.ts +32 -0
  59. package/src/tasks/sentence-similarity/spec/input.json +40 -0
  60. package/src/tasks/sentence-similarity/spec/output.json +12 -0
  61. package/src/tasks/summarization/data.ts +1 -0
  62. package/src/tasks/summarization/inference.ts +58 -0
  63. package/src/tasks/summarization/spec/input.json +7 -0
  64. package/src/tasks/summarization/spec/output.json +7 -0
  65. package/src/tasks/table-question-answering/inference.ts +61 -0
  66. package/src/tasks/table-question-answering/spec/input.json +39 -0
  67. package/src/tasks/table-question-answering/spec/output.json +40 -0
  68. package/src/tasks/tabular-classification/about.md +1 -1
  69. package/src/tasks/tabular-regression/about.md +1 -1
  70. package/src/tasks/text-classification/about.md +1 -0
  71. package/src/tasks/text-classification/inference.ts +51 -0
  72. package/src/tasks/text-classification/spec/input.json +35 -0
  73. package/src/tasks/text-classification/spec/output.json +10 -0
  74. package/src/tasks/text-generation/about.md +24 -13
  75. package/src/tasks/text-generation/data.ts +22 -38
  76. package/src/tasks/text-generation/inference.ts +85 -0
  77. package/src/tasks/text-generation/spec/input.json +74 -0
  78. package/src/tasks/text-generation/spec/output.json +17 -0
  79. package/src/tasks/text-to-audio/inference.ts +138 -0
  80. package/src/tasks/text-to-audio/spec/input.json +31 -0
  81. package/src/tasks/text-to-audio/spec/output.json +20 -0
  82. package/src/tasks/text-to-image/about.md +11 -2
  83. package/src/tasks/text-to-image/data.ts +6 -2
  84. package/src/tasks/text-to-image/inference.ts +73 -0
  85. package/src/tasks/text-to-image/spec/input.json +57 -0
  86. package/src/tasks/text-to-image/spec/output.json +15 -0
  87. package/src/tasks/text-to-speech/about.md +4 -2
  88. package/src/tasks/text-to-speech/data.ts +1 -0
  89. package/src/tasks/text-to-speech/inference.ts +146 -0
  90. package/src/tasks/text-to-speech/spec/input.json +7 -0
  91. package/src/tasks/text-to-speech/spec/output.json +7 -0
  92. package/src/tasks/text2text-generation/inference.ts +53 -0
  93. package/src/tasks/text2text-generation/spec/input.json +55 -0
  94. package/src/tasks/text2text-generation/spec/output.json +17 -0
  95. package/src/tasks/token-classification/inference.ts +82 -0
  96. package/src/tasks/token-classification/spec/input.json +65 -0
  97. package/src/tasks/token-classification/spec/output.json +33 -0
  98. package/src/tasks/translation/data.ts +1 -0
  99. package/src/tasks/translation/inference.ts +58 -0
  100. package/src/tasks/translation/spec/input.json +7 -0
  101. package/src/tasks/translation/spec/output.json +7 -0
  102. package/src/tasks/video-classification/inference.ts +59 -0
  103. package/src/tasks/video-classification/spec/input.json +42 -0
  104. package/src/tasks/video-classification/spec/output.json +10 -0
  105. package/src/tasks/visual-question-answering/inference.ts +63 -0
  106. package/src/tasks/visual-question-answering/spec/input.json +41 -0
  107. package/src/tasks/visual-question-answering/spec/output.json +21 -0
  108. package/src/tasks/zero-shot-classification/inference.ts +67 -0
  109. package/src/tasks/zero-shot-classification/spec/input.json +50 -0
  110. package/src/tasks/zero-shot-classification/spec/output.json +10 -0
  111. package/src/tasks/zero-shot-image-classification/data.ts +8 -5
  112. package/src/tasks/zero-shot-image-classification/inference.ts +61 -0
  113. package/src/tasks/zero-shot-image-classification/spec/input.json +45 -0
  114. package/src/tasks/zero-shot-image-classification/spec/output.json +10 -0
  115. package/src/tasks/zero-shot-object-detection/about.md +45 -0
  116. package/src/tasks/zero-shot-object-detection/data.ts +62 -0
  117. package/src/tasks/zero-shot-object-detection/inference.ts +66 -0
  118. package/src/tasks/zero-shot-object-detection/spec/input.json +40 -0
  119. package/src/tasks/zero-shot-object-detection/spec/output.json +47 -0
  120. package/tsconfig.json +3 -3
@@ -1,8 +1,42 @@
1
+ "use strict";
1
2
  var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
2
6
  var __export = (target, all) => {
3
7
  for (var name in all)
4
8
  __defProp(target, name, { get: all[name], enumerable: true });
5
9
  };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var src_exports = {};
22
+ __export(src_exports, {
23
+ ALL_DISPLAY_MODEL_LIBRARY_KEYS: () => ALL_DISPLAY_MODEL_LIBRARY_KEYS,
24
+ InferenceDisplayability: () => InferenceDisplayability,
25
+ LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS: () => LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS,
26
+ MAPPING_DEFAULT_WIDGET: () => MAPPING_DEFAULT_WIDGET,
27
+ MODALITIES: () => MODALITIES,
28
+ MODALITY_LABELS: () => MODALITY_LABELS,
29
+ MODEL_LIBRARIES_UI_ELEMENTS: () => MODEL_LIBRARIES_UI_ELEMENTS,
30
+ ModelLibrary: () => ModelLibrary,
31
+ PIPELINE_DATA: () => PIPELINE_DATA,
32
+ PIPELINE_TYPES: () => PIPELINE_TYPES,
33
+ PIPELINE_TYPES_SET: () => PIPELINE_TYPES_SET,
34
+ SUBTASK_TYPES: () => SUBTASK_TYPES,
35
+ TASKS_DATA: () => TASKS_DATA,
36
+ TASKS_MODEL_LIBRARIES: () => TASKS_MODEL_LIBRARIES,
37
+ snippets: () => snippets_exports
38
+ });
39
+ module.exports = __toCommonJS(src_exports);
6
40
 
7
41
  // src/library-to-tasks.ts
8
42
  var LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS = {
@@ -49,11 +83,11 @@ function nameWithoutNamespace(modelId) {
49
83
  const splitted = modelId.split("/");
50
84
  return splitted.length === 1 ? splitted[0] : splitted[1];
51
85
  }
52
- var adapter_transformers = (model) => [
53
- `from transformers import ${model.config?.adapter_transformers?.model_class}
86
+ var adapters = (model) => [
87
+ `from adapters import AutoAdapterModel
54
88
 
55
- model = ${model.config?.adapter_transformers?.model_class}.from_pretrained("${model.config?.adapter_transformers?.model_name}")
56
- model.load_adapter("${model.id}", source="hf")`
89
+ model = AutoAdapterModel.from_pretrained("${model.config?.adapter_transformers?.model_name}")
90
+ model.load_adapter("${model.id}", set_active=True)`
57
91
  ];
58
92
  var allennlpUnknown = (model) => [
59
93
  `import allennlp_models
@@ -493,6 +527,13 @@ transcriptions = asr_model.transcribe(["file.wav"])`
493
527
  }
494
528
  };
495
529
  var mlAgents = (model) => [`mlagents-load-from-hf --repo-id="${model.id}" --local-dir="./downloads"`];
530
+ var sentis = () => [
531
+ `string modelName = "[Your model name here].sentis";
532
+ Model model = ModelLoader.Load(Application.streamingAssetsPath + "/" + modelName);
533
+ IWorker engine = WorkerFactory.CreateWorker(BackendType.GPUCompute, model);
534
+ // Please see provided C# file for more details
535
+ `
536
+ ];
496
537
  var mlx = (model) => [
497
538
  `pip install huggingface_hub hf_transfer
498
539
 
@@ -513,11 +554,11 @@ model = AutoModel.load_from_hf_hub("${model.id}")`
513
554
  ];
514
555
  var MODEL_LIBRARIES_UI_ELEMENTS = {
515
556
  "adapter-transformers": {
516
- btnLabel: "Adapter Transformers",
517
- repoName: "adapter-transformers",
518
- repoUrl: "https://github.com/Adapter-Hub/adapter-transformers",
519
- docsUrl: "https://huggingface.co/docs/hub/adapter-transformers",
520
- snippets: adapter_transformers
557
+ btnLabel: "Adapters",
558
+ repoName: "adapters",
559
+ repoUrl: "https://github.com/Adapter-Hub/adapters",
560
+ docsUrl: "https://huggingface.co/docs/hub/adapters",
561
+ snippets: adapters
521
562
  },
522
563
  allennlp: {
523
564
  btnLabel: "AllenNLP",
@@ -715,10 +756,16 @@ var MODEL_LIBRARIES_UI_ELEMENTS = {
715
756
  "ml-agents": {
716
757
  btnLabel: "ml-agents",
717
758
  repoName: "ml-agents",
718
- repoUrl: "https://github.com/huggingface/ml-agents",
759
+ repoUrl: "https://github.com/Unity-Technologies/ml-agents",
719
760
  docsUrl: "https://huggingface.co/docs/hub/ml-agents",
720
761
  snippets: mlAgents
721
762
  },
763
+ "unity-sentis": {
764
+ btnLabel: "unity-sentis",
765
+ repoName: "unity-sentis",
766
+ repoUrl: "https://github.com/Unity-Technologies/sentis-samples",
767
+ snippets: sentis
768
+ },
722
769
  pythae: {
723
770
  btnLabel: "pythae",
724
771
  repoName: "pythae",
@@ -2833,8 +2880,60 @@ var taskData11 = {
2833
2880
  };
2834
2881
  var data_default11 = taskData11;
2835
2882
 
2836
- // src/tasks/object-detection/data.ts
2883
+ // src/tasks/mask-generation/data.ts
2837
2884
  var taskData12 = {
2885
+ datasets: [],
2886
+ demo: {
2887
+ inputs: [
2888
+ {
2889
+ filename: "mask-generation-input.png",
2890
+ type: "img"
2891
+ }
2892
+ ],
2893
+ outputs: [
2894
+ {
2895
+ filename: "mask-generation-output.png",
2896
+ type: "img"
2897
+ }
2898
+ ]
2899
+ },
2900
+ metrics: [],
2901
+ models: [
2902
+ {
2903
+ description: "Small yet powerful mask generation model.",
2904
+ id: "Zigeng/SlimSAM-uniform-50"
2905
+ },
2906
+ {
2907
+ description: "Very strong mask generation model.",
2908
+ id: "facebook/sam-vit-huge"
2909
+ }
2910
+ ],
2911
+ spaces: [
2912
+ {
2913
+ description: "An application that combines a mask generation model with an image embedding model for open-vocabulary image segmentation.",
2914
+ id: "SkalskiP/SAM_and_MetaCLIP"
2915
+ },
2916
+ {
2917
+ description: "An application that compares the performance of a large and a small mask generation model.",
2918
+ id: "merve/slimsam"
2919
+ },
2920
+ {
2921
+ description: "An application based on an improved mask generation model.",
2922
+ id: "linfanluntan/Grounded-SAM"
2923
+ },
2924
+ {
2925
+ description: "An application to remove objects from videos using mask generation models.",
2926
+ id: "SkalskiP/SAM_and_ProPainter"
2927
+ }
2928
+ ],
2929
+ summary: "Mask generation is the task of generating masks that identify a specific object or region of interest in a given image. Masks are often used in segmentation tasks, where they provide a precise way to isolate the object of interest for further processing or analysis.",
2930
+ widgetModels: [],
2931
+ youtubeId: ""
2932
+ };
2933
+ var data_default12 = taskData12;
2934
+
2935
+ // src/tasks/object-detection/data.ts
2936
+ var taskData13 = {
2838
2937
  datasets: [
2839
2938
  {
2840
2939
  // TODO write proper description
@@ -2906,10 +3005,10 @@ var taskData12 = {
2906
3005
  widgetModels: ["facebook/detr-resnet-50"],
2907
3006
  youtubeId: "WdAeKSOpxhw"
2908
3007
  };
2909
- var data_default12 = taskData12;
3008
+ var data_default13 = taskData13;
2910
3009
 
2911
3010
  // src/tasks/depth-estimation/data.ts
2912
- var taskData13 = {
3011
+ var taskData14 = {
2913
3012
  datasets: [
2914
3013
  {
2915
3014
  description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data",
@@ -2937,8 +3036,8 @@ var taskData13 = {
2937
3036
  id: "Intel/dpt-large"
2938
3037
  },
2939
3038
  {
2940
- description: "Strong Depth Estimation model trained on the KITTI dataset.",
2941
- id: "facebook/dpt-dinov2-large-kitti"
3039
+ description: "Strong Depth Estimation model trained on a big compilation of datasets.",
3040
+ id: "LiheYoung/depth-anything-large-hf"
2942
3041
  },
2943
3042
  {
2944
3043
  description: "A strong monocular depth estimation model.",
@@ -2951,18 +3050,22 @@ var taskData13 = {
2951
3050
  id: "radames/dpt-depth-estimation-3d-voxels"
2952
3051
  },
2953
3052
  {
2954
- description: "An application that can estimate the depth in a given image.",
2955
- id: "keras-io/Monocular-Depth-Estimation"
3053
+ description: "An application to compare the outputs of different depth estimation models.",
3054
+ id: "LiheYoung/Depth-Anything"
3055
+ },
3056
+ {
3057
+ description: "An application to try state-of-the-art depth estimation.",
3058
+ id: "merve/compare_depth_models"
2956
3059
  }
2957
3060
  ],
2958
3061
  summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
2959
3062
  widgetModels: [""],
2960
3063
  youtubeId: ""
2961
3064
  };
2962
- var data_default13 = taskData13;
3065
+ var data_default14 = taskData14;
2963
3066
 
2964
3067
  // src/tasks/placeholder/data.ts
2965
- var taskData14 = {
3068
+ var taskData15 = {
2966
3069
  datasets: [],
2967
3070
  demo: {
2968
3071
  inputs: [],
@@ -2974,12 +3077,15 @@ var taskData14 = {
2974
3077
  spaces: [],
2975
3078
  summary: "",
2976
3079
  widgetModels: [],
2977
- youtubeId: void 0
3080
+ youtubeId: void 0,
3081
+ /// If this is a subtask, link to the most general task ID
3082
+ /// (eg, text2text-generation is the canonical ID of translation)
3083
+ canonicalId: void 0
2978
3084
  };
2979
- var data_default14 = taskData14;
3085
+ var data_default15 = taskData15;
2980
3086
 
2981
3087
  // src/tasks/reinforcement-learning/data.ts
2982
- var taskData15 = {
3088
+ var taskData16 = {
2983
3089
  datasets: [
2984
3090
  {
2985
3091
  description: "A curation of widely used datasets for Data Driven Deep Reinforcement Learning (D4RL)",
@@ -3045,10 +3151,10 @@ var taskData15 = {
3045
3151
  widgetModels: [],
3046
3152
  youtubeId: "q0BiUn5LiBc"
3047
3153
  };
3048
- var data_default15 = taskData15;
3154
+ var data_default16 = taskData16;
3049
3155
 
3050
3156
  // src/tasks/question-answering/data.ts
3051
- var taskData16 = {
3157
+ var taskData17 = {
3052
3158
  datasets: [
3053
3159
  {
3054
3160
  // TODO write proper description
@@ -3112,10 +3218,10 @@ var taskData16 = {
3112
3218
  widgetModels: ["deepset/roberta-base-squad2"],
3113
3219
  youtubeId: "ajPx5LwJD-I"
3114
3220
  };
3115
- var data_default16 = taskData16;
3221
+ var data_default17 = taskData17;
3116
3222
 
3117
3223
  // src/tasks/sentence-similarity/data.ts
3118
- var taskData17 = {
3224
+ var taskData18 = {
3119
3225
  datasets: [
3120
3226
  {
3121
3227
  description: "Bing queries with relevant passages from various web sources.",
@@ -3207,10 +3313,11 @@ var taskData17 = {
3207
3313
  widgetModels: ["sentence-transformers/all-MiniLM-L6-v2"],
3208
3314
  youtubeId: "VCZq5AkbNEU"
3209
3315
  };
3210
- var data_default17 = taskData17;
3316
+ var data_default18 = taskData18;
3211
3317
 
3212
3318
  // src/tasks/summarization/data.ts
3213
- var taskData18 = {
3319
+ var taskData19 = {
3320
+ canonicalId: "text2text-generation",
3214
3321
  datasets: [
3215
3322
  {
3216
3323
  description: "News articles in five different languages along with their summaries. Widely used for benchmarking multilingual summarization models.",
@@ -3275,10 +3382,10 @@ var taskData18 = {
3275
3382
  widgetModels: ["sshleifer/distilbart-cnn-12-6"],
3276
3383
  youtubeId: "yHnr5Dk2zCI"
3277
3384
  };
3278
- var data_default18 = taskData18;
3385
+ var data_default19 = taskData19;
3279
3386
 
3280
3387
  // src/tasks/table-question-answering/data.ts
3281
- var taskData19 = {
3388
+ var taskData20 = {
3282
3389
  datasets: [
3283
3390
  {
3284
3391
  description: "The WikiTableQuestions dataset is a large-scale dataset for the task of question answering on semi-structured tables.",
@@ -3329,10 +3436,10 @@ var taskData19 = {
3329
3436
  summary: "Table Question Answering (Table QA) is the answering a question about an information on a given table.",
3330
3437
  widgetModels: ["google/tapas-base-finetuned-wtq"]
3331
3438
  };
3332
- var data_default19 = taskData19;
3439
+ var data_default20 = taskData20;
3333
3440
 
3334
3441
  // src/tasks/tabular-classification/data.ts
3335
- var taskData20 = {
3442
+ var taskData21 = {
3336
3443
  datasets: [
3337
3444
  {
3338
3445
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -3396,10 +3503,10 @@ var taskData20 = {
3396
3503
  widgetModels: ["scikit-learn/tabular-playground"],
3397
3504
  youtubeId: ""
3398
3505
  };
3399
- var data_default20 = taskData20;
3506
+ var data_default21 = taskData21;
3400
3507
 
3401
3508
  // src/tasks/tabular-regression/data.ts
3402
- var taskData21 = {
3509
+ var taskData22 = {
3403
3510
  datasets: [
3404
3511
  {
3405
3512
  description: "A comprehensive curation of datasets covering all benchmarks.",
@@ -3451,10 +3558,10 @@ var taskData21 = {
3451
3558
  widgetModels: ["scikit-learn/Fish-Weight"],
3452
3559
  youtubeId: ""
3453
3560
  };
3454
- var data_default21 = taskData21;
3561
+ var data_default22 = taskData22;
3455
3562
 
3456
3563
  // src/tasks/text-to-image/data.ts
3457
- var taskData22 = {
3564
+ var taskData23 = {
3458
3565
  datasets: [
3459
3566
  {
3460
3567
  description: "RedCaps is a large-scale dataset of 12M image-text pairs collected from Reddit.",
@@ -3530,22 +3637,27 @@ var taskData22 = {
3530
3637
  id: "latent-consistency/lcm-lora-for-sdxl"
3531
3638
  },
3532
3639
  {
3533
- description: "A powerful text-to-image application that can generate 3D representations.",
3534
- id: "hysts/Shap-E"
3640
+ description: "A gallery to explore various text-to-image models.",
3641
+ id: "multimodalart/LoraTheExplorer"
3535
3642
  },
3536
3643
  {
3537
3644
  description: "An application for `text-to-image`, `image-to-image` and image inpainting.",
3538
3645
  id: "ArtGAN/Stable-Diffusion-ControlNet-WebUI"
3646
+ },
3647
+ {
3648
+ description: "An application to generate realistic images given photos of a person and a prompt.",
3649
+ id: "InstantX/InstantID"
3539
3650
  }
3540
3651
  ],
3541
3652
  summary: "Generates images from input text. These models can be used to generate and modify images based on text prompts.",
3542
3653
  widgetModels: ["CompVis/stable-diffusion-v1-4"],
3543
3654
  youtubeId: ""
3544
3655
  };
3545
- var data_default22 = taskData22;
3656
+ var data_default23 = taskData23;
3546
3657
 
3547
3658
  // src/tasks/text-to-speech/data.ts
3548
- var taskData23 = {
3659
+ var taskData24 = {
3660
+ canonicalId: "text-to-audio",
3549
3661
  datasets: [
3550
3662
  {
3551
3663
  description: "Thousands of short audio clips of a single speaker.",
@@ -3609,10 +3721,10 @@ var taskData23 = {
3609
3721
  widgetModels: ["suno/bark"],
3610
3722
  youtubeId: "NW62DpzJ274"
3611
3723
  };
3612
- var data_default23 = taskData23;
3724
+ var data_default24 = taskData24;
3613
3725
 
3614
3726
  // src/tasks/token-classification/data.ts
3615
- var taskData24 = {
3727
+ var taskData25 = {
3616
3728
  datasets: [
3617
3729
  {
3618
3730
  description: "A widely used dataset useful to benchmark named entity recognition models.",
@@ -3688,10 +3800,11 @@ var taskData24 = {
3688
3800
  widgetModels: ["dslim/bert-base-NER"],
3689
3801
  youtubeId: "wVHdVlPScxA"
3690
3802
  };
3691
- var data_default24 = taskData24;
3803
+ var data_default25 = taskData25;
3692
3804
 
3693
3805
  // src/tasks/translation/data.ts
3694
- var taskData25 = {
3806
+ var taskData26 = {
3807
+ canonicalId: "text2text-generation",
3695
3808
  datasets: [
3696
3809
  {
3697
3810
  description: "A dataset of copyright-free books translated into 16 different languages.",
@@ -3752,10 +3865,10 @@ var taskData25 = {
3752
3865
  widgetModels: ["t5-small"],
3753
3866
  youtubeId: "1JvfrvZgi6c"
3754
3867
  };
3755
- var data_default25 = taskData25;
3868
+ var data_default26 = taskData26;
3756
3869
 
3757
3870
  // src/tasks/text-classification/data.ts
3758
- var taskData26 = {
3871
+ var taskData27 = {
3759
3872
  datasets: [
3760
3873
  {
3761
3874
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -3840,10 +3953,10 @@ var taskData26 = {
3840
3953
  widgetModels: ["distilbert-base-uncased-finetuned-sst-2-english"],
3841
3954
  youtubeId: "leNG9fN9FQU"
3842
3955
  };
3843
- var data_default26 = taskData26;
3956
+ var data_default27 = taskData27;
3844
3957
 
3845
3958
  // src/tasks/text-generation/data.ts
3846
- var taskData27 = {
3959
+ var taskData28 = {
3847
3960
  datasets: [
3848
3961
  {
3849
3962
  description: "A large multilingual dataset of text crawled from the web.",
@@ -3854,12 +3967,12 @@ var taskData27 = {
3854
3967
  id: "the_pile"
3855
3968
  },
3856
3969
  {
3857
- description: "A crowd-sourced instruction dataset to develop an AI assistant.",
3858
- id: "OpenAssistant/oasst1"
3970
+ description: "Truly open-source, curated and cleaned dialogue dataset.",
3971
+ id: "HuggingFaceH4/ultrachat_200k"
3859
3972
  },
3860
3973
  {
3861
- description: "A crowd-sourced instruction dataset created by Databricks employees.",
3862
- id: "databricks/databricks-dolly-15k"
3974
+ description: "An instruction dataset with preference ratings on responses.",
3975
+ id: "openbmb/UltraFeedback"
3863
3976
  }
3864
3977
  ],
3865
3978
  demo: {
@@ -3898,72 +4011,56 @@ var taskData27 = {
3898
4011
  id: "bigcode/starcoder"
3899
4012
  },
3900
4013
  {
3901
- description: "A model trained to follow instructions, uses Pythia-12b as base model.",
3902
- id: "databricks/dolly-v2-12b"
3903
- },
3904
- {
3905
- description: "A model trained to follow instructions curated by community, uses Pythia-12b as base model.",
3906
- id: "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
3907
- },
3908
- {
3909
- description: "A large language model trained to generate text in English.",
3910
- id: "stabilityai/stablelm-tuned-alpha-7b"
3911
- },
3912
- {
3913
- description: "A model trained to follow instructions, based on mosaicml/mpt-7b.",
3914
- id: "mosaicml/mpt-7b-instruct"
4014
+ description: "A very powerful text generation model.",
4015
+ id: "mistralai/Mixtral-8x7B-Instruct-v0.1"
3915
4016
  },
3916
4017
  {
3917
- description: "A large language model trained to generate text in English.",
3918
- id: "EleutherAI/pythia-12b"
4018
+ description: "Small yet powerful text generation model.",
4019
+ id: "microsoft/phi-2"
3919
4020
  },
3920
4021
  {
3921
- description: "A large text-to-text model trained to follow instructions.",
3922
- id: "google/flan-ul2"
4022
+ description: "A very powerful model that can chat, do mathematical reasoning and write code.",
4023
+ id: "openchat/openchat-3.5-0106"
3923
4024
  },
3924
4025
  {
3925
- description: "A large and powerful text generation model.",
3926
- id: "tiiuae/falcon-40b"
4026
+ description: "Very strong yet small assistant model.",
4027
+ id: "HuggingFaceH4/zephyr-7b-beta"
3927
4028
  },
3928
4029
  {
3929
- description: "State-of-the-art open-source large language model.",
4030
+ description: "Very strong open-source large language model.",
3930
4031
  id: "meta-llama/Llama-2-70b-hf"
3931
4032
  }
3932
4033
  ],
3933
4034
  spaces: [
3934
4035
  {
3935
- description: "A robust text generation model that can perform various tasks through natural language prompting.",
3936
- id: "huggingface/bloom_demo"
4036
+ description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
4037
+ id: "HuggingFaceH4/open_llm_leaderboard"
3937
4038
  },
3938
4039
  {
3939
- description: "An text generation based application that can write code for 80+ languages.",
3940
- id: "bigcode/bigcode-playground"
4040
+ description: "An text generation based application based on a very powerful LLaMA2 model.",
4041
+ id: "ysharma/Explore_llamav2_with_TGI"
3941
4042
  },
3942
4043
  {
3943
- description: "An text generation based application for conversations.",
3944
- id: "h2oai/h2ogpt-chatbot"
4044
+ description: "An text generation based application to converse with Zephyr model.",
4045
+ id: "HuggingFaceH4/zephyr-chat"
3945
4046
  },
3946
4047
  {
3947
4048
  description: "An text generation application that combines OpenAI and Hugging Face models.",
3948
4049
  id: "microsoft/HuggingGPT"
3949
4050
  },
3950
4051
  {
3951
- description: "An text generation application that uses StableLM-tuned-alpha-7b.",
3952
- id: "stabilityai/stablelm-tuned-alpha-chat"
3953
- },
3954
- {
3955
- description: "An UI that uses StableLM-tuned-alpha-7b.",
3956
- id: "togethercomputer/OpenChatKit"
4052
+ description: "An chatbot to converse with a very powerful text generation model.",
4053
+ id: "mlabonne/phixtral-chat"
3957
4054
  }
3958
4055
  ],
3959
- summary: "Generating text is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase.",
4056
+ summary: "Generating text is the task of generating new text given another text. These models can, for example, fill in incomplete text or paraphrase.",
3960
4057
  widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
3961
4058
  youtubeId: "Vpjb1lu0MDk"
3962
4059
  };
3963
- var data_default27 = taskData27;
4060
+ var data_default28 = taskData28;
3964
4061
 
3965
4062
  // src/tasks/text-to-video/data.ts
3966
- var taskData28 = {
4063
+ var taskData29 = {
3967
4064
  datasets: [
3968
4065
  {
3969
4066
  description: "Microsoft Research Video to Text is a large-scale dataset for open domain video captioning",
@@ -4055,10 +4152,10 @@ var taskData28 = {
4055
4152
  widgetModels: [],
4056
4153
  youtubeId: void 0
4057
4154
  };
4058
- var data_default28 = taskData28;
4155
+ var data_default29 = taskData29;
4059
4156
 
4060
4157
  // src/tasks/unconditional-image-generation/data.ts
4061
- var taskData29 = {
4158
+ var taskData30 = {
4062
4159
  datasets: [
4063
4160
  {
4064
4161
  description: "The CIFAR-100 dataset consists of 60000 32x32 colour images in 100 classes, with 600 images per class.",
@@ -4120,10 +4217,10 @@ var taskData29 = {
4120
4217
  // TODO: Add related video
4121
4218
  youtubeId: ""
4122
4219
  };
4123
- var data_default29 = taskData29;
4220
+ var data_default30 = taskData30;
4124
4221
 
4125
4222
  // src/tasks/video-classification/data.ts
4126
- var taskData30 = {
4223
+ var taskData31 = {
4127
4224
  datasets: [
4128
4225
  {
4129
4226
  // TODO write proper description
@@ -4202,10 +4299,10 @@ var taskData30 = {
4202
4299
  widgetModels: [],
4203
4300
  youtubeId: ""
4204
4301
  };
4205
- var data_default30 = taskData30;
4302
+ var data_default31 = taskData31;
4206
4303
 
4207
4304
  // src/tasks/visual-question-answering/data.ts
4208
- var taskData31 = {
4305
+ var taskData32 = {
4209
4306
  datasets: [
4210
4307
  {
4211
4308
  description: "A widely used dataset containing questions (with answers) about images.",
@@ -4295,10 +4392,10 @@ var taskData31 = {
4295
4392
  widgetModels: ["dandelin/vilt-b32-finetuned-vqa"],
4296
4393
  youtubeId: ""
4297
4394
  };
4298
- var data_default31 = taskData31;
4395
+ var data_default32 = taskData32;
4299
4396
 
4300
4397
  // src/tasks/zero-shot-classification/data.ts
4301
- var taskData32 = {
4398
+ var taskData33 = {
4302
4399
  datasets: [
4303
4400
  {
4304
4401
  description: "A widely used dataset used to benchmark multiple variants of text classification.",
@@ -4357,10 +4454,10 @@ var taskData32 = {
4357
4454
  summary: "Zero-shot text classification is a task in natural language processing where a model is trained on a set of labeled examples but is then able to classify new examples from previously unseen classes.",
4358
4455
  widgetModels: ["facebook/bart-large-mnli"]
4359
4456
  };
4360
- var data_default32 = taskData32;
4457
+ var data_default33 = taskData33;
4361
4458
 
4362
4459
  // src/tasks/zero-shot-image-classification/data.ts
4363
- var taskData33 = {
4460
+ var taskData34 = {
4364
4461
  datasets: [
4365
4462
  {
4366
4463
  // TODO write proper description
@@ -4412,8 +4509,8 @@ var taskData33 = {
4412
4509
  id: "openai/clip-vit-base-patch16"
4413
4510
  },
4414
4511
  {
4415
- description: "Robust image classification model trained on publicly available image-caption data trained on additional high pixel data for better performance.",
4416
- id: "openai/clip-vit-large-patch14-336"
4512
+ description: "Strong zero-shot image classification model.",
4513
+ id: "google/siglip-base-patch16-224"
4417
4514
  },
4418
4515
  {
4419
4516
  description: "Strong image classification model for biomedical domain.",
@@ -4422,15 +4519,77 @@ var taskData33 = {
4422
4519
  ],
4423
4520
  spaces: [
4424
4521
  {
4425
- description: "An application that leverages zero shot image classification to find best captions to generate an image. ",
4522
+ description: "An application that leverages zero-shot image classification to find best captions to generate an image. ",
4426
4523
  id: "pharma/CLIP-Interrogator"
4524
+ },
4525
+ {
4526
+ description: "An application to compare different zero-shot image classification models. ",
4527
+ id: "merve/compare_clip_siglip"
4427
4528
  }
4428
4529
  ],
4429
- summary: "Zero shot image classification is the task of classifying previously unseen classes during training of a model.",
4530
+ summary: "Zero-shot image classification is the task of classifying previously unseen classes during training of a model.",
4430
4531
  widgetModels: ["openai/clip-vit-large-patch14-336"],
4431
4532
  youtubeId: ""
4432
4533
  };
4433
- var data_default33 = taskData33;
4534
+ var data_default34 = taskData34;
4535
+
4536
+ // src/tasks/zero-shot-object-detection/data.ts
4537
+ var taskData35 = {
4538
+ datasets: [],
4539
+ demo: {
4540
+ inputs: [
4541
+ {
4542
+ filename: "zero-shot-object-detection-input.jpg",
4543
+ type: "img"
4544
+ },
4545
+ {
4546
+ label: "Classes",
4547
+ content: "cat, dog, bird",
4548
+ type: "text"
4549
+ }
4550
+ ],
4551
+ outputs: [
4552
+ {
4553
+ filename: "zero-shot-object-detection-output.jpg",
4554
+ type: "img"
4555
+ }
4556
+ ]
4557
+ },
4558
+ metrics: [
4559
+ {
4560
+ description: "The Average Precision (AP) metric is the Area Under the PR Curve (AUC-PR). It is calculated for each class separately",
4561
+ id: "Average Precision"
4562
+ },
4563
+ {
4564
+ description: "The Mean Average Precision (mAP) metric is the overall average of the AP values",
4565
+ id: "Mean Average Precision"
4566
+ },
4567
+ {
4568
+ description: "The AP\u03B1 metric is the Average Precision at the IoU threshold of a \u03B1 value, for example, AP50 and AP75",
4569
+ id: "AP\u03B1"
4570
+ }
4571
+ ],
4572
+ models: [
4573
+ {
4574
+ description: "Solid zero-shot object detection model that uses CLIP as backbone.",
4575
+ id: "google/owlvit-base-patch32"
4576
+ },
4577
+ {
4578
+ description: "The improved version of the owlvit model.",
4579
+ id: "google/owlv2-base-patch16-ensemble"
4580
+ }
4581
+ ],
4582
+ spaces: [
4583
+ {
4584
+ description: "A demo to try the state-of-the-art zero-shot object detection model, OWLv2.",
4585
+ id: "merve/owlv2"
4586
+ }
4587
+ ],
4588
+ summary: "Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",
4589
+ widgetModels: [],
4590
+ youtubeId: ""
4591
+ };
4592
+ var data_default35 = taskData35;
4434
4593
 
4435
4594
  // src/tasks/index.ts
4436
4595
  var TASKS_MODEL_LIBRARIES = {
@@ -4491,7 +4650,7 @@ var TASKS_MODEL_LIBRARIES = {
4491
4650
  "text-to-3d": [],
4492
4651
  "image-to-3d": []
4493
4652
  };
4494
- function getData(type, partialTaskData = data_default14) {
4653
+ function getData(type, partialTaskData = data_default15) {
4495
4654
  return {
4496
4655
  ...partialTaskData,
4497
4656
  id: type,
@@ -4504,7 +4663,7 @@ var TASKS_DATA = {
4504
4663
  "audio-to-audio": getData("audio-to-audio", data_default2),
4505
4664
  "automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
4506
4665
  conversational: getData("conversational", data_default4),
4507
- "depth-estimation": getData("depth-estimation", data_default13),
4666
+ "depth-estimation": getData("depth-estimation", data_default14),
4508
4667
  "document-question-answering": getData("document-question-answering", data_default5),
4509
4668
  "feature-extraction": getData("feature-extraction", data_default6),
4510
4669
  "fill-mask": getData("fill-mask", data_default7),
@@ -4514,45 +4673,45 @@ var TASKS_DATA = {
4514
4673
  "image-to-image": getData("image-to-image", data_default9),
4515
4674
  "image-to-text": getData("image-to-text", data_default10),
4516
4675
  "image-to-video": void 0,
4517
- "mask-generation": getData("mask-generation", data_default14),
4676
+ "mask-generation": getData("mask-generation", data_default12),
4518
4677
  "multiple-choice": void 0,
4519
- "object-detection": getData("object-detection", data_default12),
4520
- "video-classification": getData("video-classification", data_default30),
4678
+ "object-detection": getData("object-detection", data_default13),
4679
+ "video-classification": getData("video-classification", data_default31),
4521
4680
  other: void 0,
4522
- "question-answering": getData("question-answering", data_default16),
4523
- "reinforcement-learning": getData("reinforcement-learning", data_default15),
4681
+ "question-answering": getData("question-answering", data_default17),
4682
+ "reinforcement-learning": getData("reinforcement-learning", data_default16),
4524
4683
  robotics: void 0,
4525
- "sentence-similarity": getData("sentence-similarity", data_default17),
4526
- summarization: getData("summarization", data_default18),
4527
- "table-question-answering": getData("table-question-answering", data_default19),
4684
+ "sentence-similarity": getData("sentence-similarity", data_default18),
4685
+ summarization: getData("summarization", data_default19),
4686
+ "table-question-answering": getData("table-question-answering", data_default20),
4528
4687
  "table-to-text": void 0,
4529
- "tabular-classification": getData("tabular-classification", data_default20),
4530
- "tabular-regression": getData("tabular-regression", data_default21),
4688
+ "tabular-classification": getData("tabular-classification", data_default21),
4689
+ "tabular-regression": getData("tabular-regression", data_default22),
4531
4690
  "tabular-to-text": void 0,
4532
- "text-classification": getData("text-classification", data_default26),
4533
- "text-generation": getData("text-generation", data_default27),
4691
+ "text-classification": getData("text-classification", data_default27),
4692
+ "text-generation": getData("text-generation", data_default28),
4534
4693
  "text-retrieval": void 0,
4535
- "text-to-image": getData("text-to-image", data_default22),
4536
- "text-to-speech": getData("text-to-speech", data_default23),
4694
+ "text-to-image": getData("text-to-image", data_default23),
4695
+ "text-to-speech": getData("text-to-speech", data_default24),
4537
4696
  "text-to-audio": void 0,
4538
- "text-to-video": getData("text-to-video", data_default28),
4697
+ "text-to-video": getData("text-to-video", data_default29),
4539
4698
  "text2text-generation": void 0,
4540
4699
  "time-series-forecasting": void 0,
4541
- "token-classification": getData("token-classification", data_default24),
4542
- translation: getData("translation", data_default25),
4543
- "unconditional-image-generation": getData("unconditional-image-generation", data_default29),
4544
- "visual-question-answering": getData("visual-question-answering", data_default31),
4700
+ "token-classification": getData("token-classification", data_default25),
4701
+ translation: getData("translation", data_default26),
4702
+ "unconditional-image-generation": getData("unconditional-image-generation", data_default30),
4703
+ "visual-question-answering": getData("visual-question-answering", data_default32),
4545
4704
  "voice-activity-detection": void 0,
4546
- "zero-shot-classification": getData("zero-shot-classification", data_default32),
4547
- "zero-shot-image-classification": getData("zero-shot-image-classification", data_default33),
4548
- "zero-shot-object-detection": getData("zero-shot-object-detection", data_default14),
4549
- "text-to-3d": getData("text-to-3d", data_default14),
4550
- "image-to-3d": getData("image-to-3d", data_default14)
4705
+ "zero-shot-classification": getData("zero-shot-classification", data_default33),
4706
+ "zero-shot-image-classification": getData("zero-shot-image-classification", data_default34),
4707
+ "zero-shot-object-detection": getData("zero-shot-object-detection", data_default35),
4708
+ "text-to-3d": getData("text-to-3d", data_default15),
4709
+ "image-to-3d": getData("image-to-3d", data_default15)
4551
4710
  };
4552
4711
 
4553
4712
  // src/model-libraries.ts
4554
4713
  var ModelLibrary = /* @__PURE__ */ ((ModelLibrary2) => {
4555
- ModelLibrary2["adapter-transformers"] = "Adapter Transformers";
4714
+ ModelLibrary2["adapter-transformers"] = "Adapters";
4556
4715
  ModelLibrary2["allennlp"] = "allenNLP";
4557
4716
  ModelLibrary2["asteroid"] = "Asteroid";
4558
4717
  ModelLibrary2["bertopic"] = "BERTopic";
@@ -4584,9 +4743,10 @@ var ModelLibrary = /* @__PURE__ */ ((ModelLibrary2) => {
4584
4743
  ModelLibrary2["stanza"] = "Stanza";
4585
4744
  ModelLibrary2["fasttext"] = "fastText";
4586
4745
  ModelLibrary2["stable-baselines3"] = "Stable-Baselines3";
4587
- ModelLibrary2["ml-agents"] = "ML-Agents";
4746
+ ModelLibrary2["ml-agents"] = "Unity ML-Agents";
4588
4747
  ModelLibrary2["pythae"] = "Pythae";
4589
4748
  ModelLibrary2["mindspore"] = "MindSpore";
4749
+ ModelLibrary2["unity-sentis"] = "Unity Sentis";
4590
4750
  return ModelLibrary2;
4591
4751
  })(ModelLibrary || {});
4592
4752
  var ALL_MODEL_LIBRARY_KEYS = Object.keys(ModelLibrary);
@@ -5066,7 +5226,8 @@ function getJsInferenceSnippet(model, accessToken) {
5066
5226
  function hasJsInferenceSnippet(model) {
5067
5227
  return !!model.pipeline_tag && model.pipeline_tag in jsSnippets;
5068
5228
  }
5069
- export {
5229
+ // Annotate the CommonJS export names for ESM import in node:
5230
+ 0 && (module.exports = {
5070
5231
  ALL_DISPLAY_MODEL_LIBRARY_KEYS,
5071
5232
  InferenceDisplayability,
5072
5233
  LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS,
@@ -5081,5 +5242,5 @@ export {
5081
5242
  SUBTASK_TYPES,
5082
5243
  TASKS_DATA,
5083
5244
  TASKS_MODEL_LIBRARIES,
5084
- snippets_exports as snippets
5085
- };
5245
+ snippets
5246
+ });