@huggingface/tasks 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +20 -0
  3. package/dist/index.d.ts +358 -46
  4. package/dist/index.js +103 -41
  5. package/dist/{index.cjs → index.mjs} +73 -68
  6. package/package.json +43 -33
  7. package/src/Types.ts +49 -43
  8. package/src/audio-classification/about.md +5 -5
  9. package/src/audio-classification/data.ts +11 -11
  10. package/src/audio-to-audio/about.md +4 -3
  11. package/src/audio-to-audio/data.ts +18 -15
  12. package/src/automatic-speech-recognition/about.md +5 -4
  13. package/src/automatic-speech-recognition/data.ts +18 -17
  14. package/src/const.ts +52 -44
  15. package/src/conversational/about.md +9 -9
  16. package/src/conversational/data.ts +22 -18
  17. package/src/depth-estimation/about.md +1 -3
  18. package/src/depth-estimation/data.ts +11 -11
  19. package/src/document-question-answering/about.md +1 -2
  20. package/src/document-question-answering/data.ts +22 -19
  21. package/src/feature-extraction/about.md +2 -3
  22. package/src/feature-extraction/data.ts +12 -15
  23. package/src/fill-mask/about.md +1 -1
  24. package/src/fill-mask/data.ts +16 -14
  25. package/src/image-classification/about.md +5 -3
  26. package/src/image-classification/data.ts +15 -15
  27. package/src/image-segmentation/about.md +4 -4
  28. package/src/image-segmentation/data.ts +26 -23
  29. package/src/image-to-image/about.md +8 -10
  30. package/src/image-to-image/data.ts +31 -27
  31. package/src/image-to-text/about.md +13 -6
  32. package/src/image-to-text/data.ts +20 -21
  33. package/src/index.ts +2 -0
  34. package/src/modelLibraries.ts +43 -0
  35. package/src/object-detection/about.md +2 -1
  36. package/src/object-detection/data.ts +20 -17
  37. package/src/pipelines.ts +608 -0
  38. package/src/placeholder/about.md +3 -3
  39. package/src/placeholder/data.ts +8 -8
  40. package/src/question-answering/about.md +1 -1
  41. package/src/question-answering/data.ts +21 -19
  42. package/src/reinforcement-learning/about.md +167 -176
  43. package/src/reinforcement-learning/data.ts +75 -78
  44. package/src/sentence-similarity/data.ts +29 -28
  45. package/src/summarization/about.md +6 -5
  46. package/src/summarization/data.ts +23 -20
  47. package/src/table-question-answering/about.md +5 -5
  48. package/src/table-question-answering/data.ts +35 -39
  49. package/src/tabular-classification/about.md +4 -6
  50. package/src/tabular-classification/data.ts +11 -12
  51. package/src/tabular-regression/about.md +14 -18
  52. package/src/tabular-regression/data.ts +10 -11
  53. package/src/tasksData.ts +47 -50
  54. package/src/text-classification/about.md +5 -4
  55. package/src/text-classification/data.ts +21 -20
  56. package/src/text-generation/about.md +7 -6
  57. package/src/text-generation/data.ts +36 -34
  58. package/src/text-to-image/about.md +19 -18
  59. package/src/text-to-image/data.ts +32 -26
  60. package/src/text-to-speech/about.md +4 -5
  61. package/src/text-to-speech/data.ts +16 -17
  62. package/src/text-to-video/about.md +41 -36
  63. package/src/text-to-video/data.ts +43 -38
  64. package/src/token-classification/about.md +1 -3
  65. package/src/token-classification/data.ts +26 -25
  66. package/src/translation/about.md +4 -4
  67. package/src/translation/data.ts +21 -21
  68. package/src/unconditional-image-generation/about.md +10 -5
  69. package/src/unconditional-image-generation/data.ts +26 -20
  70. package/src/video-classification/about.md +5 -1
  71. package/src/video-classification/data.ts +14 -14
  72. package/src/visual-question-answering/about.md +8 -3
  73. package/src/visual-question-answering/data.ts +22 -19
  74. package/src/zero-shot-classification/about.md +5 -4
  75. package/src/zero-shot-classification/data.ts +20 -20
  76. package/src/zero-shot-image-classification/about.md +17 -9
  77. package/src/zero-shot-image-classification/data.ts +12 -14
  78. package/tsconfig.json +18 -0
  79. package/assets/audio-classification/audio.wav +0 -0
  80. package/assets/audio-to-audio/input.wav +0 -0
  81. package/assets/audio-to-audio/label-0.wav +0 -0
  82. package/assets/audio-to-audio/label-1.wav +0 -0
  83. package/assets/automatic-speech-recognition/input.flac +0 -0
  84. package/assets/automatic-speech-recognition/wav2vec2.png +0 -0
  85. package/assets/contribution-guide/anatomy.png +0 -0
  86. package/assets/contribution-guide/libraries.png +0 -0
  87. package/assets/depth-estimation/depth-estimation-input.jpg +0 -0
  88. package/assets/depth-estimation/depth-estimation-output.png +0 -0
  89. package/assets/document-question-answering/document-question-answering-input.png +0 -0
  90. package/assets/image-classification/image-classification-input.jpeg +0 -0
  91. package/assets/image-segmentation/image-segmentation-input.jpeg +0 -0
  92. package/assets/image-segmentation/image-segmentation-output.png +0 -0
  93. package/assets/image-to-image/image-to-image-input.jpeg +0 -0
  94. package/assets/image-to-image/image-to-image-output.png +0 -0
  95. package/assets/image-to-image/pix2pix_examples.jpg +0 -0
  96. package/assets/image-to-text/savanna.jpg +0 -0
  97. package/assets/object-detection/object-detection-input.jpg +0 -0
  98. package/assets/object-detection/object-detection-output.jpg +0 -0
  99. package/assets/table-question-answering/tableQA.jpg +0 -0
  100. package/assets/text-to-image/image.jpeg +0 -0
  101. package/assets/text-to-speech/audio.wav +0 -0
  102. package/assets/text-to-video/text-to-video-output.gif +0 -0
  103. package/assets/unconditional-image-generation/unconditional-image-generation-output.jpeg +0 -0
  104. package/assets/video-classification/video-classification-input.gif +0 -0
  105. package/assets/visual-question-answering/elephant.jpeg +0 -0
  106. package/assets/zero-shot-image-classification/image-classification-input.jpeg +0 -0
  107. package/dist/index.d.cts +0 -145
@@ -55,9 +55,9 @@ import { HfInference } from "@huggingface/inference";
55
55
 
56
56
  const inference = new HfInference(HF_ACCESS_TOKEN);
57
57
  await inference.audioClassification({
58
- data: await (await fetch("sample.flac")).blob(),
59
- model: "facebook/mms-lid-126",
60
- })
58
+ data: await (await fetch("sample.flac")).blob(),
59
+ model: "facebook/mms-lid-126",
60
+ });
61
61
  ```
62
62
 
63
63
  ### Speaker Identification
@@ -78,8 +78,8 @@ Would you like to learn more about the topic? Awesome! Here you can find some cu
78
78
 
79
79
  ### Scripts for training
80
80
 
81
- - [PyTorch](https://github.com/huggingface/transformers/tree/main/examples/pytorch/audio-classification)
81
+ - [PyTorch](https://github.com/huggingface/transformers/tree/main/examples/pytorch/audio-classification)
82
82
 
83
83
  ### Documentation
84
84
 
85
- - [Audio classification task guide](https://huggingface.co/docs/transformers/tasks/audio_classification)
85
+ - [Audio classification task guide](https://huggingface.co/docs/transformers/tasks/audio_classification)
@@ -4,14 +4,14 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "A benchmark of 10 different audio tasks.",
7
- id: "superb",
7
+ id: "superb",
8
8
  },
9
9
  ],
10
10
  demo: {
11
11
  inputs: [
12
12
  {
13
13
  filename: "audio.wav",
14
- type: "audio",
14
+ type: "audio",
15
15
  },
16
16
  ],
17
17
  outputs: [
@@ -33,45 +33,45 @@ const taskData: TaskDataCustom = {
33
33
  metrics: [
34
34
  {
35
35
  description: "",
36
- id: "accuracy",
36
+ id: "accuracy",
37
37
  },
38
38
  {
39
39
  description: "",
40
- id: "recall",
40
+ id: "recall",
41
41
  },
42
42
  {
43
43
  description: "",
44
- id: "precision",
44
+ id: "precision",
45
45
  },
46
46
  {
47
47
  description: "",
48
- id: "f1",
48
+ id: "f1",
49
49
  },
50
50
  ],
51
51
  models: [
52
52
  {
53
53
  description: "An easy-to-use model for Command Recognition.",
54
- id: "speechbrain/google_speech_command_xvector",
54
+ id: "speechbrain/google_speech_command_xvector",
55
55
  },
56
56
  {
57
57
  description: "An Emotion Recognition model.",
58
- id: "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
58
+ id: "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
59
59
  },
60
60
  {
61
61
  description: "A language identification model.",
62
- id: "facebook/mms-lid-126",
62
+ id: "facebook/mms-lid-126",
63
63
  },
64
64
  ],
65
65
  spaces: [
66
66
  {
67
67
  description: "An application that can predict the language spoken in a given audio.",
68
- id: "akhaliq/Speechbrain-audio-classification",
68
+ id: "akhaliq/Speechbrain-audio-classification",
69
69
  },
70
70
  ],
71
71
  summary:
72
72
  "Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.",
73
73
  widgetModels: ["facebook/mms-lid-126"],
74
- youtubeId: "KWwzcmG98Ds",
74
+ youtubeId: "KWwzcmG98Ds",
75
75
  };
76
76
 
77
77
  export default taskData;
@@ -29,6 +29,7 @@ def query(filename):
29
29
 
30
30
  data = query("sample1.flac")
31
31
  ```
32
+
32
33
  You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer with audio-to-audio models on Hugging Face Hub.
33
34
 
34
35
  ```javascript
@@ -36,9 +37,9 @@ import { HfInference } from "@huggingface/inference";
36
37
 
37
38
  const inference = new HfInference(HF_ACCESS_TOKEN);
38
39
  await inference.audioToAudio({
39
- data: await (await fetch("sample.flac")).blob(),
40
- model: "speechbrain/sepformer-wham",
41
- })
40
+ data: await (await fetch("sample.flac")).blob(),
41
+ model: "speechbrain/sepformer-wham",
42
+ });
42
43
  ```
43
44
 
44
45
  ### Audio Source Separation
@@ -4,60 +4,63 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "512-element X-vector embeddings of speakers from CMU ARCTIC dataset.",
7
- id: "Matthijs/cmu-arctic-xvectors",
7
+ id: "Matthijs/cmu-arctic-xvectors",
8
8
  },
9
9
  ],
10
10
  demo: {
11
11
  inputs: [
12
12
  {
13
13
  filename: "input.wav",
14
- type: "audio",
14
+ type: "audio",
15
15
  },
16
16
  ],
17
17
  outputs: [
18
18
  {
19
19
  filename: "label-0.wav",
20
- type: "audio",
20
+ type: "audio",
21
21
  },
22
22
  {
23
23
  filename: "label-1.wav",
24
- type: "audio",
24
+ type: "audio",
25
25
  },
26
26
  ],
27
27
  },
28
28
  metrics: [
29
29
  {
30
- description: "The Signal-to-Noise ratio is the relationship between the target signal level and the background noise level. It is calculated as the logarithm of the target signal divided by the background noise, in decibels.",
31
- id: "snri",
30
+ description:
31
+ "The Signal-to-Noise ratio is the relationship between the target signal level and the background noise level. It is calculated as the logarithm of the target signal divided by the background noise, in decibels.",
32
+ id: "snri",
32
33
  },
33
34
  {
34
- description: "The Signal-to-Distortion ratio is the relationship between the target signal and the sum of noise, interference, and artifact errors",
35
- id: "sdri",
35
+ description:
36
+ "The Signal-to-Distortion ratio is the relationship between the target signal and the sum of noise, interference, and artifact errors",
37
+ id: "sdri",
36
38
  },
37
39
  ],
38
40
  models: [
39
41
  {
40
42
  description: "A solid model of audio source separation.",
41
- id: "speechbrain/sepformer-wham",
43
+ id: "speechbrain/sepformer-wham",
42
44
  },
43
45
  {
44
46
  description: "A speech enhancement model.",
45
- id: "speechbrain/metricgan-plus-voicebank",
47
+ id: "speechbrain/metricgan-plus-voicebank",
46
48
  },
47
49
  ],
48
- spaces: [
50
+ spaces: [
49
51
  {
50
52
  description: "An application for speech separation.",
51
- id: "younver/speechbrain-speech-separation",
53
+ id: "younver/speechbrain-speech-separation",
52
54
  },
53
55
  {
54
56
  description: "An application for audio style transfer.",
55
- id: "nakas/audio-diffusion_style_transfer",
57
+ id: "nakas/audio-diffusion_style_transfer",
56
58
  },
57
59
  ],
58
- summary: "Audio-to-Audio is a family of tasks in which the input is an audio and the output is one or multiple generated audios. Some example tasks are speech enhancement and source separation.",
60
+ summary:
61
+ "Audio-to-Audio is a family of tasks in which the input is an audio and the output is one or multiple generated audios. Some example tasks are speech enhancement and source separation.",
59
62
  widgetModels: ["speechbrain/sepformer-wham"],
60
- youtubeId: "iohj7nCCYoM",
63
+ youtubeId: "iohj7nCCYoM",
61
64
  };
62
65
 
63
66
  export default taskData;
@@ -14,7 +14,7 @@ A caption generation model takes audio as input from sources to generate automat
14
14
 
15
15
  Multilingual ASR models can convert audio inputs with multiple languages into transcripts. Some multilingual ASR models include [language identification](https://huggingface.co/tasks/audio-classification) blocks to improve the performance.
16
16
 
17
- The use of Multilingual ASR has become popular, the idea of maintaining just a single model for all language can simplify the production pipeline. Take a look at [Whisper](https://huggingface.co/openai/whisper-large-v2) to get an idea on how 100+ languages can be processed by a single model.
17
+ The use of Multilingual ASR has become popular, the idea of maintaining just a single model for all language can simplify the production pipeline. Take a look at [Whisper](https://huggingface.co/openai/whisper-large-v2) to get an idea on how 100+ languages can be processed by a single model.
18
18
 
19
19
  ## Inference
20
20
 
@@ -56,9 +56,9 @@ import { HfInference } from "@huggingface/inference";
56
56
 
57
57
  const inference = new HfInference(HF_ACCESS_TOKEN);
58
58
  await inference.automaticSpeechRecognition({
59
- data: await (await fetch("sample.flac")).blob(),
60
- model: "openai/whisper-large-v2",
61
- })
59
+ data: await (await fetch("sample.flac")).blob(),
60
+ model: "openai/whisper-large-v2",
61
+ });
62
62
  ```
63
63
 
64
64
  ## Solving ASR for your own data
@@ -74,6 +74,7 @@ On December 2022, over 450 participants collaborated, fine-tuned and shared 600+
74
74
  These events help democratize ASR for all languages, including low-resource languages. In addition to the trained models, the [event](https://github.com/huggingface/community-events/tree/main/whisper-fine-tuning-event) helps to build practical collaborative knowledge.
75
75
 
76
76
  ## Useful Resources
77
+
77
78
  - [Fine-tuning MetaAI's MMS Adapter Models for Multi-Lingual ASR](https://huggingface.co/blog/mms_adapters)
78
79
  - [Making automatic speech recognition work on large files with Wav2Vec2 in 🤗 Transformers](https://huggingface.co/blog/asr-chunking)
79
80
  - [Boosting Wav2Vec2 with n-grams in 🤗 Transformers](https://huggingface.co/blog/wav2vec2-with-ngram)
@@ -4,74 +4,75 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "18,000 hours of multilingual audio-text dataset in 108 languages.",
7
- id: "mozilla-foundation/common_voice_13_0",
7
+ id: "mozilla-foundation/common_voice_13_0",
8
8
  },
9
9
  {
10
10
  description: "An English dataset with 1,000 hours of data.",
11
- id: "librispeech_asr",
11
+ id: "librispeech_asr",
12
12
  },
13
13
  {
14
14
  description: "High quality, multi-speaker audio data and their transcriptions in various languages.",
15
- id: "openslr",
15
+ id: "openslr",
16
16
  },
17
17
  ],
18
18
  demo: {
19
19
  inputs: [
20
20
  {
21
21
  filename: "input.flac",
22
- type: "audio",
22
+ type: "audio",
23
23
  },
24
24
  ],
25
25
  outputs: [
26
26
  {
27
27
  /// GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES I
28
- label: "Transcript",
28
+ label: "Transcript",
29
29
  content: "Going along slushy country roads and speaking to damp audiences in...",
30
- type: "text",
30
+ type: "text",
31
31
  },
32
32
  ],
33
33
  },
34
34
  metrics: [
35
35
  {
36
36
  description: "",
37
- id: "wer",
37
+ id: "wer",
38
38
  },
39
39
  {
40
40
  description: "",
41
- id: "cer",
41
+ id: "cer",
42
42
  },
43
43
  ],
44
44
  models: [
45
45
  {
46
46
  description: "A powerful ASR model by OpenAI.",
47
- id: "openai/whisper-large-v2",
47
+ id: "openai/whisper-large-v2",
48
48
  },
49
49
  {
50
50
  description: "A good generic ASR model by MetaAI.",
51
- id: "facebook/wav2vec2-base-960h",
51
+ id: "facebook/wav2vec2-base-960h",
52
52
  },
53
53
  {
54
54
  description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
55
- id: "facebook/s2t-small-mustc-en-fr-st",
55
+ id: "facebook/s2t-small-mustc-en-fr-st",
56
56
  },
57
57
  ],
58
- spaces: [
58
+ spaces: [
59
59
  {
60
60
  description: "A powerful general-purpose speech recognition application.",
61
- id: "openai/whisper",
61
+ id: "openai/whisper",
62
62
  },
63
63
  {
64
64
  description: "Fastest speech recognition application.",
65
- id: "sanchit-gandhi/whisper-jax",
65
+ id: "sanchit-gandhi/whisper-jax",
66
66
  },
67
67
  {
68
68
  description: "An application that transcribes speeches in YouTube videos.",
69
- id: "jeffistyping/Youtube-Whisperer",
69
+ id: "jeffistyping/Youtube-Whisperer",
70
70
  },
71
71
  ],
72
- summary: "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
72
+ summary:
73
+ "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
73
74
  widgetModels: ["openai/whisper-large-v2"],
74
- youtubeId: "TksaY_FDgnk",
75
+ youtubeId: "TksaY_FDgnk",
75
76
  };
76
77
 
77
78
  export default taskData;
package/src/const.ts CHANGED
@@ -1,51 +1,59 @@
1
- import type { ModelLibraryKey } from "../../js/src/lib/interfaces/Libraries";
2
- import type { PipelineType } from "../../js/src/lib/interfaces/Types";
1
+ import type { ModelLibraryKey } from "./modelLibraries";
2
+ import type { PipelineType } from "./pipelines";
3
3
 
4
- /*
4
+ /**
5
5
  * Model libraries compatible with each ML task
6
6
  */
7
7
  export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
8
- "audio-classification": ["speechbrain", "transformers"],
9
- "audio-to-audio": ["asteroid", "speechbrain"],
10
- "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
11
- "conversational": ["transformers"],
12
- "depth-estimation": ["transformers"],
13
- "document-question-answering": ["transformers"],
14
- "feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
15
- "fill-mask": ["transformers", "transformers.js"],
16
- "graph-ml": ["transformers"],
17
- "image-classification": ["keras", "timm", "transformers", "transformers.js"],
18
- "image-segmentation": ["transformers", "transformers.js"],
19
- "image-to-image": [],
20
- "image-to-text": ["transformers.js"],
21
- "video-classification": [],
22
- "multiple-choice": ["transformers"],
23
- "object-detection": ["transformers", "transformers.js"],
24
- "other": [],
25
- "question-answering": ["adapter-transformers", "allennlp", "transformers", "transformers.js"],
26
- "robotics": [],
27
- "reinforcement-learning": ["transformers", "stable-baselines3", "ml-agents", "sample-factory"],
28
- "sentence-similarity": ["sentence-transformers", "spacy", "transformers.js"],
29
- "summarization": ["transformers", "transformers.js"],
30
- "table-question-answering": ["transformers"],
31
- "table-to-text": ["transformers"],
32
- "tabular-classification": ["sklearn"],
33
- "tabular-regression": ["sklearn"],
34
- "tabular-to-text": ["transformers"],
35
- "text-classification": ["adapter-transformers", "spacy", "transformers", "transformers.js"],
36
- "text-generation": ["transformers", "transformers.js"],
37
- "text-retrieval": [],
38
- "text-to-image": [],
39
- "text-to-speech": ["espnet", "tensorflowtts", "transformers"],
40
- "text-to-audio": ["transformers"],
41
- "text-to-video": [],
42
- "text2text-generation": ["transformers", "transformers.js"],
43
- "time-series-forecasting": [],
44
- "token-classification": ["adapter-transformers", "flair", "spacy", "span-marker", "stanza", "transformers", "transformers.js"],
45
- "translation": ["transformers", "transformers.js"],
8
+ "audio-classification": ["speechbrain", "transformers"],
9
+ "audio-to-audio": ["asteroid", "speechbrain"],
10
+ "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
11
+ conversational: ["transformers"],
12
+ "depth-estimation": ["transformers"],
13
+ "document-question-answering": ["transformers"],
14
+ "feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
15
+ "fill-mask": ["transformers", "transformers.js"],
16
+ "graph-ml": ["transformers"],
17
+ "image-classification": ["keras", "timm", "transformers", "transformers.js"],
18
+ "image-segmentation": ["transformers", "transformers.js"],
19
+ "image-to-image": [],
20
+ "image-to-text": ["transformers.js"],
21
+ "video-classification": [],
22
+ "multiple-choice": ["transformers"],
23
+ "object-detection": ["transformers", "transformers.js"],
24
+ other: [],
25
+ "question-answering": ["adapter-transformers", "allennlp", "transformers", "transformers.js"],
26
+ robotics: [],
27
+ "reinforcement-learning": ["transformers", "stable-baselines3", "ml-agents", "sample-factory"],
28
+ "sentence-similarity": ["sentence-transformers", "spacy", "transformers.js"],
29
+ summarization: ["transformers", "transformers.js"],
30
+ "table-question-answering": ["transformers"],
31
+ "table-to-text": ["transformers"],
32
+ "tabular-classification": ["sklearn"],
33
+ "tabular-regression": ["sklearn"],
34
+ "tabular-to-text": ["transformers"],
35
+ "text-classification": ["adapter-transformers", "spacy", "transformers", "transformers.js"],
36
+ "text-generation": ["transformers", "transformers.js"],
37
+ "text-retrieval": [],
38
+ "text-to-image": [],
39
+ "text-to-speech": ["espnet", "tensorflowtts", "transformers"],
40
+ "text-to-audio": ["transformers"],
41
+ "text-to-video": [],
42
+ "text2text-generation": ["transformers", "transformers.js"],
43
+ "time-series-forecasting": [],
44
+ "token-classification": [
45
+ "adapter-transformers",
46
+ "flair",
47
+ "spacy",
48
+ "span-marker",
49
+ "stanza",
50
+ "transformers",
51
+ "transformers.js",
52
+ ],
53
+ translation: ["transformers", "transformers.js"],
46
54
  "unconditional-image-generation": [],
47
- "visual-question-answering": [],
48
- "voice-activity-detection": [],
49
- "zero-shot-classification": ["transformers", "transformers.js"],
55
+ "visual-question-answering": [],
56
+ "voice-activity-detection": [],
57
+ "zero-shot-classification": ["transformers", "transformers.js"],
50
58
  "zero-shot-image-classification": ["transformers.js"],
51
59
  };
@@ -2,15 +2,15 @@
2
2
 
3
3
  ### Chatbot 💬
4
4
 
5
- Chatbots are used to have conversations instead of providing direct contact with a live human. They are used to provide customer service, sales, and can even be used to play games (see [ELIZA](https://en.wikipedia.org/wiki/ELIZA) from 1966 for one of the earliest examples).
5
+ Chatbots are used to have conversations instead of providing direct contact with a live human. They are used to provide customer service, sales, and can even be used to play games (see [ELIZA](https://en.wikipedia.org/wiki/ELIZA) from 1966 for one of the earliest examples).
6
6
 
7
7
  ## Voice Assistants 🎙️
8
8
 
9
- Conversational response models are used as part of voice assistants to provide appropriate responses to voice based queries.
9
+ Conversational response models are used as part of voice assistants to provide appropriate responses to voice based queries.
10
10
 
11
11
  ## Inference
12
12
 
13
- You can infer with Conversational models with the 🤗 Transformers library using the `conversational` pipeline. This pipeline takes a conversation prompt or a list of conversations and generates responses for each prompt. The models that this pipeline can use are models that have been fine-tuned on a multi-turn conversational task (see https://huggingface.co/models?filter=conversational for a list of updated Conversational models).
13
+ You can infer with Conversational models with the 🤗 Transformers library using the `conversational` pipeline. This pipeline takes a conversation prompt or a list of conversations and generates responses for each prompt. The models that this pipeline can use are models that have been fine-tuned on a multi-turn conversational task (see https://huggingface.co/models?filter=conversational for a list of updated Conversational models).
14
14
 
15
15
  ```python
16
16
  from transformers import pipeline, Conversation
@@ -22,10 +22,10 @@ converse([conversation_1, conversation_2])
22
22
 
23
23
  ## Output:
24
24
  ## Conversation 1
25
- ## user >> Going to the movies tonight - any suggestions?
25
+ ## user >> Going to the movies tonight - any suggestions?
26
26
  ## bot >> The Big Lebowski ,
27
27
  ## Conversation 2
28
- ## user >> What's the last book you have read?
28
+ ## user >> What's the last book you have read?
29
29
  ## bot >> The Last Question
30
30
  ```
31
31
 
@@ -36,9 +36,9 @@ import { HfInference } from "@huggingface/inference";
36
36
 
37
37
  const inference = new HfInference(HF_ACCESS_TOKEN);
38
38
  await inference.conversational({
39
- model: 'facebook/blenderbot-400M-distill',
40
- inputs: "Going to the movies tonight - any suggestions?"
41
- })
39
+ model: "facebook/blenderbot-400M-distill",
40
+ inputs: "Going to the movies tonight - any suggestions?",
41
+ });
42
42
  ```
43
43
 
44
44
  ## Useful Resources
@@ -47,4 +47,4 @@ await inference.conversational({
47
47
  - [Reinforcement Learning from Human Feedback From Zero to ChatGPT](https://www.youtube.com/watch?v=EAd4oQtEJOM)
48
48
  - [A guide on Dialog Agents](https://huggingface.co/blog/dialog-agents)
49
49
 
50
- This page was made possible thanks to the efforts of [Viraat Aryabumi](https://huggingface.co/viraat).
50
+ This page was made possible thanks to the efforts of [Viraat Aryabumi](https://huggingface.co/viraat).
@@ -3,60 +3,64 @@ import type { TaskDataCustom } from "../Types";
3
3
  const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
- description: "A dataset of 7k conversations explicitly designed to exhibit multiple conversation modes: displaying personality, having empathy, and demonstrating knowledge.",
7
- id: "blended_skill_talk",
6
+ description:
7
+ "A dataset of 7k conversations explicitly designed to exhibit multiple conversation modes: displaying personality, having empathy, and demonstrating knowledge.",
8
+ id: "blended_skill_talk",
8
9
  },
9
10
  {
10
- description: "ConvAI is a dataset of human-to-bot conversations labeled for quality. This data can be used to train a metric for evaluating dialogue systems",
11
- id: "conv_ai_2",
11
+ description:
12
+ "ConvAI is a dataset of human-to-bot conversations labeled for quality. This data can be used to train a metric for evaluating dialogue systems",
13
+ id: "conv_ai_2",
12
14
  },
13
15
  {
14
16
  description: "EmpatheticDialogues, is a dataset of 25k conversations grounded in emotional situations",
15
- id: "empathetic_dialogues",
17
+ id: "empathetic_dialogues",
16
18
  },
17
19
  ],
18
20
  demo: {
19
21
  inputs: [
20
22
  {
21
- label: "Input",
23
+ label: "Input",
22
24
  content: "Hey my name is Julien! How are you?",
23
- type: "text",
25
+ type: "text",
24
26
  },
25
-
26
27
  ],
27
28
  outputs: [
28
29
  {
29
- label: "Answer",
30
+ label: "Answer",
30
31
  content: "Hi Julien! My name is Julia! I am well.",
31
- type: "text",
32
+ type: "text",
32
33
  },
33
34
  ],
34
35
  },
35
36
  metrics: [
36
37
  {
37
- description: "BLEU score is calculated by counting the number of shared single or subsequent tokens between the generated sequence and the reference. Subsequent n tokens are called “n-grams”. Unigram refers to a single token while bi-gram refers to token pairs and n-grams refer to n subsequent tokens. The score ranges from 0 to 1, where 1 means the translation perfectly matched and 0 did not match at all",
38
- id: "bleu",
38
+ description:
39
+ "BLEU score is calculated by counting the number of shared single or subsequent tokens between the generated sequence and the reference. Subsequent n tokens are called “n-grams”. Unigram refers to a single token while bi-gram refers to token pairs and n-grams refer to n subsequent tokens. The score ranges from 0 to 1, where 1 means the translation perfectly matched and 0 did not match at all",
40
+ id: "bleu",
39
41
  },
40
42
  ],
41
43
  models: [
42
44
  {
43
45
  description: "A faster and smaller model than the famous BERT model.",
44
- id: "facebook/blenderbot-400M-distill",
46
+ id: "facebook/blenderbot-400M-distill",
45
47
  },
46
48
  {
47
- description: "DialoGPT is a large-scale pretrained dialogue response generation model for multiturn conversations.",
48
- id: "microsoft/DialoGPT-large",
49
+ description:
50
+ "DialoGPT is a large-scale pretrained dialogue response generation model for multiturn conversations.",
51
+ id: "microsoft/DialoGPT-large",
49
52
  },
50
53
  ],
51
54
  spaces: [
52
55
  {
53
56
  description: "A chatbot based on Blender model.",
54
- id: "EXFINITE/BlenderBot-UI",
57
+ id: "EXFINITE/BlenderBot-UI",
55
58
  },
56
59
  ],
57
- summary: "Conversational response modelling is the task of generating conversational text that is relevant, coherent and knowledgable given a prompt. These models have applications in chatbots, and as a part of voice assistants",
60
+ summary:
61
+ "Conversational response modelling is the task of generating conversational text that is relevant, coherent and knowledgable given a prompt. These models have applications in chatbots, and as a part of voice assistants",
58
62
  widgetModels: ["facebook/blenderbot-400M-distill"],
59
- youtubeId: "",
63
+ youtubeId: "",
60
64
  };
61
65
 
62
66
  export default taskData;
@@ -2,7 +2,7 @@
2
2
  Depth estimation models can be used to estimate the depth of different objects present in an image.
3
3
 
4
4
  ### Estimation of Volumetric Information
5
- Depth estimation models are widely used to study volumetric formation of objects present inside an image. This is an important use case in the domain of computer graphics.
5
+ Depth estimation models are widely used to study volumetric formation of objects present inside an image. This is an important use case in the domain of computer graphics.
6
6
 
7
7
  ### 3D Representation
8
8
 
@@ -31,8 +31,6 @@ result
31
31
  # You can visualize the result just by calling `result["depth"]`.
32
32
  ```
33
33
 
34
-
35
34
  ## Useful Resources
36
35
 
37
36
  - [Monocular depth estimation task guide](https://huggingface.co/docs/transformers/tasks/monocular_depth_estimation)
38
-
@@ -7,46 +7,46 @@ const taskData: TaskDataCustom = {
7
7
  id: "sayakpaul/nyu_depth_v2",
8
8
  },
9
9
  ],
10
- demo: {
10
+ demo: {
11
11
  inputs: [
12
12
  {
13
13
  filename: "depth-estimation-input.jpg",
14
- type: "img",
14
+ type: "img",
15
15
  },
16
16
  ],
17
17
  outputs: [
18
18
  {
19
19
  filename: "depth-estimation-output.png",
20
- type: "img",
20
+ type: "img",
21
21
  },
22
22
  ],
23
23
  },
24
24
  metrics: [],
25
- models: [
25
+ models: [
26
26
  {
27
27
  // TO DO: write description
28
28
  description: "Strong Depth Estimation model trained on 1.4 million images.",
29
- id: "Intel/dpt-large",
29
+ id: "Intel/dpt-large",
30
30
  },
31
31
  {
32
32
  // TO DO: write description
33
33
  description: "Strong Depth Estimation model trained on the KITTI dataset.",
34
- id: "vinvino02/glpn-kitti",
34
+ id: "vinvino02/glpn-kitti",
35
35
  },
36
36
  ],
37
- spaces: [
37
+ spaces: [
38
38
  {
39
39
  description: "An application that predicts the depth of an image and then reconstruct the 3D model as voxels.",
40
- id: "radames/dpt-depth-estimation-3d-voxels",
40
+ id: "radames/dpt-depth-estimation-3d-voxels",
41
41
  },
42
42
  {
43
43
  description: "An application that can estimate the depth in a given image.",
44
- id: "keras-io/Monocular-Depth-Estimation",
44
+ id: "keras-io/Monocular-Depth-Estimation",
45
45
  },
46
46
  ],
47
- summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
47
+ summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
48
48
  widgetModels: [""],
49
- youtubeId: "",
49
+ youtubeId: "",
50
50
  };
51
51
 
52
52
  export default taskData;