@huggingface/tasks 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +20 -0
  3. package/dist/index.d.ts +368 -46
  4. package/dist/index.js +117 -41
  5. package/dist/{index.cjs → index.mjs} +84 -67
  6. package/package.json +43 -33
  7. package/src/Types.ts +49 -43
  8. package/src/audio-classification/about.md +5 -5
  9. package/src/audio-classification/data.ts +11 -11
  10. package/src/audio-to-audio/about.md +4 -3
  11. package/src/audio-to-audio/data.ts +18 -15
  12. package/src/automatic-speech-recognition/about.md +5 -4
  13. package/src/automatic-speech-recognition/data.ts +18 -17
  14. package/src/const.ts +52 -44
  15. package/src/conversational/about.md +9 -9
  16. package/src/conversational/data.ts +22 -18
  17. package/src/depth-estimation/about.md +1 -3
  18. package/src/depth-estimation/data.ts +11 -11
  19. package/src/document-question-answering/about.md +1 -2
  20. package/src/document-question-answering/data.ts +22 -19
  21. package/src/feature-extraction/about.md +2 -3
  22. package/src/feature-extraction/data.ts +12 -15
  23. package/src/fill-mask/about.md +1 -1
  24. package/src/fill-mask/data.ts +16 -14
  25. package/src/image-classification/about.md +5 -3
  26. package/src/image-classification/data.ts +15 -15
  27. package/src/image-segmentation/about.md +4 -4
  28. package/src/image-segmentation/data.ts +26 -23
  29. package/src/image-to-image/about.md +10 -12
  30. package/src/image-to-image/data.ts +31 -27
  31. package/src/image-to-text/about.md +13 -6
  32. package/src/image-to-text/data.ts +20 -21
  33. package/src/index.ts +11 -0
  34. package/src/modelLibraries.ts +43 -0
  35. package/src/object-detection/about.md +2 -1
  36. package/src/object-detection/data.ts +20 -17
  37. package/src/pipelines.ts +619 -0
  38. package/src/placeholder/about.md +3 -3
  39. package/src/placeholder/data.ts +8 -8
  40. package/src/question-answering/about.md +1 -1
  41. package/src/question-answering/data.ts +21 -19
  42. package/src/reinforcement-learning/about.md +167 -176
  43. package/src/reinforcement-learning/data.ts +75 -78
  44. package/src/sentence-similarity/data.ts +29 -28
  45. package/src/summarization/about.md +6 -5
  46. package/src/summarization/data.ts +23 -20
  47. package/src/table-question-answering/about.md +5 -5
  48. package/src/table-question-answering/data.ts +35 -39
  49. package/src/tabular-classification/about.md +4 -6
  50. package/src/tabular-classification/data.ts +11 -12
  51. package/src/tabular-regression/about.md +14 -18
  52. package/src/tabular-regression/data.ts +10 -11
  53. package/src/tasksData.ts +47 -50
  54. package/src/text-classification/about.md +5 -4
  55. package/src/text-classification/data.ts +21 -20
  56. package/src/text-generation/about.md +7 -6
  57. package/src/text-generation/data.ts +36 -34
  58. package/src/text-to-image/about.md +19 -18
  59. package/src/text-to-image/data.ts +32 -26
  60. package/src/text-to-speech/about.md +4 -5
  61. package/src/text-to-speech/data.ts +16 -17
  62. package/src/text-to-video/about.md +41 -36
  63. package/src/text-to-video/data.ts +43 -38
  64. package/src/token-classification/about.md +1 -3
  65. package/src/token-classification/data.ts +26 -25
  66. package/src/translation/about.md +4 -4
  67. package/src/translation/data.ts +21 -21
  68. package/src/unconditional-image-generation/about.md +10 -5
  69. package/src/unconditional-image-generation/data.ts +26 -20
  70. package/src/video-classification/about.md +5 -1
  71. package/src/video-classification/data.ts +14 -14
  72. package/src/visual-question-answering/about.md +8 -3
  73. package/src/visual-question-answering/data.ts +22 -19
  74. package/src/zero-shot-classification/about.md +5 -4
  75. package/src/zero-shot-classification/data.ts +20 -20
  76. package/src/zero-shot-image-classification/about.md +17 -9
  77. package/src/zero-shot-image-classification/data.ts +12 -14
  78. package/tsconfig.json +18 -0
  79. package/assets/audio-classification/audio.wav +0 -0
  80. package/assets/audio-to-audio/input.wav +0 -0
  81. package/assets/audio-to-audio/label-0.wav +0 -0
  82. package/assets/audio-to-audio/label-1.wav +0 -0
  83. package/assets/automatic-speech-recognition/input.flac +0 -0
  84. package/assets/automatic-speech-recognition/wav2vec2.png +0 -0
  85. package/assets/contribution-guide/anatomy.png +0 -0
  86. package/assets/contribution-guide/libraries.png +0 -0
  87. package/assets/depth-estimation/depth-estimation-input.jpg +0 -0
  88. package/assets/depth-estimation/depth-estimation-output.png +0 -0
  89. package/assets/document-question-answering/document-question-answering-input.png +0 -0
  90. package/assets/image-classification/image-classification-input.jpeg +0 -0
  91. package/assets/image-segmentation/image-segmentation-input.jpeg +0 -0
  92. package/assets/image-segmentation/image-segmentation-output.png +0 -0
  93. package/assets/image-to-image/image-to-image-input.jpeg +0 -0
  94. package/assets/image-to-image/image-to-image-output.png +0 -0
  95. package/assets/image-to-image/pix2pix_examples.jpg +0 -0
  96. package/assets/image-to-text/savanna.jpg +0 -0
  97. package/assets/object-detection/object-detection-input.jpg +0 -0
  98. package/assets/object-detection/object-detection-output.jpg +0 -0
  99. package/assets/table-question-answering/tableQA.jpg +0 -0
  100. package/assets/text-to-image/image.jpeg +0 -0
  101. package/assets/text-to-speech/audio.wav +0 -0
  102. package/assets/text-to-video/text-to-video-output.gif +0 -0
  103. package/assets/unconditional-image-generation/unconditional-image-generation-output.jpeg +0 -0
  104. package/assets/video-classification/video-classification-input.gif +0 -0
  105. package/assets/visual-question-answering/elephant.jpeg +0 -0
  106. package/assets/zero-shot-image-classification/image-classification-input.jpeg +0 -0
  107. package/dist/index.d.cts +0 -145
package/src/Types.ts CHANGED
@@ -1,58 +1,64 @@
1
- import type { ModelLibraryKey } from "../../js/src/lib/interfaces/Libraries";
2
- import type { PipelineType } from "../../js/src/lib/interfaces/Types";
1
+ import type { ModelLibraryKey } from "./modelLibraries";
2
+ import type { PipelineType } from "./pipelines";
3
3
 
4
4
  export interface ExampleRepo {
5
5
  description: string;
6
- id: string;
6
+ id: string;
7
7
  }
8
8
 
9
- export type TaskDemoEntry = {
10
- filename: string;
11
- type: "audio";
12
- } | {
13
- data: Array<{
14
- label: string;
15
- score: number;
16
- }>;
17
- type: "chart";
18
- } | {
19
- filename: string;
20
- type: "img";
21
- } | {
22
- table: string[][];
23
- type: "tabular";
24
- } | {
25
- content: string;
26
- label: string;
27
- type: "text";
28
- } | {
29
- text: string;
30
- tokens: Array<{
31
- end: number;
32
- start: number;
33
- type: string;
34
- }>;
35
- type: "text-with-tokens";
36
- } ;
9
+ export type TaskDemoEntry =
10
+ | {
11
+ filename: string;
12
+ type: "audio";
13
+ }
14
+ | {
15
+ data: Array<{
16
+ label: string;
17
+ score: number;
18
+ }>;
19
+ type: "chart";
20
+ }
21
+ | {
22
+ filename: string;
23
+ type: "img";
24
+ }
25
+ | {
26
+ table: string[][];
27
+ type: "tabular";
28
+ }
29
+ | {
30
+ content: string;
31
+ label: string;
32
+ type: "text";
33
+ }
34
+ | {
35
+ text: string;
36
+ tokens: Array<{
37
+ end: number;
38
+ start: number;
39
+ type: string;
40
+ }>;
41
+ type: "text-with-tokens";
42
+ };
37
43
 
38
44
  export interface TaskDemo {
39
- inputs: TaskDemoEntry[];
45
+ inputs: TaskDemoEntry[];
40
46
  outputs: TaskDemoEntry[];
41
47
  }
42
48
 
43
49
  export interface TaskData {
44
- datasets: ExampleRepo[];
45
- demo: TaskDemo;
46
- id: PipelineType;
50
+ datasets: ExampleRepo[];
51
+ demo: TaskDemo;
52
+ id: PipelineType;
47
53
  isPlaceholder?: boolean;
48
- label: string;
49
- libraries: ModelLibraryKey[];
50
- metrics: ExampleRepo[];
51
- models: ExampleRepo[];
52
- spaces: ExampleRepo[];
53
- summary: string;
54
- widgetModels: string[];
55
- youtubeId?: string;
54
+ label: string;
55
+ libraries: ModelLibraryKey[];
56
+ metrics: ExampleRepo[];
57
+ models: ExampleRepo[];
58
+ spaces: ExampleRepo[];
59
+ summary: string;
60
+ widgetModels: string[];
61
+ youtubeId?: string;
56
62
  }
57
63
 
58
64
  export type TaskDataCustom = Omit<TaskData, "id" | "label" | "libraries">;
@@ -55,9 +55,9 @@ import { HfInference } from "@huggingface/inference";
55
55
 
56
56
  const inference = new HfInference(HF_ACCESS_TOKEN);
57
57
  await inference.audioClassification({
58
- data: await (await fetch("sample.flac")).blob(),
59
- model: "facebook/mms-lid-126",
60
- })
58
+ data: await (await fetch("sample.flac")).blob(),
59
+ model: "facebook/mms-lid-126",
60
+ });
61
61
  ```
62
62
 
63
63
  ### Speaker Identification
@@ -78,8 +78,8 @@ Would you like to learn more about the topic? Awesome! Here you can find some cu
78
78
 
79
79
  ### Scripts for training
80
80
 
81
- - [PyTorch](https://github.com/huggingface/transformers/tree/main/examples/pytorch/audio-classification)
81
+ - [PyTorch](https://github.com/huggingface/transformers/tree/main/examples/pytorch/audio-classification)
82
82
 
83
83
  ### Documentation
84
84
 
85
- - [Audio classification task guide](https://huggingface.co/docs/transformers/tasks/audio_classification)
85
+ - [Audio classification task guide](https://huggingface.co/docs/transformers/tasks/audio_classification)
@@ -4,14 +4,14 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "A benchmark of 10 different audio tasks.",
7
- id: "superb",
7
+ id: "superb",
8
8
  },
9
9
  ],
10
10
  demo: {
11
11
  inputs: [
12
12
  {
13
13
  filename: "audio.wav",
14
- type: "audio",
14
+ type: "audio",
15
15
  },
16
16
  ],
17
17
  outputs: [
@@ -33,45 +33,45 @@ const taskData: TaskDataCustom = {
33
33
  metrics: [
34
34
  {
35
35
  description: "",
36
- id: "accuracy",
36
+ id: "accuracy",
37
37
  },
38
38
  {
39
39
  description: "",
40
- id: "recall",
40
+ id: "recall",
41
41
  },
42
42
  {
43
43
  description: "",
44
- id: "precision",
44
+ id: "precision",
45
45
  },
46
46
  {
47
47
  description: "",
48
- id: "f1",
48
+ id: "f1",
49
49
  },
50
50
  ],
51
51
  models: [
52
52
  {
53
53
  description: "An easy-to-use model for Command Recognition.",
54
- id: "speechbrain/google_speech_command_xvector",
54
+ id: "speechbrain/google_speech_command_xvector",
55
55
  },
56
56
  {
57
57
  description: "An Emotion Recognition model.",
58
- id: "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
58
+ id: "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
59
59
  },
60
60
  {
61
61
  description: "A language identification model.",
62
- id: "facebook/mms-lid-126",
62
+ id: "facebook/mms-lid-126",
63
63
  },
64
64
  ],
65
65
  spaces: [
66
66
  {
67
67
  description: "An application that can predict the language spoken in a given audio.",
68
- id: "akhaliq/Speechbrain-audio-classification",
68
+ id: "akhaliq/Speechbrain-audio-classification",
69
69
  },
70
70
  ],
71
71
  summary:
72
72
  "Audio classification is the task of assigning a label or class to a given audio. It can be used for recognizing which command a user is giving or the emotion of a statement, as well as identifying a speaker.",
73
73
  widgetModels: ["facebook/mms-lid-126"],
74
- youtubeId: "KWwzcmG98Ds",
74
+ youtubeId: "KWwzcmG98Ds",
75
75
  };
76
76
 
77
77
  export default taskData;
@@ -29,6 +29,7 @@ def query(filename):
29
29
 
30
30
  data = query("sample1.flac")
31
31
  ```
32
+
32
33
  You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer with audio-to-audio models on Hugging Face Hub.
33
34
 
34
35
  ```javascript
@@ -36,9 +37,9 @@ import { HfInference } from "@huggingface/inference";
36
37
 
37
38
  const inference = new HfInference(HF_ACCESS_TOKEN);
38
39
  await inference.audioToAudio({
39
- data: await (await fetch("sample.flac")).blob(),
40
- model: "speechbrain/sepformer-wham",
41
- })
40
+ data: await (await fetch("sample.flac")).blob(),
41
+ model: "speechbrain/sepformer-wham",
42
+ });
42
43
  ```
43
44
 
44
45
  ### Audio Source Separation
@@ -4,60 +4,63 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "512-element X-vector embeddings of speakers from CMU ARCTIC dataset.",
7
- id: "Matthijs/cmu-arctic-xvectors",
7
+ id: "Matthijs/cmu-arctic-xvectors",
8
8
  },
9
9
  ],
10
10
  demo: {
11
11
  inputs: [
12
12
  {
13
13
  filename: "input.wav",
14
- type: "audio",
14
+ type: "audio",
15
15
  },
16
16
  ],
17
17
  outputs: [
18
18
  {
19
19
  filename: "label-0.wav",
20
- type: "audio",
20
+ type: "audio",
21
21
  },
22
22
  {
23
23
  filename: "label-1.wav",
24
- type: "audio",
24
+ type: "audio",
25
25
  },
26
26
  ],
27
27
  },
28
28
  metrics: [
29
29
  {
30
- description: "The Signal-to-Noise ratio is the relationship between the target signal level and the background noise level. It is calculated as the logarithm of the target signal divided by the background noise, in decibels.",
31
- id: "snri",
30
+ description:
31
+ "The Signal-to-Noise ratio is the relationship between the target signal level and the background noise level. It is calculated as the logarithm of the target signal divided by the background noise, in decibels.",
32
+ id: "snri",
32
33
  },
33
34
  {
34
- description: "The Signal-to-Distortion ratio is the relationship between the target signal and the sum of noise, interference, and artifact errors",
35
- id: "sdri",
35
+ description:
36
+ "The Signal-to-Distortion ratio is the relationship between the target signal and the sum of noise, interference, and artifact errors",
37
+ id: "sdri",
36
38
  },
37
39
  ],
38
40
  models: [
39
41
  {
40
42
  description: "A solid model of audio source separation.",
41
- id: "speechbrain/sepformer-wham",
43
+ id: "speechbrain/sepformer-wham",
42
44
  },
43
45
  {
44
46
  description: "A speech enhancement model.",
45
- id: "speechbrain/metricgan-plus-voicebank",
47
+ id: "speechbrain/metricgan-plus-voicebank",
46
48
  },
47
49
  ],
48
- spaces: [
50
+ spaces: [
49
51
  {
50
52
  description: "An application for speech separation.",
51
- id: "younver/speechbrain-speech-separation",
53
+ id: "younver/speechbrain-speech-separation",
52
54
  },
53
55
  {
54
56
  description: "An application for audio style transfer.",
55
- id: "nakas/audio-diffusion_style_transfer",
57
+ id: "nakas/audio-diffusion_style_transfer",
56
58
  },
57
59
  ],
58
- summary: "Audio-to-Audio is a family of tasks in which the input is an audio and the output is one or multiple generated audios. Some example tasks are speech enhancement and source separation.",
60
+ summary:
61
+ "Audio-to-Audio is a family of tasks in which the input is an audio and the output is one or multiple generated audios. Some example tasks are speech enhancement and source separation.",
59
62
  widgetModels: ["speechbrain/sepformer-wham"],
60
- youtubeId: "iohj7nCCYoM",
63
+ youtubeId: "iohj7nCCYoM",
61
64
  };
62
65
 
63
66
  export default taskData;
@@ -14,7 +14,7 @@ A caption generation model takes audio as input from sources to generate automat
14
14
 
15
15
  Multilingual ASR models can convert audio inputs with multiple languages into transcripts. Some multilingual ASR models include [language identification](https://huggingface.co/tasks/audio-classification) blocks to improve the performance.
16
16
 
17
- The use of Multilingual ASR has become popular, the idea of maintaining just a single model for all language can simplify the production pipeline. Take a look at [Whisper](https://huggingface.co/openai/whisper-large-v2) to get an idea on how 100+ languages can be processed by a single model.
17
+ The use of Multilingual ASR has become popular, the idea of maintaining just a single model for all language can simplify the production pipeline. Take a look at [Whisper](https://huggingface.co/openai/whisper-large-v2) to get an idea on how 100+ languages can be processed by a single model.
18
18
 
19
19
  ## Inference
20
20
 
@@ -56,9 +56,9 @@ import { HfInference } from "@huggingface/inference";
56
56
 
57
57
  const inference = new HfInference(HF_ACCESS_TOKEN);
58
58
  await inference.automaticSpeechRecognition({
59
- data: await (await fetch("sample.flac")).blob(),
60
- model: "openai/whisper-large-v2",
61
- })
59
+ data: await (await fetch("sample.flac")).blob(),
60
+ model: "openai/whisper-large-v2",
61
+ });
62
62
  ```
63
63
 
64
64
  ## Solving ASR for your own data
@@ -74,6 +74,7 @@ On December 2022, over 450 participants collaborated, fine-tuned and shared 600+
74
74
  These events help democratize ASR for all languages, including low-resource languages. In addition to the trained models, the [event](https://github.com/huggingface/community-events/tree/main/whisper-fine-tuning-event) helps to build practical collaborative knowledge.
75
75
 
76
76
  ## Useful Resources
77
+
77
78
  - [Fine-tuning MetaAI's MMS Adapter Models for Multi-Lingual ASR](https://huggingface.co/blog/mms_adapters)
78
79
  - [Making automatic speech recognition work on large files with Wav2Vec2 in 🤗 Transformers](https://huggingface.co/blog/asr-chunking)
79
80
  - [Boosting Wav2Vec2 with n-grams in 🤗 Transformers](https://huggingface.co/blog/wav2vec2-with-ngram)
@@ -4,74 +4,75 @@ const taskData: TaskDataCustom = {
4
4
  datasets: [
5
5
  {
6
6
  description: "18,000 hours of multilingual audio-text dataset in 108 languages.",
7
- id: "mozilla-foundation/common_voice_13_0",
7
+ id: "mozilla-foundation/common_voice_13_0",
8
8
  },
9
9
  {
10
10
  description: "An English dataset with 1,000 hours of data.",
11
- id: "librispeech_asr",
11
+ id: "librispeech_asr",
12
12
  },
13
13
  {
14
14
  description: "High quality, multi-speaker audio data and their transcriptions in various languages.",
15
- id: "openslr",
15
+ id: "openslr",
16
16
  },
17
17
  ],
18
18
  demo: {
19
19
  inputs: [
20
20
  {
21
21
  filename: "input.flac",
22
- type: "audio",
22
+ type: "audio",
23
23
  },
24
24
  ],
25
25
  outputs: [
26
26
  {
27
27
  /// GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES I
28
- label: "Transcript",
28
+ label: "Transcript",
29
29
  content: "Going along slushy country roads and speaking to damp audiences in...",
30
- type: "text",
30
+ type: "text",
31
31
  },
32
32
  ],
33
33
  },
34
34
  metrics: [
35
35
  {
36
36
  description: "",
37
- id: "wer",
37
+ id: "wer",
38
38
  },
39
39
  {
40
40
  description: "",
41
- id: "cer",
41
+ id: "cer",
42
42
  },
43
43
  ],
44
44
  models: [
45
45
  {
46
46
  description: "A powerful ASR model by OpenAI.",
47
- id: "openai/whisper-large-v2",
47
+ id: "openai/whisper-large-v2",
48
48
  },
49
49
  {
50
50
  description: "A good generic ASR model by MetaAI.",
51
- id: "facebook/wav2vec2-base-960h",
51
+ id: "facebook/wav2vec2-base-960h",
52
52
  },
53
53
  {
54
54
  description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
55
- id: "facebook/s2t-small-mustc-en-fr-st",
55
+ id: "facebook/s2t-small-mustc-en-fr-st",
56
56
  },
57
57
  ],
58
- spaces: [
58
+ spaces: [
59
59
  {
60
60
  description: "A powerful general-purpose speech recognition application.",
61
- id: "openai/whisper",
61
+ id: "openai/whisper",
62
62
  },
63
63
  {
64
64
  description: "Fastest speech recognition application.",
65
- id: "sanchit-gandhi/whisper-jax",
65
+ id: "sanchit-gandhi/whisper-jax",
66
66
  },
67
67
  {
68
68
  description: "An application that transcribes speeches in YouTube videos.",
69
- id: "jeffistyping/Youtube-Whisperer",
69
+ id: "jeffistyping/Youtube-Whisperer",
70
70
  },
71
71
  ],
72
- summary: "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
72
+ summary:
73
+ "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
73
74
  widgetModels: ["openai/whisper-large-v2"],
74
- youtubeId: "TksaY_FDgnk",
75
+ youtubeId: "TksaY_FDgnk",
75
76
  };
76
77
 
77
78
  export default taskData;
package/src/const.ts CHANGED
@@ -1,51 +1,59 @@
1
- import type { ModelLibraryKey } from "../../js/src/lib/interfaces/Libraries";
2
- import type { PipelineType } from "../../js/src/lib/interfaces/Types";
1
+ import type { ModelLibraryKey } from "./modelLibraries";
2
+ import type { PipelineType } from "./pipelines";
3
3
 
4
- /*
4
+ /**
5
5
  * Model libraries compatible with each ML task
6
6
  */
7
7
  export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
8
- "audio-classification": ["speechbrain", "transformers"],
9
- "audio-to-audio": ["asteroid", "speechbrain"],
10
- "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
11
- "conversational": ["transformers"],
12
- "depth-estimation": ["transformers"],
13
- "document-question-answering": ["transformers"],
14
- "feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
15
- "fill-mask": ["transformers", "transformers.js"],
16
- "graph-ml": ["transformers"],
17
- "image-classification": ["keras", "timm", "transformers", "transformers.js"],
18
- "image-segmentation": ["transformers", "transformers.js"],
19
- "image-to-image": [],
20
- "image-to-text": ["transformers.js"],
21
- "video-classification": [],
22
- "multiple-choice": ["transformers"],
23
- "object-detection": ["transformers", "transformers.js"],
24
- "other": [],
25
- "question-answering": ["adapter-transformers", "allennlp", "transformers", "transformers.js"],
26
- "robotics": [],
27
- "reinforcement-learning": ["transformers", "stable-baselines3", "ml-agents", "sample-factory"],
28
- "sentence-similarity": ["sentence-transformers", "spacy", "transformers.js"],
29
- "summarization": ["transformers", "transformers.js"],
30
- "table-question-answering": ["transformers"],
31
- "table-to-text": ["transformers"],
32
- "tabular-classification": ["sklearn"],
33
- "tabular-regression": ["sklearn"],
34
- "tabular-to-text": ["transformers"],
35
- "text-classification": ["adapter-transformers", "spacy", "transformers", "transformers.js"],
36
- "text-generation": ["transformers", "transformers.js"],
37
- "text-retrieval": [],
38
- "text-to-image": [],
39
- "text-to-speech": ["espnet", "tensorflowtts", "transformers"],
40
- "text-to-audio": ["transformers"],
41
- "text-to-video": [],
42
- "text2text-generation": ["transformers", "transformers.js"],
43
- "time-series-forecasting": [],
44
- "token-classification": ["adapter-transformers", "flair", "spacy", "span-marker", "stanza", "transformers", "transformers.js"],
45
- "translation": ["transformers", "transformers.js"],
8
+ "audio-classification": ["speechbrain", "transformers"],
9
+ "audio-to-audio": ["asteroid", "speechbrain"],
10
+ "automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
11
+ conversational: ["transformers"],
12
+ "depth-estimation": ["transformers"],
13
+ "document-question-answering": ["transformers"],
14
+ "feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
15
+ "fill-mask": ["transformers", "transformers.js"],
16
+ "graph-ml": ["transformers"],
17
+ "image-classification": ["keras", "timm", "transformers", "transformers.js"],
18
+ "image-segmentation": ["transformers", "transformers.js"],
19
+ "image-to-image": [],
20
+ "image-to-text": ["transformers.js"],
21
+ "video-classification": [],
22
+ "multiple-choice": ["transformers"],
23
+ "object-detection": ["transformers", "transformers.js"],
24
+ other: [],
25
+ "question-answering": ["adapter-transformers", "allennlp", "transformers", "transformers.js"],
26
+ robotics: [],
27
+ "reinforcement-learning": ["transformers", "stable-baselines3", "ml-agents", "sample-factory"],
28
+ "sentence-similarity": ["sentence-transformers", "spacy", "transformers.js"],
29
+ summarization: ["transformers", "transformers.js"],
30
+ "table-question-answering": ["transformers"],
31
+ "table-to-text": ["transformers"],
32
+ "tabular-classification": ["sklearn"],
33
+ "tabular-regression": ["sklearn"],
34
+ "tabular-to-text": ["transformers"],
35
+ "text-classification": ["adapter-transformers", "spacy", "transformers", "transformers.js"],
36
+ "text-generation": ["transformers", "transformers.js"],
37
+ "text-retrieval": [],
38
+ "text-to-image": [],
39
+ "text-to-speech": ["espnet", "tensorflowtts", "transformers"],
40
+ "text-to-audio": ["transformers"],
41
+ "text-to-video": [],
42
+ "text2text-generation": ["transformers", "transformers.js"],
43
+ "time-series-forecasting": [],
44
+ "token-classification": [
45
+ "adapter-transformers",
46
+ "flair",
47
+ "spacy",
48
+ "span-marker",
49
+ "stanza",
50
+ "transformers",
51
+ "transformers.js",
52
+ ],
53
+ translation: ["transformers", "transformers.js"],
46
54
  "unconditional-image-generation": [],
47
- "visual-question-answering": [],
48
- "voice-activity-detection": [],
49
- "zero-shot-classification": ["transformers", "transformers.js"],
55
+ "visual-question-answering": [],
56
+ "voice-activity-detection": [],
57
+ "zero-shot-classification": ["transformers", "transformers.js"],
50
58
  "zero-shot-image-classification": ["transformers.js"],
51
59
  };
@@ -2,15 +2,15 @@
2
2
 
3
3
  ### Chatbot 💬
4
4
 
5
- Chatbots are used to have conversations instead of providing direct contact with a live human. They are used to provide customer service, sales, and can even be used to play games (see [ELIZA](https://en.wikipedia.org/wiki/ELIZA) from 1966 for one of the earliest examples).
5
+ Chatbots are used to have conversations instead of providing direct contact with a live human. They are used to provide customer service, sales, and can even be used to play games (see [ELIZA](https://en.wikipedia.org/wiki/ELIZA) from 1966 for one of the earliest examples).
6
6
 
7
7
  ## Voice Assistants 🎙️
8
8
 
9
- Conversational response models are used as part of voice assistants to provide appropriate responses to voice based queries.
9
+ Conversational response models are used as part of voice assistants to provide appropriate responses to voice based queries.
10
10
 
11
11
  ## Inference
12
12
 
13
- You can infer with Conversational models with the 🤗 Transformers library using the `conversational` pipeline. This pipeline takes a conversation prompt or a list of conversations and generates responses for each prompt. The models that this pipeline can use are models that have been fine-tuned on a multi-turn conversational task (see https://huggingface.co/models?filter=conversational for a list of updated Conversational models).
13
+ You can infer with Conversational models with the 🤗 Transformers library using the `conversational` pipeline. This pipeline takes a conversation prompt or a list of conversations and generates responses for each prompt. The models that this pipeline can use are models that have been fine-tuned on a multi-turn conversational task (see https://huggingface.co/models?filter=conversational for a list of updated Conversational models).
14
14
 
15
15
  ```python
16
16
  from transformers import pipeline, Conversation
@@ -22,10 +22,10 @@ converse([conversation_1, conversation_2])
22
22
 
23
23
  ## Output:
24
24
  ## Conversation 1
25
- ## user >> Going to the movies tonight - any suggestions?
25
+ ## user >> Going to the movies tonight - any suggestions?
26
26
  ## bot >> The Big Lebowski ,
27
27
  ## Conversation 2
28
- ## user >> What's the last book you have read?
28
+ ## user >> What's the last book you have read?
29
29
  ## bot >> The Last Question
30
30
  ```
31
31
 
@@ -36,9 +36,9 @@ import { HfInference } from "@huggingface/inference";
36
36
 
37
37
  const inference = new HfInference(HF_ACCESS_TOKEN);
38
38
  await inference.conversational({
39
- model: 'facebook/blenderbot-400M-distill',
40
- inputs: "Going to the movies tonight - any suggestions?"
41
- })
39
+ model: "facebook/blenderbot-400M-distill",
40
+ inputs: "Going to the movies tonight - any suggestions?",
41
+ });
42
42
  ```
43
43
 
44
44
  ## Useful Resources
@@ -47,4 +47,4 @@ await inference.conversational({
47
47
  - [Reinforcement Learning from Human Feedback From Zero to ChatGPT](https://www.youtube.com/watch?v=EAd4oQtEJOM)
48
48
  - [A guide on Dialog Agents](https://huggingface.co/blog/dialog-agents)
49
49
 
50
- This page was made possible thanks to the efforts of [Viraat Aryabumi](https://huggingface.co/viraat).
50
+ This page was made possible thanks to the efforts of [Viraat Aryabumi](https://huggingface.co/viraat).