@huggingface/tasks 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -2
- package/dist/index.d.ts +381 -5
- package/dist/index.js +1986 -77
- package/dist/index.mjs +1985 -76
- package/package.json +2 -4
- package/src/default-widget-inputs.ts +718 -0
- package/src/index.ts +35 -4
- package/src/library-to-tasks.ts +47 -0
- package/src/library-ui-elements.ts +765 -0
- package/src/model-data.ts +239 -0
- package/src/pipelines.ts +39 -0
- package/src/snippets/curl.ts +63 -0
- package/src/snippets/index.ts +6 -0
- package/src/snippets/inputs.ts +144 -0
- package/src/snippets/js.ts +150 -0
- package/src/snippets/python.ts +155 -0
- package/src/{audio-classification → tasks/audio-classification}/about.md +2 -1
- package/src/{audio-classification → tasks/audio-classification}/data.ts +3 -3
- package/src/{audio-to-audio → tasks/audio-to-audio}/data.ts +1 -1
- package/src/{automatic-speech-recognition → tasks/automatic-speech-recognition}/about.md +3 -2
- package/src/{automatic-speech-recognition → tasks/automatic-speech-recognition}/data.ts +6 -6
- package/src/{conversational → tasks/conversational}/data.ts +1 -1
- package/src/{depth-estimation → tasks/depth-estimation}/data.ts +1 -1
- package/src/{document-question-answering → tasks/document-question-answering}/data.ts +1 -1
- package/src/{feature-extraction → tasks/feature-extraction}/data.ts +2 -7
- package/src/{fill-mask → tasks/fill-mask}/data.ts +1 -1
- package/src/{image-classification → tasks/image-classification}/data.ts +1 -1
- package/src/{image-segmentation → tasks/image-segmentation}/data.ts +1 -1
- package/src/{image-to-image → tasks/image-to-image}/about.md +8 -7
- package/src/{image-to-image → tasks/image-to-image}/data.ts +1 -1
- package/src/{image-to-text → tasks/image-to-text}/data.ts +1 -1
- package/src/{tasksData.ts → tasks/index.ts} +144 -15
- package/src/{object-detection → tasks/object-detection}/data.ts +1 -1
- package/src/{placeholder → tasks/placeholder}/data.ts +1 -1
- package/src/{question-answering → tasks/question-answering}/data.ts +1 -1
- package/src/{reinforcement-learning → tasks/reinforcement-learning}/data.ts +1 -1
- package/src/{sentence-similarity → tasks/sentence-similarity}/data.ts +1 -1
- package/src/{summarization → tasks/summarization}/data.ts +1 -1
- package/src/{table-question-answering → tasks/table-question-answering}/data.ts +1 -1
- package/src/{tabular-classification → tasks/tabular-classification}/data.ts +1 -1
- package/src/{tabular-regression → tasks/tabular-regression}/data.ts +1 -1
- package/src/{text-classification → tasks/text-classification}/data.ts +1 -1
- package/src/{text-generation → tasks/text-generation}/about.md +13 -3
- package/src/{text-generation → tasks/text-generation}/data.ts +2 -2
- package/src/{text-to-image → tasks/text-to-image}/data.ts +1 -1
- package/src/{text-to-speech → tasks/text-to-speech}/about.md +2 -1
- package/src/{text-to-speech → tasks/text-to-speech}/data.ts +4 -4
- package/src/{text-to-video → tasks/text-to-video}/data.ts +1 -1
- package/src/{token-classification → tasks/token-classification}/data.ts +1 -1
- package/src/{translation → tasks/translation}/data.ts +1 -1
- package/src/{unconditional-image-generation → tasks/unconditional-image-generation}/data.ts +1 -1
- package/src/{video-classification → tasks/video-classification}/about.md +8 -28
- package/src/{video-classification → tasks/video-classification}/data.ts +1 -1
- package/src/{visual-question-answering → tasks/visual-question-answering}/data.ts +1 -1
- package/src/{zero-shot-classification → tasks/zero-shot-classification}/data.ts +1 -1
- package/src/{zero-shot-image-classification → tasks/zero-shot-image-classification}/data.ts +1 -1
- package/src/Types.ts +0 -64
- package/src/const.ts +0 -59
- /package/src/{modelLibraries.ts → model-libraries.ts} +0 -0
- /package/src/{audio-to-audio → tasks/audio-to-audio}/about.md +0 -0
- /package/src/{conversational → tasks/conversational}/about.md +0 -0
- /package/src/{depth-estimation → tasks/depth-estimation}/about.md +0 -0
- /package/src/{document-question-answering → tasks/document-question-answering}/about.md +0 -0
- /package/src/{feature-extraction → tasks/feature-extraction}/about.md +0 -0
- /package/src/{fill-mask → tasks/fill-mask}/about.md +0 -0
- /package/src/{image-classification → tasks/image-classification}/about.md +0 -0
- /package/src/{image-segmentation → tasks/image-segmentation}/about.md +0 -0
- /package/src/{image-to-text → tasks/image-to-text}/about.md +0 -0
- /package/src/{object-detection → tasks/object-detection}/about.md +0 -0
- /package/src/{placeholder → tasks/placeholder}/about.md +0 -0
- /package/src/{question-answering → tasks/question-answering}/about.md +0 -0
- /package/src/{reinforcement-learning → tasks/reinforcement-learning}/about.md +0 -0
- /package/src/{sentence-similarity → tasks/sentence-similarity}/about.md +0 -0
- /package/src/{summarization → tasks/summarization}/about.md +0 -0
- /package/src/{table-question-answering → tasks/table-question-answering}/about.md +0 -0
- /package/src/{tabular-classification → tasks/tabular-classification}/about.md +0 -0
- /package/src/{tabular-regression → tasks/tabular-regression}/about.md +0 -0
- /package/src/{text-classification → tasks/text-classification}/about.md +0 -0
- /package/src/{text-to-image → tasks/text-to-image}/about.md +0 -0
- /package/src/{text-to-video → tasks/text-to-video}/about.md +0 -0
- /package/src/{token-classification → tasks/token-classification}/about.md +0 -0
- /package/src/{translation → tasks/translation}/about.md +0 -0
- /package/src/{unconditional-image-generation → tasks/unconditional-image-generation}/about.md +0 -0
- /package/src/{visual-question-answering → tasks/visual-question-answering}/about.md +0 -0
- /package/src/{zero-shot-classification → tasks/zero-shot-classification}/about.md +0 -0
- /package/src/{zero-shot-image-classification → tasks/zero-shot-image-classification}/about.md +0 -0
|
@@ -26,11 +26,21 @@ A popular variant of Text Generation models predicts the next word given a bunch
|
|
|
26
26
|
- Continue a story given the first sentences.
|
|
27
27
|
- Provided a code description, generate the code.
|
|
28
28
|
|
|
29
|
-
The most popular models for this task are GPT-based models or [Llama series](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf). These models are trained on data that has no labels, so you just need plain text to train your own model. You can train text generation models to generate a wide variety of documents, from code to stories.
|
|
29
|
+
The most popular models for this task are GPT-based models, [Mistral](mistralai/Mistral-7B-v0.1) or [Llama series](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf). These models are trained on data that has no labels, so you just need plain text to train your own model. You can train text generation models to generate a wide variety of documents, from code to stories.
|
|
30
30
|
|
|
31
31
|
### Text-to-Text Generation Models
|
|
32
32
|
|
|
33
|
-
These models are trained to learn the mapping between a pair of texts (e.g. translation from one language to another). The most popular variants of these models are [FLAN-T5](https://huggingface.co/google/flan-t5-xxl), and [BART](https://huggingface.co/docs/transformers/model_doc/bart). Text-to-Text models are trained with multi-tasking capabilities, they can accomplish a wide range of tasks, including summarization, translation, and text classification.
|
|
33
|
+
These models are trained to learn the mapping between a pair of texts (e.g. translation from one language to another). The most popular variants of these models are [NLLB](facebook/nllb-200-distilled-600M), [FLAN-T5](https://huggingface.co/google/flan-t5-xxl), and [BART](https://huggingface.co/docs/transformers/model_doc/bart). Text-to-Text models are trained with multi-tasking capabilities, they can accomplish a wide range of tasks, including summarization, translation, and text classification.
|
|
34
|
+
|
|
35
|
+
## Language Model Variants
|
|
36
|
+
|
|
37
|
+
When it comes to text generation, the underlying language model can come in several types:
|
|
38
|
+
|
|
39
|
+
- **Base models:** refers to plain language models like [Mistral 7B](mistralai/Mistral-7B-v0.1) and [Llama-2-70b](https://huggingface.co/meta-llama/Llama-2-70b-hf). These models are good for fine-tuning and few-shot prompting.
|
|
40
|
+
|
|
41
|
+
- **Instruction-trained models:** these models are trained in a multi-task manner to follow a broad range of instructions like "Write me a recipe for chocolate cake". Models like [Flan-T5](https://huggingface.co/google/flan-t5-xl), [Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1), and [falcon-40b-instruct](https://huggingface.co/tiiuae/falcon-40b-instruct) are examples of instruction-trained models. In general, instruction-trained models will produce better responses to instructions than base models.
|
|
42
|
+
|
|
43
|
+
- **Human feedback models:** these models extend base and instruction-trained models by incorporating human feedback that rates the quality of the generated text according to criteria like [helpfulness, honesty, and harmlessness](https://arxiv.org/abs/2112.00861). The human feedback is then combined with an optimization technique like reinforcement learning to align the original model to be closer with human preferences. The overall methodology is often called [Reinforcement Learning from Human Feedback](https://huggingface.co/blog/rlhf), or RLHF for short. [Llama2-Chat](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) is an open-source model aligned through human feedback.
|
|
34
44
|
|
|
35
45
|
## Inference
|
|
36
46
|
|
|
@@ -38,7 +48,7 @@ You can use the 🤗 Transformers library `text-generation` pipeline to do infer
|
|
|
38
48
|
|
|
39
49
|
```python
|
|
40
50
|
from transformers import pipeline
|
|
41
|
-
generator = pipeline('text-generation', model = '
|
|
51
|
+
generator = pipeline('text-generation', model = 'HuggingFaceH4/zephyr-7b-beta')
|
|
42
52
|
generator("Hello, I'm a language model", max_length = 30, num_return_sequences=3)
|
|
43
53
|
## [{'generated_text': "Hello, I'm a language modeler. So while writing this, when I went out to meet my wife or come home she told me that my"},
|
|
44
54
|
## {'generated_text': "Hello, I'm a language modeler. I write and maintain software in Python. I love to code, and that includes coding things that require writing"}, ...
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { TaskDataCustom } from "
|
|
1
|
+
import type { TaskDataCustom } from "..";
|
|
2
2
|
|
|
3
3
|
const taskData: TaskDataCustom = {
|
|
4
4
|
datasets: [
|
|
@@ -119,7 +119,7 @@ const taskData: TaskDataCustom = {
|
|
|
119
119
|
],
|
|
120
120
|
summary:
|
|
121
121
|
"Generating text is the task of producing new text. These models can, for example, fill in incomplete text or paraphrase.",
|
|
122
|
-
widgetModels: ["
|
|
122
|
+
widgetModels: ["HuggingFaceH4/zephyr-7b-beta"],
|
|
123
123
|
youtubeId: "Vpjb1lu0MDk",
|
|
124
124
|
};
|
|
125
125
|
|
|
@@ -25,7 +25,7 @@ def query(payload):
|
|
|
25
25
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
26
26
|
return response
|
|
27
27
|
|
|
28
|
-
output = query({"text_inputs": "
|
|
28
|
+
output = query({"text_inputs": "Max is the best doggo."})
|
|
29
29
|
```
|
|
30
30
|
|
|
31
31
|
You can also use libraries such as [espnet](https://huggingface.co/models?library=espnet&pipeline_tag=text-to-speech&sort=downloads) or [transformers](https://huggingface.co/models?pipeline_tag=text-to-speech&library=transformers&sort=trending) if you want to handle the Inference directly.
|
|
@@ -56,6 +56,7 @@ await inference.textToSpeech({
|
|
|
56
56
|
|
|
57
57
|
## Useful Resources
|
|
58
58
|
|
|
59
|
+
- [Hugging Face Audio Course](https://huggingface.co/learn/audio-course/chapter6/introduction)
|
|
59
60
|
- [ML for Audio Study Group - Text to Speech Deep Dive](https://www.youtube.com/watch?v=aLBedWj-5CQ)
|
|
60
61
|
- [An introduction to SpeechT5, a multi-purpose speech recognition and synthesis model](https://huggingface.co/blog/speecht5).
|
|
61
62
|
- [A guide on Fine-tuning Whisper For Multilingual ASR with 🤗Transformers](https://huggingface.co/blog/fine-tune-whisper)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { TaskDataCustom } from "
|
|
1
|
+
import type { TaskDataCustom } from "..";
|
|
2
2
|
|
|
3
3
|
const taskData: TaskDataCustom = {
|
|
4
4
|
datasets: [
|
|
@@ -52,8 +52,8 @@ const taskData: TaskDataCustom = {
|
|
|
52
52
|
id: "suno/bark",
|
|
53
53
|
},
|
|
54
54
|
{
|
|
55
|
-
description: "
|
|
56
|
-
id: "coqui/
|
|
55
|
+
description: "XTTS is a Voice generation model that lets you clone voices into different languages.",
|
|
56
|
+
id: "coqui/xtts",
|
|
57
57
|
},
|
|
58
58
|
{
|
|
59
59
|
description: "An application that synthesizes speech for various speaker types.",
|
|
@@ -62,7 +62,7 @@ const taskData: TaskDataCustom = {
|
|
|
62
62
|
],
|
|
63
63
|
summary:
|
|
64
64
|
"Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
|
|
65
|
-
widgetModels: ["
|
|
65
|
+
widgetModels: ["suno/bark"],
|
|
66
66
|
youtubeId: "NW62DpzJ274",
|
|
67
67
|
};
|
|
68
68
|
|
|
@@ -15,34 +15,14 @@ Models trained in video classification can improve user experience by organizing
|
|
|
15
15
|
Below you can find code for inferring with a pre-trained video classification model.
|
|
16
16
|
|
|
17
17
|
```python
|
|
18
|
-
from transformers import
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
#
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# Sub-sample a fixed set of frames and convert them to a NumPy array.
|
|
28
|
-
num_frames = 16
|
|
29
|
-
subsampler = UniformTemporalSubsample(num_frames)
|
|
30
|
-
subsampled_frames = subsampler(video_data)
|
|
31
|
-
video_data_np = subsampled_frames.numpy().transpose(1, 2, 3, 0)
|
|
32
|
-
|
|
33
|
-
# Preprocess the video frames.
|
|
34
|
-
inputs = feature_extractor(list(video_data_np), return_tensors="pt")
|
|
35
|
-
|
|
36
|
-
# Run inference
|
|
37
|
-
with torch.no_grad():
|
|
38
|
-
outputs = model(**inputs)
|
|
39
|
-
logits = outputs.logits
|
|
40
|
-
|
|
41
|
-
# Model predicts one of the 400 Kinetics 400 classes
|
|
42
|
-
predicted_label = logits.argmax(-1).item()
|
|
43
|
-
print(model.config.id2label[predicted_label])
|
|
44
|
-
# `eating spaghetti` (if you chose this video:
|
|
45
|
-
# https://hf.co/datasets/nielsr/video-demo/resolve/main/eating_spaghetti.mp4)
|
|
18
|
+
from transformers import pipeline
|
|
19
|
+
|
|
20
|
+
pipe = pipeline(task = "video-classification", model="nateraw/videomae-base-finetuned-ucf101-subset")
|
|
21
|
+
pipe("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/basketball.avi?download=true")
|
|
22
|
+
|
|
23
|
+
#[{'score': 0.90, 'label': 'BasketballDunk'},
|
|
24
|
+
# {'score': 0.02, 'label': 'BalanceBeam'},
|
|
25
|
+
# ... ]
|
|
46
26
|
```
|
|
47
27
|
|
|
48
28
|
## Useful Resources
|
package/src/Types.ts
DELETED
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
import type { ModelLibraryKey } from "./modelLibraries";
|
|
2
|
-
import type { PipelineType } from "./pipelines";
|
|
3
|
-
|
|
4
|
-
export interface ExampleRepo {
|
|
5
|
-
description: string;
|
|
6
|
-
id: string;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
export type TaskDemoEntry =
|
|
10
|
-
| {
|
|
11
|
-
filename: string;
|
|
12
|
-
type: "audio";
|
|
13
|
-
}
|
|
14
|
-
| {
|
|
15
|
-
data: Array<{
|
|
16
|
-
label: string;
|
|
17
|
-
score: number;
|
|
18
|
-
}>;
|
|
19
|
-
type: "chart";
|
|
20
|
-
}
|
|
21
|
-
| {
|
|
22
|
-
filename: string;
|
|
23
|
-
type: "img";
|
|
24
|
-
}
|
|
25
|
-
| {
|
|
26
|
-
table: string[][];
|
|
27
|
-
type: "tabular";
|
|
28
|
-
}
|
|
29
|
-
| {
|
|
30
|
-
content: string;
|
|
31
|
-
label: string;
|
|
32
|
-
type: "text";
|
|
33
|
-
}
|
|
34
|
-
| {
|
|
35
|
-
text: string;
|
|
36
|
-
tokens: Array<{
|
|
37
|
-
end: number;
|
|
38
|
-
start: number;
|
|
39
|
-
type: string;
|
|
40
|
-
}>;
|
|
41
|
-
type: "text-with-tokens";
|
|
42
|
-
};
|
|
43
|
-
|
|
44
|
-
export interface TaskDemo {
|
|
45
|
-
inputs: TaskDemoEntry[];
|
|
46
|
-
outputs: TaskDemoEntry[];
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
export interface TaskData {
|
|
50
|
-
datasets: ExampleRepo[];
|
|
51
|
-
demo: TaskDemo;
|
|
52
|
-
id: PipelineType;
|
|
53
|
-
isPlaceholder?: boolean;
|
|
54
|
-
label: string;
|
|
55
|
-
libraries: ModelLibraryKey[];
|
|
56
|
-
metrics: ExampleRepo[];
|
|
57
|
-
models: ExampleRepo[];
|
|
58
|
-
spaces: ExampleRepo[];
|
|
59
|
-
summary: string;
|
|
60
|
-
widgetModels: string[];
|
|
61
|
-
youtubeId?: string;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
export type TaskDataCustom = Omit<TaskData, "id" | "label" | "libraries">;
|
package/src/const.ts
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
import type { ModelLibraryKey } from "./modelLibraries";
|
|
2
|
-
import type { PipelineType } from "./pipelines";
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Model libraries compatible with each ML task
|
|
6
|
-
*/
|
|
7
|
-
export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
|
|
8
|
-
"audio-classification": ["speechbrain", "transformers"],
|
|
9
|
-
"audio-to-audio": ["asteroid", "speechbrain"],
|
|
10
|
-
"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
|
|
11
|
-
conversational: ["transformers"],
|
|
12
|
-
"depth-estimation": ["transformers"],
|
|
13
|
-
"document-question-answering": ["transformers"],
|
|
14
|
-
"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
|
|
15
|
-
"fill-mask": ["transformers", "transformers.js"],
|
|
16
|
-
"graph-ml": ["transformers"],
|
|
17
|
-
"image-classification": ["keras", "timm", "transformers", "transformers.js"],
|
|
18
|
-
"image-segmentation": ["transformers", "transformers.js"],
|
|
19
|
-
"image-to-image": [],
|
|
20
|
-
"image-to-text": ["transformers.js"],
|
|
21
|
-
"video-classification": [],
|
|
22
|
-
"multiple-choice": ["transformers"],
|
|
23
|
-
"object-detection": ["transformers", "transformers.js"],
|
|
24
|
-
other: [],
|
|
25
|
-
"question-answering": ["adapter-transformers", "allennlp", "transformers", "transformers.js"],
|
|
26
|
-
robotics: [],
|
|
27
|
-
"reinforcement-learning": ["transformers", "stable-baselines3", "ml-agents", "sample-factory"],
|
|
28
|
-
"sentence-similarity": ["sentence-transformers", "spacy", "transformers.js"],
|
|
29
|
-
summarization: ["transformers", "transformers.js"],
|
|
30
|
-
"table-question-answering": ["transformers"],
|
|
31
|
-
"table-to-text": ["transformers"],
|
|
32
|
-
"tabular-classification": ["sklearn"],
|
|
33
|
-
"tabular-regression": ["sklearn"],
|
|
34
|
-
"tabular-to-text": ["transformers"],
|
|
35
|
-
"text-classification": ["adapter-transformers", "spacy", "transformers", "transformers.js"],
|
|
36
|
-
"text-generation": ["transformers", "transformers.js"],
|
|
37
|
-
"text-retrieval": [],
|
|
38
|
-
"text-to-image": [],
|
|
39
|
-
"text-to-speech": ["espnet", "tensorflowtts", "transformers"],
|
|
40
|
-
"text-to-audio": ["transformers"],
|
|
41
|
-
"text-to-video": [],
|
|
42
|
-
"text2text-generation": ["transformers", "transformers.js"],
|
|
43
|
-
"time-series-forecasting": [],
|
|
44
|
-
"token-classification": [
|
|
45
|
-
"adapter-transformers",
|
|
46
|
-
"flair",
|
|
47
|
-
"spacy",
|
|
48
|
-
"span-marker",
|
|
49
|
-
"stanza",
|
|
50
|
-
"transformers",
|
|
51
|
-
"transformers.js",
|
|
52
|
-
],
|
|
53
|
-
translation: ["transformers", "transformers.js"],
|
|
54
|
-
"unconditional-image-generation": [],
|
|
55
|
-
"visual-question-answering": [],
|
|
56
|
-
"voice-activity-detection": [],
|
|
57
|
-
"zero-shot-classification": ["transformers", "transformers.js"],
|
|
58
|
-
"zero-shot-image-classification": ["transformers.js"],
|
|
59
|
-
};
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
/package/src/{unconditional-image-generation → tasks/unconditional-image-generation}/about.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
/package/src/{zero-shot-image-classification → tasks/zero-shot-image-classification}/about.md
RENAMED
|
File without changes
|