@huggingface/tasks 0.3.1 β 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +126 -179
- package/dist/index.d.ts +26 -13
- package/dist/index.js +125 -179
- package/package.json +1 -1
- package/src/default-widget-inputs.ts +2 -2
- package/src/index.ts +3 -0
- package/src/library-to-tasks.ts +1 -1
- package/src/model-data.ts +2 -0
- package/src/pipelines.ts +16 -12
- package/src/snippets/curl.ts +0 -1
- package/src/snippets/inputs.ts +0 -8
- package/src/snippets/js.ts +0 -1
- package/src/snippets/python.ts +0 -1
- package/src/tasks/index.ts +4 -4
- package/src/tokenizer-data.ts +24 -0
- package/src/tasks/conversational/about.md +0 -50
- package/src/tasks/conversational/data.ts +0 -66
package/package.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import type { WidgetExample } from "./widget-example";
|
|
2
|
-
import type {
|
|
2
|
+
import type { WidgetType } from "./pipelines";
|
|
3
3
|
|
|
4
4
|
type LanguageCode = string;
|
|
5
5
|
|
|
6
|
-
type PerLanguageMapping = Map<
|
|
6
|
+
type PerLanguageMapping = Map<WidgetType, string[] | WidgetExample[]>;
|
|
7
7
|
|
|
8
8
|
/// NOTE TO CONTRIBUTORS:
|
|
9
9
|
///
|
package/src/index.ts
CHANGED
|
@@ -5,6 +5,7 @@ export * from "./tasks";
|
|
|
5
5
|
export {
|
|
6
6
|
PIPELINE_DATA,
|
|
7
7
|
PIPELINE_TYPES,
|
|
8
|
+
type WidgetType,
|
|
8
9
|
type PipelineType,
|
|
9
10
|
type PipelineData,
|
|
10
11
|
type Modality,
|
|
@@ -16,6 +17,7 @@ export {
|
|
|
16
17
|
export { ALL_DISPLAY_MODEL_LIBRARY_KEYS, ALL_MODEL_LIBRARY_KEYS, MODEL_LIBRARIES_UI_ELEMENTS } from "./model-libraries";
|
|
17
18
|
export type { LibraryUiElement, ModelLibraryKey } from "./model-libraries";
|
|
18
19
|
export type { ModelData, TransformersInfo } from "./model-data";
|
|
20
|
+
export type { SpecialTokensMap, TokenizerConfig } from "./tokenizer-data";
|
|
19
21
|
export type {
|
|
20
22
|
WidgetExample,
|
|
21
23
|
WidgetExampleAttribute,
|
|
@@ -37,6 +39,7 @@ export type {
|
|
|
37
39
|
WidgetExampleOutputText,
|
|
38
40
|
} from "./widget-example";
|
|
39
41
|
export { InferenceDisplayability } from "./model-data";
|
|
42
|
+
export { SPECIAL_TOKENS_ATTRIBUTES } from "./tokenizer-data";
|
|
40
43
|
|
|
41
44
|
import * as snippets from "./snippets";
|
|
42
45
|
export { snippets };
|
package/src/library-to-tasks.ts
CHANGED
|
@@ -27,7 +27,7 @@ export const LIBRARY_TASK_MAPPING_EXCLUDING_TRANSFORMERS: Partial<Record<ModelLi
|
|
|
27
27
|
keras: ["image-classification"],
|
|
28
28
|
nemo: ["automatic-speech-recognition"],
|
|
29
29
|
open_clip: ["zero-shot-classification", "zero-shot-image-classification"],
|
|
30
|
-
paddlenlp: ["
|
|
30
|
+
paddlenlp: ["fill-mask", "summarization", "zero-shot-classification"],
|
|
31
31
|
peft: ["text-generation"],
|
|
32
32
|
"pyannote-audio": ["automatic-speech-recognition"],
|
|
33
33
|
"sentence-transformers": ["feature-extraction", "sentence-similarity"],
|
package/src/model-data.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { PipelineType } from "./pipelines";
|
|
2
2
|
import type { WidgetExample } from "./widget-example";
|
|
3
|
+
import type { TokenizerConfig } from "./tokenizer-data";
|
|
3
4
|
|
|
4
5
|
export enum InferenceDisplayability {
|
|
5
6
|
/**
|
|
@@ -53,6 +54,7 @@ export interface ModelData {
|
|
|
53
54
|
base_model_name?: string;
|
|
54
55
|
task_type?: string;
|
|
55
56
|
};
|
|
57
|
+
tokenizer_config?: TokenizerConfig;
|
|
56
58
|
};
|
|
57
59
|
/**
|
|
58
60
|
* all the model tags
|
package/src/pipelines.ts
CHANGED
|
@@ -225,17 +225,6 @@ export const PIPELINE_DATA = {
|
|
|
225
225
|
modality: "nlp",
|
|
226
226
|
color: "indigo",
|
|
227
227
|
},
|
|
228
|
-
conversational: {
|
|
229
|
-
name: "Conversational",
|
|
230
|
-
subtasks: [
|
|
231
|
-
{
|
|
232
|
-
type: "dialogue-generation",
|
|
233
|
-
name: "Dialogue Generation",
|
|
234
|
-
},
|
|
235
|
-
],
|
|
236
|
-
modality: "nlp",
|
|
237
|
-
color: "green",
|
|
238
|
-
},
|
|
239
228
|
"feature-extraction": {
|
|
240
229
|
name: "Feature Extraction",
|
|
241
230
|
modality: "nlp",
|
|
@@ -248,6 +237,14 @@ export const PIPELINE_DATA = {
|
|
|
248
237
|
type: "dialogue-modeling",
|
|
249
238
|
name: "Dialogue Modeling",
|
|
250
239
|
},
|
|
240
|
+
{
|
|
241
|
+
type: "dialogue-generation",
|
|
242
|
+
name: "Dialogue Generation",
|
|
243
|
+
},
|
|
244
|
+
{
|
|
245
|
+
type: "conversational",
|
|
246
|
+
name: "Conversational",
|
|
247
|
+
},
|
|
251
248
|
{
|
|
252
249
|
type: "language-modeling",
|
|
253
250
|
name: "Language Modeling",
|
|
@@ -593,7 +590,7 @@ export const PIPELINE_DATA = {
|
|
|
593
590
|
color: "green",
|
|
594
591
|
},
|
|
595
592
|
"image-text-to-text": {
|
|
596
|
-
name: "Image + Text to
|
|
593
|
+
name: "Image + Text to Text (VLLMs)",
|
|
597
594
|
modality: "multimodal",
|
|
598
595
|
color: "red",
|
|
599
596
|
hideInDatasets: true,
|
|
@@ -651,6 +648,11 @@ export const PIPELINE_DATA = {
|
|
|
651
648
|
modality: "cv",
|
|
652
649
|
color: "green",
|
|
653
650
|
},
|
|
651
|
+
"image-feature-extraction": {
|
|
652
|
+
name: "Image Feature Extraction",
|
|
653
|
+
modality: "cv",
|
|
654
|
+
color: "indigo",
|
|
655
|
+
},
|
|
654
656
|
other: {
|
|
655
657
|
name: "Other",
|
|
656
658
|
modality: "other",
|
|
@@ -662,6 +664,8 @@ export const PIPELINE_DATA = {
|
|
|
662
664
|
|
|
663
665
|
export type PipelineType = keyof typeof PIPELINE_DATA;
|
|
664
666
|
|
|
667
|
+
export type WidgetType = PipelineType | "conversational";
|
|
668
|
+
|
|
665
669
|
export const PIPELINE_TYPES = Object.keys(PIPELINE_DATA) as PipelineType[];
|
|
666
670
|
|
|
667
671
|
export const SUBTASK_TYPES = Object.values(PIPELINE_DATA)
|
package/src/snippets/curl.ts
CHANGED
|
@@ -34,7 +34,6 @@ export const curlSnippets: Partial<Record<PipelineType, (model: ModelData, acces
|
|
|
34
34
|
"zero-shot-classification": snippetZeroShotClassification,
|
|
35
35
|
translation: snippetBasic,
|
|
36
36
|
summarization: snippetBasic,
|
|
37
|
-
conversational: snippetBasic,
|
|
38
37
|
"feature-extraction": snippetBasic,
|
|
39
38
|
"text-generation": snippetBasic,
|
|
40
39
|
"text2text-generation": snippetBasic,
|
package/src/snippets/inputs.ts
CHANGED
|
@@ -9,13 +9,6 @@ const inputsTranslation = () => `"ΠΠ΅Π½Ρ Π·ΠΎΠ²ΡΡ ΠΠΎΠ»ΡΡΠ³Π°Π½Π³ ΠΈ Ρ
|
|
|
9
9
|
const inputsSummarization = () =>
|
|
10
10
|
`"The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."`;
|
|
11
11
|
|
|
12
|
-
const inputsConversational = () =>
|
|
13
|
-
`{
|
|
14
|
-
"past_user_inputs": ["Which movie is the best ?"],
|
|
15
|
-
"generated_responses": ["It is Die Hard for sure."],
|
|
16
|
-
"text": "Can you explain why ?"
|
|
17
|
-
}`;
|
|
18
|
-
|
|
19
12
|
const inputsTableQuestionAnswering = () =>
|
|
20
13
|
`{
|
|
21
14
|
"query": "How many stars does the transformers repository have?",
|
|
@@ -96,7 +89,6 @@ const modelInputSnippets: {
|
|
|
96
89
|
"audio-to-audio": inputsAudioToAudio,
|
|
97
90
|
"audio-classification": inputsAudioClassification,
|
|
98
91
|
"automatic-speech-recognition": inputsAutomaticSpeechRecognition,
|
|
99
|
-
conversational: inputsConversational,
|
|
100
92
|
"document-question-answering": inputsVisualQuestionAnswering,
|
|
101
93
|
"feature-extraction": inputsFeatureExtraction,
|
|
102
94
|
"fill-mask": inputsFillMask,
|
package/src/snippets/js.ts
CHANGED
|
@@ -121,7 +121,6 @@ export const jsSnippets: Partial<Record<PipelineType, (model: ModelData, accessT
|
|
|
121
121
|
"zero-shot-classification": snippetZeroShotClassification,
|
|
122
122
|
translation: snippetBasic,
|
|
123
123
|
summarization: snippetBasic,
|
|
124
|
-
conversational: snippetBasic,
|
|
125
124
|
"feature-extraction": snippetBasic,
|
|
126
125
|
"text-generation": snippetBasic,
|
|
127
126
|
"text2text-generation": snippetBasic,
|
package/src/snippets/python.ts
CHANGED
|
@@ -116,7 +116,6 @@ export const pythonSnippets: Partial<Record<PipelineType, (model: ModelData) =>
|
|
|
116
116
|
"zero-shot-classification": snippetZeroShotClassification,
|
|
117
117
|
translation: snippetBasic,
|
|
118
118
|
summarization: snippetBasic,
|
|
119
|
-
conversational: snippetBasic,
|
|
120
119
|
"feature-extraction": snippetBasic,
|
|
121
120
|
"text-generation": snippetBasic,
|
|
122
121
|
"text2text-generation": snippetBasic,
|
package/src/tasks/index.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import type { PipelineType } from "../pipelines";
|
|
2
|
+
import { PIPELINE_DATA } from "../pipelines";
|
|
2
3
|
|
|
3
4
|
import audioClassification from "./audio-classification/data";
|
|
4
5
|
import audioToAudio from "./audio-to-audio/data";
|
|
5
6
|
import automaticSpeechRecognition from "./automatic-speech-recognition/data";
|
|
6
|
-
import conversational from "./conversational/data";
|
|
7
7
|
import documentQuestionAnswering from "./document-question-answering/data";
|
|
8
8
|
import featureExtraction from "./feature-extraction/data";
|
|
9
9
|
import fillMask from "./fill-mask/data";
|
|
@@ -45,13 +45,13 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
|
|
|
45
45
|
"audio-classification": ["speechbrain", "transformers", "transformers.js"],
|
|
46
46
|
"audio-to-audio": ["asteroid", "speechbrain"],
|
|
47
47
|
"automatic-speech-recognition": ["espnet", "nemo", "speechbrain", "transformers", "transformers.js"],
|
|
48
|
-
conversational: ["transformers"],
|
|
49
48
|
"depth-estimation": ["transformers", "transformers.js"],
|
|
50
49
|
"document-question-answering": ["transformers", "transformers.js"],
|
|
51
50
|
"feature-extraction": ["sentence-transformers", "transformers", "transformers.js"],
|
|
52
51
|
"fill-mask": ["transformers", "transformers.js"],
|
|
53
52
|
"graph-ml": ["transformers"],
|
|
54
53
|
"image-classification": ["keras", "timm", "transformers", "transformers.js"],
|
|
54
|
+
"image-feature-extraction": ["timm", "transformers"],
|
|
55
55
|
"image-segmentation": ["transformers", "transformers.js"],
|
|
56
56
|
"image-text-to-text": ["transformers"],
|
|
57
57
|
"image-to-image": ["diffusers", "transformers", "transformers.js"],
|
|
@@ -123,7 +123,6 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
|
|
|
123
123
|
"audio-classification": getData("audio-classification", audioClassification),
|
|
124
124
|
"audio-to-audio": getData("audio-to-audio", audioToAudio),
|
|
125
125
|
"automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
|
|
126
|
-
conversational: getData("conversational", conversational),
|
|
127
126
|
"depth-estimation": getData("depth-estimation", depthEstimation),
|
|
128
127
|
"document-question-answering": getData("document-question-answering", documentQuestionAnswering),
|
|
129
128
|
"feature-extraction": getData("feature-extraction", featureExtraction),
|
|
@@ -169,6 +168,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
|
|
|
169
168
|
"zero-shot-object-detection": getData("zero-shot-object-detection", zeroShotObjectDetection),
|
|
170
169
|
"text-to-3d": getData("text-to-3d", placeholder),
|
|
171
170
|
"image-to-3d": getData("image-to-3d", placeholder),
|
|
171
|
+
"image-feature-extraction": getData("image-feature-extraction", placeholder),
|
|
172
172
|
} as const;
|
|
173
173
|
|
|
174
174
|
export interface ExampleRepo {
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export const SPECIAL_TOKENS_ATTRIBUTES = [
|
|
2
|
+
"bos_token",
|
|
3
|
+
"eos_token",
|
|
4
|
+
"unk_token",
|
|
5
|
+
"sep_token",
|
|
6
|
+
"pad_token",
|
|
7
|
+
"cls_token",
|
|
8
|
+
"mask_token",
|
|
9
|
+
// additional_special_tokens (TODO)
|
|
10
|
+
] as const;
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Public interface for a tokenizer's special tokens mapping
|
|
14
|
+
*/
|
|
15
|
+
export type SpecialTokensMap = {
|
|
16
|
+
[key in (typeof SPECIAL_TOKENS_ATTRIBUTES)[number]]?: string;
|
|
17
|
+
};
|
|
18
|
+
/**
|
|
19
|
+
* Public interface for tokenizer config
|
|
20
|
+
*/
|
|
21
|
+
export interface TokenizerConfig extends SpecialTokensMap {
|
|
22
|
+
use_default_system_prompt?: boolean;
|
|
23
|
+
chat_template?: string;
|
|
24
|
+
}
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
## Use Cases
|
|
2
|
-
|
|
3
|
-
### Chatbot π¬
|
|
4
|
-
|
|
5
|
-
Chatbots are used to have conversations instead of providing direct contact with a live human. They are used to provide customer service, sales, and can even be used to play games (see [ELIZA](https://en.wikipedia.org/wiki/ELIZA) from 1966 for one of the earliest examples).
|
|
6
|
-
|
|
7
|
-
## Voice Assistants ποΈ
|
|
8
|
-
|
|
9
|
-
Conversational response models are used as part of voice assistants to provide appropriate responses to voice based queries.
|
|
10
|
-
|
|
11
|
-
## Inference
|
|
12
|
-
|
|
13
|
-
You can infer with Conversational models with the π€ Transformers library using the `conversational` pipeline. This pipeline takes a conversation prompt or a list of conversations and generates responses for each prompt. The models that this pipeline can use are models that have been fine-tuned on a multi-turn conversational task (see https://huggingface.co/models?filter=conversational for a list of updated Conversational models).
|
|
14
|
-
|
|
15
|
-
```python
|
|
16
|
-
from transformers import pipeline, Conversation
|
|
17
|
-
converse = pipeline("conversational")
|
|
18
|
-
|
|
19
|
-
conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
|
|
20
|
-
conversation_2 = Conversation("What's the last book you have read?")
|
|
21
|
-
converse([conversation_1, conversation_2])
|
|
22
|
-
|
|
23
|
-
## Output:
|
|
24
|
-
## Conversation 1
|
|
25
|
-
## user >> Going to the movies tonight - any suggestions?
|
|
26
|
-
## bot >> The Big Lebowski ,
|
|
27
|
-
## Conversation 2
|
|
28
|
-
## user >> What's the last book you have read?
|
|
29
|
-
## bot >> The Last Question
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer with conversational models on Hugging Face Hub.
|
|
33
|
-
|
|
34
|
-
```javascript
|
|
35
|
-
import { HfInference } from "@huggingface/inference";
|
|
36
|
-
|
|
37
|
-
const inference = new HfInference(HF_TOKEN);
|
|
38
|
-
await inference.conversational({
|
|
39
|
-
model: "facebook/blenderbot-400M-distill",
|
|
40
|
-
inputs: "Going to the movies tonight - any suggestions?",
|
|
41
|
-
});
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
## Useful Resources
|
|
45
|
-
|
|
46
|
-
- Learn how ChatGPT and InstructGPT work in this blog: [Illustrating Reinforcement Learning from Human Feedback (RLHF)](https://huggingface.co/blog/rlhf)
|
|
47
|
-
- [Reinforcement Learning from Human Feedback From Zero to ChatGPT](https://www.youtube.com/watch?v=EAd4oQtEJOM)
|
|
48
|
-
- [A guide on Dialog Agents](https://huggingface.co/blog/dialog-agents)
|
|
49
|
-
|
|
50
|
-
This page was made possible thanks to the efforts of [Viraat Aryabumi](https://huggingface.co/viraat).
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
import type { TaskDataCustom } from "..";
|
|
2
|
-
|
|
3
|
-
const taskData: TaskDataCustom = {
|
|
4
|
-
datasets: [
|
|
5
|
-
{
|
|
6
|
-
description:
|
|
7
|
-
"A dataset of 7k conversations explicitly designed to exhibit multiple conversation modes: displaying personality, having empathy, and demonstrating knowledge.",
|
|
8
|
-
id: "blended_skill_talk",
|
|
9
|
-
},
|
|
10
|
-
{
|
|
11
|
-
description:
|
|
12
|
-
"ConvAI is a dataset of human-to-bot conversations labeled for quality. This data can be used to train a metric for evaluating dialogue systems",
|
|
13
|
-
id: "conv_ai_2",
|
|
14
|
-
},
|
|
15
|
-
{
|
|
16
|
-
description: "EmpatheticDialogues, is a dataset of 25k conversations grounded in emotional situations",
|
|
17
|
-
id: "empathetic_dialogues",
|
|
18
|
-
},
|
|
19
|
-
],
|
|
20
|
-
demo: {
|
|
21
|
-
inputs: [
|
|
22
|
-
{
|
|
23
|
-
label: "Input",
|
|
24
|
-
content: "Hey my name is Julien! How are you?",
|
|
25
|
-
type: "text",
|
|
26
|
-
},
|
|
27
|
-
],
|
|
28
|
-
outputs: [
|
|
29
|
-
{
|
|
30
|
-
label: "Answer",
|
|
31
|
-
content: "Hi Julien! My name is Julia! I am well.",
|
|
32
|
-
type: "text",
|
|
33
|
-
},
|
|
34
|
-
],
|
|
35
|
-
},
|
|
36
|
-
metrics: [
|
|
37
|
-
{
|
|
38
|
-
description:
|
|
39
|
-
"BLEU score is calculated by counting the number of shared single or subsequent tokens between the generated sequence and the reference. Subsequent n tokens are called βn-gramsβ. Unigram refers to a single token while bi-gram refers to token pairs and n-grams refer to n subsequent tokens. The score ranges from 0 to 1, where 1 means the translation perfectly matched and 0 did not match at all",
|
|
40
|
-
id: "bleu",
|
|
41
|
-
},
|
|
42
|
-
],
|
|
43
|
-
models: [
|
|
44
|
-
{
|
|
45
|
-
description: "A faster and smaller model than the famous BERT model.",
|
|
46
|
-
id: "facebook/blenderbot-400M-distill",
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
description:
|
|
50
|
-
"DialoGPT is a large-scale pretrained dialogue response generation model for multiturn conversations.",
|
|
51
|
-
id: "microsoft/DialoGPT-large",
|
|
52
|
-
},
|
|
53
|
-
],
|
|
54
|
-
spaces: [
|
|
55
|
-
{
|
|
56
|
-
description: "A chatbot based on Blender model.",
|
|
57
|
-
id: "EXFINITE/BlenderBot-UI",
|
|
58
|
-
},
|
|
59
|
-
],
|
|
60
|
-
summary:
|
|
61
|
-
"Conversational response modelling is the task of generating conversational text that is relevant, coherent and knowledgable given a prompt. These models have applications in chatbots, and as a part of voice assistants",
|
|
62
|
-
widgetModels: ["facebook/blenderbot-400M-distill"],
|
|
63
|
-
youtubeId: "",
|
|
64
|
-
};
|
|
65
|
-
|
|
66
|
-
export default taskData;
|