@huggingface/tasks 0.13.1-test → 0.13.1-test2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -2
- package/src/dataset-libraries.ts +89 -0
- package/src/default-widget-inputs.ts +718 -0
- package/src/gguf.ts +40 -0
- package/src/hardware.ts +482 -0
- package/src/index.ts +59 -0
- package/src/library-to-tasks.ts +76 -0
- package/src/local-apps.ts +412 -0
- package/src/model-data.ts +149 -0
- package/src/model-libraries-downloads.ts +18 -0
- package/src/model-libraries-snippets.ts +1128 -0
- package/src/model-libraries.ts +820 -0
- package/src/pipelines.ts +698 -0
- package/src/snippets/common.ts +39 -0
- package/src/snippets/curl.spec.ts +94 -0
- package/src/snippets/curl.ts +120 -0
- package/src/snippets/index.ts +7 -0
- package/src/snippets/inputs.ts +167 -0
- package/src/snippets/js.spec.ts +148 -0
- package/src/snippets/js.ts +305 -0
- package/src/snippets/python.spec.ts +144 -0
- package/src/snippets/python.ts +321 -0
- package/src/snippets/types.ts +16 -0
- package/src/tasks/audio-classification/about.md +86 -0
- package/src/tasks/audio-classification/data.ts +81 -0
- package/src/tasks/audio-classification/inference.ts +52 -0
- package/src/tasks/audio-classification/spec/input.json +35 -0
- package/src/tasks/audio-classification/spec/output.json +11 -0
- package/src/tasks/audio-to-audio/about.md +56 -0
- package/src/tasks/audio-to-audio/data.ts +70 -0
- package/src/tasks/automatic-speech-recognition/about.md +90 -0
- package/src/tasks/automatic-speech-recognition/data.ts +82 -0
- package/src/tasks/automatic-speech-recognition/inference.ts +160 -0
- package/src/tasks/automatic-speech-recognition/spec/input.json +35 -0
- package/src/tasks/automatic-speech-recognition/spec/output.json +38 -0
- package/src/tasks/chat-completion/inference.ts +322 -0
- package/src/tasks/chat-completion/spec/input.json +350 -0
- package/src/tasks/chat-completion/spec/output.json +206 -0
- package/src/tasks/chat-completion/spec/stream_output.json +213 -0
- package/src/tasks/common-definitions.json +100 -0
- package/src/tasks/depth-estimation/about.md +45 -0
- package/src/tasks/depth-estimation/data.ts +70 -0
- package/src/tasks/depth-estimation/inference.ts +35 -0
- package/src/tasks/depth-estimation/spec/input.json +25 -0
- package/src/tasks/depth-estimation/spec/output.json +16 -0
- package/src/tasks/document-question-answering/about.md +53 -0
- package/src/tasks/document-question-answering/data.ts +85 -0
- package/src/tasks/document-question-answering/inference.ts +110 -0
- package/src/tasks/document-question-answering/spec/input.json +85 -0
- package/src/tasks/document-question-answering/spec/output.json +36 -0
- package/src/tasks/feature-extraction/about.md +72 -0
- package/src/tasks/feature-extraction/data.ts +57 -0
- package/src/tasks/feature-extraction/inference.ts +40 -0
- package/src/tasks/feature-extraction/spec/input.json +47 -0
- package/src/tasks/feature-extraction/spec/output.json +15 -0
- package/src/tasks/fill-mask/about.md +51 -0
- package/src/tasks/fill-mask/data.ts +79 -0
- package/src/tasks/fill-mask/inference.ts +62 -0
- package/src/tasks/fill-mask/spec/input.json +38 -0
- package/src/tasks/fill-mask/spec/output.json +29 -0
- package/src/tasks/image-classification/about.md +50 -0
- package/src/tasks/image-classification/data.ts +88 -0
- package/src/tasks/image-classification/inference.ts +52 -0
- package/src/tasks/image-classification/spec/input.json +35 -0
- package/src/tasks/image-classification/spec/output.json +11 -0
- package/src/tasks/image-feature-extraction/about.md +23 -0
- package/src/tasks/image-feature-extraction/data.ts +59 -0
- package/src/tasks/image-segmentation/about.md +63 -0
- package/src/tasks/image-segmentation/data.ts +99 -0
- package/src/tasks/image-segmentation/inference.ts +69 -0
- package/src/tasks/image-segmentation/spec/input.json +45 -0
- package/src/tasks/image-segmentation/spec/output.json +26 -0
- package/src/tasks/image-text-to-text/about.md +76 -0
- package/src/tasks/image-text-to-text/data.ts +102 -0
- package/src/tasks/image-to-3d/about.md +62 -0
- package/src/tasks/image-to-3d/data.ts +75 -0
- package/src/tasks/image-to-image/about.md +129 -0
- package/src/tasks/image-to-image/data.ts +101 -0
- package/src/tasks/image-to-image/inference.ts +68 -0
- package/src/tasks/image-to-image/spec/input.json +55 -0
- package/src/tasks/image-to-image/spec/output.json +12 -0
- package/src/tasks/image-to-text/about.md +61 -0
- package/src/tasks/image-to-text/data.ts +82 -0
- package/src/tasks/image-to-text/inference.ts +143 -0
- package/src/tasks/image-to-text/spec/input.json +34 -0
- package/src/tasks/image-to-text/spec/output.json +14 -0
- package/src/tasks/index.ts +312 -0
- package/src/tasks/keypoint-detection/about.md +57 -0
- package/src/tasks/keypoint-detection/data.ts +50 -0
- package/src/tasks/mask-generation/about.md +65 -0
- package/src/tasks/mask-generation/data.ts +55 -0
- package/src/tasks/object-detection/about.md +37 -0
- package/src/tasks/object-detection/data.ts +86 -0
- package/src/tasks/object-detection/inference.ts +75 -0
- package/src/tasks/object-detection/spec/input.json +31 -0
- package/src/tasks/object-detection/spec/output.json +50 -0
- package/src/tasks/placeholder/about.md +15 -0
- package/src/tasks/placeholder/data.ts +21 -0
- package/src/tasks/placeholder/spec/input.json +35 -0
- package/src/tasks/placeholder/spec/output.json +17 -0
- package/src/tasks/question-answering/about.md +56 -0
- package/src/tasks/question-answering/data.ts +75 -0
- package/src/tasks/question-answering/inference.ts +99 -0
- package/src/tasks/question-answering/spec/input.json +67 -0
- package/src/tasks/question-answering/spec/output.json +29 -0
- package/src/tasks/reinforcement-learning/about.md +167 -0
- package/src/tasks/reinforcement-learning/data.ts +75 -0
- package/src/tasks/sentence-similarity/about.md +97 -0
- package/src/tasks/sentence-similarity/data.ts +101 -0
- package/src/tasks/sentence-similarity/inference.ts +32 -0
- package/src/tasks/sentence-similarity/spec/input.json +40 -0
- package/src/tasks/sentence-similarity/spec/output.json +12 -0
- package/src/tasks/summarization/about.md +58 -0
- package/src/tasks/summarization/data.ts +76 -0
- package/src/tasks/summarization/inference.ts +57 -0
- package/src/tasks/summarization/spec/input.json +42 -0
- package/src/tasks/summarization/spec/output.json +14 -0
- package/src/tasks/table-question-answering/about.md +43 -0
- package/src/tasks/table-question-answering/data.ts +59 -0
- package/src/tasks/table-question-answering/inference.ts +61 -0
- package/src/tasks/table-question-answering/spec/input.json +44 -0
- package/src/tasks/table-question-answering/spec/output.json +40 -0
- package/src/tasks/tabular-classification/about.md +65 -0
- package/src/tasks/tabular-classification/data.ts +68 -0
- package/src/tasks/tabular-regression/about.md +87 -0
- package/src/tasks/tabular-regression/data.ts +57 -0
- package/src/tasks/text-classification/about.md +173 -0
- package/src/tasks/text-classification/data.ts +103 -0
- package/src/tasks/text-classification/inference.ts +51 -0
- package/src/tasks/text-classification/spec/input.json +35 -0
- package/src/tasks/text-classification/spec/output.json +11 -0
- package/src/tasks/text-generation/about.md +154 -0
- package/src/tasks/text-generation/data.ts +114 -0
- package/src/tasks/text-generation/inference.ts +200 -0
- package/src/tasks/text-generation/spec/input.json +219 -0
- package/src/tasks/text-generation/spec/output.json +179 -0
- package/src/tasks/text-generation/spec/stream_output.json +103 -0
- package/src/tasks/text-to-3d/about.md +62 -0
- package/src/tasks/text-to-3d/data.ts +56 -0
- package/src/tasks/text-to-audio/inference.ts +143 -0
- package/src/tasks/text-to-audio/spec/input.json +31 -0
- package/src/tasks/text-to-audio/spec/output.json +17 -0
- package/src/tasks/text-to-image/about.md +96 -0
- package/src/tasks/text-to-image/data.ts +100 -0
- package/src/tasks/text-to-image/inference.ts +75 -0
- package/src/tasks/text-to-image/spec/input.json +63 -0
- package/src/tasks/text-to-image/spec/output.json +13 -0
- package/src/tasks/text-to-speech/about.md +63 -0
- package/src/tasks/text-to-speech/data.ts +79 -0
- package/src/tasks/text-to-speech/inference.ts +145 -0
- package/src/tasks/text-to-speech/spec/input.json +31 -0
- package/src/tasks/text-to-speech/spec/output.json +7 -0
- package/src/tasks/text-to-video/about.md +41 -0
- package/src/tasks/text-to-video/data.ts +102 -0
- package/src/tasks/text2text-generation/inference.ts +55 -0
- package/src/tasks/text2text-generation/spec/input.json +55 -0
- package/src/tasks/text2text-generation/spec/output.json +14 -0
- package/src/tasks/token-classification/about.md +76 -0
- package/src/tasks/token-classification/data.ts +92 -0
- package/src/tasks/token-classification/inference.ts +85 -0
- package/src/tasks/token-classification/spec/input.json +65 -0
- package/src/tasks/token-classification/spec/output.json +37 -0
- package/src/tasks/translation/about.md +65 -0
- package/src/tasks/translation/data.ts +70 -0
- package/src/tasks/translation/inference.ts +67 -0
- package/src/tasks/translation/spec/input.json +50 -0
- package/src/tasks/translation/spec/output.json +14 -0
- package/src/tasks/unconditional-image-generation/about.md +50 -0
- package/src/tasks/unconditional-image-generation/data.ts +72 -0
- package/src/tasks/video-classification/about.md +37 -0
- package/src/tasks/video-classification/data.ts +84 -0
- package/src/tasks/video-classification/inference.ts +59 -0
- package/src/tasks/video-classification/spec/input.json +42 -0
- package/src/tasks/video-classification/spec/output.json +10 -0
- package/src/tasks/video-text-to-text/about.md +98 -0
- package/src/tasks/video-text-to-text/data.ts +66 -0
- package/src/tasks/visual-question-answering/about.md +48 -0
- package/src/tasks/visual-question-answering/data.ts +97 -0
- package/src/tasks/visual-question-answering/inference.ts +62 -0
- package/src/tasks/visual-question-answering/spec/input.json +41 -0
- package/src/tasks/visual-question-answering/spec/output.json +21 -0
- package/src/tasks/zero-shot-classification/about.md +40 -0
- package/src/tasks/zero-shot-classification/data.ts +70 -0
- package/src/tasks/zero-shot-classification/inference.ts +67 -0
- package/src/tasks/zero-shot-classification/spec/input.json +50 -0
- package/src/tasks/zero-shot-classification/spec/output.json +11 -0
- package/src/tasks/zero-shot-image-classification/about.md +75 -0
- package/src/tasks/zero-shot-image-classification/data.ts +84 -0
- package/src/tasks/zero-shot-image-classification/inference.ts +61 -0
- package/src/tasks/zero-shot-image-classification/spec/input.json +45 -0
- package/src/tasks/zero-shot-image-classification/spec/output.json +10 -0
- package/src/tasks/zero-shot-object-detection/about.md +45 -0
- package/src/tasks/zero-shot-object-detection/data.ts +67 -0
- package/src/tasks/zero-shot-object-detection/inference.ts +66 -0
- package/src/tasks/zero-shot-object-detection/spec/input.json +40 -0
- package/src/tasks/zero-shot-object-detection/spec/output.json +47 -0
- package/src/tokenizer-data.ts +32 -0
- package/src/widget-example.ts +125 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inference code generated from the JSON schema spec in ./spec
|
|
3
|
+
*
|
|
4
|
+
* Using src/scripts/inference-codegen
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Inputs for Object Detection inference
|
|
8
|
+
*/
|
|
9
|
+
export interface ObjectDetectionInput {
|
|
10
|
+
/**
|
|
11
|
+
* The input image data as a base64-encoded string. If no `parameters` are provided, you can
|
|
12
|
+
* also provide the image data as a raw bytes payload.
|
|
13
|
+
*/
|
|
14
|
+
inputs: string;
|
|
15
|
+
/**
|
|
16
|
+
* Additional inference parameters
|
|
17
|
+
*/
|
|
18
|
+
parameters?: ObjectDetectionParameters;
|
|
19
|
+
[property: string]: unknown;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Additional inference parameters
|
|
23
|
+
*
|
|
24
|
+
* Additional inference parameters for Object Detection
|
|
25
|
+
*/
|
|
26
|
+
export interface ObjectDetectionParameters {
|
|
27
|
+
/**
|
|
28
|
+
* The probability necessary to make a prediction.
|
|
29
|
+
*/
|
|
30
|
+
threshold?: number;
|
|
31
|
+
[property: string]: unknown;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* The predicted bounding box. Coordinates are relative to the top left corner of the input
|
|
35
|
+
* image.
|
|
36
|
+
*/
|
|
37
|
+
export interface BoundingBox {
|
|
38
|
+
/**
|
|
39
|
+
* The x-coordinate of the bottom-right corner of the bounding box.
|
|
40
|
+
*/
|
|
41
|
+
xmax: number;
|
|
42
|
+
/**
|
|
43
|
+
* The x-coordinate of the top-left corner of the bounding box.
|
|
44
|
+
*/
|
|
45
|
+
xmin: number;
|
|
46
|
+
/**
|
|
47
|
+
* The y-coordinate of the bottom-right corner of the bounding box.
|
|
48
|
+
*/
|
|
49
|
+
ymax: number;
|
|
50
|
+
/**
|
|
51
|
+
* The y-coordinate of the top-left corner of the bounding box.
|
|
52
|
+
*/
|
|
53
|
+
ymin: number;
|
|
54
|
+
[property: string]: unknown;
|
|
55
|
+
}
|
|
56
|
+
export type ObjectDetectionOutput = ObjectDetectionOutputElement[];
|
|
57
|
+
/**
|
|
58
|
+
* Outputs of inference for the Object Detection task
|
|
59
|
+
*/
|
|
60
|
+
export interface ObjectDetectionOutputElement {
|
|
61
|
+
/**
|
|
62
|
+
* The predicted bounding box. Coordinates are relative to the top left corner of the input
|
|
63
|
+
* image.
|
|
64
|
+
*/
|
|
65
|
+
box: BoundingBox;
|
|
66
|
+
/**
|
|
67
|
+
* The predicted label for the bounding box.
|
|
68
|
+
*/
|
|
69
|
+
label: string;
|
|
70
|
+
/**
|
|
71
|
+
* The associated score / probability.
|
|
72
|
+
*/
|
|
73
|
+
score: number;
|
|
74
|
+
[property: string]: unknown;
|
|
75
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/object-detection/input.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Inputs for Object Detection inference",
|
|
5
|
+
"title": "ObjectDetectionInput",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"inputs": {
|
|
9
|
+
"type": "string",
|
|
10
|
+
"description": "The input image data as a base64-encoded string. If no `parameters` are provided, you can also provide the image data as a raw bytes payload."
|
|
11
|
+
},
|
|
12
|
+
"parameters": {
|
|
13
|
+
"description": "Additional inference parameters",
|
|
14
|
+
"$ref": "#/$defs/ObjectDetectionParameters"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"$defs": {
|
|
18
|
+
"ObjectDetectionParameters": {
|
|
19
|
+
"title": "ObjectDetectionParameters",
|
|
20
|
+
"description": "Additional inference parameters for Object Detection",
|
|
21
|
+
"type": "object",
|
|
22
|
+
"properties": {
|
|
23
|
+
"threshold": {
|
|
24
|
+
"type": "number",
|
|
25
|
+
"description": "The probability necessary to make a prediction."
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
"required": ["inputs"]
|
|
31
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/object-detection/output.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Outputs of inference for the Object Detection task",
|
|
5
|
+
"title": "ObjectDetectionOutput",
|
|
6
|
+
"type": "array",
|
|
7
|
+
"items": {
|
|
8
|
+
"type": "object",
|
|
9
|
+
"properties": {
|
|
10
|
+
"label": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"description": "The predicted label for the bounding box."
|
|
13
|
+
},
|
|
14
|
+
"score": {
|
|
15
|
+
"type": "number",
|
|
16
|
+
"description": "The associated score / probability."
|
|
17
|
+
},
|
|
18
|
+
"box": {
|
|
19
|
+
"$ref": "#/$defs/BoundingBox",
|
|
20
|
+
"description": "The predicted bounding box. Coordinates are relative to the top left corner of the input image."
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
"required": ["box", "label", "score"]
|
|
24
|
+
},
|
|
25
|
+
"$defs": {
|
|
26
|
+
"BoundingBox": {
|
|
27
|
+
"type": "object",
|
|
28
|
+
"title": "BoundingBox",
|
|
29
|
+
"properties": {
|
|
30
|
+
"xmin": {
|
|
31
|
+
"type": "integer",
|
|
32
|
+
"description": "The x-coordinate of the top-left corner of the bounding box."
|
|
33
|
+
},
|
|
34
|
+
"xmax": {
|
|
35
|
+
"type": "integer",
|
|
36
|
+
"description": "The x-coordinate of the bottom-right corner of the bounding box."
|
|
37
|
+
},
|
|
38
|
+
"ymin": {
|
|
39
|
+
"type": "integer",
|
|
40
|
+
"description": "The y-coordinate of the top-left corner of the bounding box."
|
|
41
|
+
},
|
|
42
|
+
"ymax": {
|
|
43
|
+
"type": "integer",
|
|
44
|
+
"description": "The y-coordinate of the bottom-right corner of the bounding box."
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"required": ["xmin", "xmax", "ymin", "ymax"]
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
## Use Cases
|
|
2
|
+
|
|
3
|
+
You can contribute this area with common use cases of the task!
|
|
4
|
+
|
|
5
|
+
## Task Variants
|
|
6
|
+
|
|
7
|
+
This place can be filled with variants of this task if there's any.
|
|
8
|
+
|
|
9
|
+
## Inference
|
|
10
|
+
|
|
11
|
+
This section should have useful information about how to pull a model from Hugging Face Hub that is a part of a library specialized in a task and use it.
|
|
12
|
+
|
|
13
|
+
## Useful Resources
|
|
14
|
+
|
|
15
|
+
In this area, you can insert useful resources about how to train or use a model for this task.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { TaskDataCustom } from "../index.js";
|
|
2
|
+
|
|
3
|
+
const taskData: TaskDataCustom = {
|
|
4
|
+
datasets: [],
|
|
5
|
+
demo: {
|
|
6
|
+
inputs: [],
|
|
7
|
+
outputs: [],
|
|
8
|
+
},
|
|
9
|
+
isPlaceholder: true,
|
|
10
|
+
metrics: [],
|
|
11
|
+
models: [],
|
|
12
|
+
spaces: [],
|
|
13
|
+
summary: "",
|
|
14
|
+
widgetModels: [],
|
|
15
|
+
youtubeId: undefined,
|
|
16
|
+
/// If this is a subtask, link to the most general task ID
|
|
17
|
+
/// (eg, text2text-generation is the canonical ID of translation)
|
|
18
|
+
canonicalId: undefined,
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
export default taskData;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/<TASK_ID>/input.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Inputs for <TASK_ID> inference",
|
|
5
|
+
"title": "PlaceholderInput",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"inputs": {
|
|
9
|
+
"description": "TODO: describe the input here. This must be model & framework agnostic.",
|
|
10
|
+
"type": "string"
|
|
11
|
+
},
|
|
12
|
+
"parameters": {
|
|
13
|
+
"description": "Additional inference parameters",
|
|
14
|
+
"$ref": "#/$defs/<TASK_ID>Parameters"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"$defs": {
|
|
18
|
+
"<TASK_ID>Parameters": {
|
|
19
|
+
"title": "<TASK_ID>Parameters",
|
|
20
|
+
"description": "TODO: describe additional parameters here.",
|
|
21
|
+
"type": "object",
|
|
22
|
+
"properties": {
|
|
23
|
+
"dummy_parameter_name": {
|
|
24
|
+
"type": "boolean",
|
|
25
|
+
"description": "TODO: describe the parameter here"
|
|
26
|
+
},
|
|
27
|
+
"dummy_parameter_name2": {
|
|
28
|
+
"type": "integer",
|
|
29
|
+
"description": "TODO: describe the parameter here"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
"required": ["inputs"]
|
|
35
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/<TASK_ID>/output.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Outputs for <TASK_ID> inference",
|
|
5
|
+
"title": "PlaceholderOutput",
|
|
6
|
+
"type": "array",
|
|
7
|
+
"items": {
|
|
8
|
+
"type": "object",
|
|
9
|
+
"properties": {
|
|
10
|
+
"meaningful_output_name": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"description": "TODO: Describe what is outputed by the inference here"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"required": ["meaningfulOutputName"]
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
## Use Cases
|
|
2
|
+
|
|
3
|
+
### Frequently Asked Questions
|
|
4
|
+
|
|
5
|
+
You can use Question Answering (QA) models to automate the response to frequently asked questions by using a knowledge base (documents) as context. Answers to customer questions can be drawn from those documents.
|
|
6
|
+
|
|
7
|
+
⚡⚡ If you’d like to save inference time, you can first use [passage ranking models](/tasks/sentence-similarity) to see which document might contain the answer to the question and iterate over that document with the QA model instead.
|
|
8
|
+
|
|
9
|
+
## Task Variants
|
|
10
|
+
There are different QA variants based on the inputs and outputs:
|
|
11
|
+
|
|
12
|
+
- **Extractive QA:** The model **extracts** the answer from a context. The context here could be a provided text, a table or even HTML! This is usually solved with BERT-like models.
|
|
13
|
+
- **Open Generative QA:** The model **generates** free text directly based on the context. You can learn more about the Text Generation task in [its page](/tasks/text-generation).
|
|
14
|
+
- **Closed Generative QA:** In this case, no context is provided. The answer is completely generated by a model.
|
|
15
|
+
|
|
16
|
+
The schema above illustrates extractive, open book QA. The model takes a context and the question and extracts the answer from the given context.
|
|
17
|
+
|
|
18
|
+
You can also differentiate QA models depending on whether they are open-domain or closed-domain. Open-domain models are not restricted to a specific domain, while closed-domain models are restricted to a specific domain (e.g. legal, medical documents).
|
|
19
|
+
|
|
20
|
+
## Inference
|
|
21
|
+
|
|
22
|
+
You can infer with QA models with the 🤗 Transformers library using the `question-answering` pipeline. If no model checkpoint is given, the pipeline will be initialized with `distilbert-base-cased-distilled-squad`. This pipeline takes a question and a context from which the answer will be extracted and returned.
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from transformers import pipeline
|
|
26
|
+
|
|
27
|
+
qa_model = pipeline("question-answering")
|
|
28
|
+
question = "Where do I live?"
|
|
29
|
+
context = "My name is Merve and I live in İstanbul."
|
|
30
|
+
qa_model(question = question, context = context)
|
|
31
|
+
## {'answer': 'İstanbul', 'end': 39, 'score': 0.953, 'start': 31}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Useful Resources
|
|
35
|
+
|
|
36
|
+
Would you like to learn more about QA? Awesome! Here are some curated resources that you may find helpful!
|
|
37
|
+
|
|
38
|
+
- [Course Chapter on Question Answering](https://huggingface.co/course/chapter7/7?fw=pt)
|
|
39
|
+
- [Question Answering Workshop](https://www.youtube.com/watch?v=Ihgk8kGLpIE&ab_channel=HuggingFace)
|
|
40
|
+
- [How to Build an Open-Domain Question Answering System?](https://lilianweng.github.io/lil-log/2020/10/29/open-domain-question-answering.html)
|
|
41
|
+
- [Blog Post: ELI5 A Model for Open Domain Long Form Question Answering](https://yjernite.github.io/lfqa.html)
|
|
42
|
+
|
|
43
|
+
### Notebooks
|
|
44
|
+
|
|
45
|
+
- [PyTorch](https://github.com/huggingface/notebooks/blob/master/examples/question_answering.ipynb)
|
|
46
|
+
- [TensorFlow](https://github.com/huggingface/notebooks/blob/main/examples/question_answering-tf.ipynb)
|
|
47
|
+
|
|
48
|
+
### Scripts for training
|
|
49
|
+
|
|
50
|
+
- [PyTorch](https://github.com/huggingface/transformers/tree/main/examples/pytorch/question-answering)
|
|
51
|
+
- [TensorFlow](https://github.com/huggingface/transformers/tree/main/examples/tensorflow/question-answering)
|
|
52
|
+
- [Flax](https://github.com/huggingface/transformers/tree/main/examples/flax/question-answering)
|
|
53
|
+
|
|
54
|
+
### Documentation
|
|
55
|
+
|
|
56
|
+
- [Question answering task guide](https://huggingface.co/docs/transformers/tasks/question_answering)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import type { TaskDataCustom } from "../index.js";
|
|
2
|
+
|
|
3
|
+
const taskData: TaskDataCustom = {
|
|
4
|
+
datasets: [
|
|
5
|
+
{
|
|
6
|
+
// TODO write proper description
|
|
7
|
+
description: "A famous question answering dataset based on English articles from Wikipedia.",
|
|
8
|
+
id: "squad_v2",
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
// TODO write proper description
|
|
12
|
+
description: "A dataset of aggregated anonymized actual queries issued to the Google search engine.",
|
|
13
|
+
id: "natural_questions",
|
|
14
|
+
},
|
|
15
|
+
],
|
|
16
|
+
demo: {
|
|
17
|
+
inputs: [
|
|
18
|
+
{
|
|
19
|
+
label: "Question",
|
|
20
|
+
content: "Which name is also used to describe the Amazon rainforest in English?",
|
|
21
|
+
type: "text",
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
label: "Context",
|
|
25
|
+
content: "The Amazon rainforest, also known in English as Amazonia or the Amazon Jungle",
|
|
26
|
+
type: "text",
|
|
27
|
+
},
|
|
28
|
+
],
|
|
29
|
+
outputs: [
|
|
30
|
+
{
|
|
31
|
+
label: "Answer",
|
|
32
|
+
content: "Amazonia",
|
|
33
|
+
type: "text",
|
|
34
|
+
},
|
|
35
|
+
],
|
|
36
|
+
},
|
|
37
|
+
metrics: [
|
|
38
|
+
{
|
|
39
|
+
description:
|
|
40
|
+
"Exact Match is a metric based on the strict character match of the predicted answer and the right answer. For answers predicted correctly, the Exact Match will be 1. Even if only one character is different, Exact Match will be 0",
|
|
41
|
+
id: "exact-match",
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
description:
|
|
45
|
+
" The F1-Score metric is useful if we value both false positives and false negatives equally. The F1-Score is calculated on each word in the predicted sequence against the correct answer",
|
|
46
|
+
id: "f1",
|
|
47
|
+
},
|
|
48
|
+
],
|
|
49
|
+
models: [
|
|
50
|
+
{
|
|
51
|
+
description: "A robust baseline model for most question answering domains.",
|
|
52
|
+
id: "deepset/roberta-base-squad2",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
description: "Small yet robust model that can answer questions.",
|
|
56
|
+
id: "distilbert/distilbert-base-cased-distilled-squad",
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
description: "A special model that can answer questions from tables.",
|
|
60
|
+
id: "google/tapas-base-finetuned-wtq",
|
|
61
|
+
},
|
|
62
|
+
],
|
|
63
|
+
spaces: [
|
|
64
|
+
{
|
|
65
|
+
description: "An application that can answer a long question from Wikipedia.",
|
|
66
|
+
id: "deepset/wikipedia-assistant",
|
|
67
|
+
},
|
|
68
|
+
],
|
|
69
|
+
summary:
|
|
70
|
+
"Question Answering models can retrieve the answer to a question from a given text, which is useful for searching for an answer in a document. Some question answering models can generate answers without context!",
|
|
71
|
+
widgetModels: ["deepset/roberta-base-squad2"],
|
|
72
|
+
youtubeId: "ajPx5LwJD-I",
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
export default taskData;
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inference code generated from the JSON schema spec in ./spec
|
|
3
|
+
*
|
|
4
|
+
* Using src/scripts/inference-codegen
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Inputs for Question Answering inference
|
|
8
|
+
*/
|
|
9
|
+
export interface QuestionAnsweringInput {
|
|
10
|
+
/**
|
|
11
|
+
* One (context, question) pair to answer
|
|
12
|
+
*/
|
|
13
|
+
inputs: QuestionAnsweringInputData;
|
|
14
|
+
/**
|
|
15
|
+
* Additional inference parameters
|
|
16
|
+
*/
|
|
17
|
+
parameters?: QuestionAnsweringParameters;
|
|
18
|
+
[property: string]: unknown;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* One (context, question) pair to answer
|
|
22
|
+
*/
|
|
23
|
+
export interface QuestionAnsweringInputData {
|
|
24
|
+
/**
|
|
25
|
+
* The context to be used for answering the question
|
|
26
|
+
*/
|
|
27
|
+
context: string;
|
|
28
|
+
/**
|
|
29
|
+
* The question to be answered
|
|
30
|
+
*/
|
|
31
|
+
question: string;
|
|
32
|
+
[property: string]: unknown;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Additional inference parameters
|
|
36
|
+
*
|
|
37
|
+
* Additional inference parameters for Question Answering
|
|
38
|
+
*/
|
|
39
|
+
export interface QuestionAnsweringParameters {
|
|
40
|
+
/**
|
|
41
|
+
* Attempts to align the answer to real words. Improves quality on space separated
|
|
42
|
+
* languages. Might hurt on non-space-separated languages (like Japanese or Chinese)
|
|
43
|
+
*/
|
|
44
|
+
align_to_words?: boolean;
|
|
45
|
+
/**
|
|
46
|
+
* If the context is too long to fit with the question for the model, it will be split in
|
|
47
|
+
* several chunks with some overlap. This argument controls the size of that overlap.
|
|
48
|
+
*/
|
|
49
|
+
doc_stride?: number;
|
|
50
|
+
/**
|
|
51
|
+
* Whether to accept impossible as an answer.
|
|
52
|
+
*/
|
|
53
|
+
handle_impossible_answer?: boolean;
|
|
54
|
+
/**
|
|
55
|
+
* The maximum length of predicted answers (e.g., only answers with a shorter length are
|
|
56
|
+
* considered).
|
|
57
|
+
*/
|
|
58
|
+
max_answer_len?: number;
|
|
59
|
+
/**
|
|
60
|
+
* The maximum length of the question after tokenization. It will be truncated if needed.
|
|
61
|
+
*/
|
|
62
|
+
max_question_len?: number;
|
|
63
|
+
/**
|
|
64
|
+
* The maximum length of the total sentence (context + question) in tokens of each chunk
|
|
65
|
+
* passed to the model. The context will be split in several chunks (using docStride as
|
|
66
|
+
* overlap) if needed.
|
|
67
|
+
*/
|
|
68
|
+
max_seq_len?: number;
|
|
69
|
+
/**
|
|
70
|
+
* The number of answers to return (will be chosen by order of likelihood). Note that we
|
|
71
|
+
* return less than topk answers if there are not enough options available within the
|
|
72
|
+
* context.
|
|
73
|
+
*/
|
|
74
|
+
top_k?: number;
|
|
75
|
+
[property: string]: unknown;
|
|
76
|
+
}
|
|
77
|
+
export type QuestionAnsweringOutput = QuestionAnsweringOutputElement[];
|
|
78
|
+
/**
|
|
79
|
+
* Outputs of inference for the Question Answering task
|
|
80
|
+
*/
|
|
81
|
+
export interface QuestionAnsweringOutputElement {
|
|
82
|
+
/**
|
|
83
|
+
* The answer to the question.
|
|
84
|
+
*/
|
|
85
|
+
answer: string;
|
|
86
|
+
/**
|
|
87
|
+
* The character position in the input where the answer ends.
|
|
88
|
+
*/
|
|
89
|
+
end: number;
|
|
90
|
+
/**
|
|
91
|
+
* The probability associated to the answer.
|
|
92
|
+
*/
|
|
93
|
+
score: number;
|
|
94
|
+
/**
|
|
95
|
+
* The character position in the input where the answer begins.
|
|
96
|
+
*/
|
|
97
|
+
start: number;
|
|
98
|
+
[property: string]: unknown;
|
|
99
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/question-answering/input.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Inputs for Question Answering inference",
|
|
5
|
+
"title": "QuestionAnsweringInput",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"inputs": {
|
|
9
|
+
"title": "QuestionAnsweringInputData",
|
|
10
|
+
"description": "One (context, question) pair to answer",
|
|
11
|
+
"type": "object",
|
|
12
|
+
"properties": {
|
|
13
|
+
"context": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"description": "The context to be used for answering the question"
|
|
16
|
+
},
|
|
17
|
+
"question": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"description": "The question to be answered"
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"required": ["question", "context"]
|
|
23
|
+
},
|
|
24
|
+
"parameters": {
|
|
25
|
+
"description": "Additional inference parameters",
|
|
26
|
+
"$ref": "#/$defs/QuestionAnsweringParameters"
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"$defs": {
|
|
30
|
+
"QuestionAnsweringParameters": {
|
|
31
|
+
"title": "QuestionAnsweringParameters",
|
|
32
|
+
"description": "Additional inference parameters for Question Answering",
|
|
33
|
+
"type": "object",
|
|
34
|
+
"properties": {
|
|
35
|
+
"top_k": {
|
|
36
|
+
"type": "integer",
|
|
37
|
+
"description": "The number of answers to return (will be chosen by order of likelihood). Note that we return less than topk answers if there are not enough options available within the context."
|
|
38
|
+
},
|
|
39
|
+
"doc_stride": {
|
|
40
|
+
"type": "integer",
|
|
41
|
+
"description": "If the context is too long to fit with the question for the model, it will be split in several chunks with some overlap. This argument controls the size of that overlap."
|
|
42
|
+
},
|
|
43
|
+
"max_answer_len": {
|
|
44
|
+
"type": "integer",
|
|
45
|
+
"description": "The maximum length of predicted answers (e.g., only answers with a shorter length are considered)."
|
|
46
|
+
},
|
|
47
|
+
"max_seq_len": {
|
|
48
|
+
"type": "integer",
|
|
49
|
+
"description": "The maximum length of the total sentence (context + question) in tokens of each chunk passed to the model. The context will be split in several chunks (using docStride as overlap) if needed."
|
|
50
|
+
},
|
|
51
|
+
"max_question_len": {
|
|
52
|
+
"type": "integer",
|
|
53
|
+
"description": "The maximum length of the question after tokenization. It will be truncated if needed."
|
|
54
|
+
},
|
|
55
|
+
"handle_impossible_answer": {
|
|
56
|
+
"type": "boolean",
|
|
57
|
+
"description": "Whether to accept impossible as an answer."
|
|
58
|
+
},
|
|
59
|
+
"align_to_words": {
|
|
60
|
+
"type": "boolean",
|
|
61
|
+
"description": "Attempts to align the answer to real words. Improves quality on space separated languages. Might hurt on non-space-separated languages (like Japanese or Chinese)"
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
"required": ["inputs"]
|
|
67
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/question-answering/output.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"title": "QuestionAnsweringOutput",
|
|
5
|
+
"description": "Outputs of inference for the Question Answering task",
|
|
6
|
+
"type": "array",
|
|
7
|
+
"items": {
|
|
8
|
+
"type": "object",
|
|
9
|
+
"properties": {
|
|
10
|
+
"answer": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"description": "The answer to the question."
|
|
13
|
+
},
|
|
14
|
+
"score": {
|
|
15
|
+
"type": "number",
|
|
16
|
+
"description": "The probability associated to the answer."
|
|
17
|
+
},
|
|
18
|
+
"start": {
|
|
19
|
+
"type": "integer",
|
|
20
|
+
"description": "The character position in the input where the answer begins."
|
|
21
|
+
},
|
|
22
|
+
"end": {
|
|
23
|
+
"type": "integer",
|
|
24
|
+
"description": "The character position in the input where the answer ends."
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"required": ["answer", "score", "start", "end"]
|
|
28
|
+
}
|
|
29
|
+
}
|