@huggingface/tasks 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{index.mjs → index.cjs} +295 -134
- package/dist/index.d.ts +8 -6
- package/dist/index.js +260 -169
- package/package.json +13 -8
- package/src/library-to-tasks.ts +1 -1
- package/src/library-ui-elements.ts +24 -10
- package/src/model-data.ts +1 -1
- package/src/model-libraries.ts +3 -2
- package/src/pipelines.ts +1 -1
- package/src/tasks/audio-classification/about.md +1 -1
- package/src/tasks/audio-classification/inference.ts +51 -0
- package/src/tasks/audio-classification/spec/input.json +34 -0
- package/src/tasks/audio-classification/spec/output.json +21 -0
- package/src/tasks/audio-to-audio/about.md +1 -1
- package/src/tasks/automatic-speech-recognition/about.md +4 -2
- package/src/tasks/automatic-speech-recognition/inference.ts +154 -0
- package/src/tasks/automatic-speech-recognition/spec/input.json +34 -0
- package/src/tasks/automatic-speech-recognition/spec/output.json +36 -0
- package/src/tasks/common-definitions.json +109 -0
- package/src/tasks/depth-estimation/data.ts +8 -4
- package/src/tasks/depth-estimation/inference.ts +35 -0
- package/src/tasks/depth-estimation/spec/input.json +30 -0
- package/src/tasks/depth-estimation/spec/output.json +10 -0
- package/src/tasks/document-question-answering/inference.ts +102 -0
- package/src/tasks/document-question-answering/spec/input.json +85 -0
- package/src/tasks/document-question-answering/spec/output.json +36 -0
- package/src/tasks/feature-extraction/inference.ts +22 -0
- package/src/tasks/feature-extraction/spec/input.json +26 -0
- package/src/tasks/feature-extraction/spec/output.json +7 -0
- package/src/tasks/fill-mask/inference.ts +61 -0
- package/src/tasks/fill-mask/spec/input.json +38 -0
- package/src/tasks/fill-mask/spec/output.json +29 -0
- package/src/tasks/image-classification/inference.ts +51 -0
- package/src/tasks/image-classification/spec/input.json +34 -0
- package/src/tasks/image-classification/spec/output.json +10 -0
- package/src/tasks/image-segmentation/inference.ts +65 -0
- package/src/tasks/image-segmentation/spec/input.json +54 -0
- package/src/tasks/image-segmentation/spec/output.json +25 -0
- package/src/tasks/image-to-image/inference.ts +67 -0
- package/src/tasks/image-to-image/spec/input.json +52 -0
- package/src/tasks/image-to-image/spec/output.json +12 -0
- package/src/tasks/image-to-text/inference.ts +138 -0
- package/src/tasks/image-to-text/spec/input.json +34 -0
- package/src/tasks/image-to-text/spec/output.json +17 -0
- package/src/tasks/index.ts +5 -2
- package/src/tasks/mask-generation/about.md +65 -0
- package/src/tasks/mask-generation/data.ts +55 -0
- package/src/tasks/object-detection/inference.ts +62 -0
- package/src/tasks/object-detection/spec/input.json +30 -0
- package/src/tasks/object-detection/spec/output.json +46 -0
- package/src/tasks/placeholder/data.ts +3 -0
- package/src/tasks/placeholder/spec/input.json +35 -0
- package/src/tasks/placeholder/spec/output.json +17 -0
- package/src/tasks/question-answering/inference.ts +99 -0
- package/src/tasks/question-answering/spec/input.json +67 -0
- package/src/tasks/question-answering/spec/output.json +29 -0
- package/src/tasks/sentence-similarity/about.md +2 -2
- package/src/tasks/sentence-similarity/inference.ts +32 -0
- package/src/tasks/sentence-similarity/spec/input.json +40 -0
- package/src/tasks/sentence-similarity/spec/output.json +12 -0
- package/src/tasks/summarization/data.ts +1 -0
- package/src/tasks/summarization/inference.ts +58 -0
- package/src/tasks/summarization/spec/input.json +7 -0
- package/src/tasks/summarization/spec/output.json +7 -0
- package/src/tasks/table-question-answering/inference.ts +61 -0
- package/src/tasks/table-question-answering/spec/input.json +39 -0
- package/src/tasks/table-question-answering/spec/output.json +40 -0
- package/src/tasks/tabular-classification/about.md +1 -1
- package/src/tasks/tabular-regression/about.md +1 -1
- package/src/tasks/text-classification/about.md +1 -0
- package/src/tasks/text-classification/inference.ts +51 -0
- package/src/tasks/text-classification/spec/input.json +35 -0
- package/src/tasks/text-classification/spec/output.json +10 -0
- package/src/tasks/text-generation/about.md +24 -13
- package/src/tasks/text-generation/data.ts +22 -38
- package/src/tasks/text-generation/inference.ts +85 -0
- package/src/tasks/text-generation/spec/input.json +74 -0
- package/src/tasks/text-generation/spec/output.json +17 -0
- package/src/tasks/text-to-audio/inference.ts +138 -0
- package/src/tasks/text-to-audio/spec/input.json +31 -0
- package/src/tasks/text-to-audio/spec/output.json +20 -0
- package/src/tasks/text-to-image/about.md +11 -2
- package/src/tasks/text-to-image/data.ts +6 -2
- package/src/tasks/text-to-image/inference.ts +73 -0
- package/src/tasks/text-to-image/spec/input.json +57 -0
- package/src/tasks/text-to-image/spec/output.json +15 -0
- package/src/tasks/text-to-speech/about.md +4 -2
- package/src/tasks/text-to-speech/data.ts +1 -0
- package/src/tasks/text-to-speech/inference.ts +146 -0
- package/src/tasks/text-to-speech/spec/input.json +7 -0
- package/src/tasks/text-to-speech/spec/output.json +7 -0
- package/src/tasks/text2text-generation/inference.ts +53 -0
- package/src/tasks/text2text-generation/spec/input.json +55 -0
- package/src/tasks/text2text-generation/spec/output.json +17 -0
- package/src/tasks/token-classification/inference.ts +82 -0
- package/src/tasks/token-classification/spec/input.json +65 -0
- package/src/tasks/token-classification/spec/output.json +33 -0
- package/src/tasks/translation/data.ts +1 -0
- package/src/tasks/translation/inference.ts +58 -0
- package/src/tasks/translation/spec/input.json +7 -0
- package/src/tasks/translation/spec/output.json +7 -0
- package/src/tasks/video-classification/inference.ts +59 -0
- package/src/tasks/video-classification/spec/input.json +42 -0
- package/src/tasks/video-classification/spec/output.json +10 -0
- package/src/tasks/visual-question-answering/inference.ts +63 -0
- package/src/tasks/visual-question-answering/spec/input.json +41 -0
- package/src/tasks/visual-question-answering/spec/output.json +21 -0
- package/src/tasks/zero-shot-classification/inference.ts +67 -0
- package/src/tasks/zero-shot-classification/spec/input.json +50 -0
- package/src/tasks/zero-shot-classification/spec/output.json +10 -0
- package/src/tasks/zero-shot-image-classification/data.ts +8 -5
- package/src/tasks/zero-shot-image-classification/inference.ts +61 -0
- package/src/tasks/zero-shot-image-classification/spec/input.json +45 -0
- package/src/tasks/zero-shot-image-classification/spec/output.json +10 -0
- package/src/tasks/zero-shot-object-detection/about.md +45 -0
- package/src/tasks/zero-shot-object-detection/data.ts +62 -0
- package/src/tasks/zero-shot-object-detection/inference.ts +66 -0
- package/src/tasks/zero-shot-object-detection/spec/input.json +40 -0
- package/src/tasks/zero-shot-object-detection/spec/output.json +47 -0
- package/tsconfig.json +3 -3
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import type { TaskDataCustom } from "..";
|
|
2
|
+
|
|
3
|
+
const taskData: TaskDataCustom = {
|
|
4
|
+
datasets: [],
|
|
5
|
+
demo: {
|
|
6
|
+
inputs: [
|
|
7
|
+
{
|
|
8
|
+
filename: "zero-shot-object-detection-input.jpg",
|
|
9
|
+
type: "img",
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
label: "Classes",
|
|
13
|
+
content: "cat, dog, bird",
|
|
14
|
+
type: "text",
|
|
15
|
+
},
|
|
16
|
+
],
|
|
17
|
+
outputs: [
|
|
18
|
+
{
|
|
19
|
+
filename: "zero-shot-object-detection-output.jpg",
|
|
20
|
+
type: "img",
|
|
21
|
+
},
|
|
22
|
+
],
|
|
23
|
+
},
|
|
24
|
+
metrics: [
|
|
25
|
+
{
|
|
26
|
+
description:
|
|
27
|
+
"The Average Precision (AP) metric is the Area Under the PR Curve (AUC-PR). It is calculated for each class separately",
|
|
28
|
+
id: "Average Precision",
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
description: "The Mean Average Precision (mAP) metric is the overall average of the AP values",
|
|
32
|
+
id: "Mean Average Precision",
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
description:
|
|
36
|
+
"The APα metric is the Average Precision at the IoU threshold of a α value, for example, AP50 and AP75",
|
|
37
|
+
id: "APα",
|
|
38
|
+
},
|
|
39
|
+
],
|
|
40
|
+
models: [
|
|
41
|
+
{
|
|
42
|
+
description: "Solid zero-shot object detection model that uses CLIP as backbone.",
|
|
43
|
+
id: "google/owlvit-base-patch32",
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
description: "The improved version of the owlvit model.",
|
|
47
|
+
id: "google/owlv2-base-patch16-ensemble",
|
|
48
|
+
},
|
|
49
|
+
],
|
|
50
|
+
spaces: [
|
|
51
|
+
{
|
|
52
|
+
description: "A demo to try the state-of-the-art zero-shot object detection model, OWLv2.",
|
|
53
|
+
id: "merve/owlv2",
|
|
54
|
+
},
|
|
55
|
+
],
|
|
56
|
+
summary:
|
|
57
|
+
"Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",
|
|
58
|
+
widgetModels: [],
|
|
59
|
+
youtubeId: "",
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
export default taskData;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inference code generated from the JSON schema spec in ./spec
|
|
3
|
+
*
|
|
4
|
+
* Using src/scripts/inference-codegen
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Inputs for Zero Shot Object Detection inference
|
|
8
|
+
*/
|
|
9
|
+
export interface ZeroShotObjectDetectionInput {
|
|
10
|
+
/**
|
|
11
|
+
* The input image data, with candidate labels
|
|
12
|
+
*/
|
|
13
|
+
data: ZeroShotObjectDetectionInputData;
|
|
14
|
+
/**
|
|
15
|
+
* Additional inference parameters
|
|
16
|
+
*/
|
|
17
|
+
parameters?: {
|
|
18
|
+
[key: string]: unknown;
|
|
19
|
+
};
|
|
20
|
+
[property: string]: unknown;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* The input image data, with candidate labels
|
|
24
|
+
*/
|
|
25
|
+
export interface ZeroShotObjectDetectionInputData {
|
|
26
|
+
/**
|
|
27
|
+
* The candidate labels for this image
|
|
28
|
+
*/
|
|
29
|
+
candidateLabels: string[];
|
|
30
|
+
/**
|
|
31
|
+
* The image data to generate bounding boxes from
|
|
32
|
+
*/
|
|
33
|
+
image: unknown;
|
|
34
|
+
[property: string]: unknown;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* The predicted bounding box. Coordinates are relative to the top left corner of the input
|
|
38
|
+
* image.
|
|
39
|
+
*/
|
|
40
|
+
export interface BoundingBox {
|
|
41
|
+
xmax: number;
|
|
42
|
+
xmin: number;
|
|
43
|
+
ymax: number;
|
|
44
|
+
ymin: number;
|
|
45
|
+
[property: string]: unknown;
|
|
46
|
+
}
|
|
47
|
+
export type ZeroShotObjectDetectionOutput = ZeroShotObjectDetectionOutputElement[];
|
|
48
|
+
/**
|
|
49
|
+
* Outputs of inference for the Zero Shot Object Detection task
|
|
50
|
+
*/
|
|
51
|
+
export interface ZeroShotObjectDetectionOutputElement {
|
|
52
|
+
/**
|
|
53
|
+
* The predicted bounding box. Coordinates are relative to the top left corner of the input
|
|
54
|
+
* image.
|
|
55
|
+
*/
|
|
56
|
+
box: BoundingBox;
|
|
57
|
+
/**
|
|
58
|
+
* A candidate label
|
|
59
|
+
*/
|
|
60
|
+
label: string;
|
|
61
|
+
/**
|
|
62
|
+
* The associated score / probability
|
|
63
|
+
*/
|
|
64
|
+
score: number;
|
|
65
|
+
[property: string]: unknown;
|
|
66
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/zero-shot-object-detection/input.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Inputs for Zero Shot Object Detection inference",
|
|
5
|
+
"title": "ZeroShotObjectDetectionInput",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"data": {
|
|
9
|
+
"description": "The input image data, with candidate labels",
|
|
10
|
+
"type": "object",
|
|
11
|
+
"title": "ZeroShotObjectDetectionInputData",
|
|
12
|
+
"properties": {
|
|
13
|
+
"image": {
|
|
14
|
+
"description": "The image data to generate bounding boxes from"
|
|
15
|
+
},
|
|
16
|
+
"candidateLabels": {
|
|
17
|
+
"description": "The candidate labels for this image",
|
|
18
|
+
"type": "array",
|
|
19
|
+
"items": {
|
|
20
|
+
"type": "string"
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"required": ["image", "candidateLabels"]
|
|
25
|
+
},
|
|
26
|
+
"parameters": {
|
|
27
|
+
"description": "Additional inference parameters",
|
|
28
|
+
"$ref": "#/$defs/ZeroShotObjectDetectionParameters"
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"$defs": {
|
|
32
|
+
"ZeroShotObjectDetectionParameters": {
|
|
33
|
+
"title": "ZeroShotObjectDetectionParameters",
|
|
34
|
+
"description": "Additional inference parameters for Zero Shot Object Detection",
|
|
35
|
+
"type": "object",
|
|
36
|
+
"properties": {}
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
"required": ["data"]
|
|
40
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/zero-shot-object-detection/output.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Outputs of inference for the Zero Shot Object Detection task",
|
|
5
|
+
"title": "ZeroShotObjectDetectionOutput",
|
|
6
|
+
"type": "array",
|
|
7
|
+
"items": {
|
|
8
|
+
"type": "object",
|
|
9
|
+
"title": "ZeroShotObjectDetectionOutputElement",
|
|
10
|
+
"properties": {
|
|
11
|
+
"label": {
|
|
12
|
+
"type": "string",
|
|
13
|
+
"description": "A candidate label"
|
|
14
|
+
},
|
|
15
|
+
"score": {
|
|
16
|
+
"type": "number",
|
|
17
|
+
"description": "The associated score / probability"
|
|
18
|
+
},
|
|
19
|
+
"box": {
|
|
20
|
+
"$ref": "#/$defs/BoundingBox",
|
|
21
|
+
"description": "The predicted bounding box. Coordinates are relative to the top left corner of the input image."
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"required": ["box", "label", "score"]
|
|
25
|
+
},
|
|
26
|
+
"$defs": {
|
|
27
|
+
"BoundingBox": {
|
|
28
|
+
"title": "BoundingBox",
|
|
29
|
+
"type": "object",
|
|
30
|
+
"properties": {
|
|
31
|
+
"xmin": {
|
|
32
|
+
"type": "integer"
|
|
33
|
+
},
|
|
34
|
+
"xmax": {
|
|
35
|
+
"type": "integer"
|
|
36
|
+
},
|
|
37
|
+
"ymin": {
|
|
38
|
+
"type": "integer"
|
|
39
|
+
},
|
|
40
|
+
"ymax": {
|
|
41
|
+
"type": "integer"
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
"required": ["xmin", "xmax", "ymin", "ymax"]
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
package/tsconfig.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
"compilerOptions": {
|
|
3
3
|
"allowSyntheticDefaultImports": true,
|
|
4
4
|
"lib": ["ES2022", "DOM"],
|
|
5
|
-
"module": "
|
|
5
|
+
"module": "ESNext",
|
|
6
|
+
"target": "ESNext",
|
|
6
7
|
"moduleResolution": "node",
|
|
7
|
-
"target": "ES2022",
|
|
8
8
|
"forceConsistentCasingInFileNames": true,
|
|
9
9
|
"strict": true,
|
|
10
10
|
"noImplicitAny": true,
|
|
@@ -13,6 +13,6 @@
|
|
|
13
13
|
"noImplicitOverride": true,
|
|
14
14
|
"outDir": "./dist"
|
|
15
15
|
},
|
|
16
|
-
"include": ["src"],
|
|
16
|
+
"include": ["src", "scripts"],
|
|
17
17
|
"exclude": ["dist"]
|
|
18
18
|
}
|