@huggingface/tasks 0.12.22 → 0.12.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +503 -132
- package/dist/index.js +503 -132
- package/dist/src/hardware.d.ts +20 -0
- package/dist/src/hardware.d.ts.map +1 -1
- package/dist/src/model-libraries-snippets.d.ts +1 -0
- package/dist/src/model-libraries-snippets.d.ts.map +1 -1
- package/dist/src/model-libraries.d.ts +9 -2
- package/dist/src/model-libraries.d.ts.map +1 -1
- package/dist/src/snippets/common.d.ts +20 -0
- package/dist/src/snippets/common.d.ts.map +1 -0
- package/dist/src/snippets/curl.d.ts +15 -8
- package/dist/src/snippets/curl.d.ts.map +1 -1
- package/dist/src/snippets/js.d.ts +17 -10
- package/dist/src/snippets/js.d.ts.map +1 -1
- package/dist/src/snippets/python.d.ts +20 -13
- package/dist/src/snippets/python.d.ts.map +1 -1
- package/dist/src/snippets/types.d.ts +4 -0
- package/dist/src/snippets/types.d.ts.map +1 -1
- package/dist/src/tasks/depth-estimation/data.d.ts.map +1 -1
- package/dist/src/tasks/image-segmentation/data.d.ts.map +1 -1
- package/dist/src/tasks/image-text-to-text/data.d.ts.map +1 -1
- package/dist/src/tasks/object-detection/data.d.ts.map +1 -1
- package/dist/src/tasks/text-to-speech/data.d.ts.map +1 -1
- package/dist/src/tasks/token-classification/inference.d.ts +8 -5
- package/dist/src/tasks/token-classification/inference.d.ts.map +1 -1
- package/dist/src/tasks/video-text-to-text/data.d.ts.map +1 -1
- package/dist/src/tasks/visual-question-answering/inference.d.ts +0 -1
- package/dist/src/tasks/visual-question-answering/inference.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/hardware.ts +20 -0
- package/src/model-libraries-snippets.ts +28 -3
- package/src/model-libraries.ts +8 -1
- package/src/snippets/common.ts +63 -0
- package/src/snippets/curl.ts +71 -26
- package/src/snippets/js.ts +165 -40
- package/src/snippets/python.ts +186 -48
- package/src/snippets/types.ts +5 -0
- package/src/tasks/depth-estimation/data.ts +15 -7
- package/src/tasks/image-segmentation/about.md +1 -1
- package/src/tasks/image-segmentation/data.ts +10 -9
- package/src/tasks/image-text-to-text/data.ts +17 -9
- package/src/tasks/keypoint-detection/data.ts +1 -1
- package/src/tasks/object-detection/data.ts +5 -4
- package/src/tasks/text-generation/data.ts +7 -7
- package/src/tasks/text-to-image/data.ts +2 -2
- package/src/tasks/text-to-speech/data.ts +5 -1
- package/src/tasks/text-to-video/data.ts +10 -10
- package/src/tasks/token-classification/inference.ts +8 -5
- package/src/tasks/token-classification/spec/output.json +6 -2
- package/src/tasks/video-text-to-text/data.ts +8 -0
- package/src/tasks/visual-question-answering/inference.ts +0 -1
- package/src/tasks/visual-question-answering/spec/output.json +1 -1
|
@@ -9,7 +9,11 @@
|
|
|
9
9
|
"properties": {
|
|
10
10
|
"entity_group": {
|
|
11
11
|
"type": "string",
|
|
12
|
-
"description": "The predicted label for
|
|
12
|
+
"description": "The predicted label for a group of one or more tokens"
|
|
13
|
+
},
|
|
14
|
+
"entity": {
|
|
15
|
+
"type": "string",
|
|
16
|
+
"description": "The predicted label for a single token"
|
|
13
17
|
},
|
|
14
18
|
"score": {
|
|
15
19
|
"type": "number",
|
|
@@ -28,6 +32,6 @@
|
|
|
28
32
|
"description": "The character position in the input where this group ends."
|
|
29
33
|
}
|
|
30
34
|
},
|
|
31
|
-
"required": ["
|
|
35
|
+
"required": ["score", "word", "start", "end"]
|
|
32
36
|
}
|
|
33
37
|
}
|
|
@@ -10,6 +10,10 @@ const taskData: TaskDataCustom = {
|
|
|
10
10
|
description: "A dataset of instructions and question-answer pairs about videos.",
|
|
11
11
|
id: "lmms-lab/VideoChatGPT",
|
|
12
12
|
},
|
|
13
|
+
{
|
|
14
|
+
description: "Large video understanding dataset.",
|
|
15
|
+
id: "HuggingFaceFV/finevideo",
|
|
16
|
+
},
|
|
13
17
|
],
|
|
14
18
|
demo: {
|
|
15
19
|
inputs: [
|
|
@@ -48,6 +52,10 @@ const taskData: TaskDataCustom = {
|
|
|
48
52
|
description: "An application to chat with a video-text-to-text model.",
|
|
49
53
|
id: "llava-hf/video-llava",
|
|
50
54
|
},
|
|
55
|
+
{
|
|
56
|
+
description: "A leaderboard for various video-text-to-text models.",
|
|
57
|
+
id: "opencompass/openvlm_video_leaderboard",
|
|
58
|
+
},
|
|
51
59
|
],
|
|
52
60
|
summary:
|
|
53
61
|
"Video-text-to-text models take in a video and a text prompt and output text. These models are also called video-language models.",
|