@huggingface/tasks 0.13.1-test → 0.13.1-test2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -2
- package/src/dataset-libraries.ts +89 -0
- package/src/default-widget-inputs.ts +718 -0
- package/src/gguf.ts +40 -0
- package/src/hardware.ts +482 -0
- package/src/index.ts +59 -0
- package/src/library-to-tasks.ts +76 -0
- package/src/local-apps.ts +412 -0
- package/src/model-data.ts +149 -0
- package/src/model-libraries-downloads.ts +18 -0
- package/src/model-libraries-snippets.ts +1128 -0
- package/src/model-libraries.ts +820 -0
- package/src/pipelines.ts +698 -0
- package/src/snippets/common.ts +39 -0
- package/src/snippets/curl.spec.ts +94 -0
- package/src/snippets/curl.ts +120 -0
- package/src/snippets/index.ts +7 -0
- package/src/snippets/inputs.ts +167 -0
- package/src/snippets/js.spec.ts +148 -0
- package/src/snippets/js.ts +305 -0
- package/src/snippets/python.spec.ts +144 -0
- package/src/snippets/python.ts +321 -0
- package/src/snippets/types.ts +16 -0
- package/src/tasks/audio-classification/about.md +86 -0
- package/src/tasks/audio-classification/data.ts +81 -0
- package/src/tasks/audio-classification/inference.ts +52 -0
- package/src/tasks/audio-classification/spec/input.json +35 -0
- package/src/tasks/audio-classification/spec/output.json +11 -0
- package/src/tasks/audio-to-audio/about.md +56 -0
- package/src/tasks/audio-to-audio/data.ts +70 -0
- package/src/tasks/automatic-speech-recognition/about.md +90 -0
- package/src/tasks/automatic-speech-recognition/data.ts +82 -0
- package/src/tasks/automatic-speech-recognition/inference.ts +160 -0
- package/src/tasks/automatic-speech-recognition/spec/input.json +35 -0
- package/src/tasks/automatic-speech-recognition/spec/output.json +38 -0
- package/src/tasks/chat-completion/inference.ts +322 -0
- package/src/tasks/chat-completion/spec/input.json +350 -0
- package/src/tasks/chat-completion/spec/output.json +206 -0
- package/src/tasks/chat-completion/spec/stream_output.json +213 -0
- package/src/tasks/common-definitions.json +100 -0
- package/src/tasks/depth-estimation/about.md +45 -0
- package/src/tasks/depth-estimation/data.ts +70 -0
- package/src/tasks/depth-estimation/inference.ts +35 -0
- package/src/tasks/depth-estimation/spec/input.json +25 -0
- package/src/tasks/depth-estimation/spec/output.json +16 -0
- package/src/tasks/document-question-answering/about.md +53 -0
- package/src/tasks/document-question-answering/data.ts +85 -0
- package/src/tasks/document-question-answering/inference.ts +110 -0
- package/src/tasks/document-question-answering/spec/input.json +85 -0
- package/src/tasks/document-question-answering/spec/output.json +36 -0
- package/src/tasks/feature-extraction/about.md +72 -0
- package/src/tasks/feature-extraction/data.ts +57 -0
- package/src/tasks/feature-extraction/inference.ts +40 -0
- package/src/tasks/feature-extraction/spec/input.json +47 -0
- package/src/tasks/feature-extraction/spec/output.json +15 -0
- package/src/tasks/fill-mask/about.md +51 -0
- package/src/tasks/fill-mask/data.ts +79 -0
- package/src/tasks/fill-mask/inference.ts +62 -0
- package/src/tasks/fill-mask/spec/input.json +38 -0
- package/src/tasks/fill-mask/spec/output.json +29 -0
- package/src/tasks/image-classification/about.md +50 -0
- package/src/tasks/image-classification/data.ts +88 -0
- package/src/tasks/image-classification/inference.ts +52 -0
- package/src/tasks/image-classification/spec/input.json +35 -0
- package/src/tasks/image-classification/spec/output.json +11 -0
- package/src/tasks/image-feature-extraction/about.md +23 -0
- package/src/tasks/image-feature-extraction/data.ts +59 -0
- package/src/tasks/image-segmentation/about.md +63 -0
- package/src/tasks/image-segmentation/data.ts +99 -0
- package/src/tasks/image-segmentation/inference.ts +69 -0
- package/src/tasks/image-segmentation/spec/input.json +45 -0
- package/src/tasks/image-segmentation/spec/output.json +26 -0
- package/src/tasks/image-text-to-text/about.md +76 -0
- package/src/tasks/image-text-to-text/data.ts +102 -0
- package/src/tasks/image-to-3d/about.md +62 -0
- package/src/tasks/image-to-3d/data.ts +75 -0
- package/src/tasks/image-to-image/about.md +129 -0
- package/src/tasks/image-to-image/data.ts +101 -0
- package/src/tasks/image-to-image/inference.ts +68 -0
- package/src/tasks/image-to-image/spec/input.json +55 -0
- package/src/tasks/image-to-image/spec/output.json +12 -0
- package/src/tasks/image-to-text/about.md +61 -0
- package/src/tasks/image-to-text/data.ts +82 -0
- package/src/tasks/image-to-text/inference.ts +143 -0
- package/src/tasks/image-to-text/spec/input.json +34 -0
- package/src/tasks/image-to-text/spec/output.json +14 -0
- package/src/tasks/index.ts +312 -0
- package/src/tasks/keypoint-detection/about.md +57 -0
- package/src/tasks/keypoint-detection/data.ts +50 -0
- package/src/tasks/mask-generation/about.md +65 -0
- package/src/tasks/mask-generation/data.ts +55 -0
- package/src/tasks/object-detection/about.md +37 -0
- package/src/tasks/object-detection/data.ts +86 -0
- package/src/tasks/object-detection/inference.ts +75 -0
- package/src/tasks/object-detection/spec/input.json +31 -0
- package/src/tasks/object-detection/spec/output.json +50 -0
- package/src/tasks/placeholder/about.md +15 -0
- package/src/tasks/placeholder/data.ts +21 -0
- package/src/tasks/placeholder/spec/input.json +35 -0
- package/src/tasks/placeholder/spec/output.json +17 -0
- package/src/tasks/question-answering/about.md +56 -0
- package/src/tasks/question-answering/data.ts +75 -0
- package/src/tasks/question-answering/inference.ts +99 -0
- package/src/tasks/question-answering/spec/input.json +67 -0
- package/src/tasks/question-answering/spec/output.json +29 -0
- package/src/tasks/reinforcement-learning/about.md +167 -0
- package/src/tasks/reinforcement-learning/data.ts +75 -0
- package/src/tasks/sentence-similarity/about.md +97 -0
- package/src/tasks/sentence-similarity/data.ts +101 -0
- package/src/tasks/sentence-similarity/inference.ts +32 -0
- package/src/tasks/sentence-similarity/spec/input.json +40 -0
- package/src/tasks/sentence-similarity/spec/output.json +12 -0
- package/src/tasks/summarization/about.md +58 -0
- package/src/tasks/summarization/data.ts +76 -0
- package/src/tasks/summarization/inference.ts +57 -0
- package/src/tasks/summarization/spec/input.json +42 -0
- package/src/tasks/summarization/spec/output.json +14 -0
- package/src/tasks/table-question-answering/about.md +43 -0
- package/src/tasks/table-question-answering/data.ts +59 -0
- package/src/tasks/table-question-answering/inference.ts +61 -0
- package/src/tasks/table-question-answering/spec/input.json +44 -0
- package/src/tasks/table-question-answering/spec/output.json +40 -0
- package/src/tasks/tabular-classification/about.md +65 -0
- package/src/tasks/tabular-classification/data.ts +68 -0
- package/src/tasks/tabular-regression/about.md +87 -0
- package/src/tasks/tabular-regression/data.ts +57 -0
- package/src/tasks/text-classification/about.md +173 -0
- package/src/tasks/text-classification/data.ts +103 -0
- package/src/tasks/text-classification/inference.ts +51 -0
- package/src/tasks/text-classification/spec/input.json +35 -0
- package/src/tasks/text-classification/spec/output.json +11 -0
- package/src/tasks/text-generation/about.md +154 -0
- package/src/tasks/text-generation/data.ts +114 -0
- package/src/tasks/text-generation/inference.ts +200 -0
- package/src/tasks/text-generation/spec/input.json +219 -0
- package/src/tasks/text-generation/spec/output.json +179 -0
- package/src/tasks/text-generation/spec/stream_output.json +103 -0
- package/src/tasks/text-to-3d/about.md +62 -0
- package/src/tasks/text-to-3d/data.ts +56 -0
- package/src/tasks/text-to-audio/inference.ts +143 -0
- package/src/tasks/text-to-audio/spec/input.json +31 -0
- package/src/tasks/text-to-audio/spec/output.json +17 -0
- package/src/tasks/text-to-image/about.md +96 -0
- package/src/tasks/text-to-image/data.ts +100 -0
- package/src/tasks/text-to-image/inference.ts +75 -0
- package/src/tasks/text-to-image/spec/input.json +63 -0
- package/src/tasks/text-to-image/spec/output.json +13 -0
- package/src/tasks/text-to-speech/about.md +63 -0
- package/src/tasks/text-to-speech/data.ts +79 -0
- package/src/tasks/text-to-speech/inference.ts +145 -0
- package/src/tasks/text-to-speech/spec/input.json +31 -0
- package/src/tasks/text-to-speech/spec/output.json +7 -0
- package/src/tasks/text-to-video/about.md +41 -0
- package/src/tasks/text-to-video/data.ts +102 -0
- package/src/tasks/text2text-generation/inference.ts +55 -0
- package/src/tasks/text2text-generation/spec/input.json +55 -0
- package/src/tasks/text2text-generation/spec/output.json +14 -0
- package/src/tasks/token-classification/about.md +76 -0
- package/src/tasks/token-classification/data.ts +92 -0
- package/src/tasks/token-classification/inference.ts +85 -0
- package/src/tasks/token-classification/spec/input.json +65 -0
- package/src/tasks/token-classification/spec/output.json +37 -0
- package/src/tasks/translation/about.md +65 -0
- package/src/tasks/translation/data.ts +70 -0
- package/src/tasks/translation/inference.ts +67 -0
- package/src/tasks/translation/spec/input.json +50 -0
- package/src/tasks/translation/spec/output.json +14 -0
- package/src/tasks/unconditional-image-generation/about.md +50 -0
- package/src/tasks/unconditional-image-generation/data.ts +72 -0
- package/src/tasks/video-classification/about.md +37 -0
- package/src/tasks/video-classification/data.ts +84 -0
- package/src/tasks/video-classification/inference.ts +59 -0
- package/src/tasks/video-classification/spec/input.json +42 -0
- package/src/tasks/video-classification/spec/output.json +10 -0
- package/src/tasks/video-text-to-text/about.md +98 -0
- package/src/tasks/video-text-to-text/data.ts +66 -0
- package/src/tasks/visual-question-answering/about.md +48 -0
- package/src/tasks/visual-question-answering/data.ts +97 -0
- package/src/tasks/visual-question-answering/inference.ts +62 -0
- package/src/tasks/visual-question-answering/spec/input.json +41 -0
- package/src/tasks/visual-question-answering/spec/output.json +21 -0
- package/src/tasks/zero-shot-classification/about.md +40 -0
- package/src/tasks/zero-shot-classification/data.ts +70 -0
- package/src/tasks/zero-shot-classification/inference.ts +67 -0
- package/src/tasks/zero-shot-classification/spec/input.json +50 -0
- package/src/tasks/zero-shot-classification/spec/output.json +11 -0
- package/src/tasks/zero-shot-image-classification/about.md +75 -0
- package/src/tasks/zero-shot-image-classification/data.ts +84 -0
- package/src/tasks/zero-shot-image-classification/inference.ts +61 -0
- package/src/tasks/zero-shot-image-classification/spec/input.json +45 -0
- package/src/tasks/zero-shot-image-classification/spec/output.json +10 -0
- package/src/tasks/zero-shot-object-detection/about.md +45 -0
- package/src/tasks/zero-shot-object-detection/data.ts +67 -0
- package/src/tasks/zero-shot-object-detection/inference.ts +66 -0
- package/src/tasks/zero-shot-object-detection/spec/input.json +40 -0
- package/src/tasks/zero-shot-object-detection/spec/output.json +47 -0
- package/src/tokenizer-data.ts +32 -0
- package/src/widget-example.ts +125 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/chat-completion/output.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Chat Completion Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
|
|
5
|
+
"title": "ChatCompletionOutput",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["id", "created", "model", "system_fingerprint", "choices", "usage"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"choices": {
|
|
10
|
+
"type": "array",
|
|
11
|
+
"items": {
|
|
12
|
+
"$ref": "#/$defs/ChatCompletionOutputComplete"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"created": {
|
|
16
|
+
"type": "integer",
|
|
17
|
+
"format": "int64",
|
|
18
|
+
"example": "1706270835",
|
|
19
|
+
"minimum": 0
|
|
20
|
+
},
|
|
21
|
+
"id": {
|
|
22
|
+
"type": "string"
|
|
23
|
+
},
|
|
24
|
+
"model": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"example": "mistralai/Mistral-7B-Instruct-v0.2"
|
|
27
|
+
},
|
|
28
|
+
"system_fingerprint": {
|
|
29
|
+
"type": "string"
|
|
30
|
+
},
|
|
31
|
+
"usage": {
|
|
32
|
+
"$ref": "#/$defs/ChatCompletionOutputUsage"
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
"$defs": {
|
|
36
|
+
"ChatCompletionOutputComplete": {
|
|
37
|
+
"type": "object",
|
|
38
|
+
"required": ["index", "message", "finish_reason"],
|
|
39
|
+
"properties": {
|
|
40
|
+
"finish_reason": {
|
|
41
|
+
"type": "string"
|
|
42
|
+
},
|
|
43
|
+
"index": {
|
|
44
|
+
"type": "integer",
|
|
45
|
+
"format": "int32",
|
|
46
|
+
"minimum": 0
|
|
47
|
+
},
|
|
48
|
+
"logprobs": {
|
|
49
|
+
"allOf": [
|
|
50
|
+
{
|
|
51
|
+
"$ref": "#/$defs/ChatCompletionOutputLogprobs"
|
|
52
|
+
}
|
|
53
|
+
],
|
|
54
|
+
"nullable": true
|
|
55
|
+
},
|
|
56
|
+
"message": {
|
|
57
|
+
"$ref": "#/$defs/ChatCompletionOutputMessage"
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
"title": "ChatCompletionOutputComplete"
|
|
61
|
+
},
|
|
62
|
+
"ChatCompletionOutputLogprobs": {
|
|
63
|
+
"type": "object",
|
|
64
|
+
"required": ["content"],
|
|
65
|
+
"properties": {
|
|
66
|
+
"content": {
|
|
67
|
+
"type": "array",
|
|
68
|
+
"items": {
|
|
69
|
+
"$ref": "#/$defs/ChatCompletionOutputLogprob"
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
"title": "ChatCompletionOutputLogprobs"
|
|
74
|
+
},
|
|
75
|
+
"ChatCompletionOutputLogprob": {
|
|
76
|
+
"type": "object",
|
|
77
|
+
"required": ["token", "logprob", "top_logprobs"],
|
|
78
|
+
"properties": {
|
|
79
|
+
"logprob": {
|
|
80
|
+
"type": "number",
|
|
81
|
+
"format": "float"
|
|
82
|
+
},
|
|
83
|
+
"token": {
|
|
84
|
+
"type": "string"
|
|
85
|
+
},
|
|
86
|
+
"top_logprobs": {
|
|
87
|
+
"type": "array",
|
|
88
|
+
"items": {
|
|
89
|
+
"$ref": "#/$defs/ChatCompletionOutputTopLogprob"
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
"title": "ChatCompletionOutputLogprob"
|
|
94
|
+
},
|
|
95
|
+
"ChatCompletionOutputTopLogprob": {
|
|
96
|
+
"type": "object",
|
|
97
|
+
"required": ["token", "logprob"],
|
|
98
|
+
"properties": {
|
|
99
|
+
"logprob": {
|
|
100
|
+
"type": "number",
|
|
101
|
+
"format": "float"
|
|
102
|
+
},
|
|
103
|
+
"token": {
|
|
104
|
+
"type": "string"
|
|
105
|
+
}
|
|
106
|
+
},
|
|
107
|
+
"title": "ChatCompletionOutputTopLogprob"
|
|
108
|
+
},
|
|
109
|
+
"ChatCompletionOutputMessage": {
|
|
110
|
+
"oneOf": [
|
|
111
|
+
{
|
|
112
|
+
"$ref": "#/$defs/ChatCompletionOutputTextMessage"
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
"$ref": "#/$defs/ChatCompletionOutputToolCallMessage"
|
|
116
|
+
}
|
|
117
|
+
],
|
|
118
|
+
"title": "ChatCompletionOutputMessage"
|
|
119
|
+
},
|
|
120
|
+
"ChatCompletionOutputTextMessage": {
|
|
121
|
+
"type": "object",
|
|
122
|
+
"required": ["role", "content"],
|
|
123
|
+
"properties": {
|
|
124
|
+
"content": {
|
|
125
|
+
"type": "string",
|
|
126
|
+
"example": "My name is David and I"
|
|
127
|
+
},
|
|
128
|
+
"role": {
|
|
129
|
+
"type": "string",
|
|
130
|
+
"example": "user"
|
|
131
|
+
}
|
|
132
|
+
},
|
|
133
|
+
"title": "ChatCompletionOutputTextMessage"
|
|
134
|
+
},
|
|
135
|
+
"ChatCompletionOutputToolCallMessage": {
|
|
136
|
+
"type": "object",
|
|
137
|
+
"required": ["role", "tool_calls"],
|
|
138
|
+
"properties": {
|
|
139
|
+
"role": {
|
|
140
|
+
"type": "string",
|
|
141
|
+
"example": "assistant"
|
|
142
|
+
},
|
|
143
|
+
"tool_calls": {
|
|
144
|
+
"type": "array",
|
|
145
|
+
"items": {
|
|
146
|
+
"$ref": "#/$defs/ChatCompletionOutputToolCall"
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
},
|
|
150
|
+
"title": "ChatCompletionOutputToolCallMessage"
|
|
151
|
+
},
|
|
152
|
+
"ChatCompletionOutputToolCall": {
|
|
153
|
+
"type": "object",
|
|
154
|
+
"required": ["id", "type", "function"],
|
|
155
|
+
"properties": {
|
|
156
|
+
"function": {
|
|
157
|
+
"$ref": "#/$defs/ChatCompletionOutputFunctionDefinition"
|
|
158
|
+
},
|
|
159
|
+
"id": {
|
|
160
|
+
"type": "string"
|
|
161
|
+
},
|
|
162
|
+
"type": {
|
|
163
|
+
"type": "string"
|
|
164
|
+
}
|
|
165
|
+
},
|
|
166
|
+
"title": "ChatCompletionOutputToolCall"
|
|
167
|
+
},
|
|
168
|
+
"ChatCompletionOutputFunctionDefinition": {
|
|
169
|
+
"type": "object",
|
|
170
|
+
"required": ["name", "arguments"],
|
|
171
|
+
"properties": {
|
|
172
|
+
"arguments": {},
|
|
173
|
+
"description": {
|
|
174
|
+
"type": "string",
|
|
175
|
+
"nullable": true
|
|
176
|
+
},
|
|
177
|
+
"name": {
|
|
178
|
+
"type": "string"
|
|
179
|
+
}
|
|
180
|
+
},
|
|
181
|
+
"title": "ChatCompletionOutputFunctionDefinition"
|
|
182
|
+
},
|
|
183
|
+
"ChatCompletionOutputUsage": {
|
|
184
|
+
"type": "object",
|
|
185
|
+
"required": ["prompt_tokens", "completion_tokens", "total_tokens"],
|
|
186
|
+
"properties": {
|
|
187
|
+
"completion_tokens": {
|
|
188
|
+
"type": "integer",
|
|
189
|
+
"format": "int32",
|
|
190
|
+
"minimum": 0
|
|
191
|
+
},
|
|
192
|
+
"prompt_tokens": {
|
|
193
|
+
"type": "integer",
|
|
194
|
+
"format": "int32",
|
|
195
|
+
"minimum": 0
|
|
196
|
+
},
|
|
197
|
+
"total_tokens": {
|
|
198
|
+
"type": "integer",
|
|
199
|
+
"format": "int32",
|
|
200
|
+
"minimum": 0
|
|
201
|
+
}
|
|
202
|
+
},
|
|
203
|
+
"title": "ChatCompletionOutputUsage"
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/chat-completion/stream_output.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Chat Completion Stream Output.\n\nAuto-generated from TGI specs.\nFor more details, check out https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.",
|
|
5
|
+
"title": "ChatCompletionStreamOutput",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["id", "created", "model", "system_fingerprint", "choices"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"choices": {
|
|
10
|
+
"type": "array",
|
|
11
|
+
"items": {
|
|
12
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputChoice"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"created": {
|
|
16
|
+
"type": "integer",
|
|
17
|
+
"format": "int64",
|
|
18
|
+
"example": "1706270978",
|
|
19
|
+
"minimum": 0
|
|
20
|
+
},
|
|
21
|
+
"id": {
|
|
22
|
+
"type": "string"
|
|
23
|
+
},
|
|
24
|
+
"model": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"example": "mistralai/Mistral-7B-Instruct-v0.2"
|
|
27
|
+
},
|
|
28
|
+
"system_fingerprint": {
|
|
29
|
+
"type": "string"
|
|
30
|
+
},
|
|
31
|
+
"usage": {
|
|
32
|
+
"allOf": [
|
|
33
|
+
{
|
|
34
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputUsage"
|
|
35
|
+
}
|
|
36
|
+
],
|
|
37
|
+
"nullable": true
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
"$defs": {
|
|
41
|
+
"ChatCompletionStreamOutputChoice": {
|
|
42
|
+
"type": "object",
|
|
43
|
+
"required": ["index", "delta"],
|
|
44
|
+
"properties": {
|
|
45
|
+
"delta": {
|
|
46
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputDelta"
|
|
47
|
+
},
|
|
48
|
+
"finish_reason": {
|
|
49
|
+
"type": "string",
|
|
50
|
+
"nullable": true
|
|
51
|
+
},
|
|
52
|
+
"index": {
|
|
53
|
+
"type": "integer",
|
|
54
|
+
"format": "int32",
|
|
55
|
+
"minimum": 0
|
|
56
|
+
},
|
|
57
|
+
"logprobs": {
|
|
58
|
+
"allOf": [
|
|
59
|
+
{
|
|
60
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputLogprobs"
|
|
61
|
+
}
|
|
62
|
+
],
|
|
63
|
+
"nullable": true
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
"title": "ChatCompletionStreamOutputChoice"
|
|
67
|
+
},
|
|
68
|
+
"ChatCompletionStreamOutputDelta": {
|
|
69
|
+
"oneOf": [
|
|
70
|
+
{
|
|
71
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputTextMessage"
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputToolCallDelta"
|
|
75
|
+
}
|
|
76
|
+
],
|
|
77
|
+
"title": "ChatCompletionStreamOutputDelta"
|
|
78
|
+
},
|
|
79
|
+
"ChatCompletionStreamOutputTextMessage": {
|
|
80
|
+
"type": "object",
|
|
81
|
+
"required": ["role", "content"],
|
|
82
|
+
"properties": {
|
|
83
|
+
"content": {
|
|
84
|
+
"type": "string",
|
|
85
|
+
"example": "My name is David and I"
|
|
86
|
+
},
|
|
87
|
+
"role": {
|
|
88
|
+
"type": "string",
|
|
89
|
+
"example": "user"
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
"title": "ChatCompletionStreamOutputTextMessage"
|
|
93
|
+
},
|
|
94
|
+
"ChatCompletionStreamOutputToolCallDelta": {
|
|
95
|
+
"type": "object",
|
|
96
|
+
"required": ["role", "tool_calls"],
|
|
97
|
+
"properties": {
|
|
98
|
+
"role": {
|
|
99
|
+
"type": "string",
|
|
100
|
+
"example": "assistant"
|
|
101
|
+
},
|
|
102
|
+
"tool_calls": {
|
|
103
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputDeltaToolCall"
|
|
104
|
+
}
|
|
105
|
+
},
|
|
106
|
+
"title": "ChatCompletionStreamOutputToolCallDelta"
|
|
107
|
+
},
|
|
108
|
+
"ChatCompletionStreamOutputDeltaToolCall": {
|
|
109
|
+
"type": "object",
|
|
110
|
+
"required": ["index", "id", "type", "function"],
|
|
111
|
+
"properties": {
|
|
112
|
+
"function": {
|
|
113
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputFunction"
|
|
114
|
+
},
|
|
115
|
+
"id": {
|
|
116
|
+
"type": "string"
|
|
117
|
+
},
|
|
118
|
+
"index": {
|
|
119
|
+
"type": "integer",
|
|
120
|
+
"format": "int32",
|
|
121
|
+
"minimum": 0
|
|
122
|
+
},
|
|
123
|
+
"type": {
|
|
124
|
+
"type": "string"
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
"title": "ChatCompletionStreamOutputDeltaToolCall"
|
|
128
|
+
},
|
|
129
|
+
"ChatCompletionStreamOutputFunction": {
|
|
130
|
+
"type": "object",
|
|
131
|
+
"required": ["arguments"],
|
|
132
|
+
"properties": {
|
|
133
|
+
"arguments": {
|
|
134
|
+
"type": "string"
|
|
135
|
+
},
|
|
136
|
+
"name": {
|
|
137
|
+
"type": "string",
|
|
138
|
+
"nullable": true
|
|
139
|
+
}
|
|
140
|
+
},
|
|
141
|
+
"title": "ChatCompletionStreamOutputFunction"
|
|
142
|
+
},
|
|
143
|
+
"ChatCompletionStreamOutputLogprobs": {
|
|
144
|
+
"type": "object",
|
|
145
|
+
"required": ["content"],
|
|
146
|
+
"properties": {
|
|
147
|
+
"content": {
|
|
148
|
+
"type": "array",
|
|
149
|
+
"items": {
|
|
150
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputLogprob"
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
},
|
|
154
|
+
"title": "ChatCompletionStreamOutputLogprobs"
|
|
155
|
+
},
|
|
156
|
+
"ChatCompletionStreamOutputLogprob": {
|
|
157
|
+
"type": "object",
|
|
158
|
+
"required": ["token", "logprob", "top_logprobs"],
|
|
159
|
+
"properties": {
|
|
160
|
+
"logprob": {
|
|
161
|
+
"type": "number",
|
|
162
|
+
"format": "float"
|
|
163
|
+
},
|
|
164
|
+
"token": {
|
|
165
|
+
"type": "string"
|
|
166
|
+
},
|
|
167
|
+
"top_logprobs": {
|
|
168
|
+
"type": "array",
|
|
169
|
+
"items": {
|
|
170
|
+
"$ref": "#/$defs/ChatCompletionStreamOutputTopLogprob"
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
},
|
|
174
|
+
"title": "ChatCompletionStreamOutputLogprob"
|
|
175
|
+
},
|
|
176
|
+
"ChatCompletionStreamOutputTopLogprob": {
|
|
177
|
+
"type": "object",
|
|
178
|
+
"required": ["token", "logprob"],
|
|
179
|
+
"properties": {
|
|
180
|
+
"logprob": {
|
|
181
|
+
"type": "number",
|
|
182
|
+
"format": "float"
|
|
183
|
+
},
|
|
184
|
+
"token": {
|
|
185
|
+
"type": "string"
|
|
186
|
+
}
|
|
187
|
+
},
|
|
188
|
+
"title": "ChatCompletionStreamOutputTopLogprob"
|
|
189
|
+
},
|
|
190
|
+
"ChatCompletionStreamOutputUsage": {
|
|
191
|
+
"type": "object",
|
|
192
|
+
"required": ["prompt_tokens", "completion_tokens", "total_tokens"],
|
|
193
|
+
"properties": {
|
|
194
|
+
"completion_tokens": {
|
|
195
|
+
"type": "integer",
|
|
196
|
+
"format": "int32",
|
|
197
|
+
"minimum": 0
|
|
198
|
+
},
|
|
199
|
+
"prompt_tokens": {
|
|
200
|
+
"type": "integer",
|
|
201
|
+
"format": "int32",
|
|
202
|
+
"minimum": 0
|
|
203
|
+
},
|
|
204
|
+
"total_tokens": {
|
|
205
|
+
"type": "integer",
|
|
206
|
+
"format": "int32",
|
|
207
|
+
"minimum": 0
|
|
208
|
+
}
|
|
209
|
+
},
|
|
210
|
+
"title": "ChatCompletionStreamOutputUsage"
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/common-definitions.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "(Incomplete!) Common type definitions shared by several tasks",
|
|
5
|
+
"definitions": {
|
|
6
|
+
"ClassificationOutputTransform": {
|
|
7
|
+
"title": "ClassificationOutputTransform",
|
|
8
|
+
"type": "string",
|
|
9
|
+
"description": "The function to apply to the model outputs in order to retrieve the scores.",
|
|
10
|
+
"enum": ["sigmoid", "softmax", "none"]
|
|
11
|
+
},
|
|
12
|
+
"ClassificationOutput": {
|
|
13
|
+
"title": "ClassificationOutput",
|
|
14
|
+
"type": "object",
|
|
15
|
+
"properties": {
|
|
16
|
+
"label": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"description": "The predicted class label."
|
|
19
|
+
},
|
|
20
|
+
"score": {
|
|
21
|
+
"type": "number",
|
|
22
|
+
"description": "The corresponding probability."
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"required": ["label", "score"]
|
|
26
|
+
},
|
|
27
|
+
"GenerationParameters": {
|
|
28
|
+
"title": "GenerationParameters",
|
|
29
|
+
"description": "Ad-hoc parametrization of the text generation process",
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"temperature": {
|
|
33
|
+
"type": "number",
|
|
34
|
+
"description": "The value used to modulate the next token probabilities."
|
|
35
|
+
},
|
|
36
|
+
"top_k": {
|
|
37
|
+
"type": "integer",
|
|
38
|
+
"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering."
|
|
39
|
+
},
|
|
40
|
+
"top_p": {
|
|
41
|
+
"type": "number",
|
|
42
|
+
"description": "If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation."
|
|
43
|
+
},
|
|
44
|
+
"typical_p": {
|
|
45
|
+
"type": "number",
|
|
46
|
+
"description": " Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to typical_p or higher are kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details."
|
|
47
|
+
},
|
|
48
|
+
"epsilon_cutoff": {
|
|
49
|
+
"type": "number",
|
|
50
|
+
"description": "If set to float strictly between 0 and 1, only tokens with a conditional probability greater than epsilon_cutoff will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
|
|
51
|
+
},
|
|
52
|
+
"eta_cutoff": {
|
|
53
|
+
"type": "number",
|
|
54
|
+
"description": "Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191) for more details."
|
|
55
|
+
},
|
|
56
|
+
"max_length": {
|
|
57
|
+
"type": "integer",
|
|
58
|
+
"description": "The maximum length (in tokens) of the generated text, including the input."
|
|
59
|
+
},
|
|
60
|
+
"max_new_tokens": {
|
|
61
|
+
"type": "integer",
|
|
62
|
+
"description": "The maximum number of tokens to generate. Takes precedence over max_length."
|
|
63
|
+
},
|
|
64
|
+
"min_length": {
|
|
65
|
+
"type": "integer",
|
|
66
|
+
"description": "The minimum length (in tokens) of the generated text, including the input."
|
|
67
|
+
},
|
|
68
|
+
"min_new_tokens": {
|
|
69
|
+
"type": "integer",
|
|
70
|
+
"description": "The minimum number of tokens to generate. Takes precedence over min_length."
|
|
71
|
+
},
|
|
72
|
+
"do_sample": {
|
|
73
|
+
"type": "boolean",
|
|
74
|
+
"description": "Whether to use sampling instead of greedy decoding when generating new tokens."
|
|
75
|
+
},
|
|
76
|
+
"early_stopping": {
|
|
77
|
+
"type": ["boolean", "string"],
|
|
78
|
+
"description": "Controls the stopping condition for beam-based methods.",
|
|
79
|
+
"enum": ["never", true, false]
|
|
80
|
+
},
|
|
81
|
+
"num_beams": {
|
|
82
|
+
"type": "integer",
|
|
83
|
+
"description": "Number of beams to use for beam search."
|
|
84
|
+
},
|
|
85
|
+
"num_beam_groups": {
|
|
86
|
+
"type": "integer",
|
|
87
|
+
"description": "Number of groups to divide num_beams into in order to ensure diversity among different groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details."
|
|
88
|
+
},
|
|
89
|
+
"penalty_alpha": {
|
|
90
|
+
"type": "number",
|
|
91
|
+
"description": "The value balances the model confidence and the degeneration penalty in contrastive search decoding."
|
|
92
|
+
},
|
|
93
|
+
"use_cache": {
|
|
94
|
+
"type": "boolean",
|
|
95
|
+
"description": "Whether the model should use the past last key/values attentions to speed up decoding"
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
## Use Cases
|
|
2
|
+
|
|
3
|
+
Depth estimation models can be used to estimate the depth of different objects present in an image.
|
|
4
|
+
|
|
5
|
+
### Estimation of Volumetric Information
|
|
6
|
+
Depth estimation models are widely used to study volumetric formation of objects present inside an image. This is an important use case in the domain of computer graphics.
|
|
7
|
+
|
|
8
|
+
### 3D Representation
|
|
9
|
+
|
|
10
|
+
Depth estimation models can also be used to develop a 3D representation from a 2D image.
|
|
11
|
+
|
|
12
|
+
## Depth Estimation Subtasks
|
|
13
|
+
|
|
14
|
+
There are two depth estimation subtasks.
|
|
15
|
+
|
|
16
|
+
- **Absolute depth estimation**: Absolute (or metric) depth estimation aims to provide exact depth measurements from the camera. Absolute depth estimation models output depth maps with real-world distances in meter or feet.
|
|
17
|
+
|
|
18
|
+
- **Relative depth estimation**: Relative depth estimation aims to predict the depth order of objects or points in a scene without providing the precise measurements.
|
|
19
|
+
|
|
20
|
+
## Inference
|
|
21
|
+
|
|
22
|
+
With the `transformers` library, you can use the `depth-estimation` pipeline to infer with image classification models. You can initialize the pipeline with a model id from the Hub. If you do not provide a model id it will initialize with [Intel/dpt-large](https://huggingface.co/Intel/dpt-large) by default. When calling the pipeline you just need to specify a path, http link or an image loaded in PIL. Additionally, you can find a comprehensive list of various depth estimation models at [this link](https://huggingface.co/models?pipeline_tag=depth-estimation).
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from transformers import pipeline
|
|
26
|
+
|
|
27
|
+
estimator = pipeline(task="depth-estimation", model="Intel/dpt-large")
|
|
28
|
+
result = estimator(images="http://images.cocodataset.org/val2017/000000039769.jpg")
|
|
29
|
+
result
|
|
30
|
+
|
|
31
|
+
# {'predicted_depth': tensor([[[ 6.3199, 6.3629, 6.4148, ..., 10.4104, 10.5109, 10.3847],
|
|
32
|
+
# [ 6.3850, 6.3615, 6.4166, ..., 10.4540, 10.4384, 10.4554],
|
|
33
|
+
# [ 6.3519, 6.3176, 6.3575, ..., 10.4247, 10.4618, 10.4257],
|
|
34
|
+
# ...,
|
|
35
|
+
# [22.3772, 22.4624, 22.4227, ..., 22.5207, 22.5593, 22.5293],
|
|
36
|
+
# [22.5073, 22.5148, 22.5114, ..., 22.6604, 22.6344, 22.5871],
|
|
37
|
+
# [22.5176, 22.5275, 22.5218, ..., 22.6282, 22.6216, 22.6108]]]),
|
|
38
|
+
# 'depth': <PIL.Image.Image image mode=L size=640x480 at 0x7F1A8BFE5D90>}
|
|
39
|
+
|
|
40
|
+
# You can visualize the result just by calling `result["depth"]`.
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Useful Resources
|
|
44
|
+
|
|
45
|
+
- [Monocular depth estimation task guide](https://huggingface.co/docs/transformers/tasks/monocular_depth_estimation)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import type { TaskDataCustom } from "../index.js";
|
|
2
|
+
|
|
3
|
+
const taskData: TaskDataCustom = {
|
|
4
|
+
datasets: [
|
|
5
|
+
{
|
|
6
|
+
description: "NYU Depth V2 Dataset: Video dataset containing both RGB and depth sensor data.",
|
|
7
|
+
id: "sayakpaul/nyu_depth_v2",
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
description: "Monocular depth estimation benchmark based without noise and errors.",
|
|
11
|
+
id: "depth-anything/DA-2K",
|
|
12
|
+
},
|
|
13
|
+
],
|
|
14
|
+
demo: {
|
|
15
|
+
inputs: [
|
|
16
|
+
{
|
|
17
|
+
filename: "depth-estimation-input.jpg",
|
|
18
|
+
type: "img",
|
|
19
|
+
},
|
|
20
|
+
],
|
|
21
|
+
outputs: [
|
|
22
|
+
{
|
|
23
|
+
filename: "depth-estimation-output.png",
|
|
24
|
+
type: "img",
|
|
25
|
+
},
|
|
26
|
+
],
|
|
27
|
+
},
|
|
28
|
+
metrics: [],
|
|
29
|
+
models: [
|
|
30
|
+
{
|
|
31
|
+
description: "Cutting-edge depth estimation model.",
|
|
32
|
+
id: "depth-anything/Depth-Anything-V2-Large",
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
description: "A strong monocular depth estimation model.",
|
|
36
|
+
id: "jingheya/lotus-depth-g-v1-0",
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
description: "A depth estimation model that predicts depth in videos.",
|
|
40
|
+
id: "tencent/DepthCrafter",
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
description: "A robust depth estimation model.",
|
|
44
|
+
id: "apple/DepthPro",
|
|
45
|
+
},
|
|
46
|
+
],
|
|
47
|
+
spaces: [
|
|
48
|
+
{
|
|
49
|
+
description: "An application that predicts the depth of an image and then reconstruct the 3D model as voxels.",
|
|
50
|
+
id: "radames/dpt-depth-estimation-3d-voxels",
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
description: "An application for bleeding-edge depth estimation.",
|
|
54
|
+
id: "akhaliq/depth-pro",
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
description: "An application on cutting-edge depth estimation in videos.",
|
|
58
|
+
id: "tencent/DepthCrafter",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
description: "A human-centric depth estimation application.",
|
|
62
|
+
id: "facebook/sapiens-depth",
|
|
63
|
+
},
|
|
64
|
+
],
|
|
65
|
+
summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
|
|
66
|
+
widgetModels: [""],
|
|
67
|
+
youtubeId: "",
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
export default taskData;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inference code generated from the JSON schema spec in ./spec
|
|
3
|
+
*
|
|
4
|
+
* Using src/scripts/inference-codegen
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Inputs for Depth Estimation inference
|
|
9
|
+
*/
|
|
10
|
+
export interface DepthEstimationInput {
|
|
11
|
+
/**
|
|
12
|
+
* The input image data
|
|
13
|
+
*/
|
|
14
|
+
inputs: unknown;
|
|
15
|
+
/**
|
|
16
|
+
* Additional inference parameters
|
|
17
|
+
*/
|
|
18
|
+
parameters?: { [key: string]: unknown };
|
|
19
|
+
[property: string]: unknown;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Outputs of inference for the Depth Estimation task
|
|
24
|
+
*/
|
|
25
|
+
export interface DepthEstimationOutput {
|
|
26
|
+
/**
|
|
27
|
+
* The predicted depth as an image
|
|
28
|
+
*/
|
|
29
|
+
depth?: unknown;
|
|
30
|
+
/**
|
|
31
|
+
* The predicted depth as a tensor
|
|
32
|
+
*/
|
|
33
|
+
predicted_depth?: unknown;
|
|
34
|
+
[property: string]: unknown;
|
|
35
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$id": "/inference/schemas/depth-estimation/input.json",
|
|
3
|
+
"$schema": "http://json-schema.org/draft-06/schema#",
|
|
4
|
+
"description": "Inputs for Depth Estimation inference",
|
|
5
|
+
"title": "DepthEstimationInput",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"inputs": {
|
|
9
|
+
"description": "The input image data"
|
|
10
|
+
},
|
|
11
|
+
"parameters": {
|
|
12
|
+
"description": "Additional inference parameters",
|
|
13
|
+
"$ref": "#/$defs/DepthEstimationParameters"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"$defs": {
|
|
17
|
+
"DepthEstimationParameters": {
|
|
18
|
+
"title": "DepthEstimationParameters",
|
|
19
|
+
"description": "Additional inference parameters for Depth Estimation",
|
|
20
|
+
"type": "object",
|
|
21
|
+
"properties": {}
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"required": ["inputs"]
|
|
25
|
+
}
|