@huggingface/tasks 0.12.22 → 0.12.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +503 -132
- package/dist/index.js +503 -132
- package/dist/src/hardware.d.ts +20 -0
- package/dist/src/hardware.d.ts.map +1 -1
- package/dist/src/model-libraries-snippets.d.ts +1 -0
- package/dist/src/model-libraries-snippets.d.ts.map +1 -1
- package/dist/src/model-libraries.d.ts +9 -2
- package/dist/src/model-libraries.d.ts.map +1 -1
- package/dist/src/snippets/common.d.ts +20 -0
- package/dist/src/snippets/common.d.ts.map +1 -0
- package/dist/src/snippets/curl.d.ts +15 -8
- package/dist/src/snippets/curl.d.ts.map +1 -1
- package/dist/src/snippets/js.d.ts +17 -10
- package/dist/src/snippets/js.d.ts.map +1 -1
- package/dist/src/snippets/python.d.ts +20 -13
- package/dist/src/snippets/python.d.ts.map +1 -1
- package/dist/src/snippets/types.d.ts +4 -0
- package/dist/src/snippets/types.d.ts.map +1 -1
- package/dist/src/tasks/depth-estimation/data.d.ts.map +1 -1
- package/dist/src/tasks/image-segmentation/data.d.ts.map +1 -1
- package/dist/src/tasks/image-text-to-text/data.d.ts.map +1 -1
- package/dist/src/tasks/object-detection/data.d.ts.map +1 -1
- package/dist/src/tasks/text-to-speech/data.d.ts.map +1 -1
- package/dist/src/tasks/token-classification/inference.d.ts +8 -5
- package/dist/src/tasks/token-classification/inference.d.ts.map +1 -1
- package/dist/src/tasks/video-text-to-text/data.d.ts.map +1 -1
- package/dist/src/tasks/visual-question-answering/inference.d.ts +0 -1
- package/dist/src/tasks/visual-question-answering/inference.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/hardware.ts +20 -0
- package/src/model-libraries-snippets.ts +28 -3
- package/src/model-libraries.ts +8 -1
- package/src/snippets/common.ts +63 -0
- package/src/snippets/curl.ts +71 -26
- package/src/snippets/js.ts +165 -40
- package/src/snippets/python.ts +186 -48
- package/src/snippets/types.ts +5 -0
- package/src/tasks/depth-estimation/data.ts +15 -7
- package/src/tasks/image-segmentation/about.md +1 -1
- package/src/tasks/image-segmentation/data.ts +10 -9
- package/src/tasks/image-text-to-text/data.ts +17 -9
- package/src/tasks/keypoint-detection/data.ts +1 -1
- package/src/tasks/object-detection/data.ts +5 -4
- package/src/tasks/text-generation/data.ts +7 -7
- package/src/tasks/text-to-image/data.ts +2 -2
- package/src/tasks/text-to-speech/data.ts +5 -1
- package/src/tasks/text-to-video/data.ts +10 -10
- package/src/tasks/token-classification/inference.ts +8 -5
- package/src/tasks/token-classification/spec/output.json +6 -2
- package/src/tasks/video-text-to-text/data.ts +8 -0
- package/src/tasks/visual-question-answering/inference.ts +0 -1
- package/src/tasks/visual-question-answering/spec/output.json +1 -1
package/src/snippets/python.ts
CHANGED
|
@@ -1,22 +1,128 @@
|
|
|
1
1
|
import type { PipelineType } from "../pipelines.js";
|
|
2
|
+
import type { ChatCompletionInputMessage, GenerationParameters } from "../tasks/index.js";
|
|
3
|
+
import { stringifyGenerationConfig, stringifyMessages } from "./common.js";
|
|
2
4
|
import { getModelInputSnippet } from "./inputs.js";
|
|
3
|
-
import type { ModelDataMinimal } from "./types.js";
|
|
5
|
+
import type { InferenceSnippet, ModelDataMinimal } from "./types.js";
|
|
4
6
|
|
|
5
|
-
export const snippetConversational = (
|
|
6
|
-
|
|
7
|
+
export const snippetConversational = (
|
|
8
|
+
model: ModelDataMinimal,
|
|
9
|
+
accessToken: string,
|
|
10
|
+
opts?: {
|
|
11
|
+
streaming?: boolean;
|
|
12
|
+
messages?: ChatCompletionInputMessage[];
|
|
13
|
+
temperature?: GenerationParameters["temperature"];
|
|
14
|
+
max_tokens?: GenerationParameters["max_tokens"];
|
|
15
|
+
top_p?: GenerationParameters["top_p"];
|
|
16
|
+
}
|
|
17
|
+
): InferenceSnippet[] => {
|
|
18
|
+
const streaming = opts?.streaming ?? true;
|
|
19
|
+
const messages: ChatCompletionInputMessage[] = opts?.messages ?? [
|
|
20
|
+
{ role: "user", content: "What is the capital of France?" },
|
|
21
|
+
];
|
|
22
|
+
const messagesStr = stringifyMessages(messages, {
|
|
23
|
+
sep: ",\n\t",
|
|
24
|
+
start: `[\n\t`,
|
|
25
|
+
end: `\n]`,
|
|
26
|
+
attributeKeyQuotes: true,
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
const config = {
|
|
30
|
+
...(opts?.temperature ? { temperature: opts.temperature } : undefined),
|
|
31
|
+
max_tokens: opts?.max_tokens ?? 500,
|
|
32
|
+
...(opts?.top_p ? { top_p: opts.top_p } : undefined),
|
|
33
|
+
};
|
|
34
|
+
const configStr = stringifyGenerationConfig(config, {
|
|
35
|
+
sep: ",\n\t",
|
|
36
|
+
start: "",
|
|
37
|
+
end: "",
|
|
38
|
+
attributeValueConnector: "=",
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
if (streaming) {
|
|
42
|
+
return [
|
|
43
|
+
{
|
|
44
|
+
client: "huggingface_hub",
|
|
45
|
+
content: `from huggingface_hub import InferenceClient
|
|
7
46
|
|
|
8
47
|
client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
|
|
9
48
|
|
|
10
|
-
|
|
11
|
-
model="${model.id}",
|
|
12
|
-
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
|
13
|
-
max_tokens=500,
|
|
14
|
-
stream=True,
|
|
15
|
-
):
|
|
16
|
-
print(message.choices[0].delta.content, end="")`;
|
|
49
|
+
messages = ${messagesStr}
|
|
17
50
|
|
|
18
|
-
|
|
19
|
-
|
|
51
|
+
stream = client.chat.completions.create(
|
|
52
|
+
model="${model.id}",
|
|
53
|
+
messages=messages,
|
|
54
|
+
${configStr},
|
|
55
|
+
stream=True
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
for chunk in stream:
|
|
59
|
+
print(chunk.choices[0].delta.content)`,
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
client: "openai",
|
|
63
|
+
content: `from openai import OpenAI
|
|
64
|
+
|
|
65
|
+
client = OpenAI(
|
|
66
|
+
base_url="https://api-inference.huggingface.co/v1/",
|
|
67
|
+
api_key="${accessToken || "{API_TOKEN}"}"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
messages = ${messagesStr}
|
|
71
|
+
|
|
72
|
+
stream = client.chat.completions.create(
|
|
73
|
+
model="${model.id}",
|
|
74
|
+
messages=messages,
|
|
75
|
+
${configStr},
|
|
76
|
+
stream=True
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
for chunk in stream:
|
|
80
|
+
print(chunk.choices[0].delta.content)`,
|
|
81
|
+
},
|
|
82
|
+
];
|
|
83
|
+
} else {
|
|
84
|
+
return [
|
|
85
|
+
{
|
|
86
|
+
client: "huggingface_hub",
|
|
87
|
+
content: `from huggingface_hub import InferenceClient
|
|
88
|
+
|
|
89
|
+
client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
|
|
90
|
+
|
|
91
|
+
messages = ${messagesStr}
|
|
92
|
+
|
|
93
|
+
completion = client.chat.completions.create(
|
|
94
|
+
model="${model.id}",
|
|
95
|
+
messages=messages,
|
|
96
|
+
${configStr}
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
print(completion.choices[0].message)`,
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
client: "openai",
|
|
103
|
+
content: `from openai import OpenAI
|
|
104
|
+
|
|
105
|
+
client = OpenAI(
|
|
106
|
+
base_url="https://api-inference.huggingface.co/v1/",
|
|
107
|
+
api_key="${accessToken || "{API_TOKEN}"}"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
messages = ${messagesStr}
|
|
111
|
+
|
|
112
|
+
completion = client.chat.completions.create(
|
|
113
|
+
model="${model.id}",
|
|
114
|
+
messages=messages,
|
|
115
|
+
${configStr}
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
print(completion.choices[0].message)`,
|
|
119
|
+
},
|
|
120
|
+
];
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
export const snippetConversationalWithImage = (model: ModelDataMinimal, accessToken: string): InferenceSnippet => ({
|
|
125
|
+
content: `from huggingface_hub import InferenceClient
|
|
20
126
|
|
|
21
127
|
client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
|
|
22
128
|
|
|
@@ -36,20 +142,22 @@ for message in client.chat_completion(
|
|
|
36
142
|
max_tokens=500,
|
|
37
143
|
stream=True,
|
|
38
144
|
):
|
|
39
|
-
print(message.choices[0].delta.content, end="")
|
|
145
|
+
print(message.choices[0].delta.content, end="")`,
|
|
146
|
+
});
|
|
40
147
|
|
|
41
|
-
export const snippetZeroShotClassification = (model: ModelDataMinimal):
|
|
42
|
-
`def query(payload):
|
|
148
|
+
export const snippetZeroShotClassification = (model: ModelDataMinimal): InferenceSnippet => ({
|
|
149
|
+
content: `def query(payload):
|
|
43
150
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
44
151
|
return response.json()
|
|
45
152
|
|
|
46
153
|
output = query({
|
|
47
154
|
"inputs": ${getModelInputSnippet(model)},
|
|
48
155
|
"parameters": {"candidate_labels": ["refund", "legal", "faq"]},
|
|
49
|
-
})
|
|
156
|
+
})`,
|
|
157
|
+
});
|
|
50
158
|
|
|
51
|
-
export const snippetZeroShotImageClassification = (model: ModelDataMinimal):
|
|
52
|
-
`def query(data):
|
|
159
|
+
export const snippetZeroShotImageClassification = (model: ModelDataMinimal): InferenceSnippet => ({
|
|
160
|
+
content: `def query(data):
|
|
53
161
|
with open(data["image_path"], "rb") as f:
|
|
54
162
|
img = f.read()
|
|
55
163
|
payload={
|
|
@@ -62,28 +170,31 @@ export const snippetZeroShotImageClassification = (model: ModelDataMinimal): str
|
|
|
62
170
|
output = query({
|
|
63
171
|
"image_path": ${getModelInputSnippet(model)},
|
|
64
172
|
"parameters": {"candidate_labels": ["cat", "dog", "llama"]},
|
|
65
|
-
})
|
|
173
|
+
})`,
|
|
174
|
+
});
|
|
66
175
|
|
|
67
|
-
export const snippetBasic = (model: ModelDataMinimal):
|
|
68
|
-
`def query(payload):
|
|
176
|
+
export const snippetBasic = (model: ModelDataMinimal): InferenceSnippet => ({
|
|
177
|
+
content: `def query(payload):
|
|
69
178
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
70
179
|
return response.json()
|
|
71
180
|
|
|
72
181
|
output = query({
|
|
73
182
|
"inputs": ${getModelInputSnippet(model)},
|
|
74
|
-
})
|
|
183
|
+
})`,
|
|
184
|
+
});
|
|
75
185
|
|
|
76
|
-
export const snippetFile = (model: ModelDataMinimal):
|
|
77
|
-
`def query(filename):
|
|
186
|
+
export const snippetFile = (model: ModelDataMinimal): InferenceSnippet => ({
|
|
187
|
+
content: `def query(filename):
|
|
78
188
|
with open(filename, "rb") as f:
|
|
79
189
|
data = f.read()
|
|
80
190
|
response = requests.post(API_URL, headers=headers, data=data)
|
|
81
191
|
return response.json()
|
|
82
192
|
|
|
83
|
-
output = query(${getModelInputSnippet(model)})
|
|
193
|
+
output = query(${getModelInputSnippet(model)})`,
|
|
194
|
+
});
|
|
84
195
|
|
|
85
|
-
export const snippetTextToImage = (model: ModelDataMinimal):
|
|
86
|
-
`def query(payload):
|
|
196
|
+
export const snippetTextToImage = (model: ModelDataMinimal): InferenceSnippet => ({
|
|
197
|
+
content: `def query(payload):
|
|
87
198
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
88
199
|
return response.content
|
|
89
200
|
image_bytes = query({
|
|
@@ -92,22 +203,25 @@ image_bytes = query({
|
|
|
92
203
|
# You can access the image with PIL.Image for example
|
|
93
204
|
import io
|
|
94
205
|
from PIL import Image
|
|
95
|
-
image = Image.open(io.BytesIO(image_bytes))
|
|
206
|
+
image = Image.open(io.BytesIO(image_bytes))`,
|
|
207
|
+
});
|
|
96
208
|
|
|
97
|
-
export const snippetTabular = (model: ModelDataMinimal):
|
|
98
|
-
`def query(payload):
|
|
209
|
+
export const snippetTabular = (model: ModelDataMinimal): InferenceSnippet => ({
|
|
210
|
+
content: `def query(payload):
|
|
99
211
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
100
212
|
return response.content
|
|
101
213
|
response = query({
|
|
102
214
|
"inputs": {"data": ${getModelInputSnippet(model)}},
|
|
103
|
-
})
|
|
215
|
+
})`,
|
|
216
|
+
});
|
|
104
217
|
|
|
105
|
-
export const snippetTextToAudio = (model: ModelDataMinimal):
|
|
218
|
+
export const snippetTextToAudio = (model: ModelDataMinimal): InferenceSnippet => {
|
|
106
219
|
// Transformers TTS pipeline and api-inference-community (AIC) pipeline outputs are diverged
|
|
107
220
|
// with the latest update to inference-api (IA).
|
|
108
221
|
// Transformers IA returns a byte object (wav file), whereas AIC returns wav and sampling_rate.
|
|
109
222
|
if (model.library_name === "transformers") {
|
|
110
|
-
return
|
|
223
|
+
return {
|
|
224
|
+
content: `def query(payload):
|
|
111
225
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
112
226
|
return response.content
|
|
113
227
|
|
|
@@ -116,9 +230,11 @@ audio_bytes = query({
|
|
|
116
230
|
})
|
|
117
231
|
# You can access the audio with IPython.display for example
|
|
118
232
|
from IPython.display import Audio
|
|
119
|
-
Audio(audio_bytes)
|
|
233
|
+
Audio(audio_bytes)`,
|
|
234
|
+
};
|
|
120
235
|
} else {
|
|
121
|
-
return
|
|
236
|
+
return {
|
|
237
|
+
content: `def query(payload):
|
|
122
238
|
response = requests.post(API_URL, headers=headers, json=payload)
|
|
123
239
|
return response.json()
|
|
124
240
|
|
|
@@ -127,12 +243,13 @@ audio, sampling_rate = query({
|
|
|
127
243
|
})
|
|
128
244
|
# You can access the audio with IPython.display for example
|
|
129
245
|
from IPython.display import Audio
|
|
130
|
-
Audio(audio, rate=sampling_rate)
|
|
246
|
+
Audio(audio, rate=sampling_rate)`,
|
|
247
|
+
};
|
|
131
248
|
}
|
|
132
249
|
};
|
|
133
250
|
|
|
134
|
-
export const snippetDocumentQuestionAnswering = (model: ModelDataMinimal):
|
|
135
|
-
`def query(payload):
|
|
251
|
+
export const snippetDocumentQuestionAnswering = (model: ModelDataMinimal): InferenceSnippet => ({
|
|
252
|
+
content: `def query(payload):
|
|
136
253
|
with open(payload["image"], "rb") as f:
|
|
137
254
|
img = f.read()
|
|
138
255
|
payload["image"] = base64.b64encode(img).decode("utf-8")
|
|
@@ -141,9 +258,19 @@ export const snippetDocumentQuestionAnswering = (model: ModelDataMinimal): strin
|
|
|
141
258
|
|
|
142
259
|
output = query({
|
|
143
260
|
"inputs": ${getModelInputSnippet(model)},
|
|
144
|
-
})
|
|
261
|
+
})`,
|
|
262
|
+
});
|
|
145
263
|
|
|
146
|
-
export const pythonSnippets: Partial<
|
|
264
|
+
export const pythonSnippets: Partial<
|
|
265
|
+
Record<
|
|
266
|
+
PipelineType,
|
|
267
|
+
(
|
|
268
|
+
model: ModelDataMinimal,
|
|
269
|
+
accessToken: string,
|
|
270
|
+
opts?: Record<string, unknown>
|
|
271
|
+
) => InferenceSnippet | InferenceSnippet[]
|
|
272
|
+
>
|
|
273
|
+
> = {
|
|
147
274
|
// Same order as in tasks/src/pipelines.ts
|
|
148
275
|
"text-classification": snippetBasic,
|
|
149
276
|
"token-classification": snippetBasic,
|
|
@@ -174,25 +301,36 @@ export const pythonSnippets: Partial<Record<PipelineType, (model: ModelDataMinim
|
|
|
174
301
|
"zero-shot-image-classification": snippetZeroShotImageClassification,
|
|
175
302
|
};
|
|
176
303
|
|
|
177
|
-
export function getPythonInferenceSnippet(
|
|
304
|
+
export function getPythonInferenceSnippet(
|
|
305
|
+
model: ModelDataMinimal,
|
|
306
|
+
accessToken: string,
|
|
307
|
+
opts?: Record<string, unknown>
|
|
308
|
+
): InferenceSnippet | InferenceSnippet[] {
|
|
178
309
|
if (model.pipeline_tag === "text-generation" && model.tags.includes("conversational")) {
|
|
179
310
|
// Conversational model detected, so we display a code snippet that features the Messages API
|
|
180
|
-
return snippetConversational(model, accessToken);
|
|
311
|
+
return snippetConversational(model, accessToken, opts);
|
|
181
312
|
} else if (model.pipeline_tag === "image-text-to-text" && model.tags.includes("conversational")) {
|
|
182
313
|
// Example sending an image to the Message API
|
|
183
314
|
return snippetConversationalWithImage(model, accessToken);
|
|
184
315
|
} else {
|
|
185
|
-
|
|
316
|
+
let snippets =
|
|
186
317
|
model.pipeline_tag && model.pipeline_tag in pythonSnippets
|
|
187
|
-
? pythonSnippets[model.pipeline_tag]?.(model, accessToken) ?? ""
|
|
188
|
-
: "";
|
|
318
|
+
? pythonSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" }
|
|
319
|
+
: { content: "" };
|
|
189
320
|
|
|
190
|
-
|
|
321
|
+
snippets = Array.isArray(snippets) ? snippets : [snippets];
|
|
191
322
|
|
|
323
|
+
return snippets.map((snippet) => {
|
|
324
|
+
return {
|
|
325
|
+
...snippet,
|
|
326
|
+
content: `import requests
|
|
327
|
+
|
|
192
328
|
API_URL = "https://api-inference.huggingface.co/models/${model.id}"
|
|
193
329
|
headers = {"Authorization": ${accessToken ? `"Bearer ${accessToken}"` : `f"Bearer {API_TOKEN}"`}}
|
|
194
|
-
|
|
195
|
-
${
|
|
330
|
+
|
|
331
|
+
${snippet.content}`,
|
|
332
|
+
};
|
|
333
|
+
});
|
|
196
334
|
}
|
|
197
335
|
}
|
|
198
336
|
|
package/src/snippets/types.ts
CHANGED
|
@@ -9,3 +9,8 @@ export type ModelDataMinimal = Pick<
|
|
|
9
9
|
ModelData,
|
|
10
10
|
"id" | "pipeline_tag" | "mask_token" | "library_name" | "config" | "tags" | "inference"
|
|
11
11
|
>;
|
|
12
|
+
|
|
13
|
+
export interface InferenceSnippet {
|
|
14
|
+
content: string;
|
|
15
|
+
client?: string; // for instance: `client` could be `huggingface_hub` or `openai` client for Python snippets
|
|
16
|
+
}
|
|
@@ -33,11 +33,15 @@ const taskData: TaskDataCustom = {
|
|
|
33
33
|
},
|
|
34
34
|
{
|
|
35
35
|
description: "A strong monocular depth estimation model.",
|
|
36
|
-
id: "
|
|
36
|
+
id: "jingheya/lotus-depth-g-v1-0",
|
|
37
37
|
},
|
|
38
38
|
{
|
|
39
|
-
description: "A
|
|
40
|
-
id: "
|
|
39
|
+
description: "A depth estimation model that predicts depth in videos.",
|
|
40
|
+
id: "tencent/DepthCrafter",
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
description: "A robust depth estimation model.",
|
|
44
|
+
id: "apple/DepthPro",
|
|
41
45
|
},
|
|
42
46
|
],
|
|
43
47
|
spaces: [
|
|
@@ -46,12 +50,16 @@ const taskData: TaskDataCustom = {
|
|
|
46
50
|
id: "radames/dpt-depth-estimation-3d-voxels",
|
|
47
51
|
},
|
|
48
52
|
{
|
|
49
|
-
description: "An application
|
|
50
|
-
id: "depth-
|
|
53
|
+
description: "An application for bleeding-edge depth estimation.",
|
|
54
|
+
id: "akhaliq/depth-pro",
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
description: "An application on cutting-edge depth estimation in videos.",
|
|
58
|
+
id: "tencent/DepthCrafter",
|
|
51
59
|
},
|
|
52
60
|
{
|
|
53
|
-
description: "
|
|
54
|
-
id: "
|
|
61
|
+
description: "A human-centric depth estimation application.",
|
|
62
|
+
id: "facebook/sapiens-depth",
|
|
55
63
|
},
|
|
56
64
|
],
|
|
57
65
|
summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
|
|
@@ -48,7 +48,7 @@ import { HfInference } from "@huggingface/inference";
|
|
|
48
48
|
const inference = new HfInference(HF_TOKEN);
|
|
49
49
|
await inference.imageSegmentation({
|
|
50
50
|
data: await (await fetch("https://picsum.photos/300/300")).blob(),
|
|
51
|
-
model: "facebook/
|
|
51
|
+
model: "facebook/mask2former-swin-base-coco-panoptic",
|
|
52
52
|
});
|
|
53
53
|
```
|
|
54
54
|
|
|
@@ -44,23 +44,24 @@ const taskData: TaskDataCustom = {
|
|
|
44
44
|
models: [
|
|
45
45
|
{
|
|
46
46
|
// TO DO: write description
|
|
47
|
-
description:
|
|
48
|
-
|
|
47
|
+
description:
|
|
48
|
+
"Solid semantic segmentation model trained on ADE20k.",
|
|
49
|
+
id: "openmmlab/upernet-convnext-small",
|
|
49
50
|
},
|
|
50
51
|
{
|
|
51
52
|
description: "Background removal model.",
|
|
52
53
|
id: "briaai/RMBG-1.4",
|
|
53
54
|
},
|
|
54
|
-
{
|
|
55
|
-
description: "Semantic segmentation model trained on ADE20k benchmark dataset with 512x512 resolution.",
|
|
56
|
-
id: "nvidia/segformer-b0-finetuned-ade-512-512",
|
|
57
|
-
},
|
|
58
55
|
{
|
|
59
56
|
description: "A multipurpose image segmentation model for high resolution images.",
|
|
60
57
|
id: "ZhengPeng7/BiRefNet",
|
|
61
58
|
},
|
|
62
59
|
{
|
|
63
|
-
description: "
|
|
60
|
+
description: "Powerful human-centric image segmentation model.",
|
|
61
|
+
id: "facebook/sapiens-seg-1b",
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
description: "Panoptic segmentation model trained on the COCO (common objects) dataset.",
|
|
64
65
|
id: "facebook/mask2former-swin-large-coco-panoptic",
|
|
65
66
|
},
|
|
66
67
|
],
|
|
@@ -74,8 +75,8 @@ const taskData: TaskDataCustom = {
|
|
|
74
75
|
id: "jbrinkma/segment-anything",
|
|
75
76
|
},
|
|
76
77
|
{
|
|
77
|
-
description: "A
|
|
78
|
-
id: "
|
|
78
|
+
description: "A human-centric segmentation model.",
|
|
79
|
+
id: "facebook/sapiens-pose",
|
|
79
80
|
},
|
|
80
81
|
{
|
|
81
82
|
description: "An instance segmentation application to predict neuronal cell types from microscopy images.",
|
|
@@ -47,20 +47,24 @@ const taskData: TaskDataCustom = {
|
|
|
47
47
|
id: "meta-llama/Llama-3.2-11B-Vision-Instruct",
|
|
48
48
|
},
|
|
49
49
|
{
|
|
50
|
-
description: "Cutting-edge
|
|
51
|
-
id: "
|
|
50
|
+
description: "Cutting-edge vision language models.",
|
|
51
|
+
id: "allenai/Molmo-7B-D-0924",
|
|
52
52
|
},
|
|
53
53
|
{
|
|
54
54
|
description: "Small yet powerful model.",
|
|
55
55
|
id: "vikhyatk/moondream2",
|
|
56
56
|
},
|
|
57
57
|
{
|
|
58
|
-
description: "Strong image-text-to-text model
|
|
59
|
-
id: "
|
|
58
|
+
description: "Strong image-text-to-text model.",
|
|
59
|
+
id: "Qwen/Qwen2-VL-7B-Instruct",
|
|
60
60
|
},
|
|
61
61
|
{
|
|
62
62
|
description: "Strong image-text-to-text model.",
|
|
63
|
-
id: "
|
|
63
|
+
id: "mistralai/Pixtral-12B-2409",
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
description: "Strong image-text-to-text model focused on documents.",
|
|
67
|
+
id: "stepfun-ai/GOT-OCR2_0",
|
|
64
68
|
},
|
|
65
69
|
],
|
|
66
70
|
spaces: [
|
|
@@ -74,20 +78,24 @@ const taskData: TaskDataCustom = {
|
|
|
74
78
|
},
|
|
75
79
|
{
|
|
76
80
|
description: "Powerful vision-language model assistant.",
|
|
77
|
-
id: "
|
|
81
|
+
id: "akhaliq/Molmo-7B-D-0924",
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
description: "An image-text-to-text application focused on documents.",
|
|
85
|
+
id: "stepfun-ai/GOT_official_online_demo",
|
|
78
86
|
},
|
|
79
87
|
{
|
|
80
88
|
description: "An application to compare outputs of different vision language models.",
|
|
81
89
|
id: "merve/compare_VLMs",
|
|
82
90
|
},
|
|
83
91
|
{
|
|
84
|
-
description: "An application for
|
|
85
|
-
id: "
|
|
92
|
+
description: "An application for chatting with an image-text-to-text model.",
|
|
93
|
+
id: "GanymedeNil/Qwen2-VL-7B",
|
|
86
94
|
},
|
|
87
95
|
],
|
|
88
96
|
summary:
|
|
89
97
|
"Image-text-to-text models take in an image and text prompt and output text. These models are also called vision-language models, or VLMs. The difference from image-to-text models is that these models take an additional text input, not restricting the model to certain use cases like image captioning, and may also be trained to accept a conversation as input.",
|
|
90
|
-
widgetModels: ["
|
|
98
|
+
widgetModels: ["meta-llama/Llama-3.2-11B-Vision-Instruct"],
|
|
91
99
|
youtubeId: "IoGaGfU1CIg",
|
|
92
100
|
};
|
|
93
101
|
|
|
@@ -43,15 +43,16 @@ const taskData: TaskDataCustom = {
|
|
|
43
43
|
],
|
|
44
44
|
models: [
|
|
45
45
|
{
|
|
46
|
-
description: "Solid object detection model trained on the
|
|
46
|
+
description: "Solid object detection model pre-trained on the COCO 2017 dataset.",
|
|
47
47
|
id: "facebook/detr-resnet-50",
|
|
48
48
|
},
|
|
49
49
|
{
|
|
50
|
-
description: "
|
|
51
|
-
id: "
|
|
50
|
+
description: "Real-time and accurate object detection model.",
|
|
51
|
+
id: "jameslahm/yolov10x",
|
|
52
52
|
},
|
|
53
53
|
{
|
|
54
|
-
description:
|
|
54
|
+
description:
|
|
55
|
+
"Fast and accurate object detection model trained on COCO and Object365 datasets.",
|
|
55
56
|
id: "PekingU/rtdetr_r18vd_coco_o365",
|
|
56
57
|
},
|
|
57
58
|
],
|
|
@@ -58,10 +58,6 @@ const taskData: TaskDataCustom = {
|
|
|
58
58
|
description: "A text-generation model trained to follow instructions.",
|
|
59
59
|
id: "google/gemma-2-2b-it",
|
|
60
60
|
},
|
|
61
|
-
{
|
|
62
|
-
description: "A code generation model that can generate code in 80+ languages.",
|
|
63
|
-
id: "bigcode/starcoder",
|
|
64
|
-
},
|
|
65
61
|
{
|
|
66
62
|
description: "Very powerful text generation model trained to follow instructions.",
|
|
67
63
|
id: "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
@@ -75,12 +71,12 @@ const taskData: TaskDataCustom = {
|
|
|
75
71
|
id: "AI-MO/NuminaMath-7B-TIR",
|
|
76
72
|
},
|
|
77
73
|
{
|
|
78
|
-
description: "Strong
|
|
79
|
-
id: "
|
|
74
|
+
description: "Strong text generation model to follow instructions.",
|
|
75
|
+
id: "Qwen/Qwen2.5-7B-Instruct",
|
|
80
76
|
},
|
|
81
77
|
{
|
|
82
78
|
description: "Very strong open-source large language model.",
|
|
83
|
-
id: "
|
|
79
|
+
id: "nvidia/Llama-3.1-Nemotron-70B-Instruct",
|
|
84
80
|
},
|
|
85
81
|
],
|
|
86
82
|
spaces: [
|
|
@@ -88,6 +84,10 @@ const taskData: TaskDataCustom = {
|
|
|
88
84
|
description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
|
|
89
85
|
id: "open-llm-leaderboard/open_llm_leaderboard",
|
|
90
86
|
},
|
|
87
|
+
{
|
|
88
|
+
description: "A leaderboard for comparing chain-of-thought performance of models.",
|
|
89
|
+
id: "logikon/open_cot_leaderboard",
|
|
90
|
+
},
|
|
91
91
|
{
|
|
92
92
|
description: "An text generation based application based on a very powerful LLaMA2 model.",
|
|
93
93
|
id: "ysharma/Explore_llamav2_with_TGI",
|
|
@@ -71,8 +71,8 @@ const taskData: TaskDataCustom = {
|
|
|
71
71
|
id: "jbilcke-hf/ai-comic-factory",
|
|
72
72
|
},
|
|
73
73
|
{
|
|
74
|
-
description: "
|
|
75
|
-
id: "
|
|
74
|
+
description: "An application to match multiple custom image generation models.",
|
|
75
|
+
id: "multimodalart/flux-lora-lab",
|
|
76
76
|
},
|
|
77
77
|
{
|
|
78
78
|
description: "A powerful yet very fast image generation application.",
|
|
@@ -57,9 +57,13 @@ const taskData: TaskDataCustom = {
|
|
|
57
57
|
id: "suno/bark",
|
|
58
58
|
},
|
|
59
59
|
{
|
|
60
|
-
description: "XTTS
|
|
60
|
+
description: "An application on XTTS, a voice generation model that lets you clone voices into different languages.",
|
|
61
61
|
id: "coqui/xtts",
|
|
62
62
|
},
|
|
63
|
+
{
|
|
64
|
+
description: "An application that generates speech in different styles in English and Chinese.",
|
|
65
|
+
id: "mrfakename/E2-F5-TTS",
|
|
66
|
+
},
|
|
63
67
|
{
|
|
64
68
|
description: "An application that synthesizes speech for diverse speaker prompts.",
|
|
65
69
|
id: "parler-tts/parler_tts_mini",
|
|
@@ -67,30 +67,30 @@ const taskData: TaskDataCustom = {
|
|
|
67
67
|
],
|
|
68
68
|
models: [
|
|
69
69
|
{
|
|
70
|
-
description: "A strong model for video generation.",
|
|
71
|
-
id: "
|
|
70
|
+
description: "A strong model for consistent video generation.",
|
|
71
|
+
id: "rain1011/pyramid-flow-sd3",
|
|
72
72
|
},
|
|
73
73
|
{
|
|
74
74
|
description: "A robust model for text-to-video generation.",
|
|
75
|
-
id: "
|
|
75
|
+
id: "VideoCrafter/VideoCrafter2",
|
|
76
76
|
},
|
|
77
77
|
{
|
|
78
|
-
description: "A text-to-video generation model
|
|
79
|
-
id: "
|
|
78
|
+
description: "A cutting-edge text-to-video generation model.",
|
|
79
|
+
id: "TIGER-Lab/T2V-Turbo-V2",
|
|
80
80
|
},
|
|
81
81
|
],
|
|
82
82
|
spaces: [
|
|
83
83
|
{
|
|
84
84
|
description: "An application that generates video from text.",
|
|
85
|
-
id: "
|
|
85
|
+
id: "VideoCrafter/VideoCrafter",
|
|
86
86
|
},
|
|
87
87
|
{
|
|
88
|
-
description: "
|
|
89
|
-
id: "
|
|
88
|
+
description: "Consistent video generation application.",
|
|
89
|
+
id: "TIGER-Lab/T2V-Turbo-V2",
|
|
90
90
|
},
|
|
91
91
|
{
|
|
92
|
-
description: "
|
|
93
|
-
id: "
|
|
92
|
+
description: "A cutting edge video generation application.",
|
|
93
|
+
id: "Pyramid-Flow/pyramid-flow",
|
|
94
94
|
},
|
|
95
95
|
],
|
|
96
96
|
summary:
|
|
@@ -60,12 +60,15 @@ export interface TokenClassificationOutputElement {
|
|
|
60
60
|
/**
|
|
61
61
|
* The character position in the input where this group ends.
|
|
62
62
|
*/
|
|
63
|
-
end
|
|
63
|
+
end: number;
|
|
64
64
|
/**
|
|
65
|
-
* The predicted label for
|
|
65
|
+
* The predicted label for a single token
|
|
66
|
+
*/
|
|
67
|
+
entity?: string;
|
|
68
|
+
/**
|
|
69
|
+
* The predicted label for a group of one or more tokens
|
|
66
70
|
*/
|
|
67
71
|
entity_group?: string;
|
|
68
|
-
label: unknown;
|
|
69
72
|
/**
|
|
70
73
|
* The associated score / probability
|
|
71
74
|
*/
|
|
@@ -73,10 +76,10 @@ export interface TokenClassificationOutputElement {
|
|
|
73
76
|
/**
|
|
74
77
|
* The character position in the input where this group begins.
|
|
75
78
|
*/
|
|
76
|
-
start
|
|
79
|
+
start: number;
|
|
77
80
|
/**
|
|
78
81
|
* The corresponding text
|
|
79
82
|
*/
|
|
80
|
-
word
|
|
83
|
+
word: string;
|
|
81
84
|
[property: string]: unknown;
|
|
82
85
|
}
|