@huggingface/tasks 0.12.22 → 0.12.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/index.cjs +503 -132
  2. package/dist/index.js +503 -132
  3. package/dist/src/hardware.d.ts +20 -0
  4. package/dist/src/hardware.d.ts.map +1 -1
  5. package/dist/src/model-libraries-snippets.d.ts +1 -0
  6. package/dist/src/model-libraries-snippets.d.ts.map +1 -1
  7. package/dist/src/model-libraries.d.ts +9 -2
  8. package/dist/src/model-libraries.d.ts.map +1 -1
  9. package/dist/src/snippets/common.d.ts +20 -0
  10. package/dist/src/snippets/common.d.ts.map +1 -0
  11. package/dist/src/snippets/curl.d.ts +15 -8
  12. package/dist/src/snippets/curl.d.ts.map +1 -1
  13. package/dist/src/snippets/js.d.ts +17 -10
  14. package/dist/src/snippets/js.d.ts.map +1 -1
  15. package/dist/src/snippets/python.d.ts +20 -13
  16. package/dist/src/snippets/python.d.ts.map +1 -1
  17. package/dist/src/snippets/types.d.ts +4 -0
  18. package/dist/src/snippets/types.d.ts.map +1 -1
  19. package/dist/src/tasks/depth-estimation/data.d.ts.map +1 -1
  20. package/dist/src/tasks/image-segmentation/data.d.ts.map +1 -1
  21. package/dist/src/tasks/image-text-to-text/data.d.ts.map +1 -1
  22. package/dist/src/tasks/object-detection/data.d.ts.map +1 -1
  23. package/dist/src/tasks/text-to-speech/data.d.ts.map +1 -1
  24. package/dist/src/tasks/token-classification/inference.d.ts +8 -5
  25. package/dist/src/tasks/token-classification/inference.d.ts.map +1 -1
  26. package/dist/src/tasks/video-text-to-text/data.d.ts.map +1 -1
  27. package/dist/src/tasks/visual-question-answering/inference.d.ts +0 -1
  28. package/dist/src/tasks/visual-question-answering/inference.d.ts.map +1 -1
  29. package/package.json +1 -1
  30. package/src/hardware.ts +20 -0
  31. package/src/model-libraries-snippets.ts +28 -3
  32. package/src/model-libraries.ts +8 -1
  33. package/src/snippets/common.ts +63 -0
  34. package/src/snippets/curl.ts +71 -26
  35. package/src/snippets/js.ts +165 -40
  36. package/src/snippets/python.ts +186 -48
  37. package/src/snippets/types.ts +5 -0
  38. package/src/tasks/depth-estimation/data.ts +15 -7
  39. package/src/tasks/image-segmentation/about.md +1 -1
  40. package/src/tasks/image-segmentation/data.ts +10 -9
  41. package/src/tasks/image-text-to-text/data.ts +17 -9
  42. package/src/tasks/keypoint-detection/data.ts +1 -1
  43. package/src/tasks/object-detection/data.ts +5 -4
  44. package/src/tasks/text-generation/data.ts +7 -7
  45. package/src/tasks/text-to-image/data.ts +2 -2
  46. package/src/tasks/text-to-speech/data.ts +5 -1
  47. package/src/tasks/text-to-video/data.ts +10 -10
  48. package/src/tasks/token-classification/inference.ts +8 -5
  49. package/src/tasks/token-classification/spec/output.json +6 -2
  50. package/src/tasks/video-text-to-text/data.ts +8 -0
  51. package/src/tasks/visual-question-answering/inference.ts +0 -1
  52. package/src/tasks/visual-question-answering/spec/output.json +1 -1
@@ -1,22 +1,128 @@
1
1
  import type { PipelineType } from "../pipelines.js";
2
+ import type { ChatCompletionInputMessage, GenerationParameters } from "../tasks/index.js";
3
+ import { stringifyGenerationConfig, stringifyMessages } from "./common.js";
2
4
  import { getModelInputSnippet } from "./inputs.js";
3
- import type { ModelDataMinimal } from "./types.js";
5
+ import type { InferenceSnippet, ModelDataMinimal } from "./types.js";
4
6
 
5
- export const snippetConversational = (model: ModelDataMinimal, accessToken: string): string =>
6
- `from huggingface_hub import InferenceClient
7
+ export const snippetConversational = (
8
+ model: ModelDataMinimal,
9
+ accessToken: string,
10
+ opts?: {
11
+ streaming?: boolean;
12
+ messages?: ChatCompletionInputMessage[];
13
+ temperature?: GenerationParameters["temperature"];
14
+ max_tokens?: GenerationParameters["max_tokens"];
15
+ top_p?: GenerationParameters["top_p"];
16
+ }
17
+ ): InferenceSnippet[] => {
18
+ const streaming = opts?.streaming ?? true;
19
+ const messages: ChatCompletionInputMessage[] = opts?.messages ?? [
20
+ { role: "user", content: "What is the capital of France?" },
21
+ ];
22
+ const messagesStr = stringifyMessages(messages, {
23
+ sep: ",\n\t",
24
+ start: `[\n\t`,
25
+ end: `\n]`,
26
+ attributeKeyQuotes: true,
27
+ });
28
+
29
+ const config = {
30
+ ...(opts?.temperature ? { temperature: opts.temperature } : undefined),
31
+ max_tokens: opts?.max_tokens ?? 500,
32
+ ...(opts?.top_p ? { top_p: opts.top_p } : undefined),
33
+ };
34
+ const configStr = stringifyGenerationConfig(config, {
35
+ sep: ",\n\t",
36
+ start: "",
37
+ end: "",
38
+ attributeValueConnector: "=",
39
+ });
40
+
41
+ if (streaming) {
42
+ return [
43
+ {
44
+ client: "huggingface_hub",
45
+ content: `from huggingface_hub import InferenceClient
7
46
 
8
47
  client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
9
48
 
10
- for message in client.chat_completion(
11
- model="${model.id}",
12
- messages=[{"role": "user", "content": "What is the capital of France?"}],
13
- max_tokens=500,
14
- stream=True,
15
- ):
16
- print(message.choices[0].delta.content, end="")`;
49
+ messages = ${messagesStr}
17
50
 
18
- export const snippetConversationalWithImage = (model: ModelDataMinimal, accessToken: string): string =>
19
- `from huggingface_hub import InferenceClient
51
+ stream = client.chat.completions.create(
52
+ model="${model.id}",
53
+ messages=messages,
54
+ ${configStr},
55
+ stream=True
56
+ )
57
+
58
+ for chunk in stream:
59
+ print(chunk.choices[0].delta.content)`,
60
+ },
61
+ {
62
+ client: "openai",
63
+ content: `from openai import OpenAI
64
+
65
+ client = OpenAI(
66
+ base_url="https://api-inference.huggingface.co/v1/",
67
+ api_key="${accessToken || "{API_TOKEN}"}"
68
+ )
69
+
70
+ messages = ${messagesStr}
71
+
72
+ stream = client.chat.completions.create(
73
+ model="${model.id}",
74
+ messages=messages,
75
+ ${configStr},
76
+ stream=True
77
+ )
78
+
79
+ for chunk in stream:
80
+ print(chunk.choices[0].delta.content)`,
81
+ },
82
+ ];
83
+ } else {
84
+ return [
85
+ {
86
+ client: "huggingface_hub",
87
+ content: `from huggingface_hub import InferenceClient
88
+
89
+ client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
90
+
91
+ messages = ${messagesStr}
92
+
93
+ completion = client.chat.completions.create(
94
+ model="${model.id}",
95
+ messages=messages,
96
+ ${configStr}
97
+ )
98
+
99
+ print(completion.choices[0].message)`,
100
+ },
101
+ {
102
+ client: "openai",
103
+ content: `from openai import OpenAI
104
+
105
+ client = OpenAI(
106
+ base_url="https://api-inference.huggingface.co/v1/",
107
+ api_key="${accessToken || "{API_TOKEN}"}"
108
+ )
109
+
110
+ messages = ${messagesStr}
111
+
112
+ completion = client.chat.completions.create(
113
+ model="${model.id}",
114
+ messages=messages,
115
+ ${configStr}
116
+ )
117
+
118
+ print(completion.choices[0].message)`,
119
+ },
120
+ ];
121
+ }
122
+ };
123
+
124
+ export const snippetConversationalWithImage = (model: ModelDataMinimal, accessToken: string): InferenceSnippet => ({
125
+ content: `from huggingface_hub import InferenceClient
20
126
 
21
127
  client = InferenceClient(api_key="${accessToken || "{API_TOKEN}"}")
22
128
 
@@ -36,20 +142,22 @@ for message in client.chat_completion(
36
142
  max_tokens=500,
37
143
  stream=True,
38
144
  ):
39
- print(message.choices[0].delta.content, end="")`;
145
+ print(message.choices[0].delta.content, end="")`,
146
+ });
40
147
 
41
- export const snippetZeroShotClassification = (model: ModelDataMinimal): string =>
42
- `def query(payload):
148
+ export const snippetZeroShotClassification = (model: ModelDataMinimal): InferenceSnippet => ({
149
+ content: `def query(payload):
43
150
  response = requests.post(API_URL, headers=headers, json=payload)
44
151
  return response.json()
45
152
 
46
153
  output = query({
47
154
  "inputs": ${getModelInputSnippet(model)},
48
155
  "parameters": {"candidate_labels": ["refund", "legal", "faq"]},
49
- })`;
156
+ })`,
157
+ });
50
158
 
51
- export const snippetZeroShotImageClassification = (model: ModelDataMinimal): string =>
52
- `def query(data):
159
+ export const snippetZeroShotImageClassification = (model: ModelDataMinimal): InferenceSnippet => ({
160
+ content: `def query(data):
53
161
  with open(data["image_path"], "rb") as f:
54
162
  img = f.read()
55
163
  payload={
@@ -62,28 +170,31 @@ export const snippetZeroShotImageClassification = (model: ModelDataMinimal): str
62
170
  output = query({
63
171
  "image_path": ${getModelInputSnippet(model)},
64
172
  "parameters": {"candidate_labels": ["cat", "dog", "llama"]},
65
- })`;
173
+ })`,
174
+ });
66
175
 
67
- export const snippetBasic = (model: ModelDataMinimal): string =>
68
- `def query(payload):
176
+ export const snippetBasic = (model: ModelDataMinimal): InferenceSnippet => ({
177
+ content: `def query(payload):
69
178
  response = requests.post(API_URL, headers=headers, json=payload)
70
179
  return response.json()
71
180
 
72
181
  output = query({
73
182
  "inputs": ${getModelInputSnippet(model)},
74
- })`;
183
+ })`,
184
+ });
75
185
 
76
- export const snippetFile = (model: ModelDataMinimal): string =>
77
- `def query(filename):
186
+ export const snippetFile = (model: ModelDataMinimal): InferenceSnippet => ({
187
+ content: `def query(filename):
78
188
  with open(filename, "rb") as f:
79
189
  data = f.read()
80
190
  response = requests.post(API_URL, headers=headers, data=data)
81
191
  return response.json()
82
192
 
83
- output = query(${getModelInputSnippet(model)})`;
193
+ output = query(${getModelInputSnippet(model)})`,
194
+ });
84
195
 
85
- export const snippetTextToImage = (model: ModelDataMinimal): string =>
86
- `def query(payload):
196
+ export const snippetTextToImage = (model: ModelDataMinimal): InferenceSnippet => ({
197
+ content: `def query(payload):
87
198
  response = requests.post(API_URL, headers=headers, json=payload)
88
199
  return response.content
89
200
  image_bytes = query({
@@ -92,22 +203,25 @@ image_bytes = query({
92
203
  # You can access the image with PIL.Image for example
93
204
  import io
94
205
  from PIL import Image
95
- image = Image.open(io.BytesIO(image_bytes))`;
206
+ image = Image.open(io.BytesIO(image_bytes))`,
207
+ });
96
208
 
97
- export const snippetTabular = (model: ModelDataMinimal): string =>
98
- `def query(payload):
209
+ export const snippetTabular = (model: ModelDataMinimal): InferenceSnippet => ({
210
+ content: `def query(payload):
99
211
  response = requests.post(API_URL, headers=headers, json=payload)
100
212
  return response.content
101
213
  response = query({
102
214
  "inputs": {"data": ${getModelInputSnippet(model)}},
103
- })`;
215
+ })`,
216
+ });
104
217
 
105
- export const snippetTextToAudio = (model: ModelDataMinimal): string => {
218
+ export const snippetTextToAudio = (model: ModelDataMinimal): InferenceSnippet => {
106
219
  // Transformers TTS pipeline and api-inference-community (AIC) pipeline outputs are diverged
107
220
  // with the latest update to inference-api (IA).
108
221
  // Transformers IA returns a byte object (wav file), whereas AIC returns wav and sampling_rate.
109
222
  if (model.library_name === "transformers") {
110
- return `def query(payload):
223
+ return {
224
+ content: `def query(payload):
111
225
  response = requests.post(API_URL, headers=headers, json=payload)
112
226
  return response.content
113
227
 
@@ -116,9 +230,11 @@ audio_bytes = query({
116
230
  })
117
231
  # You can access the audio with IPython.display for example
118
232
  from IPython.display import Audio
119
- Audio(audio_bytes)`;
233
+ Audio(audio_bytes)`,
234
+ };
120
235
  } else {
121
- return `def query(payload):
236
+ return {
237
+ content: `def query(payload):
122
238
  response = requests.post(API_URL, headers=headers, json=payload)
123
239
  return response.json()
124
240
 
@@ -127,12 +243,13 @@ audio, sampling_rate = query({
127
243
  })
128
244
  # You can access the audio with IPython.display for example
129
245
  from IPython.display import Audio
130
- Audio(audio, rate=sampling_rate)`;
246
+ Audio(audio, rate=sampling_rate)`,
247
+ };
131
248
  }
132
249
  };
133
250
 
134
- export const snippetDocumentQuestionAnswering = (model: ModelDataMinimal): string =>
135
- `def query(payload):
251
+ export const snippetDocumentQuestionAnswering = (model: ModelDataMinimal): InferenceSnippet => ({
252
+ content: `def query(payload):
136
253
  with open(payload["image"], "rb") as f:
137
254
  img = f.read()
138
255
  payload["image"] = base64.b64encode(img).decode("utf-8")
@@ -141,9 +258,19 @@ export const snippetDocumentQuestionAnswering = (model: ModelDataMinimal): strin
141
258
 
142
259
  output = query({
143
260
  "inputs": ${getModelInputSnippet(model)},
144
- })`;
261
+ })`,
262
+ });
145
263
 
146
- export const pythonSnippets: Partial<Record<PipelineType, (model: ModelDataMinimal, accessToken: string) => string>> = {
264
+ export const pythonSnippets: Partial<
265
+ Record<
266
+ PipelineType,
267
+ (
268
+ model: ModelDataMinimal,
269
+ accessToken: string,
270
+ opts?: Record<string, unknown>
271
+ ) => InferenceSnippet | InferenceSnippet[]
272
+ >
273
+ > = {
147
274
  // Same order as in tasks/src/pipelines.ts
148
275
  "text-classification": snippetBasic,
149
276
  "token-classification": snippetBasic,
@@ -174,25 +301,36 @@ export const pythonSnippets: Partial<Record<PipelineType, (model: ModelDataMinim
174
301
  "zero-shot-image-classification": snippetZeroShotImageClassification,
175
302
  };
176
303
 
177
- export function getPythonInferenceSnippet(model: ModelDataMinimal, accessToken: string): string {
304
+ export function getPythonInferenceSnippet(
305
+ model: ModelDataMinimal,
306
+ accessToken: string,
307
+ opts?: Record<string, unknown>
308
+ ): InferenceSnippet | InferenceSnippet[] {
178
309
  if (model.pipeline_tag === "text-generation" && model.tags.includes("conversational")) {
179
310
  // Conversational model detected, so we display a code snippet that features the Messages API
180
- return snippetConversational(model, accessToken);
311
+ return snippetConversational(model, accessToken, opts);
181
312
  } else if (model.pipeline_tag === "image-text-to-text" && model.tags.includes("conversational")) {
182
313
  // Example sending an image to the Message API
183
314
  return snippetConversationalWithImage(model, accessToken);
184
315
  } else {
185
- const body =
316
+ let snippets =
186
317
  model.pipeline_tag && model.pipeline_tag in pythonSnippets
187
- ? pythonSnippets[model.pipeline_tag]?.(model, accessToken) ?? ""
188
- : "";
318
+ ? pythonSnippets[model.pipeline_tag]?.(model, accessToken) ?? { content: "" }
319
+ : { content: "" };
189
320
 
190
- return `import requests
321
+ snippets = Array.isArray(snippets) ? snippets : [snippets];
191
322
 
323
+ return snippets.map((snippet) => {
324
+ return {
325
+ ...snippet,
326
+ content: `import requests
327
+
192
328
  API_URL = "https://api-inference.huggingface.co/models/${model.id}"
193
329
  headers = {"Authorization": ${accessToken ? `"Bearer ${accessToken}"` : `f"Bearer {API_TOKEN}"`}}
194
-
195
- ${body}`;
330
+
331
+ ${snippet.content}`,
332
+ };
333
+ });
196
334
  }
197
335
  }
198
336
 
@@ -9,3 +9,8 @@ export type ModelDataMinimal = Pick<
9
9
  ModelData,
10
10
  "id" | "pipeline_tag" | "mask_token" | "library_name" | "config" | "tags" | "inference"
11
11
  >;
12
+
13
+ export interface InferenceSnippet {
14
+ content: string;
15
+ client?: string; // for instance: `client` could be `huggingface_hub` or `openai` client for Python snippets
16
+ }
@@ -33,11 +33,15 @@ const taskData: TaskDataCustom = {
33
33
  },
34
34
  {
35
35
  description: "A strong monocular depth estimation model.",
36
- id: "Bingxin/Marigold",
36
+ id: "jingheya/lotus-depth-g-v1-0",
37
37
  },
38
38
  {
39
- description: "A metric depth estimation model trained on NYU dataset.",
40
- id: "Intel/zoedepth-nyu",
39
+ description: "A depth estimation model that predicts depth in videos.",
40
+ id: "tencent/DepthCrafter",
41
+ },
42
+ {
43
+ description: "A robust depth estimation model.",
44
+ id: "apple/DepthPro",
41
45
  },
42
46
  ],
43
47
  spaces: [
@@ -46,12 +50,16 @@ const taskData: TaskDataCustom = {
46
50
  id: "radames/dpt-depth-estimation-3d-voxels",
47
51
  },
48
52
  {
49
- description: "An application on cutting-edge depth estimation.",
50
- id: "depth-anything/Depth-Anything-V2",
53
+ description: "An application for bleeding-edge depth estimation.",
54
+ id: "akhaliq/depth-pro",
55
+ },
56
+ {
57
+ description: "An application on cutting-edge depth estimation in videos.",
58
+ id: "tencent/DepthCrafter",
51
59
  },
52
60
  {
53
- description: "An application to try state-of-the-art depth estimation.",
54
- id: "merve/compare_depth_models",
61
+ description: "A human-centric depth estimation application.",
62
+ id: "facebook/sapiens-depth",
55
63
  },
56
64
  ],
57
65
  summary: "Depth estimation is the task of predicting depth of the objects present in an image.",
@@ -48,7 +48,7 @@ import { HfInference } from "@huggingface/inference";
48
48
  const inference = new HfInference(HF_TOKEN);
49
49
  await inference.imageSegmentation({
50
50
  data: await (await fetch("https://picsum.photos/300/300")).blob(),
51
- model: "facebook/detr-resnet-50-panoptic",
51
+ model: "facebook/mask2former-swin-base-coco-panoptic",
52
52
  });
53
53
  ```
54
54
 
@@ -44,23 +44,24 @@ const taskData: TaskDataCustom = {
44
44
  models: [
45
45
  {
46
46
  // TO DO: write description
47
- description: "Solid panoptic segmentation model trained on the COCO 2017 benchmark dataset.",
48
- id: "facebook/detr-resnet-50-panoptic",
47
+ description:
48
+ "Solid semantic segmentation model trained on ADE20k.",
49
+ id: "openmmlab/upernet-convnext-small",
49
50
  },
50
51
  {
51
52
  description: "Background removal model.",
52
53
  id: "briaai/RMBG-1.4",
53
54
  },
54
- {
55
- description: "Semantic segmentation model trained on ADE20k benchmark dataset with 512x512 resolution.",
56
- id: "nvidia/segformer-b0-finetuned-ade-512-512",
57
- },
58
55
  {
59
56
  description: "A multipurpose image segmentation model for high resolution images.",
60
57
  id: "ZhengPeng7/BiRefNet",
61
58
  },
62
59
  {
63
- description: "Panoptic segmentation model trained COCO (common objects) dataset.",
60
+ description: "Powerful human-centric image segmentation model.",
61
+ id: "facebook/sapiens-seg-1b",
62
+ },
63
+ {
64
+ description: "Panoptic segmentation model trained on the COCO (common objects) dataset.",
64
65
  id: "facebook/mask2former-swin-large-coco-panoptic",
65
66
  },
66
67
  ],
@@ -74,8 +75,8 @@ const taskData: TaskDataCustom = {
74
75
  id: "jbrinkma/segment-anything",
75
76
  },
76
77
  {
77
- description: "A semantic segmentation application that predicts human silhouettes.",
78
- id: "keras-io/Human-Part-Segmentation",
78
+ description: "A human-centric segmentation model.",
79
+ id: "facebook/sapiens-pose",
79
80
  },
80
81
  {
81
82
  description: "An instance segmentation application to predict neuronal cell types from microscopy images.",
@@ -47,20 +47,24 @@ const taskData: TaskDataCustom = {
47
47
  id: "meta-llama/Llama-3.2-11B-Vision-Instruct",
48
48
  },
49
49
  {
50
- description: "Cutting-edge conversational vision language model that can take multiple image inputs.",
51
- id: "HuggingFaceM4/idefics2-8b-chatty",
50
+ description: "Cutting-edge vision language models.",
51
+ id: "allenai/Molmo-7B-D-0924",
52
52
  },
53
53
  {
54
54
  description: "Small yet powerful model.",
55
55
  id: "vikhyatk/moondream2",
56
56
  },
57
57
  {
58
- description: "Strong image-text-to-text model made to understand documents.",
59
- id: "mPLUG/DocOwl1.5",
58
+ description: "Strong image-text-to-text model.",
59
+ id: "Qwen/Qwen2-VL-7B-Instruct",
60
60
  },
61
61
  {
62
62
  description: "Strong image-text-to-text model.",
63
- id: "microsoft/Phi-3.5-vision-instruct",
63
+ id: "mistralai/Pixtral-12B-2409",
64
+ },
65
+ {
66
+ description: "Strong image-text-to-text model focused on documents.",
67
+ id: "stepfun-ai/GOT-OCR2_0",
64
68
  },
65
69
  ],
66
70
  spaces: [
@@ -74,20 +78,24 @@ const taskData: TaskDataCustom = {
74
78
  },
75
79
  {
76
80
  description: "Powerful vision-language model assistant.",
77
- id: "liuhaotian/LLaVA-1.6",
81
+ id: "akhaliq/Molmo-7B-D-0924",
82
+ },
83
+ {
84
+ description: "An image-text-to-text application focused on documents.",
85
+ id: "stepfun-ai/GOT_official_online_demo",
78
86
  },
79
87
  {
80
88
  description: "An application to compare outputs of different vision language models.",
81
89
  id: "merve/compare_VLMs",
82
90
  },
83
91
  {
84
- description: "An application for document vision language tasks.",
85
- id: "mPLUG/DocOwl",
92
+ description: "An application for chatting with an image-text-to-text model.",
93
+ id: "GanymedeNil/Qwen2-VL-7B",
86
94
  },
87
95
  ],
88
96
  summary:
89
97
  "Image-text-to-text models take in an image and text prompt and output text. These models are also called vision-language models, or VLMs. The difference from image-to-text models is that these models take an additional text input, not restricting the model to certain use cases like image captioning, and may also be trained to accept a conversation as input.",
90
- widgetModels: ["microsoft/kosmos-2-patch14-224"],
98
+ widgetModels: ["meta-llama/Llama-3.2-11B-Vision-Instruct"],
91
99
  youtubeId: "IoGaGfU1CIg",
92
100
  };
93
101
 
@@ -29,7 +29,7 @@ const taskData: TaskDataCustom = {
29
29
  },
30
30
  {
31
31
  description: "Strong keypoint detection model used to detect human pose.",
32
- id: "qualcomm/MediaPipe-Pose-Estimation",
32
+ id: "facebook/sapiens-pose-1b",
33
33
  },
34
34
  ],
35
35
  spaces: [
@@ -43,15 +43,16 @@ const taskData: TaskDataCustom = {
43
43
  ],
44
44
  models: [
45
45
  {
46
- description: "Solid object detection model trained on the benchmark dataset COCO 2017.",
46
+ description: "Solid object detection model pre-trained on the COCO 2017 dataset.",
47
47
  id: "facebook/detr-resnet-50",
48
48
  },
49
49
  {
50
- description: "Strong object detection model trained on ImageNet-21k dataset.",
51
- id: "microsoft/beit-base-patch16-224-pt22k-ft22k",
50
+ description: "Real-time and accurate object detection model.",
51
+ id: "jameslahm/yolov10x",
52
52
  },
53
53
  {
54
- description: "Fast and accurate object detection model trained on COCO dataset.",
54
+ description:
55
+ "Fast and accurate object detection model trained on COCO and Object365 datasets.",
55
56
  id: "PekingU/rtdetr_r18vd_coco_o365",
56
57
  },
57
58
  ],
@@ -58,10 +58,6 @@ const taskData: TaskDataCustom = {
58
58
  description: "A text-generation model trained to follow instructions.",
59
59
  id: "google/gemma-2-2b-it",
60
60
  },
61
- {
62
- description: "A code generation model that can generate code in 80+ languages.",
63
- id: "bigcode/starcoder",
64
- },
65
61
  {
66
62
  description: "Very powerful text generation model trained to follow instructions.",
67
63
  id: "meta-llama/Meta-Llama-3.1-8B-Instruct",
@@ -75,12 +71,12 @@ const taskData: TaskDataCustom = {
75
71
  id: "AI-MO/NuminaMath-7B-TIR",
76
72
  },
77
73
  {
78
- description: "Strong coding assistant model.",
79
- id: "HuggingFaceH4/starchat2-15b-v0.1",
74
+ description: "Strong text generation model to follow instructions.",
75
+ id: "Qwen/Qwen2.5-7B-Instruct",
80
76
  },
81
77
  {
82
78
  description: "Very strong open-source large language model.",
83
- id: "mistralai/Mistral-Nemo-Instruct-2407",
79
+ id: "nvidia/Llama-3.1-Nemotron-70B-Instruct",
84
80
  },
85
81
  ],
86
82
  spaces: [
@@ -88,6 +84,10 @@ const taskData: TaskDataCustom = {
88
84
  description: "A leaderboard to compare different open-source text generation models based on various benchmarks.",
89
85
  id: "open-llm-leaderboard/open_llm_leaderboard",
90
86
  },
87
+ {
88
+ description: "A leaderboard for comparing chain-of-thought performance of models.",
89
+ id: "logikon/open_cot_leaderboard",
90
+ },
91
91
  {
92
92
  description: "An text generation based application based on a very powerful LLaMA2 model.",
93
93
  id: "ysharma/Explore_llamav2_with_TGI",
@@ -71,8 +71,8 @@ const taskData: TaskDataCustom = {
71
71
  id: "jbilcke-hf/ai-comic-factory",
72
72
  },
73
73
  {
74
- description: "A text-to-image application that can generate coherent text inside the image.",
75
- id: "DeepFloyd/IF",
74
+ description: "An application to match multiple custom image generation models.",
75
+ id: "multimodalart/flux-lora-lab",
76
76
  },
77
77
  {
78
78
  description: "A powerful yet very fast image generation application.",
@@ -57,9 +57,13 @@ const taskData: TaskDataCustom = {
57
57
  id: "suno/bark",
58
58
  },
59
59
  {
60
- description: "XTTS is a Voice generation model that lets you clone voices into different languages.",
60
+ description: "An application on XTTS, a voice generation model that lets you clone voices into different languages.",
61
61
  id: "coqui/xtts",
62
62
  },
63
+ {
64
+ description: "An application that generates speech in different styles in English and Chinese.",
65
+ id: "mrfakename/E2-F5-TTS",
66
+ },
63
67
  {
64
68
  description: "An application that synthesizes speech for diverse speaker prompts.",
65
69
  id: "parler-tts/parler_tts_mini",
@@ -67,30 +67,30 @@ const taskData: TaskDataCustom = {
67
67
  ],
68
68
  models: [
69
69
  {
70
- description: "A strong model for video generation.",
71
- id: "Vchitect/LaVie",
70
+ description: "A strong model for consistent video generation.",
71
+ id: "rain1011/pyramid-flow-sd3",
72
72
  },
73
73
  {
74
74
  description: "A robust model for text-to-video generation.",
75
- id: "damo-vilab/text-to-video-ms-1.7b",
75
+ id: "VideoCrafter/VideoCrafter2",
76
76
  },
77
77
  {
78
- description: "A text-to-video generation model with high quality and smooth outputs.",
79
- id: "hotshotco/Hotshot-XL",
78
+ description: "A cutting-edge text-to-video generation model.",
79
+ id: "TIGER-Lab/T2V-Turbo-V2",
80
80
  },
81
81
  ],
82
82
  spaces: [
83
83
  {
84
84
  description: "An application that generates video from text.",
85
- id: "fffiloni/zeroscope",
85
+ id: "VideoCrafter/VideoCrafter",
86
86
  },
87
87
  {
88
- description: "An application that generates video from image and text.",
89
- id: "Vchitect/LaVie",
88
+ description: "Consistent video generation application.",
89
+ id: "TIGER-Lab/T2V-Turbo-V2",
90
90
  },
91
91
  {
92
- description: "An application that generates videos from text and provides multi-model support.",
93
- id: "ArtGAN/Video-Diffusion-WebUI",
92
+ description: "A cutting edge video generation application.",
93
+ id: "Pyramid-Flow/pyramid-flow",
94
94
  },
95
95
  ],
96
96
  summary:
@@ -60,12 +60,15 @@ export interface TokenClassificationOutputElement {
60
60
  /**
61
61
  * The character position in the input where this group ends.
62
62
  */
63
- end?: number;
63
+ end: number;
64
64
  /**
65
- * The predicted label for that group of tokens
65
+ * The predicted label for a single token
66
+ */
67
+ entity?: string;
68
+ /**
69
+ * The predicted label for a group of one or more tokens
66
70
  */
67
71
  entity_group?: string;
68
- label: unknown;
69
72
  /**
70
73
  * The associated score / probability
71
74
  */
@@ -73,10 +76,10 @@ export interface TokenClassificationOutputElement {
73
76
  /**
74
77
  * The character position in the input where this group begins.
75
78
  */
76
- start?: number;
79
+ start: number;
77
80
  /**
78
81
  * The corresponding text
79
82
  */
80
- word?: string;
83
+ word: string;
81
84
  [property: string]: unknown;
82
85
  }