@huggingface/tasks 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +1 -1
  2. package/dist/index.cjs +3144 -3085
  3. package/dist/index.d.ts +441 -74
  4. package/dist/index.js +3143 -3084
  5. package/package.json +1 -1
  6. package/src/index.ts +2 -5
  7. package/src/library-to-tasks.ts +1 -1
  8. package/src/model-libraries-downloads.ts +20 -0
  9. package/src/{library-ui-elements.ts → model-libraries-snippets.ts} +46 -292
  10. package/src/model-libraries.ts +375 -44
  11. package/src/pipelines.ts +14 -8
  12. package/src/tasks/audio-classification/inference.ts +4 -4
  13. package/src/tasks/audio-classification/spec/input.json +4 -4
  14. package/src/tasks/audio-classification/spec/output.json +1 -12
  15. package/src/tasks/automatic-speech-recognition/inference.ts +35 -30
  16. package/src/tasks/automatic-speech-recognition/spec/input.json +3 -3
  17. package/src/tasks/automatic-speech-recognition/spec/output.json +30 -28
  18. package/src/tasks/common-definitions.json +25 -17
  19. package/src/tasks/depth-estimation/inference.ts +10 -10
  20. package/src/tasks/depth-estimation/spec/input.json +3 -8
  21. package/src/tasks/depth-estimation/spec/output.json +9 -3
  22. package/src/tasks/document-question-answering/inference.ts +16 -8
  23. package/src/tasks/document-question-answering/spec/input.json +9 -9
  24. package/src/tasks/document-question-answering/spec/output.json +2 -2
  25. package/src/tasks/feature-extraction/inference.ts +1 -1
  26. package/src/tasks/feature-extraction/spec/input.json +2 -2
  27. package/src/tasks/fill-mask/inference.ts +4 -3
  28. package/src/tasks/fill-mask/spec/input.json +3 -3
  29. package/src/tasks/fill-mask/spec/output.json +1 -1
  30. package/src/tasks/image-classification/inference.ts +3 -3
  31. package/src/tasks/image-classification/spec/input.json +4 -4
  32. package/src/tasks/image-segmentation/inference.ts +3 -3
  33. package/src/tasks/image-segmentation/spec/input.json +4 -4
  34. package/src/tasks/image-to-image/inference.ts +5 -5
  35. package/src/tasks/image-to-image/spec/input.json +9 -7
  36. package/src/tasks/image-to-text/inference.ts +25 -20
  37. package/src/tasks/image-to-text/spec/input.json +3 -3
  38. package/src/tasks/image-to-text/spec/output.json +8 -11
  39. package/src/tasks/index.ts +2 -0
  40. package/src/tasks/object-detection/inference.ts +1 -1
  41. package/src/tasks/object-detection/spec/input.json +2 -2
  42. package/src/tasks/placeholder/spec/input.json +4 -4
  43. package/src/tasks/placeholder/spec/output.json +1 -1
  44. package/src/tasks/question-answering/inference.ts +8 -8
  45. package/src/tasks/question-answering/spec/input.json +9 -9
  46. package/src/tasks/sentence-similarity/inference.ts +1 -1
  47. package/src/tasks/sentence-similarity/spec/input.json +2 -2
  48. package/src/tasks/summarization/inference.ts +5 -4
  49. package/src/tasks/table-question-answering/inference.ts +1 -1
  50. package/src/tasks/table-question-answering/spec/input.json +8 -3
  51. package/src/tasks/text-classification/inference.ts +3 -3
  52. package/src/tasks/text-classification/spec/input.json +4 -4
  53. package/src/tasks/text-generation/inference.ts +123 -14
  54. package/src/tasks/text-generation/spec/input.json +28 -12
  55. package/src/tasks/text-generation/spec/output.json +112 -9
  56. package/src/tasks/text-to-audio/inference.ts +24 -19
  57. package/src/tasks/text-to-audio/spec/input.json +2 -2
  58. package/src/tasks/text-to-audio/spec/output.json +10 -13
  59. package/src/tasks/text-to-image/inference.ts +6 -8
  60. package/src/tasks/text-to-image/spec/input.json +9 -7
  61. package/src/tasks/text-to-image/spec/output.json +7 -9
  62. package/src/tasks/text-to-speech/inference.ts +18 -17
  63. package/src/tasks/text2text-generation/inference.ts +10 -8
  64. package/src/tasks/text2text-generation/spec/input.json +4 -4
  65. package/src/tasks/text2text-generation/spec/output.json +8 -11
  66. package/src/tasks/token-classification/inference.ts +4 -4
  67. package/src/tasks/token-classification/spec/input.json +4 -4
  68. package/src/tasks/token-classification/spec/output.json +1 -1
  69. package/src/tasks/translation/inference.ts +5 -4
  70. package/src/tasks/video-classification/inference.ts +5 -5
  71. package/src/tasks/video-classification/spec/input.json +6 -6
  72. package/src/tasks/visual-question-answering/inference.ts +2 -2
  73. package/src/tasks/visual-question-answering/spec/input.json +3 -3
  74. package/src/tasks/zero-shot-classification/inference.ts +3 -3
  75. package/src/tasks/zero-shot-classification/spec/input.json +4 -4
  76. package/src/tasks/zero-shot-image-classification/inference.ts +2 -2
  77. package/src/tasks/zero-shot-image-classification/spec/input.json +3 -3
  78. package/src/tasks/zero-shot-object-detection/inference.ts +1 -1
  79. package/src/tasks/zero-shot-object-detection/spec/input.json +2 -2
@@ -1,52 +1,383 @@
1
+ import * as snippets from "./model-libraries-snippets";
2
+ import type { ModelData } from "./model-data";
3
+ import type { ElasticBoolQueryFilter } from "./model-libraries-downloads";
4
+
5
+ /**
6
+ * Elements configurable by a model library.
7
+ */
8
+ export interface LibraryUiElement {
9
+ /**
10
+ * Pretty name of the library.
11
+ * displayed in tags, and on the main
12
+ * call-to-action button on the model page.
13
+ */
14
+ prettyLabel: string;
15
+ /**
16
+ * Repo name of the library's (usually on GitHub) code repo
17
+ */
18
+ repoName: string;
19
+ /**
20
+ * URL to library's (usually on GitHub) code repo
21
+ */
22
+ repoUrl: string;
23
+ /**
24
+ * URL to library's docs
25
+ */
26
+ docsUrl?: string;
27
+ /**
28
+ * Code snippet(s) displayed on model page
29
+ */
30
+ snippets?: (model: ModelData) => string[];
31
+ /**
32
+ * Elastic query used to count this library's model downloads
33
+ *
34
+ * By default, those files are counted:
35
+ * "config.json", "config.yaml", "hyperparams.yaml", "meta.yaml"
36
+ */
37
+ countDownloads?: ElasticBoolQueryFilter;
38
+ /**
39
+ * should we display this library in hf.co/models filter
40
+ * (only for popular libraries with > 100 models)
41
+ */
42
+ filter?: boolean;
43
+ }
44
+
1
45
  /**
2
46
  * Add your new library here.
3
47
  *
4
48
  * This is for modeling (= architectures) libraries, not for file formats (like ONNX, etc).
5
- * File formats live in an enum inside the internal codebase.
49
+ * (unlike libraries, file formats live in an enum inside the internal codebase.)
50
+ *
51
+ * Doc on how to add a library to the Hub:
52
+ *
53
+ * https://huggingface.co/docs/hub/models-adding-libraries
54
+ *
55
+ * /!\ IMPORTANT
56
+ *
57
+ * The key you choose is the tag your models have in their library_name on the Hub.
6
58
  */
7
- export enum ModelLibrary {
8
- "adapter-transformers" = "Adapters",
9
- "allennlp" = "allenNLP",
10
- "asteroid" = "Asteroid",
11
- "bertopic" = "BERTopic",
12
- "diffusers" = "Diffusers",
13
- "doctr" = "docTR",
14
- "espnet" = "ESPnet",
15
- "fairseq" = "Fairseq",
16
- "flair" = "Flair",
17
- "keras" = "Keras",
18
- "k2" = "K2",
19
- "mlx" = "MLX",
20
- "nemo" = "NeMo",
21
- "open_clip" = "OpenCLIP",
22
- "paddlenlp" = "PaddleNLP",
23
- "peft" = "PEFT",
24
- "pyannote-audio" = "pyannote.audio",
25
- "sample-factory" = "Sample Factory",
26
- "sentence-transformers" = "Sentence Transformers",
27
- "setfit" = "SetFit",
28
- "sklearn" = "Scikit-learn",
29
- "spacy" = "spaCy",
30
- "span-marker" = "SpanMarker",
31
- "speechbrain" = "speechbrain",
32
- "tensorflowtts" = "TensorFlowTTS",
33
- "timm" = "Timm",
34
- "fastai" = "fastai",
35
- "transformers" = "Transformers",
36
- "transformers.js" = "Transformers.js",
37
- "stanza" = "Stanza",
38
- "fasttext" = "fastText",
39
- "stable-baselines3" = "Stable-Baselines3",
40
- "ml-agents" = "Unity ML-Agents",
41
- "pythae" = "Pythae",
42
- "mindspore" = "MindSpore",
43
- "unity-sentis" = "Unity Sentis",
44
- }
45
59
 
46
- export type ModelLibraryKey = keyof typeof ModelLibrary;
60
+ export const MODEL_LIBRARIES_UI_ELEMENTS = {
61
+ "adapter-transformers": {
62
+ prettyLabel: "Adapters",
63
+ repoName: "adapters",
64
+ repoUrl: "https://github.com/Adapter-Hub/adapters",
65
+ docsUrl: "https://huggingface.co/docs/hub/adapters",
66
+ snippets: snippets.adapters,
67
+ filter: true,
68
+ countDownloads: {
69
+ term: { path: "adapter_config.json" },
70
+ },
71
+ },
72
+ allennlp: {
73
+ prettyLabel: "AllenNLP",
74
+ repoName: "AllenNLP",
75
+ repoUrl: "https://github.com/allenai/allennlp",
76
+ docsUrl: "https://huggingface.co/docs/hub/allennlp",
77
+ snippets: snippets.allennlp,
78
+ filter: true,
79
+ },
80
+ asteroid: {
81
+ prettyLabel: "Asteroid",
82
+ repoName: "Asteroid",
83
+ repoUrl: "https://github.com/asteroid-team/asteroid",
84
+ docsUrl: "https://huggingface.co/docs/hub/asteroid",
85
+ snippets: snippets.asteroid,
86
+ filter: true,
87
+ countDownloads: {
88
+ term: { path: "pytorch_model.bin" },
89
+ },
90
+ },
91
+ bertopic: {
92
+ prettyLabel: "BERTopic",
93
+ repoName: "BERTopic",
94
+ repoUrl: "https://github.com/MaartenGr/BERTopic",
95
+ snippets: snippets.bertopic,
96
+ filter: true,
97
+ },
98
+ diffusers: {
99
+ prettyLabel: "Diffusers",
100
+ repoName: "🤗/diffusers",
101
+ repoUrl: "https://github.com/huggingface/diffusers",
102
+ docsUrl: "https://huggingface.co/docs/hub/diffusers",
103
+ snippets: snippets.diffusers,
104
+ filter: true,
105
+ /// diffusers has its own more complex "countDownloads" query
106
+ },
107
+ doctr: {
108
+ prettyLabel: "docTR",
109
+ repoName: "doctr",
110
+ repoUrl: "https://github.com/mindee/doctr",
111
+ },
112
+ espnet: {
113
+ prettyLabel: "ESPnet",
114
+ repoName: "ESPnet",
115
+ repoUrl: "https://github.com/espnet/espnet",
116
+ docsUrl: "https://huggingface.co/docs/hub/espnet",
117
+ snippets: snippets.espnet,
118
+ filter: true,
119
+ },
120
+ fairseq: {
121
+ prettyLabel: "Fairseq",
122
+ repoName: "fairseq",
123
+ repoUrl: "https://github.com/pytorch/fairseq",
124
+ snippets: snippets.fairseq,
125
+ filter: true,
126
+ },
127
+ fastai: {
128
+ prettyLabel: "fastai",
129
+ repoName: "fastai",
130
+ repoUrl: "https://github.com/fastai/fastai",
131
+ docsUrl: "https://huggingface.co/docs/hub/fastai",
132
+ snippets: snippets.fastai,
133
+ filter: true,
134
+ },
135
+ fasttext: {
136
+ prettyLabel: "fastText",
137
+ repoName: "fastText",
138
+ repoUrl: "https://fasttext.cc/",
139
+ snippets: snippets.fasttext,
140
+ filter: true,
141
+ },
142
+ flair: {
143
+ prettyLabel: "Flair",
144
+ repoName: "Flair",
145
+ repoUrl: "https://github.com/flairNLP/flair",
146
+ docsUrl: "https://huggingface.co/docs/hub/flair",
147
+ snippets: snippets.flair,
148
+ filter: true,
149
+ countDownloads: {
150
+ term: { path: "pytorch_model.bin" },
151
+ },
152
+ },
153
+ keras: {
154
+ prettyLabel: "Keras",
155
+ repoName: "Keras",
156
+ repoUrl: "https://github.com/keras-team/keras",
157
+ docsUrl: "https://huggingface.co/docs/hub/keras",
158
+ snippets: snippets.keras,
159
+ filter: true,
160
+ countDownloads: { term: { path: "saved_model.pb" } },
161
+ },
162
+ k2: {
163
+ prettyLabel: "K2",
164
+ repoName: "k2",
165
+ repoUrl: "https://github.com/k2-fsa/k2",
166
+ },
167
+ mindspore: {
168
+ prettyLabel: "MindSpore",
169
+ repoName: "mindspore",
170
+ repoUrl: "https://github.com/mindspore-ai/mindspore",
171
+ },
172
+ "ml-agents": {
173
+ prettyLabel: "ml-agents",
174
+ repoName: "ml-agents",
175
+ repoUrl: "https://github.com/Unity-Technologies/ml-agents",
176
+ docsUrl: "https://huggingface.co/docs/hub/ml-agents",
177
+ snippets: snippets.mlAgents,
178
+ filter: true,
179
+ countDownloads: { wildcard: { path: "*.onnx" } },
180
+ },
181
+ mlx: {
182
+ prettyLabel: "MLX",
183
+ repoName: "MLX",
184
+ repoUrl: "https://github.com/ml-explore/mlx-examples/tree/main",
185
+ snippets: snippets.mlx,
186
+ filter: true,
187
+ },
188
+ nemo: {
189
+ prettyLabel: "NeMo",
190
+ repoName: "NeMo",
191
+ repoUrl: "https://github.com/NVIDIA/NeMo",
192
+ snippets: snippets.nemo,
193
+ filter: true,
194
+ countDownloads: { wildcard: { path: "*.nemo" } },
195
+ },
196
+ open_clip: {
197
+ prettyLabel: "OpenCLIP",
198
+ repoName: "OpenCLIP",
199
+ repoUrl: "https://github.com/mlfoundations/open_clip",
200
+ snippets: snippets.open_clip,
201
+ filter: true,
202
+ countDownloads: { wildcard: { path: "*pytorch_model.bin" } },
203
+ },
204
+ paddlenlp: {
205
+ prettyLabel: "paddlenlp",
206
+ repoName: "PaddleNLP",
207
+ repoUrl: "https://github.com/PaddlePaddle/PaddleNLP",
208
+ docsUrl: "https://huggingface.co/docs/hub/paddlenlp",
209
+ snippets: snippets.paddlenlp,
210
+ filter: true,
211
+ countDownloads: {
212
+ term: { path: "model_config.json" },
213
+ },
214
+ },
215
+ peft: {
216
+ prettyLabel: "PEFT",
217
+ repoName: "PEFT",
218
+ repoUrl: "https://github.com/huggingface/peft",
219
+ snippets: snippets.peft,
220
+ filter: true,
221
+ countDownloads: {
222
+ term: { path: "adapter_config.json" },
223
+ },
224
+ },
225
+ "pyannote-audio": {
226
+ prettyLabel: "pyannote.audio",
227
+ repoName: "pyannote-audio",
228
+ repoUrl: "https://github.com/pyannote/pyannote-audio",
229
+ snippets: snippets.pyannote_audio,
230
+ filter: true,
231
+ },
232
+ pythae: {
233
+ prettyLabel: "pythae",
234
+ repoName: "pythae",
235
+ repoUrl: "https://github.com/clementchadebec/benchmark_VAE",
236
+ snippets: snippets.pythae,
237
+ filter: true,
238
+ },
239
+ "sample-factory": {
240
+ prettyLabel: "sample-factory",
241
+ repoName: "sample-factory",
242
+ repoUrl: "https://github.com/alex-petrenko/sample-factory",
243
+ docsUrl: "https://huggingface.co/docs/hub/sample-factory",
244
+ snippets: snippets.sampleFactory,
245
+ filter: true,
246
+ countDownloads: { term: { path: "cfg.json" } },
247
+ },
248
+ "sentence-transformers": {
249
+ prettyLabel: "sentence-transformers",
250
+ repoName: "sentence-transformers",
251
+ repoUrl: "https://github.com/UKPLab/sentence-transformers",
252
+ docsUrl: "https://huggingface.co/docs/hub/sentence-transformers",
253
+ snippets: snippets.sentenceTransformers,
254
+ filter: true,
255
+ },
256
+ setfit: {
257
+ prettyLabel: "setfit",
258
+ repoName: "setfit",
259
+ repoUrl: "https://github.com/huggingface/setfit",
260
+ docsUrl: "https://huggingface.co/docs/hub/setfit",
261
+ snippets: snippets.setfit,
262
+ filter: true,
263
+ },
264
+ sklearn: {
265
+ prettyLabel: "Scikit-learn",
266
+ repoName: "Scikit-learn",
267
+ repoUrl: "https://github.com/scikit-learn/scikit-learn",
268
+ snippets: snippets.sklearn,
269
+ filter: true,
270
+ countDownloads: {
271
+ term: { path: "sklearn_model.joblib" },
272
+ },
273
+ },
274
+ spacy: {
275
+ prettyLabel: "spaCy",
276
+ repoName: "spaCy",
277
+ repoUrl: "https://github.com/explosion/spaCy",
278
+ docsUrl: "https://huggingface.co/docs/hub/spacy",
279
+ snippets: snippets.spacy,
280
+ filter: true,
281
+ countDownloads: {
282
+ wildcard: { path: "*.whl" },
283
+ },
284
+ },
285
+ "span-marker": {
286
+ prettyLabel: "SpanMarker",
287
+ repoName: "SpanMarkerNER",
288
+ repoUrl: "https://github.com/tomaarsen/SpanMarkerNER",
289
+ docsUrl: "https://huggingface.co/docs/hub/span_marker",
290
+ snippets: snippets.span_marker,
291
+ filter: true,
292
+ },
293
+ speechbrain: {
294
+ prettyLabel: "speechbrain",
295
+ repoName: "speechbrain",
296
+ repoUrl: "https://github.com/speechbrain/speechbrain",
297
+ docsUrl: "https://huggingface.co/docs/hub/speechbrain",
298
+ snippets: snippets.speechbrain,
299
+ filter: true,
300
+ countDownloads: {
301
+ term: { path: "hyperparams.yaml" },
302
+ },
303
+ },
304
+ "stable-baselines3": {
305
+ prettyLabel: "stable-baselines3",
306
+ repoName: "stable-baselines3",
307
+ repoUrl: "https://github.com/huggingface/huggingface_sb3",
308
+ docsUrl: "https://huggingface.co/docs/hub/stable-baselines3",
309
+ snippets: snippets.stableBaselines3,
310
+ filter: true,
311
+ countDownloads: {
312
+ wildcard: { path: "*.zip" },
313
+ },
314
+ },
315
+ stanza: {
316
+ prettyLabel: "Stanza",
317
+ repoName: "stanza",
318
+ repoUrl: "https://github.com/stanfordnlp/stanza",
319
+ docsUrl: "https://huggingface.co/docs/hub/stanza",
320
+ snippets: snippets.stanza,
321
+ filter: true,
322
+ countDownloads: {
323
+ term: { path: "models/default.zip" },
324
+ },
325
+ },
326
+ tensorflowtts: {
327
+ prettyLabel: "TensorFlowTTS",
328
+ repoName: "TensorFlowTTS",
329
+ repoUrl: "https://github.com/TensorSpeech/TensorFlowTTS",
330
+ snippets: snippets.tensorflowtts,
331
+ },
332
+ timm: {
333
+ prettyLabel: "timm",
334
+ repoName: "pytorch-image-models",
335
+ repoUrl: "https://github.com/rwightman/pytorch-image-models",
336
+ docsUrl: "https://huggingface.co/docs/hub/timm",
337
+ snippets: snippets.timm,
338
+ filter: true,
339
+ countDownloads: {
340
+ terms: { path: ["pytorch_model.bin", "model.safetensors"] },
341
+ },
342
+ },
343
+ transformers: {
344
+ prettyLabel: "Transformers",
345
+ repoName: "🤗/transformers",
346
+ repoUrl: "https://github.com/huggingface/transformers",
347
+ docsUrl: "https://huggingface.co/docs/hub/transformers",
348
+ snippets: snippets.transformers,
349
+ filter: true,
350
+ },
351
+ "transformers.js": {
352
+ prettyLabel: "Transformers.js",
353
+ repoName: "transformers.js",
354
+ repoUrl: "https://github.com/xenova/transformers.js",
355
+ docsUrl: "https://huggingface.co/docs/hub/transformers-js",
356
+ snippets: snippets.transformersJS,
357
+ filter: true,
358
+ },
359
+ "unity-sentis": {
360
+ prettyLabel: "unity-sentis",
361
+ repoName: "unity-sentis",
362
+ repoUrl: "https://github.com/Unity-Technologies/sentis-samples",
363
+ snippets: snippets.sentis,
364
+ filter: true,
365
+ countDownloads: {
366
+ wildcard: { path: "*.sentis" },
367
+ },
368
+ },
369
+ } satisfies Record<string, LibraryUiElement>;
370
+
371
+ export type ModelLibraryKey = keyof typeof MODEL_LIBRARIES_UI_ELEMENTS;
47
372
 
48
- export const ALL_MODEL_LIBRARY_KEYS = Object.keys(ModelLibrary) as ModelLibraryKey[];
373
+ export const ALL_MODEL_LIBRARY_KEYS = Object.keys(MODEL_LIBRARIES_UI_ELEMENTS) as ModelLibraryKey[];
49
374
 
50
- export const ALL_DISPLAY_MODEL_LIBRARY_KEYS = ALL_MODEL_LIBRARY_KEYS.filter(
51
- (k) => !["doctr", "k2", "mindspore", "tensorflowtts"].includes(k)
52
- );
375
+ export const ALL_DISPLAY_MODEL_LIBRARY_KEYS = (
376
+ Object.entries(MODEL_LIBRARIES_UI_ELEMENTS as Record<ModelLibraryKey, LibraryUiElement>) as [
377
+ ModelLibraryKey,
378
+ LibraryUiElement,
379
+ ][]
380
+ )
381
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
382
+ .filter(([_, v]) => v.filter)
383
+ .map(([k]) => k);
package/src/pipelines.ts CHANGED
@@ -238,7 +238,7 @@ export const PIPELINE_DATA = {
238
238
  },
239
239
  "feature-extraction": {
240
240
  name: "Feature Extraction",
241
- modality: "multimodal",
241
+ modality: "nlp",
242
242
  color: "red",
243
243
  },
244
244
  "text-generation": {
@@ -419,7 +419,7 @@ export const PIPELINE_DATA = {
419
419
  },
420
420
  "text-to-image": {
421
421
  name: "Text-to-Image",
422
- modality: "multimodal",
422
+ modality: "cv",
423
423
  color: "yellow",
424
424
  },
425
425
  "image-to-text": {
@@ -430,7 +430,7 @@ export const PIPELINE_DATA = {
430
430
  name: "Image Captioning",
431
431
  },
432
432
  ],
433
- modality: "multimodal",
433
+ modality: "cv",
434
434
  color: "red",
435
435
  },
436
436
  "image-to-image": {
@@ -454,7 +454,7 @@ export const PIPELINE_DATA = {
454
454
  },
455
455
  "image-to-video": {
456
456
  name: "Image-to-Video",
457
- modality: "multimodal",
457
+ modality: "cv",
458
458
  color: "indigo",
459
459
  },
460
460
  "unconditional-image-generation": {
@@ -589,9 +589,15 @@ export const PIPELINE_DATA = {
589
589
  },
590
590
  "text-to-video": {
591
591
  name: "Text-to-Video",
592
- modality: "multimodal",
592
+ modality: "cv",
593
593
  color: "green",
594
594
  },
595
+ "image-text-to-text": {
596
+ name: "Image + Text to Image (VLLMs)",
597
+ modality: "multimodal",
598
+ color: "red",
599
+ hideInDatasets: true,
600
+ },
595
601
  "visual-question-answering": {
596
602
  name: "Visual Question Answering",
597
603
  subtasks: [
@@ -622,7 +628,7 @@ export const PIPELINE_DATA = {
622
628
  },
623
629
  "graph-ml": {
624
630
  name: "Graph Machine Learning",
625
- modality: "multimodal",
631
+ modality: "other",
626
632
  color: "green",
627
633
  },
628
634
  "mask-generation": {
@@ -637,12 +643,12 @@ export const PIPELINE_DATA = {
637
643
  },
638
644
  "text-to-3d": {
639
645
  name: "Text-to-3D",
640
- modality: "multimodal",
646
+ modality: "cv",
641
647
  color: "yellow",
642
648
  },
643
649
  "image-to-3d": {
644
650
  name: "Image-to-3D",
645
- modality: "multimodal",
651
+ modality: "cv",
646
652
  color: "green",
647
653
  },
648
654
  other: {
@@ -10,7 +10,7 @@ export interface AudioClassificationInput {
10
10
  /**
11
11
  * The input audio data
12
12
  */
13
- data: unknown;
13
+ inputs: unknown;
14
14
  /**
15
15
  * Additional inference parameters
16
16
  */
@@ -23,11 +23,11 @@ export interface AudioClassificationInput {
23
23
  * Additional inference parameters for Audio Classification
24
24
  */
25
25
  export interface AudioClassificationParameters {
26
- functionToApply?: ClassificationOutputTransform;
26
+ function_to_apply?: ClassificationOutputTransform;
27
27
  /**
28
28
  * When specified, limits the output to the top K most probable classes.
29
29
  */
30
- topK?: number;
30
+ top_k?: number;
31
31
  [property: string]: unknown;
32
32
  }
33
33
  /**
@@ -40,7 +40,7 @@ export type AudioClassificationOutput = AudioClassificationOutputElement[];
40
40
  */
41
41
  export interface AudioClassificationOutputElement {
42
42
  /**
43
- * The predicted class label (model specific).
43
+ * The predicted class label.
44
44
  */
45
45
  label: string;
46
46
  /**
@@ -5,7 +5,7 @@
5
5
  "title": "AudioClassificationInput",
6
6
  "type": "object",
7
7
  "properties": {
8
- "data": {
8
+ "inputs": {
9
9
  "description": "The input audio data"
10
10
  },
11
11
  "parameters": {
@@ -19,16 +19,16 @@
19
19
  "description": "Additional inference parameters for Audio Classification",
20
20
  "type": "object",
21
21
  "properties": {
22
- "functionToApply": {
22
+ "function_to_apply": {
23
23
  "title": "AudioClassificationOutputTransform",
24
24
  "$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutputTransform"
25
25
  },
26
- "topK": {
26
+ "top_k": {
27
27
  "type": "integer",
28
28
  "description": "When specified, limits the output to the top K most probable classes."
29
29
  }
30
30
  }
31
31
  }
32
32
  },
33
- "required": ["data"]
33
+ "required": ["inputs"]
34
34
  }
@@ -5,17 +5,6 @@
5
5
  "description": "Outputs for Audio Classification inference",
6
6
  "type": "array",
7
7
  "items": {
8
- "type": "object",
9
- "properties": {
10
- "label": {
11
- "type": "string",
12
- "description": "The predicted class label (model specific)."
13
- },
14
- "score": {
15
- "type": "number",
16
- "description": "The corresponding probability."
17
- }
18
- },
19
- "required": ["label", "score"]
8
+ "$ref": "/inference/schemas/common-definitions.json#/definitions/ClassificationOutput"
20
9
  }
21
10
  }