@huggingface/tasks 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -942,7 +942,7 @@ var PIPELINE_DATA = {
942
942
  },
943
943
  "feature-extraction": {
944
944
  name: "Feature Extraction",
945
- modality: "multimodal",
945
+ modality: "nlp",
946
946
  color: "red"
947
947
  },
948
948
  "text-generation": {
@@ -1123,7 +1123,7 @@ var PIPELINE_DATA = {
1123
1123
  },
1124
1124
  "text-to-image": {
1125
1125
  name: "Text-to-Image",
1126
- modality: "multimodal",
1126
+ modality: "cv",
1127
1127
  color: "yellow"
1128
1128
  },
1129
1129
  "image-to-text": {
@@ -1134,7 +1134,7 @@ var PIPELINE_DATA = {
1134
1134
  name: "Image Captioning"
1135
1135
  }
1136
1136
  ],
1137
- modality: "multimodal",
1137
+ modality: "cv",
1138
1138
  color: "red"
1139
1139
  },
1140
1140
  "image-to-image": {
@@ -1158,7 +1158,7 @@ var PIPELINE_DATA = {
1158
1158
  },
1159
1159
  "image-to-video": {
1160
1160
  name: "Image-to-Video",
1161
- modality: "multimodal",
1161
+ modality: "cv",
1162
1162
  color: "indigo"
1163
1163
  },
1164
1164
  "unconditional-image-generation": {
@@ -1293,9 +1293,15 @@ var PIPELINE_DATA = {
1293
1293
  },
1294
1294
  "text-to-video": {
1295
1295
  name: "Text-to-Video",
1296
- modality: "multimodal",
1296
+ modality: "cv",
1297
1297
  color: "green"
1298
1298
  },
1299
+ "image-text-to-text": {
1300
+ name: "Image + Text to Image (VLLMs)",
1301
+ modality: "multimodal",
1302
+ color: "red",
1303
+ hideInDatasets: true
1304
+ },
1299
1305
  "visual-question-answering": {
1300
1306
  name: "Visual Question Answering",
1301
1307
  subtasks: [
@@ -1326,7 +1332,7 @@ var PIPELINE_DATA = {
1326
1332
  },
1327
1333
  "graph-ml": {
1328
1334
  name: "Graph Machine Learning",
1329
- modality: "multimodal",
1335
+ modality: "other",
1330
1336
  color: "green"
1331
1337
  },
1332
1338
  "mask-generation": {
@@ -1341,12 +1347,12 @@ var PIPELINE_DATA = {
1341
1347
  },
1342
1348
  "text-to-3d": {
1343
1349
  name: "Text-to-3D",
1344
- modality: "multimodal",
1350
+ modality: "cv",
1345
1351
  color: "yellow"
1346
1352
  },
1347
1353
  "image-to-3d": {
1348
1354
  name: "Image-to-3D",
1349
- modality: "multimodal",
1355
+ modality: "cv",
1350
1356
  color: "green"
1351
1357
  },
1352
1358
  other: {
@@ -3908,6 +3914,7 @@ var TASKS_MODEL_LIBRARIES = {
3908
3914
  "graph-ml": ["transformers"],
3909
3915
  "image-classification": ["keras", "timm", "transformers", "transformers.js"],
3910
3916
  "image-segmentation": ["transformers", "transformers.js"],
3917
+ "image-text-to-text": ["transformers"],
3911
3918
  "image-to-image": ["diffusers", "transformers", "transformers.js"],
3912
3919
  "image-to-text": ["transformers", "transformers.js"],
3913
3920
  "image-to-video": ["diffusers"],
@@ -3974,6 +3981,7 @@ var TASKS_DATA = {
3974
3981
  "graph-ml": void 0,
3975
3982
  "image-classification": getData("image-classification", data_default8),
3976
3983
  "image-segmentation": getData("image-segmentation", data_default11),
3984
+ "image-text-to-text": void 0,
3977
3985
  "image-to-image": getData("image-to-image", data_default9),
3978
3986
  "image-to-text": getData("image-to-text", data_default10),
3979
3987
  "image-to-video": void 0,
package/dist/index.d.ts CHANGED
@@ -115,7 +115,7 @@ declare const PIPELINE_DATA: {
115
115
  };
116
116
  "feature-extraction": {
117
117
  name: string;
118
- modality: "multimodal";
118
+ modality: "nlp";
119
119
  color: "red";
120
120
  };
121
121
  "text-generation": {
@@ -218,7 +218,7 @@ declare const PIPELINE_DATA: {
218
218
  };
219
219
  "text-to-image": {
220
220
  name: string;
221
- modality: "multimodal";
221
+ modality: "cv";
222
222
  color: "yellow";
223
223
  };
224
224
  "image-to-text": {
@@ -227,7 +227,7 @@ declare const PIPELINE_DATA: {
227
227
  type: string;
228
228
  name: string;
229
229
  }[];
230
- modality: "multimodal";
230
+ modality: "cv";
231
231
  color: "red";
232
232
  };
233
233
  "image-to-image": {
@@ -241,7 +241,7 @@ declare const PIPELINE_DATA: {
241
241
  };
242
242
  "image-to-video": {
243
243
  name: string;
244
- modality: "multimodal";
244
+ modality: "cv";
245
245
  color: "indigo";
246
246
  };
247
247
  "unconditional-image-generation": {
@@ -334,9 +334,15 @@ declare const PIPELINE_DATA: {
334
334
  };
335
335
  "text-to-video": {
336
336
  name: string;
337
- modality: "multimodal";
337
+ modality: "cv";
338
338
  color: "green";
339
339
  };
340
+ "image-text-to-text": {
341
+ name: string;
342
+ modality: "multimodal";
343
+ color: "red";
344
+ hideInDatasets: true;
345
+ };
340
346
  "visual-question-answering": {
341
347
  name: string;
342
348
  subtasks: {
@@ -363,7 +369,7 @@ declare const PIPELINE_DATA: {
363
369
  };
364
370
  "graph-ml": {
365
371
  name: string;
366
- modality: "multimodal";
372
+ modality: "other";
367
373
  color: "green";
368
374
  };
369
375
  "mask-generation": {
@@ -378,12 +384,12 @@ declare const PIPELINE_DATA: {
378
384
  };
379
385
  "text-to-3d": {
380
386
  name: string;
381
- modality: "multimodal";
387
+ modality: "cv";
382
388
  color: "yellow";
383
389
  };
384
390
  "image-to-3d": {
385
391
  name: string;
386
- modality: "multimodal";
392
+ modality: "cv";
387
393
  color: "green";
388
394
  };
389
395
  other: {
@@ -395,9 +401,9 @@ declare const PIPELINE_DATA: {
395
401
  };
396
402
  };
397
403
  type PipelineType = keyof typeof PIPELINE_DATA;
398
- declare const PIPELINE_TYPES: ("other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d")[];
404
+ declare const PIPELINE_TYPES: ("other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d")[];
399
405
  declare const SUBTASK_TYPES: string[];
400
- declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d">;
406
+ declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "conversational" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d">;
401
407
 
402
408
  /**
403
409
  * See default-widget-inputs.ts for the default widget inputs, this files only contains the types
package/dist/index.js CHANGED
@@ -908,7 +908,7 @@ var PIPELINE_DATA = {
908
908
  },
909
909
  "feature-extraction": {
910
910
  name: "Feature Extraction",
911
- modality: "multimodal",
911
+ modality: "nlp",
912
912
  color: "red"
913
913
  },
914
914
  "text-generation": {
@@ -1089,7 +1089,7 @@ var PIPELINE_DATA = {
1089
1089
  },
1090
1090
  "text-to-image": {
1091
1091
  name: "Text-to-Image",
1092
- modality: "multimodal",
1092
+ modality: "cv",
1093
1093
  color: "yellow"
1094
1094
  },
1095
1095
  "image-to-text": {
@@ -1100,7 +1100,7 @@ var PIPELINE_DATA = {
1100
1100
  name: "Image Captioning"
1101
1101
  }
1102
1102
  ],
1103
- modality: "multimodal",
1103
+ modality: "cv",
1104
1104
  color: "red"
1105
1105
  },
1106
1106
  "image-to-image": {
@@ -1124,7 +1124,7 @@ var PIPELINE_DATA = {
1124
1124
  },
1125
1125
  "image-to-video": {
1126
1126
  name: "Image-to-Video",
1127
- modality: "multimodal",
1127
+ modality: "cv",
1128
1128
  color: "indigo"
1129
1129
  },
1130
1130
  "unconditional-image-generation": {
@@ -1259,9 +1259,15 @@ var PIPELINE_DATA = {
1259
1259
  },
1260
1260
  "text-to-video": {
1261
1261
  name: "Text-to-Video",
1262
- modality: "multimodal",
1262
+ modality: "cv",
1263
1263
  color: "green"
1264
1264
  },
1265
+ "image-text-to-text": {
1266
+ name: "Image + Text to Image (VLLMs)",
1267
+ modality: "multimodal",
1268
+ color: "red",
1269
+ hideInDatasets: true
1270
+ },
1265
1271
  "visual-question-answering": {
1266
1272
  name: "Visual Question Answering",
1267
1273
  subtasks: [
@@ -1292,7 +1298,7 @@ var PIPELINE_DATA = {
1292
1298
  },
1293
1299
  "graph-ml": {
1294
1300
  name: "Graph Machine Learning",
1295
- modality: "multimodal",
1301
+ modality: "other",
1296
1302
  color: "green"
1297
1303
  },
1298
1304
  "mask-generation": {
@@ -1307,12 +1313,12 @@ var PIPELINE_DATA = {
1307
1313
  },
1308
1314
  "text-to-3d": {
1309
1315
  name: "Text-to-3D",
1310
- modality: "multimodal",
1316
+ modality: "cv",
1311
1317
  color: "yellow"
1312
1318
  },
1313
1319
  "image-to-3d": {
1314
1320
  name: "Image-to-3D",
1315
- modality: "multimodal",
1321
+ modality: "cv",
1316
1322
  color: "green"
1317
1323
  },
1318
1324
  other: {
@@ -3874,6 +3880,7 @@ var TASKS_MODEL_LIBRARIES = {
3874
3880
  "graph-ml": ["transformers"],
3875
3881
  "image-classification": ["keras", "timm", "transformers", "transformers.js"],
3876
3882
  "image-segmentation": ["transformers", "transformers.js"],
3883
+ "image-text-to-text": ["transformers"],
3877
3884
  "image-to-image": ["diffusers", "transformers", "transformers.js"],
3878
3885
  "image-to-text": ["transformers", "transformers.js"],
3879
3886
  "image-to-video": ["diffusers"],
@@ -3940,6 +3947,7 @@ var TASKS_DATA = {
3940
3947
  "graph-ml": void 0,
3941
3948
  "image-classification": getData("image-classification", data_default8),
3942
3949
  "image-segmentation": getData("image-segmentation", data_default11),
3950
+ "image-text-to-text": void 0,
3943
3951
  "image-to-image": getData("image-to-image", data_default9),
3944
3952
  "image-to-text": getData("image-to-text", data_default10),
3945
3953
  "image-to-video": void 0,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@huggingface/tasks",
3
3
  "packageManager": "pnpm@8.10.5",
4
- "version": "0.3.0",
4
+ "version": "0.3.1",
5
5
  "description": "List of ML tasks for huggingface.co/tasks",
6
6
  "repository": "https://github.com/huggingface/huggingface.js.git",
7
7
  "publishConfig": {
package/src/pipelines.ts CHANGED
@@ -238,7 +238,7 @@ export const PIPELINE_DATA = {
238
238
  },
239
239
  "feature-extraction": {
240
240
  name: "Feature Extraction",
241
- modality: "multimodal",
241
+ modality: "nlp",
242
242
  color: "red",
243
243
  },
244
244
  "text-generation": {
@@ -419,7 +419,7 @@ export const PIPELINE_DATA = {
419
419
  },
420
420
  "text-to-image": {
421
421
  name: "Text-to-Image",
422
- modality: "multimodal",
422
+ modality: "cv",
423
423
  color: "yellow",
424
424
  },
425
425
  "image-to-text": {
@@ -430,7 +430,7 @@ export const PIPELINE_DATA = {
430
430
  name: "Image Captioning",
431
431
  },
432
432
  ],
433
- modality: "multimodal",
433
+ modality: "cv",
434
434
  color: "red",
435
435
  },
436
436
  "image-to-image": {
@@ -454,7 +454,7 @@ export const PIPELINE_DATA = {
454
454
  },
455
455
  "image-to-video": {
456
456
  name: "Image-to-Video",
457
- modality: "multimodal",
457
+ modality: "cv",
458
458
  color: "indigo",
459
459
  },
460
460
  "unconditional-image-generation": {
@@ -589,9 +589,15 @@ export const PIPELINE_DATA = {
589
589
  },
590
590
  "text-to-video": {
591
591
  name: "Text-to-Video",
592
- modality: "multimodal",
592
+ modality: "cv",
593
593
  color: "green",
594
594
  },
595
+ "image-text-to-text": {
596
+ name: "Image + Text to Image (VLLMs)",
597
+ modality: "multimodal",
598
+ color: "red",
599
+ hideInDatasets: true,
600
+ },
595
601
  "visual-question-answering": {
596
602
  name: "Visual Question Answering",
597
603
  subtasks: [
@@ -622,7 +628,7 @@ export const PIPELINE_DATA = {
622
628
  },
623
629
  "graph-ml": {
624
630
  name: "Graph Machine Learning",
625
- modality: "multimodal",
631
+ modality: "other",
626
632
  color: "green",
627
633
  },
628
634
  "mask-generation": {
@@ -637,12 +643,12 @@ export const PIPELINE_DATA = {
637
643
  },
638
644
  "text-to-3d": {
639
645
  name: "Text-to-3D",
640
- modality: "multimodal",
646
+ modality: "cv",
641
647
  color: "yellow",
642
648
  },
643
649
  "image-to-3d": {
644
650
  name: "Image-to-3D",
645
- modality: "multimodal",
651
+ modality: "cv",
646
652
  color: "green",
647
653
  },
648
654
  other: {
@@ -53,6 +53,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
53
53
  "graph-ml": ["transformers"],
54
54
  "image-classification": ["keras", "timm", "transformers", "transformers.js"],
55
55
  "image-segmentation": ["transformers", "transformers.js"],
56
+ "image-text-to-text": ["transformers"],
56
57
  "image-to-image": ["diffusers", "transformers", "transformers.js"],
57
58
  "image-to-text": ["transformers", "transformers.js"],
58
59
  "image-to-video": ["diffusers"],
@@ -130,6 +131,7 @@ export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
130
131
  "graph-ml": undefined,
131
132
  "image-classification": getData("image-classification", imageClassification),
132
133
  "image-segmentation": getData("image-segmentation", imageSegmentation),
134
+ "image-text-to-text": undefined,
133
135
  "image-to-image": getData("image-to-image", imageToImage),
134
136
  "image-to-text": getData("image-to-text", imageToText),
135
137
  "image-to-video": undefined,