@huggingface/tasks 0.11.13 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +10 -2
- package/dist/index.js +10 -2
- package/dist/src/pipelines.d.ts +8 -2
- package/dist/src/pipelines.d.ts.map +1 -1
- package/dist/src/tasks/index.d.ts.map +1 -1
- package/dist/src/tasks/text-to-image/inference.d.ts +9 -5
- package/dist/src/tasks/text-to-image/inference.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/pipelines.ts +6 -0
- package/src/tasks/image-to-image/about.md +70 -21
- package/src/tasks/image-to-image/data.ts +1 -1
- package/src/tasks/index.ts +2 -0
- package/src/tasks/text-to-image/inference.ts +9 -5
- package/src/tasks/text-to-image/spec/input.json +7 -3
package/dist/index.cjs
CHANGED
|
@@ -1406,6 +1406,12 @@ var PIPELINE_DATA = {
|
|
|
1406
1406
|
color: "red",
|
|
1407
1407
|
hideInDatasets: true
|
|
1408
1408
|
},
|
|
1409
|
+
"any-to-any": {
|
|
1410
|
+
name: "Any-to-Any",
|
|
1411
|
+
modality: "multimodal",
|
|
1412
|
+
color: "yellow",
|
|
1413
|
+
hideInDatasets: true
|
|
1414
|
+
},
|
|
1409
1415
|
other: {
|
|
1410
1416
|
name: "Other",
|
|
1411
1417
|
modality: "other",
|
|
@@ -2069,7 +2075,7 @@ var taskData9 = {
|
|
|
2069
2075
|
id: "timbrooks/instruct-pix2pix"
|
|
2070
2076
|
}
|
|
2071
2077
|
],
|
|
2072
|
-
summary: "Image-to-image is the task of transforming
|
|
2078
|
+
summary: "Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.",
|
|
2073
2079
|
widgetModels: ["lllyasviel/sd-controlnet-canny"],
|
|
2074
2080
|
youtubeId: ""
|
|
2075
2081
|
};
|
|
@@ -4264,7 +4270,8 @@ var TASKS_MODEL_LIBRARIES = {
|
|
|
4264
4270
|
"zero-shot-image-classification": ["transformers", "transformers.js"],
|
|
4265
4271
|
"zero-shot-object-detection": ["transformers", "transformers.js"],
|
|
4266
4272
|
"text-to-3d": ["diffusers"],
|
|
4267
|
-
"image-to-3d": ["diffusers"]
|
|
4273
|
+
"image-to-3d": ["diffusers"],
|
|
4274
|
+
"any-to-any": ["transformers"]
|
|
4268
4275
|
};
|
|
4269
4276
|
function getData(type, partialTaskData = data_default16) {
|
|
4270
4277
|
return {
|
|
@@ -4275,6 +4282,7 @@ function getData(type, partialTaskData = data_default16) {
|
|
|
4275
4282
|
};
|
|
4276
4283
|
}
|
|
4277
4284
|
var TASKS_DATA = {
|
|
4285
|
+
"any-to-any": getData("any-to-any", data_default16),
|
|
4278
4286
|
"audio-classification": getData("audio-classification", data_default),
|
|
4279
4287
|
"audio-to-audio": getData("audio-to-audio", data_default2),
|
|
4280
4288
|
"automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
|
package/dist/index.js
CHANGED
|
@@ -1368,6 +1368,12 @@ var PIPELINE_DATA = {
|
|
|
1368
1368
|
color: "red",
|
|
1369
1369
|
hideInDatasets: true
|
|
1370
1370
|
},
|
|
1371
|
+
"any-to-any": {
|
|
1372
|
+
name: "Any-to-Any",
|
|
1373
|
+
modality: "multimodal",
|
|
1374
|
+
color: "yellow",
|
|
1375
|
+
hideInDatasets: true
|
|
1376
|
+
},
|
|
1371
1377
|
other: {
|
|
1372
1378
|
name: "Other",
|
|
1373
1379
|
modality: "other",
|
|
@@ -2031,7 +2037,7 @@ var taskData9 = {
|
|
|
2031
2037
|
id: "timbrooks/instruct-pix2pix"
|
|
2032
2038
|
}
|
|
2033
2039
|
],
|
|
2034
|
-
summary: "Image-to-image is the task of transforming
|
|
2040
|
+
summary: "Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.",
|
|
2035
2041
|
widgetModels: ["lllyasviel/sd-controlnet-canny"],
|
|
2036
2042
|
youtubeId: ""
|
|
2037
2043
|
};
|
|
@@ -4226,7 +4232,8 @@ var TASKS_MODEL_LIBRARIES = {
|
|
|
4226
4232
|
"zero-shot-image-classification": ["transformers", "transformers.js"],
|
|
4227
4233
|
"zero-shot-object-detection": ["transformers", "transformers.js"],
|
|
4228
4234
|
"text-to-3d": ["diffusers"],
|
|
4229
|
-
"image-to-3d": ["diffusers"]
|
|
4235
|
+
"image-to-3d": ["diffusers"],
|
|
4236
|
+
"any-to-any": ["transformers"]
|
|
4230
4237
|
};
|
|
4231
4238
|
function getData(type, partialTaskData = data_default16) {
|
|
4232
4239
|
return {
|
|
@@ -4237,6 +4244,7 @@ function getData(type, partialTaskData = data_default16) {
|
|
|
4237
4244
|
};
|
|
4238
4245
|
}
|
|
4239
4246
|
var TASKS_DATA = {
|
|
4247
|
+
"any-to-any": getData("any-to-any", data_default16),
|
|
4240
4248
|
"audio-classification": getData("audio-classification", data_default),
|
|
4241
4249
|
"audio-to-audio": getData("audio-to-audio", data_default2),
|
|
4242
4250
|
"automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),
|
package/dist/src/pipelines.d.ts
CHANGED
|
@@ -403,6 +403,12 @@ export declare const PIPELINE_DATA: {
|
|
|
403
403
|
color: "red";
|
|
404
404
|
hideInDatasets: true;
|
|
405
405
|
};
|
|
406
|
+
"any-to-any": {
|
|
407
|
+
name: string;
|
|
408
|
+
modality: "multimodal";
|
|
409
|
+
color: "yellow";
|
|
410
|
+
hideInDatasets: true;
|
|
411
|
+
};
|
|
406
412
|
other: {
|
|
407
413
|
name: string;
|
|
408
414
|
modality: "other";
|
|
@@ -413,7 +419,7 @@ export declare const PIPELINE_DATA: {
|
|
|
413
419
|
};
|
|
414
420
|
export type PipelineType = keyof typeof PIPELINE_DATA;
|
|
415
421
|
export type WidgetType = PipelineType | "conversational";
|
|
416
|
-
export declare const PIPELINE_TYPES: ("other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection")[];
|
|
422
|
+
export declare const PIPELINE_TYPES: ("other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any")[];
|
|
417
423
|
export declare const SUBTASK_TYPES: string[];
|
|
418
|
-
export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection">;
|
|
424
|
+
export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any">;
|
|
419
425
|
//# sourceMappingURL=pipelines.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa
|
|
1
|
+
{"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsmBc,CAAC;AAEzC,MAAM,MAAM,YAAY,GAAG,MAAM,OAAO,aAAa,CAAC;AAEtD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,gBAAgB,CAAC;AAEzD,eAAO,MAAM,cAAc,gpCAA+C,CAAC;AAE3E,eAAO,MAAM,aAAa,UAEN,CAAC;AAErB,eAAO,MAAM,kBAAkB,ipCAA0B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AA0CjD,mBAAmB,kCAAkC,CAAC;AACtD,mBAAmB,0CAA0C,CAAC;AAC9D,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,6BAA6B,CAAC;AACrC,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,uBAAuB,CAAC;AAC3C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,kCAAkC,CAAC;AAC1C,mBAAmB,4BAA4B,CAAC;AAChD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAC5G,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,2BAA2B,CAAC;AAC/C,mBAAmB,sCAAsC,CAAC;AAC1D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAC5G,YAAY,EAAE,qBAAqB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAC/G,mBAAmB,kCAAkC,CAAC;AACtD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AACnF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,iCAAiC,CAAC;AACzC,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,6BAA6B,CAAC;AACrC,mBAAmB,kCAAkC,CAAC;AACtD,mBAAmB,uCAAuC,CAAC;AAC3D,mBAAmB,sCAAsC,CAAC;AAC1D,mBAAmB,4CAA4C,CAAC;AAChE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,wCAAwC,CAAC;AAEhD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAE1D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AA0CjD,mBAAmB,kCAAkC,CAAC;AACtD,mBAAmB,0CAA0C,CAAC;AAC9D,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,6BAA6B,CAAC;AACrC,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,uBAAuB,CAAC;AAC3C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,kCAAkC,CAAC;AAC1C,mBAAmB,4BAA4B,CAAC;AAChD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAC5G,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,2BAA2B,CAAC;AAC/C,mBAAmB,sCAAsC,CAAC;AAC1D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAC5G,YAAY,EAAE,qBAAqB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAC/G,mBAAmB,kCAAkC,CAAC;AACtD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AACnF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,iCAAiC,CAAC;AACzC,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,6BAA6B,CAAC;AACrC,mBAAmB,kCAAkC,CAAC;AACtD,mBAAmB,uCAAuC,CAAC;AAC3D,mBAAmB,sCAAsC,CAAC;AAC1D,mBAAmB,4CAA4C,CAAC;AAChE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,wCAAwC,CAAC;AAEhD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAE1D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,CA6DzE,CAAC;AAoBF,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,QAAQ,GAAG,SAAS,CAqDxD,CAAC;AAEX,MAAM,WAAW,WAAW;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,MAAM,aAAa,GACtB;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,IAAI,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC;CACX,GACD;IACA,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,IAAI,EAAE,SAAS,CAAC;CACf,GACD;IACA,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACZ,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,IAAI,EAAE,kBAAkB,CAAC;CACxB,CAAC;AAEL,MAAM,WAAW,QAAQ;IACxB,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,OAAO,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,YAAY,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,OAAO,GAAG,WAAW,CAAC,CAAC"}
|
|
@@ -24,8 +24,8 @@ export interface TextToImageInput {
|
|
|
24
24
|
*/
|
|
25
25
|
export interface TextToImageParameters {
|
|
26
26
|
/**
|
|
27
|
-
*
|
|
28
|
-
*
|
|
27
|
+
* A higher guidance scale value encourages the model to generate images closely linked to
|
|
28
|
+
* the text prompt, but values too high may cause saturation and other artifacts.
|
|
29
29
|
*/
|
|
30
30
|
guidance_scale?: number;
|
|
31
31
|
/**
|
|
@@ -33,14 +33,18 @@ export interface TextToImageParameters {
|
|
|
33
33
|
*/
|
|
34
34
|
negative_prompt?: string[];
|
|
35
35
|
/**
|
|
36
|
-
*
|
|
37
|
-
*
|
|
36
|
+
* The number of denoising steps. More denoising steps usually lead to a higher quality
|
|
37
|
+
* image at the expense of slower inference.
|
|
38
38
|
*/
|
|
39
39
|
num_inference_steps?: number;
|
|
40
40
|
/**
|
|
41
|
-
*
|
|
41
|
+
* Override the scheduler with a compatible one.
|
|
42
42
|
*/
|
|
43
43
|
scheduler?: string;
|
|
44
|
+
/**
|
|
45
|
+
* Seed for the random number generator.
|
|
46
|
+
*/
|
|
47
|
+
seed?: number;
|
|
44
48
|
/**
|
|
45
49
|
* The size in pixel of the output image
|
|
46
50
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"inference.d.ts","sourceRoot":"","sources":["../../../../src/tasks/text-to-image/inference.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,UAAU,CAAC,EAAE,qBAAqB,CAAC;IACnC,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACrC;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;OAEG;IACH,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,WAAW,CAAC,EAAE,UAAU,CAAC;IACzB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC;;OAEG;IACH,KAAK,EAAE,OAAO,CAAC;IACf,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B"}
|
|
1
|
+
{"version":3,"file":"inference.d.ts","sourceRoot":"","sources":["../../../../src/tasks/text-to-image/inference.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,UAAU,CAAC,EAAE,qBAAqB,CAAC;IACnC,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACrC;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;OAEG;IACH,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,WAAW,CAAC,EAAE,UAAU,CAAC;IACzB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC;;OAEG;IACH,KAAK,EAAE,OAAO,CAAC;IACf,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@huggingface/tasks",
|
|
3
3
|
"packageManager": "pnpm@8.10.5",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.12.0",
|
|
5
5
|
"description": "List of ML tasks for huggingface.co/tasks",
|
|
6
6
|
"repository": "https://github.com/huggingface/huggingface.js.git",
|
|
7
7
|
"publishConfig": {
|
package/src/pipelines.ts
CHANGED
|
@@ -670,6 +670,12 @@ export const PIPELINE_DATA = {
|
|
|
670
670
|
color: "red",
|
|
671
671
|
hideInDatasets: true,
|
|
672
672
|
},
|
|
673
|
+
"any-to-any": {
|
|
674
|
+
name: "Any-to-Any",
|
|
675
|
+
modality: "multimodal",
|
|
676
|
+
color: "yellow",
|
|
677
|
+
hideInDatasets: true,
|
|
678
|
+
},
|
|
673
679
|
other: {
|
|
674
680
|
name: "Other",
|
|
675
681
|
modality: "other",
|
|
@@ -1,15 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
### Style transfer
|
|
1
|
+
Image-to-image pipelines can also be used in text-to-image tasks, to provide visual guidance to the text-guided generation process.
|
|
4
2
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
## Task Variants
|
|
3
|
+
## Use Cases
|
|
8
4
|
|
|
9
5
|
### Image inpainting
|
|
10
6
|
|
|
11
|
-
Image inpainting is widely used during photography editing to remove unwanted objects, such as poles, wires, or sensor
|
|
12
|
-
dust.
|
|
7
|
+
Image inpainting is widely used during photography editing to remove unwanted objects, such as poles, wires, or sensor dust.
|
|
13
8
|
|
|
14
9
|
### Image colorization
|
|
15
10
|
|
|
@@ -24,18 +19,27 @@ Super-resolution models increase the resolution of an image, allowing for higher
|
|
|
24
19
|
You can use pipelines for image-to-image in 🧨diffusers library to easily use image-to-image models. See an example for `StableDiffusionImg2ImgPipeline` below.
|
|
25
20
|
|
|
26
21
|
```python
|
|
27
|
-
|
|
28
|
-
from diffusers import
|
|
22
|
+
import torch
|
|
23
|
+
from diffusers import AutoPipelineForImage2Image
|
|
24
|
+
from diffusers.utils import make_image_grid, load_image
|
|
29
25
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
26
|
+
pipeline = AutoPipelineForImage2Image.from_pretrained(
|
|
27
|
+
"stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
|
|
28
|
+
)
|
|
33
29
|
|
|
34
|
-
|
|
35
|
-
|
|
30
|
+
# this helps us to reduce memory usage- since SDXL is a bit heavy, this could help by
|
|
31
|
+
# offloading the model to CPU w/o hurting performance.
|
|
32
|
+
pipeline.enable_model_cpu_offload()
|
|
36
33
|
|
|
37
|
-
|
|
38
|
-
images
|
|
34
|
+
# prepare image
|
|
35
|
+
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-sdxl-init.png"
|
|
36
|
+
init_image = load_image(url)
|
|
37
|
+
|
|
38
|
+
prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
|
|
39
|
+
|
|
40
|
+
# pass prompt and image to pipeline
|
|
41
|
+
image = pipeline(prompt, image=init_image, strength=0.5).images[0]
|
|
42
|
+
make_image_grid([init_image, image], rows=1, cols=2)
|
|
39
43
|
```
|
|
40
44
|
|
|
41
45
|
You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer image-to-image models on Hugging Face Hub.
|
|
@@ -53,13 +57,53 @@ await inference.imageToImage({
|
|
|
53
57
|
});
|
|
54
58
|
```
|
|
55
59
|
|
|
56
|
-
##
|
|
60
|
+
## Uses Cases for Text Guided Image Generation
|
|
57
61
|
|
|
58
|
-
|
|
62
|
+
### Style Transfer
|
|
63
|
+
|
|
64
|
+
One of the most popular use cases of image-to-image is style transfer. With style transfer models:
|
|
59
65
|
|
|
60
|
-
|
|
66
|
+
- a regular photo can be transformed into a variety of artistic styles or genres, such as a watercolor painting, a comic book illustration and more.
|
|
67
|
+
- new images can be generated using a text prompt, in the style of a reference input image.
|
|
68
|
+
|
|
69
|
+
See 🧨diffusers example for style transfer with `AutoPipelineForText2Image` below.
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from diffusers import AutoPipelineForText2Image
|
|
73
|
+
from diffusers.utils import load_image
|
|
74
|
+
import torch
|
|
75
|
+
|
|
76
|
+
# load pipeline
|
|
77
|
+
pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to("cuda")
|
|
78
|
+
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
|
|
79
|
+
|
|
80
|
+
# set the adapter and scales - this is a component that lets us add the style control from an image to the text-to-image model
|
|
81
|
+
scale = {
|
|
82
|
+
"down": {"block_2": [0.0, 1.0]},
|
|
83
|
+
"up": {"block_0": [0.0, 1.0, 0.0]},
|
|
84
|
+
}
|
|
85
|
+
pipeline.set_ip_adapter_scale(scale)
|
|
86
|
+
|
|
87
|
+
style_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg")
|
|
88
|
+
|
|
89
|
+
generator = torch.Generator(device="cpu").manual_seed(26)
|
|
90
|
+
image = pipeline(
|
|
91
|
+
prompt="a cat, masterpiece, best quality, high quality",
|
|
92
|
+
ip_adapter_image=style_image,
|
|
93
|
+
negative_prompt="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
|
|
94
|
+
guidance_scale=5,
|
|
95
|
+
num_inference_steps=30,
|
|
96
|
+
generator=generator,
|
|
97
|
+
).images[0]
|
|
98
|
+
image
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### ControlNet
|
|
102
|
+
|
|
103
|
+
Controlling the outputs of diffusion models only with a text prompt is a challenging problem. ControlNet is a neural network model that provides image-based control to diffusion models. Control images can be edges or other landmarks extracted from a source image.
|
|
104
|
+

|
|
61
105
|
|
|
62
|
-
##
|
|
106
|
+
## Pix2Pix
|
|
63
107
|
|
|
64
108
|
Pix2Pix is a popular model used for image-to-image translation tasks. It is based on a conditional-GAN (generative adversarial network) where instead of a noise vector a 2D image is given as input. More information about Pix2Pix can be retrieved from this [link](https://phillipi.github.io/pix2pix/) where the associated paper and the GitHub repository can be found.
|
|
65
109
|
|
|
@@ -70,8 +114,13 @@ The images below show some examples extracted from the Pix2Pix paper. This model
|
|
|
70
114
|
## Useful Resources
|
|
71
115
|
|
|
72
116
|
- [Image-to-image guide with diffusers](https://huggingface.co/docs/diffusers/using-diffusers/img2img)
|
|
117
|
+
- Image inpainting: [inpainting with 🧨diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/inpaint), [demo](https://huggingface.co/spaces/diffusers/stable-diffusion-xl-inpainting)
|
|
118
|
+
- Colorization: [demo](https://huggingface.co/spaces/modelscope/old_photo_restoration)
|
|
119
|
+
- Super resolution: [image upscaling with 🧨diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/upscale#super-resolution), [demo](https://huggingface.co/spaces/radames/Enhance-This-HiDiffusion-SDXL)
|
|
120
|
+
- [Style transfer and layout control with diffusers 🧨](https://huggingface.co/docs/diffusers/main/en/using-diffusers/ip_adapter#style--layout-control)
|
|
73
121
|
- [Train your ControlNet with diffusers 🧨](https://huggingface.co/blog/train-your-controlnet)
|
|
74
122
|
- [Ultra fast ControlNet with 🧨 Diffusers](https://huggingface.co/blog/controlnet)
|
|
123
|
+
- [List of ControlNets trained in the community JAX Diffusers sprint](https://huggingface.co/spaces/jax-diffusers-event/leaderboard)
|
|
75
124
|
|
|
76
125
|
## References
|
|
77
126
|
|
|
@@ -93,7 +93,7 @@ const taskData: TaskDataCustom = {
|
|
|
93
93
|
},
|
|
94
94
|
],
|
|
95
95
|
summary:
|
|
96
|
-
"Image-to-image is the task of transforming
|
|
96
|
+
"Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.",
|
|
97
97
|
widgetModels: ["lllyasviel/sd-controlnet-canny"],
|
|
98
98
|
youtubeId: "",
|
|
99
99
|
};
|
package/src/tasks/index.ts
CHANGED
|
@@ -170,6 +170,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
|
|
|
170
170
|
"zero-shot-object-detection": ["transformers", "transformers.js"],
|
|
171
171
|
"text-to-3d": ["diffusers"],
|
|
172
172
|
"image-to-3d": ["diffusers"],
|
|
173
|
+
"any-to-any": ["transformers"],
|
|
173
174
|
};
|
|
174
175
|
|
|
175
176
|
/**
|
|
@@ -191,6 +192,7 @@ function getData(type: PipelineType, partialTaskData: TaskDataCustom = placehold
|
|
|
191
192
|
// Tasks that call getData() without the second argument will
|
|
192
193
|
// have a "placeholder" page.
|
|
193
194
|
export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
|
|
195
|
+
"any-to-any": getData("any-to-any", placeholder),
|
|
194
196
|
"audio-classification": getData("audio-classification", audioClassification),
|
|
195
197
|
"audio-to-audio": getData("audio-to-audio", audioToAudio),
|
|
196
198
|
"automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),
|
|
@@ -26,8 +26,8 @@ export interface TextToImageInput {
|
|
|
26
26
|
*/
|
|
27
27
|
export interface TextToImageParameters {
|
|
28
28
|
/**
|
|
29
|
-
*
|
|
30
|
-
*
|
|
29
|
+
* A higher guidance scale value encourages the model to generate images closely linked to
|
|
30
|
+
* the text prompt, but values too high may cause saturation and other artifacts.
|
|
31
31
|
*/
|
|
32
32
|
guidance_scale?: number;
|
|
33
33
|
/**
|
|
@@ -35,14 +35,18 @@ export interface TextToImageParameters {
|
|
|
35
35
|
*/
|
|
36
36
|
negative_prompt?: string[];
|
|
37
37
|
/**
|
|
38
|
-
*
|
|
39
|
-
*
|
|
38
|
+
* The number of denoising steps. More denoising steps usually lead to a higher quality
|
|
39
|
+
* image at the expense of slower inference.
|
|
40
40
|
*/
|
|
41
41
|
num_inference_steps?: number;
|
|
42
42
|
/**
|
|
43
|
-
*
|
|
43
|
+
* Override the scheduler with a compatible one.
|
|
44
44
|
*/
|
|
45
45
|
scheduler?: string;
|
|
46
|
+
/**
|
|
47
|
+
* Seed for the random number generator.
|
|
48
|
+
*/
|
|
49
|
+
seed?: number;
|
|
46
50
|
/**
|
|
47
51
|
* The size in pixel of the output image
|
|
48
52
|
*/
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
"properties": {
|
|
23
23
|
"guidance_scale": {
|
|
24
24
|
"type": "number",
|
|
25
|
-
"description": "
|
|
25
|
+
"description": "A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts."
|
|
26
26
|
},
|
|
27
27
|
"negative_prompt": {
|
|
28
28
|
"type": "array",
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
},
|
|
34
34
|
"num_inference_steps": {
|
|
35
35
|
"type": "integer",
|
|
36
|
-
"description": "
|
|
36
|
+
"description": "The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
|
|
37
37
|
},
|
|
38
38
|
"target_size": {
|
|
39
39
|
"type": "object",
|
|
@@ -50,7 +50,11 @@
|
|
|
50
50
|
},
|
|
51
51
|
"scheduler": {
|
|
52
52
|
"type": "string",
|
|
53
|
-
"description": "
|
|
53
|
+
"description": "Override the scheduler with a compatible one."
|
|
54
|
+
},
|
|
55
|
+
"seed": {
|
|
56
|
+
"type": "integer",
|
|
57
|
+
"description": "Seed for the random number generator."
|
|
54
58
|
}
|
|
55
59
|
}
|
|
56
60
|
}
|