npm - @huggingface/tasks - Versions diffs - 0.11.13 → 0.12.0 - Mend

@huggingface/tasks 0.11.13 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/index.cjs +10 -2
package/dist/index.js +10 -2
package/dist/src/pipelines.d.ts +8 -2
package/dist/src/pipelines.d.ts.map +1 -1
package/dist/src/tasks/index.d.ts.map +1 -1
package/dist/src/tasks/text-to-image/inference.d.ts +9 -5
package/dist/src/tasks/text-to-image/inference.d.ts.map +1 -1
package/package.json +1 -1
package/src/pipelines.ts +6 -0
package/src/tasks/image-to-image/about.md +70 -21
package/src/tasks/image-to-image/data.ts +1 -1
package/src/tasks/index.ts +2 -0
package/src/tasks/text-to-image/inference.ts +9 -5
package/src/tasks/text-to-image/spec/input.json +7 -3

package/dist/index.cjs CHANGED Viewed

@@ -1406,6 +1406,12 @@ var PIPELINE_DATA = {
     color: "red",
     hideInDatasets: true
   },
+  "any-to-any": {
+    name: "Any-to-Any",
+    modality: "multimodal",
+    color: "yellow",
+    hideInDatasets: true
+  },
   other: {
     name: "Other",
     modality: "other",
@@ -2069,7 +2075,7 @@ var taskData9 = {
       id: "timbrooks/instruct-pix2pix"
     }
   ],
-  summary: "Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain. Any image manipulation and enhancement is possible with image to image models.",
+  summary: "Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.",
   widgetModels: ["lllyasviel/sd-controlnet-canny"],
   youtubeId: ""
 };
@@ -4264,7 +4270,8 @@ var TASKS_MODEL_LIBRARIES = {
   "zero-shot-image-classification": ["transformers", "transformers.js"],
   "zero-shot-object-detection": ["transformers", "transformers.js"],
   "text-to-3d": ["diffusers"],
-  "image-to-3d": ["diffusers"]
+  "image-to-3d": ["diffusers"],
+  "any-to-any": ["transformers"]
 };
 function getData(type, partialTaskData = data_default16) {
   return {
@@ -4275,6 +4282,7 @@ function getData(type, partialTaskData = data_default16) {
   };
 }
 var TASKS_DATA = {
+  "any-to-any": getData("any-to-any", data_default16),
   "audio-classification": getData("audio-classification", data_default),
   "audio-to-audio": getData("audio-to-audio", data_default2),
   "automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),

package/dist/index.js CHANGED Viewed

@@ -1368,6 +1368,12 @@ var PIPELINE_DATA = {
     color: "red",
     hideInDatasets: true
   },
+  "any-to-any": {
+    name: "Any-to-Any",
+    modality: "multimodal",
+    color: "yellow",
+    hideInDatasets: true
+  },
   other: {
     name: "Other",
     modality: "other",
@@ -2031,7 +2037,7 @@ var taskData9 = {
       id: "timbrooks/instruct-pix2pix"
     }
   ],
-  summary: "Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain. Any image manipulation and enhancement is possible with image to image models.",
+  summary: "Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.",
   widgetModels: ["lllyasviel/sd-controlnet-canny"],
   youtubeId: ""
 };
@@ -4226,7 +4232,8 @@ var TASKS_MODEL_LIBRARIES = {
   "zero-shot-image-classification": ["transformers", "transformers.js"],
   "zero-shot-object-detection": ["transformers", "transformers.js"],
   "text-to-3d": ["diffusers"],
-  "image-to-3d": ["diffusers"]
+  "image-to-3d": ["diffusers"],
+  "any-to-any": ["transformers"]
 };
 function getData(type, partialTaskData = data_default16) {
   return {
@@ -4237,6 +4244,7 @@ function getData(type, partialTaskData = data_default16) {
   };
 }
 var TASKS_DATA = {
+  "any-to-any": getData("any-to-any", data_default16),
   "audio-classification": getData("audio-classification", data_default),
   "audio-to-audio": getData("audio-to-audio", data_default2),
   "automatic-speech-recognition": getData("automatic-speech-recognition", data_default3),

package/dist/src/pipelines.d.ts CHANGED Viewed

@@ -403,6 +403,12 @@ export declare const PIPELINE_DATA: {
         color: "red";
         hideInDatasets: true;
     };
+    "any-to-any": {
+        name: string;
+        modality: "multimodal";
+        color: "yellow";
+        hideInDatasets: true;
+    };
     other: {
         name: string;
         modality: "other";
@@ -413,7 +419,7 @@ export declare const PIPELINE_DATA: {
 };
 export type PipelineType = keyof typeof PIPELINE_DATA;
 export type WidgetType = PipelineType | "conversational";
-export declare const PIPELINE_TYPES: ("other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection")[];
+export declare const PIPELINE_TYPES: ("other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any")[];
 export declare const SUBTASK_TYPES: string[];
-export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection">;
+export declare const PIPELINE_TYPES_SET: Set<"other" | "text-classification" | "token-classification" | "table-question-answering" | "question-answering" | "zero-shot-classification" | "translation" | "summarization" | "feature-extraction" | "text-generation" | "text2text-generation" | "fill-mask" | "sentence-similarity" | "text-to-speech" | "text-to-audio" | "automatic-speech-recognition" | "audio-to-audio" | "audio-classification" | "voice-activity-detection" | "depth-estimation" | "image-classification" | "object-detection" | "image-segmentation" | "text-to-image" | "image-to-text" | "image-to-image" | "image-to-video" | "unconditional-image-generation" | "video-classification" | "reinforcement-learning" | "robotics" | "tabular-classification" | "tabular-regression" | "tabular-to-text" | "table-to-text" | "multiple-choice" | "text-retrieval" | "time-series-forecasting" | "text-to-video" | "image-text-to-text" | "visual-question-answering" | "document-question-answering" | "zero-shot-image-classification" | "graph-ml" | "mask-generation" | "zero-shot-object-detection" | "text-to-3d" | "image-to-3d" | "image-feature-extraction" | "video-text-to-text" | "keypoint-detection" | "any-to-any">;
 //# sourceMappingURL=pipelines.d.ts.map

package/dist/src/pipelines.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgmBc,CAAC;AAEzC,MAAM,MAAM,YAAY,GAAG,MAAM,OAAO,aAAa,CAAC;AAEtD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,gBAAgB,CAAC;AAEzD,eAAO,MAAM,cAAc,~~ioCAA~~+C,CAAC;AAE3E,eAAO,MAAM,aAAa,UAEN,CAAC;AAErB,eAAO,MAAM,kBAAkB,~~koCAA0B~~,CAAC"}
1	+ {"version":3,"file":"pipelines.d.ts","sourceRoot":"","sources":["../../src/pipelines.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,UAAU,yEAA0E,CAAC;AAElG,MAAM,MAAM,QAAQ,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC;AAEnD,eAAO,MAAM,eAAe;;;;;;;;CAQQ,CAAC;AAErC;;;;;;GAMG;AACH,MAAM,WAAW,OAAO;IACvB;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;CACb;AAED;;;;;GAKG;AACH,MAAM,WAAW,YAAY;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC;IACrB,QAAQ,EAAE,QAAQ,CAAC;IACnB;;OAEG;IACH,KAAK,EAAE,MAAM,GAAG,OAAO,GAAG,QAAQ,GAAG,QAAQ,GAAG,KAAK,GAAG,QAAQ,CAAC;IACjE;;OAEG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB;;OAEG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CACzB;AAcD,eAAO,MAAM,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsmBc,CAAC;AAEzC,MAAM,MAAM,YAAY,GAAG,MAAM,OAAO,aAAa,CAAC;AAEtD,MAAM,MAAM,UAAU,GAAG,YAAY,GAAG,gBAAgB,CAAC;AAEzD,eAAO,MAAM,cAAc,gpCAA+C,CAAC;AAE3E,eAAO,MAAM,aAAa,UAEN,CAAC;AAErB,eAAO,MAAM,kBAAkB,ipCAA0B,CAAC"}

package/dist/src/tasks/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AA0CjD,mBAAmB,kCAAkC,CAAC;AACtD,mBAAmB,0CAA0C,CAAC;AAC9D,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,6BAA6B,CAAC;AACrC,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,uBAAuB,CAAC;AAC3C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,kCAAkC,CAAC;AAC1C,mBAAmB,4BAA4B,CAAC;AAChD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAC5G,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,2BAA2B,CAAC;AAC/C,mBAAmB,sCAAsC,CAAC;AAC1D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAC5G,YAAY,EAAE,qBAAqB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAC/G,mBAAmB,kCAAkC,CAAC;AACtD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AACnF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,iCAAiC,CAAC;AACzC,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,6BAA6B,CAAC;AACrC,mBAAmB,kCAAkC,CAAC;AACtD,mBAAmB,uCAAuC,CAAC;AAC3D,mBAAmB,sCAAsC,CAAC;AAC1D,mBAAmB,4CAA4C,CAAC;AAChE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,wCAAwC,CAAC;AAEhD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAE1D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,~~CA4DzE~~,CAAC;AAoBF,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,QAAQ,GAAG,SAAS,~~CAoDxD~~,CAAC;AAEX,MAAM,WAAW,WAAW;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,MAAM,aAAa,GACtB;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,IAAI,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC;CACX,GACD;IACA,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,IAAI,EAAE,SAAS,CAAC;CACf,GACD;IACA,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACZ,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,IAAI,EAAE,kBAAkB,CAAC;CACxB,CAAC;AAEL,MAAM,WAAW,QAAQ;IACxB,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,OAAO,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,YAAY,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,OAAO,GAAG,WAAW,CAAC,CAAC"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/tasks/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AA0CjD,mBAAmB,kCAAkC,CAAC;AACtD,mBAAmB,0CAA0C,CAAC;AAC9D,YAAY,EACX,mBAAmB,EACnB,0BAA0B,EAC1B,oBAAoB,EACpB,4BAA4B,EAC5B,2BAA2B,EAC3B,0BAA0B,EAC1B,gCAAgC,EAChC,+BAA+B,GAC/B,MAAM,6BAA6B,CAAC;AACrC,mBAAmB,yCAAyC,CAAC;AAC7D,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,uBAAuB,CAAC;AAC3C,YAAY,EACX,wBAAwB,EACxB,yBAAyB,EACzB,gCAAgC,EAChC,6BAA6B,GAC7B,MAAM,kCAAkC,CAAC;AAC1C,mBAAmB,4BAA4B,CAAC;AAChD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAC5G,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,8BAA8B,CAAC;AAClD,mBAAmB,gCAAgC,CAAC;AACpD,mBAAmB,iCAAiC,CAAC;AACrD,mBAAmB,2BAA2B,CAAC;AAC/C,mBAAmB,sCAAsC,CAAC;AAC1D,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,MAAM,2BAA2B,CAAC;AAC5G,YAAY,EAAE,qBAAqB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAC;AAC/G,mBAAmB,kCAAkC,CAAC;AACtD,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AACnF,YAAY,EACX,6BAA6B,EAC7B,uBAAuB,EACvB,wBAAwB,EACxB,+BAA+B,EAC/B,4BAA4B,GAC5B,MAAM,iCAAiC,CAAC;AACzC,YAAY,EACX,gCAAgC,EAChC,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,EACpB,2BAA2B,EAC3B,qCAAqC,EACrC,kCAAkC,EAClC,yBAAyB,EACzB,uCAAuC,EACvC,0BAA0B,GAC1B,MAAM,6BAA6B,CAAC;AACrC,mBAAmB,kCAAkC,CAAC;AACtD,mBAAmB,uCAAuC,CAAC;AAC3D,mBAAmB,sCAAsC,CAAC;AAC1D,mBAAmB,4CAA4C,CAAC;AAChE,YAAY,EACX,WAAW,EACX,4BAA4B,EAC5B,gCAAgC,EAChC,6BAA6B,EAC7B,oCAAoC,GACpC,MAAM,wCAAwC,CAAC;AAEhD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAE1D;;GAEG;AACH,eAAO,MAAM,qBAAqB,EAAE,MAAM,CAAC,YAAY,EAAE,eAAe,EAAE,CA6DzE,CAAC;AAoBF,eAAO,MAAM,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,QAAQ,GAAG,SAAS,CAqDxD,CAAC;AAEX,MAAM,WAAW,WAAW;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,EAAE,EAAE,MAAM,CAAC;CACX;AAED,MAAM,MAAM,aAAa,GACtB;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,IAAI,EAAE,KAAK,CAAC;QACX,KAAK,EAAE,MAAM,CAAC;QACd,KAAK,EAAE,MAAM,CAAC;KACd,CAAC,CAAC;IACH,IAAI,EAAE,OAAO,CAAC;CACb,GACD;IACA,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,KAAK,CAAC;CACX,GACD;IACA,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,IAAI,EAAE,SAAS,CAAC;CACf,GACD;IACA,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACZ,GACD;IACA,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,KAAK,CAAC;QACb,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;KACb,CAAC,CAAC;IACH,IAAI,EAAE,kBAAkB,CAAC;CACxB,CAAC;AAEL,MAAM,WAAW,QAAQ;IACxB,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,OAAO,EAAE,aAAa,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACxB,QAAQ,EAAE,WAAW,EAAE,CAAC;IACxB,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,YAAY,CAAC;IACjB,WAAW,CAAC,EAAE,YAAY,CAAC;IAC3B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,eAAe,EAAE,CAAC;IAC7B,OAAO,EAAE,WAAW,EAAE,CAAC;IACvB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;IACtB,OAAO,EAAE,MAAM,CAAC;IAChB,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,GAAG,OAAO,GAAG,WAAW,CAAC,CAAC"}

package/dist/src/tasks/text-to-image/inference.d.ts CHANGED Viewed

@@ -24,8 +24,8 @@ export interface TextToImageInput {
  */
 export interface TextToImageParameters {
     /**
-     * For diffusion models. A higher guidance scale value encourages the model to generate
-     * images closely linked to the text prompt at the expense of lower image quality.
+     * A higher guidance scale value encourages the model to generate images closely linked to
+     * the text prompt, but values too high may cause saturation and other artifacts.
      */
     guidance_scale?: number;
     /**
@@ -33,14 +33,18 @@ export interface TextToImageParameters {
      */
     negative_prompt?: string[];
     /**
-     * For diffusion models. The number of denoising steps. More denoising steps usually lead to
-     * a higher quality image at the expense of slower inference.
+     * The number of denoising steps. More denoising steps usually lead to a higher quality
+     * image at the expense of slower inference.
      */
     num_inference_steps?: number;
     /**
-     * For diffusion models. Override the scheduler with a compatible one
+     * Override the scheduler with a compatible one.
      */
     scheduler?: string;
+    /**
+     * Seed for the random number generator.
+     */
+    seed?: number;
     /**
      * The size in pixel of the output image
      */

package/dist/src/tasks/text-to-image/inference.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"inference.d.ts","sourceRoot":"","sources":["../../../../src/tasks/text-to-image/inference.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,UAAU,CAAC,EAAE,qBAAqB,CAAC;IACnC,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACrC;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;OAEG;IACH,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,WAAW,CAAC,EAAE,UAAU,CAAC;IACzB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC;;OAEG;IACH,KAAK,EAAE,OAAO,CAAC;IACf,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B"}
1	+ {"version":3,"file":"inference.d.ts","sourceRoot":"","sources":["../../../../src/tasks/text-to-image/inference.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC;;OAEG;IACH,MAAM,EAAE,MAAM,CAAC;IACf;;OAEG;IACH,UAAU,CAAC,EAAE,qBAAqB,CAAC;IACnC,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACrC;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;OAEG;IACH,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,WAAW,CAAC,EAAE,UAAU,CAAC;IACzB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IACjC;;OAEG;IACH,KAAK,EAAE,OAAO,CAAC;IACf,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC;CAC5B"}

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@huggingface/tasks",
   "packageManager": "pnpm@8.10.5",
-  "version": "0.11.13",
+  "version": "0.12.0",
   "description": "List of ML tasks for huggingface.co/tasks",
   "repository": "https://github.com/huggingface/huggingface.js.git",
   "publishConfig": {

package/src/pipelines.ts CHANGED Viewed

@@ -670,6 +670,12 @@ export const PIPELINE_DATA = {
 		color: "red",
 		hideInDatasets: true,
 	},
+	"any-to-any": {
+		name: "Any-to-Any",
+		modality: "multimodal",
+		color: "yellow",
+		hideInDatasets: true,
+	},
 	other: {
 		name: "Other",
 		modality: "other",

package/src/tasks/image-to-image/about.md CHANGED Viewed

@@ -1,15 +1,10 @@
-## Use Cases
-### Style transfer
+Image-to-image pipelines can also be used in text-to-image tasks, to provide visual guidance to the text-guided generation process.
-One of the most popular use cases of image-to-image is style transfer. Style transfer models can convert a normal photography into a painting in the style of a famous painter.
-## Task Variants
+## Use Cases
 ### Image inpainting
-Image inpainting is widely used during photography editing to remove unwanted objects, such as poles, wires, or sensor
-dust.
+Image inpainting is widely used during photography editing to remove unwanted objects, such as poles, wires, or sensor dust.
 ### Image colorization
@@ -24,18 +19,27 @@ Super-resolution models increase the resolution of an image, allowing for higher
 You can use pipelines for image-to-image in 🧨diffusers library to easily use image-to-image models. See an example for `StableDiffusionImg2ImgPipeline` below.
 ```python
-from PIL import Image
-from diffusers import StableDiffusionImg2ImgPipeline
+import torch
+from diffusers import AutoPipelineForImage2Image
+from diffusers.utils import make_image_grid, load_image
-model_id_or_path = "runwayml/stable-diffusion-v1-5"
-pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16)
-pipe = pipe.to(cuda)
+pipeline = AutoPipelineForImage2Image.from_pretrained(
+    "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True
+)
-init_image = Image.open("mountains_image.jpeg").convert("RGB").resize((768, 512))
-prompt = "A fantasy landscape, trending on artstation"
+# this helps us to reduce memory usage- since SDXL is a bit heavy, this could help by
+# offloading the model to CPU w/o hurting performance.
+pipeline.enable_model_cpu_offload()
-images = pipe(prompt=prompt, image=init_image, strength=0.75, guidance_scale=7.5).images
-images[0].save("fantasy_landscape.png")
+# prepare image
+url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/img2img-sdxl-init.png"
+init_image = load_image(url)
+prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
+# pass prompt and image to pipeline
+image = pipeline(prompt, image=init_image, strength=0.5).images[0]
+make_image_grid([init_image, image], rows=1, cols=2)
 ```
 You can use [huggingface.js](https://github.com/huggingface/huggingface.js) to infer image-to-image models on Hugging Face Hub.
@@ -53,13 +57,53 @@ await inference.imageToImage({
 });
 ```
-## ControlNet
+## Uses Cases for Text Guided Image Generation
-Controlling the outputs of diffusion models only with a text prompt is a challenging problem. ControlNet is a neural network model that provides image-based control to diffusion models. Control images can be edges or other landmarks extracted from a source image.
+### Style Transfer
+One of the most popular use cases of image-to-image is style transfer. With style transfer models:
-Many ControlNet models were trained in our community event, JAX Diffusers sprint. You can see the full list of the ControlNet models available [here](https://huggingface.co/spaces/jax-diffusers-event/leaderboard).
+- a regular photo can be transformed into a variety of artistic styles or genres, such as a watercolor painting, a comic book illustration and more.
+- new images can be generated using a text prompt, in the style of a reference input image.
+See 🧨diffusers example for style transfer with `AutoPipelineForText2Image` below.
+```python
+from diffusers import AutoPipelineForText2Image
+from diffusers.utils import load_image
+import torch
+# load pipeline
+pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to("cuda")
+pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
+# set the adapter and scales - this is a component that lets us add the style control from an image to the text-to-image model
+scale = {
+    "down": {"block_2": [0.0, 1.0]},
+    "up": {"block_0": [0.0, 1.0, 0.0]},
+}
+pipeline.set_ip_adapter_scale(scale)
+style_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg")
+generator = torch.Generator(device="cpu").manual_seed(26)
+image = pipeline(
+    prompt="a cat, masterpiece, best quality, high quality",
+    ip_adapter_image=style_image,
+    negative_prompt="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
+    guidance_scale=5,
+    num_inference_steps=30,
+    generator=generator,
+).images[0]
+image
+```
+### ControlNet
+Controlling the outputs of diffusion models only with a text prompt is a challenging problem. ControlNet is a neural network model that provides image-based control to diffusion models. Control images can be edges or other landmarks extracted from a source image.
+![Examples](https://huggingface.co/datasets/optimum/documentation-images/resolve/main/neuron/models/12-sdxl-text2img-controlnet.png)
-## Most Used Model for the Task
+## Pix2Pix
 Pix2Pix is a popular model used for image-to-image translation tasks. It is based on a conditional-GAN (generative adversarial network) where instead of a noise vector a 2D image is given as input. More information about Pix2Pix can be retrieved from this [link](https://phillipi.github.io/pix2pix/) where the associated paper and the GitHub repository can be found.
@@ -70,8 +114,13 @@ The images below show some examples extracted from the Pix2Pix paper. This model
 ## Useful Resources
 - [Image-to-image guide with diffusers](https://huggingface.co/docs/diffusers/using-diffusers/img2img)
+- Image inpainting: [inpainting with 🧨diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/inpaint), [demo](https://huggingface.co/spaces/diffusers/stable-diffusion-xl-inpainting)
+- Colorization: [demo](https://huggingface.co/spaces/modelscope/old_photo_restoration)
+- Super resolution: [image upscaling with 🧨diffusers](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/upscale#super-resolution), [demo](https://huggingface.co/spaces/radames/Enhance-This-HiDiffusion-SDXL)
+- [Style transfer and layout control with diffusers 🧨](https://huggingface.co/docs/diffusers/main/en/using-diffusers/ip_adapter#style--layout-control)
 - [Train your ControlNet with diffusers 🧨](https://huggingface.co/blog/train-your-controlnet)
 - [Ultra fast ControlNet with 🧨 Diffusers](https://huggingface.co/blog/controlnet)
+- [List of ControlNets trained in the community JAX Diffusers sprint](https://huggingface.co/spaces/jax-diffusers-event/leaderboard)
 ## References

package/src/tasks/image-to-image/data.ts CHANGED Viewed

@@ -93,7 +93,7 @@ const taskData: TaskDataCustom = {
 		},
 	],
 	summary:
-		"Image-to-image is the task of transforming a source image to match the characteristics of a target image or a target image domain. Any image manipulation and enhancement is possible with image to image models.",
+		"Image-to-image is the task of transforming an input image through a variety of possible manipulations and enhancements, such as super-resolution, image inpainting, colorization, and more.",
 	widgetModels: ["lllyasviel/sd-controlnet-canny"],
 	youtubeId: "",
 };

package/src/tasks/index.ts CHANGED Viewed

@@ -170,6 +170,7 @@ export const TASKS_MODEL_LIBRARIES: Record<PipelineType, ModelLibraryKey[]> = {
 	"zero-shot-object-detection": ["transformers", "transformers.js"],
 	"text-to-3d": ["diffusers"],
 	"image-to-3d": ["diffusers"],
+	"any-to-any": ["transformers"],
 };
 /**
@@ -191,6 +192,7 @@ function getData(type: PipelineType, partialTaskData: TaskDataCustom = placehold
 // Tasks that call getData() without the second argument will
 // have a "placeholder" page.
 export const TASKS_DATA: Record<PipelineType, TaskData | undefined> = {
+	"any-to-any": getData("any-to-any", placeholder),
 	"audio-classification": getData("audio-classification", audioClassification),
 	"audio-to-audio": getData("audio-to-audio", audioToAudio),
 	"automatic-speech-recognition": getData("automatic-speech-recognition", automaticSpeechRecognition),

package/src/tasks/text-to-image/inference.ts CHANGED Viewed

@@ -26,8 +26,8 @@ export interface TextToImageInput {
  */
 export interface TextToImageParameters {
 	/**
-	 * For diffusion models. A higher guidance scale value encourages the model to generate
-	 * images closely linked to the text prompt at the expense of lower image quality.
+	 * A higher guidance scale value encourages the model to generate images closely linked to
+	 * the text prompt, but values too high may cause saturation and other artifacts.
 	 */
 	guidance_scale?: number;
 	/**
@@ -35,14 +35,18 @@ export interface TextToImageParameters {
 	 */
 	negative_prompt?: string[];
 	/**
-	 * For diffusion models. The number of denoising steps. More denoising steps usually lead to
-	 * a higher quality image at the expense of slower inference.
+	 * The number of denoising steps. More denoising steps usually lead to a higher quality
+	 * image at the expense of slower inference.
 	 */
 	num_inference_steps?: number;
 	/**
-	 * For diffusion models. Override the scheduler with a compatible one
+	 * Override the scheduler with a compatible one.
 	 */
 	scheduler?: string;
+	/**
+	 * Seed for the random number generator.
+	 */
+	seed?: number;
 	/**
 	 * The size in pixel of the output image
 	 */

package/src/tasks/text-to-image/spec/input.json CHANGED Viewed

@@ -22,7 +22,7 @@
 			"properties": {
 				"guidance_scale": {
 					"type": "number",
-					"description": "For diffusion models. A higher guidance scale value encourages the model to generate images closely linked to the text prompt at the expense of lower image quality."
+					"description": "A higher guidance scale value encourages the model to generate images closely linked to the text prompt, but values too high may cause saturation and other artifacts."
 				},
 				"negative_prompt": {
 					"type": "array",
@@ -33,7 +33,7 @@
 				},
 				"num_inference_steps": {
 					"type": "integer",
-					"description": "For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
+					"description": "The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference."
 				},
 				"target_size": {
 					"type": "object",
@@ -50,7 +50,11 @@
 				},
 				"scheduler": {
 					"type": "string",
-					"description": "For diffusion models. Override the scheduler with a compatible one"
+					"description": "Override the scheduler with a compatible one."
+				},
+				"seed": {
+					"type": "integer",
+					"description": "Seed for the random number generator."
 				}
 			}
 		}