PyPI - diffusers - Versions diffs - 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl - Mend

diffusers 0.19.3py3-none-any.whl → 0.20.1py3-none-any.whl

Files changed (114) hide show

diffusers/schedulers/scheduling_vq_diffusion.py CHANGED Viewed

@@ -105,36 +105,24 @@ def gamma_schedules(num_diffusion_timesteps: int, gamma_cum_start=0.000009, gamm
 class VQDiffusionScheduler(SchedulerMixin, ConfigMixin):
     """
-    The VQ-diffusion transformer outputs predicted probabilities of the initial unnoised image.
+    A scheduler for vector quantized diffusion.
-    The VQ-diffusion scheduler converts the transformer's output into a sample for the unnoised image at the previous
-    diffusion timestep.
-    [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
-    function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
-    [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
-    [`~SchedulerMixin.from_pretrained`] functions.
-    For more details, see the original paper: https://arxiv.org/abs/2111.14822
+    This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
+    methods the library implements for all schedulers such as loading and saving.
     Args:
         num_vec_classes (`int`):
             The number of classes of the vector embeddings of the latent pixels. Includes the class for the masked
             latent pixel.
-        num_train_timesteps (`int`):
-            Number of diffusion steps used to train the model.
-        alpha_cum_start (`float`):
+        num_train_timesteps (`int`, defaults to 100):
+            The number of diffusion steps to train the model.
+        alpha_cum_start (`float`, defaults to 0.99999):
             The starting cumulative alpha value.
-        alpha_cum_end (`float`):
+        alpha_cum_end (`float`, defaults to 0.00009):
             The ending cumulative alpha value.
-        gamma_cum_start (`float`):
+        gamma_cum_start (`float`, defaults to 0.00009):
             The starting cumulative gamma value.
-        gamma_cum_end (`float`):
+        gamma_cum_end (`float`, defaults to 0.99999):
             The ending cumulative gamma value.
     """
@@ -189,14 +177,14 @@ class VQDiffusionScheduler(SchedulerMixin, ConfigMixin):
     def set_timesteps(self, num_inference_steps: int, device: Union[str, torch.device] = None):
         """
-        Sets the discrete timesteps used for the diffusion chain. Supporting function to be run before inference.
+        Sets the discrete timesteps used for the diffusion chain (to be run before inference).
         Args:
             num_inference_steps (`int`):
-                the number of diffusion steps used when generating samples with a pre-trained model.
-            device (`str` or `torch.device`):
-                device to place the timesteps and the diffusion process parameters (alpha, beta, gamma) on.
+                The number of diffusion steps used when generating samples with a pre-trained model.
+            device (`str` or `torch.device`, *optional*):
+                The device to which the timesteps and diffusion process parameters (alpha, beta, gamma) should be moved
+                to.
         """
         self.num_inference_steps = num_inference_steps
         timesteps = np.arange(0, self.num_inference_steps)[::-1].copy()
@@ -218,30 +206,27 @@ class VQDiffusionScheduler(SchedulerMixin, ConfigMixin):
         return_dict: bool = True,
     ) -> Union[VQDiffusionSchedulerOutput, Tuple]:
         """
-        Predict the sample at the previous timestep via the reverse transition distribution i.e. Equation (11). See the
-        docstring for `self.q_posterior` for more in depth docs on how Equation (11) is computed.
+        Predict the sample from the previous timestep by the reverse transition distribution. See
+        [`~VQDiffusionScheduler.q_posterior`] for more details about how the distribution is computer.
         Args:
             log_p_x_0: (`torch.FloatTensor` of shape `(batch size, num classes - 1, num latent pixels)`):
                 The log probabilities for the predicted classes of the initial latent pixels. Does not include a
                 prediction for the masked class as the initial unnoised image cannot be masked.
             t (`torch.long`):
                 The timestep that determines which transition matrices are used.
-            x_t: (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
-                The classes of each latent pixel at time `t`
-            generator: (`torch.Generator` or None):
-                RNG for the noise applied to p(x_{t-1} | x_t) before it is sampled from.
-            return_dict (`bool`):
-                option for returning tuple rather than VQDiffusionSchedulerOutput class
+            x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
+                The classes of each latent pixel at time `t`.
+            generator (`torch.Generator`, or `None`):
+                A random number generator for the noise applied to `p(x_{t-1} | x_t)` before it is sampled from.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or
+                `tuple`.
         Returns:
-            [`~schedulers.scheduling_utils.VQDiffusionSchedulerOutput`] or `tuple`:
-            [`~schedulers.scheduling_utils.VQDiffusionSchedulerOutput`] if `return_dict` is True, otherwise a `tuple`.
-            When returning a tuple, the first element is the sample tensor.
+            [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] or `tuple`:
+                If return_dict is `True`, [`~schedulers.scheduling_vq_diffusion.VQDiffusionSchedulerOutput`] is
+                returned, otherwise a tuple is returned where the first element is the sample tensor.
         """
         if timestep == 0:
             log_p_x_t_min_1 = model_output
@@ -259,32 +244,24 @@ class VQDiffusionScheduler(SchedulerMixin, ConfigMixin):
     def q_posterior(self, log_p_x_0, x_t, t):
         """
-        Calculates the log probabilities for the predicted classes of the image at timestep `t-1`. I.e. Equation (11).
-        Instead of directly computing equation (11), we use Equation (5) to restate Equation (11) in terms of only
-        forward probabilities.
-        Equation (11) stated in terms of forward probabilities via Equation (5):
-        Where:
-        - the sum is over x_0 = {C_0 ... C_{k-1}} (classes for x_0)
+        Calculates the log probabilities for the predicted classes of the image at timestep `t-1`:
+        ```
         p(x_{t-1} | x_t) = sum( q(x_t | x_{t-1}) * q(x_{t-1} | x_0) * p(x_0) / q(x_t | x_0) )
+        ```
         Args:
-            log_p_x_0: (`torch.FloatTensor` of shape `(batch size, num classes - 1, num latent pixels)`):
+            log_p_x_0 (`torch.FloatTensor` of shape `(batch size, num classes - 1, num latent pixels)`):
                 The log probabilities for the predicted classes of the initial latent pixels. Does not include a
                 prediction for the masked class as the initial unnoised image cannot be masked.
-            x_t: (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
-                The classes of each latent pixel at time `t`
-            t (torch.Long):
+            x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
+                The classes of each latent pixel at time `t`.
+            t (`torch.Long`):
                 The timestep that determines which transition matrix is used.
         Returns:
             `torch.FloatTensor` of shape `(batch size, num classes, num latent pixels)`:
-                The log probabilities for the predicted classes of the image at timestep `t-1`. I.e. Equation (11).
+                The log probabilities for the predicted classes of the image at timestep `t-1`.
         """
         log_onehot_x_t = index_to_log_onehot(x_t, self.num_embed)
@@ -380,25 +357,19 @@ class VQDiffusionScheduler(SchedulerMixin, ConfigMixin):
         self, *, t: torch.int, x_t: torch.LongTensor, log_onehot_x_t: torch.FloatTensor, cumulative: bool
     ):
         """
-        Returns the log probabilities of the rows from the (cumulative or non-cumulative) transition matrix for each
+        Calculates the log probabilities of the rows from the (cumulative or non-cumulative) transition matrix for each
         latent pixel in `x_t`.
-        See equation (7) for the complete non-cumulative transition matrix. The complete cumulative transition matrix
-        is the same structure except the parameters (alpha, beta, gamma) are the cumulative analogs.
         Args:
-            t (torch.Long):
+            t (`torch.Long`):
                 The timestep that determines which transition matrix is used.
             x_t (`torch.LongTensor` of shape `(batch size, num latent pixels)`):
                 The classes of each latent pixel at time `t`.
             log_onehot_x_t (`torch.FloatTensor` of shape `(batch size, num classes, num latent pixels)`):
-                The log one-hot vectors of `x_t`
+                The log one-hot vectors of `x_t`.
             cumulative (`bool`):
-                If cumulative is `False`, we use the single step transition matrix `t-1`->`t`. If cumulative is `True`,
-                we use the cumulative transition matrix `0`->`t`.
+                If cumulative is `False`, the single step transition matrix `t-1`->`t` is used. If cumulative is
+                `True`, the cumulative transition matrix `0`->`t` is used.
         Returns:
             `torch.FloatTensor` of shape `(batch size, num classes - 1, num latent pixels)`:

diffusers/utils/__init__.py CHANGED Viewed

@@ -37,6 +37,7 @@ from .doc_utils import replace_example_docstring
 from .dynamic_modules_utils import get_class_from_dynamic_module
 from .hub_utils import (
     HF_HUB_OFFLINE,
+    PushToHubMixin,
     _add_variant,
     _get_model_file,
     extract_commit_hash,
@@ -64,7 +65,6 @@ from .import_utils import (
     is_note_seq_available,
     is_omegaconf_available,
     is_onnx_available,
-    is_safetensors_available,
     is_scipy_available,
     is_tensorboard_available,
     is_tf_available,
@@ -80,7 +80,7 @@ from .import_utils import (
 )
 from .logging import get_logger
 from .outputs import BaseOutput
-from .pil_utils import PIL_INTERPOLATION, numpy_to_pil, pt_to_pil
+from .pil_utils import PIL_INTERPOLATION, make_image_grid, numpy_to_pil, pt_to_pil
 from .torch_utils import is_compiled_module, randn_tensor

diffusers/utils/dummy_pt_objects.py CHANGED Viewed

@@ -32,6 +32,21 @@ class AutoencoderKL(metaclass=DummyObject):
         requires_backends(cls, ["torch"])
+class AutoencoderTiny(metaclass=DummyObject):
+    _backends = ["torch"]
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, ["torch"])
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["torch"])
 class ControlNetModel(metaclass=DummyObject):
     _backends = ["torch"]

diffusers/utils/dummy_torch_and_transformers_objects.py CHANGED Viewed

@@ -602,6 +602,21 @@ class StableDiffusionDiffEditPipeline(metaclass=DummyObject):
         requires_backends(cls, ["torch", "transformers"])
+class StableDiffusionGLIGENPipeline(metaclass=DummyObject):
+    _backends = ["torch", "transformers"]
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch", "transformers"])
+    @classmethod
+    def from_config(cls, *args, **kwargs):
+        requires_backends(cls, ["torch", "transformers"])
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["torch", "transformers"])
 class StableDiffusionImageVariationPipeline(metaclass=DummyObject):
     _backends = ["torch", "transformers"]

diffusers/utils/hub_utils.py CHANGED Viewed

@@ -17,13 +17,22 @@
 import os
 import re
 import sys
+import tempfile
 import traceback
 import warnings
 from pathlib import Path
 from typing import Dict, Optional, Union
 from uuid import uuid4
-from huggingface_hub import HfFolder, ModelCard, ModelCardData, hf_hub_download, whoami
+from huggingface_hub import (
+    HfFolder,
+    ModelCard,
+    ModelCardData,
+    create_repo,
+    hf_hub_download,
+    upload_folder,
+    whoami,
+)
 from huggingface_hub.file_download import REGEX_COMMIT_HASH
 from huggingface_hub.utils import (
     EntryNotFoundError,
@@ -280,7 +289,7 @@ def _get_model_file(
         if (
             revision in DEPRECATED_REVISION_ARGS
             and (weights_name == WEIGHTS_NAME or weights_name == SAFETENSORS_WEIGHTS_NAME)
-            and version.parse(version.parse(__version__).base_version) >= version.parse("0.20.0")
+            and version.parse(version.parse(__version__).base_version) >= version.parse("0.22.0")
         ):
             try:
                 model_file = hf_hub_download(
@@ -359,3 +368,97 @@ def _get_model_file(
                 f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
                 f"containing a file named {weights_name}"
             )
+class PushToHubMixin:
+    """
+    A Mixin to push a model, scheduler, or pipeline to the Hugging Face Hub.
+    """
+    def _upload_folder(
+        self,
+        working_dir: Union[str, os.PathLike],
+        repo_id: str,
+        token: Optional[str] = None,
+        commit_message: Optional[str] = None,
+        create_pr: bool = False,
+    ):
+        """
+        Uploads all files in `working_dir` to `repo_id`.
+        """
+        if commit_message is None:
+            if "Model" in self.__class__.__name__:
+                commit_message = "Upload model"
+            elif "Scheduler" in self.__class__.__name__:
+                commit_message = "Upload scheduler"
+            else:
+                commit_message = f"Upload {self.__class__.__name__}"
+        logger.info(f"Uploading the files of {working_dir} to {repo_id}.")
+        return upload_folder(
+            repo_id=repo_id, folder_path=working_dir, token=token, commit_message=commit_message, create_pr=create_pr
+        )
+    def push_to_hub(
+        self,
+        repo_id: str,
+        commit_message: Optional[str] = None,
+        private: Optional[bool] = None,
+        token: Optional[str] = None,
+        create_pr: bool = False,
+        safe_serialization: bool = True,
+        variant: Optional[str] = None,
+    ) -> str:
+        """
+        Upload model, scheduler, or pipeline files to the 🤗 Hugging Face Hub.
+        Parameters:
+            repo_id (`str`):
+                The name of the repository you want to push your model, scheduler, or pipeline files to. It should
+                contain your organization name when pushing to an organization. `repo_id` can also be a path to a local
+                directory.
+            commit_message (`str`, *optional*):
+                Message to commit while pushing. Default to `"Upload {object}"`.
+            private (`bool`, *optional*):
+                Whether or not the repository created should be private.
+            token (`str`, *optional*):
+                The token to use as HTTP bearer authorization for remote files. The token generated when running
+                `huggingface-cli login` (stored in `~/.huggingface`).
+            create_pr (`bool`, *optional*, defaults to `False`):
+                Whether or not to create a PR with the uploaded files or directly commit.
+            safe_serialization (`bool`, *optional*, defaults to `True`):
+                Whether or not to convert the model weights to the `safetensors` format.
+            variant (`str`, *optional*):
+                If specified, weights are saved in the format `pytorch_model.<variant>.bin`.
+        Examples:
+        ```python
+        from diffusers import UNet2DConditionModel
+        unet = UNet2DConditionModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="unet")
+        # Push the `unet` to your namespace with the name "my-finetuned-unet".
+        unet.push_to_hub("my-finetuned-unet")
+        # Push the `unet` to an organization with the name "my-finetuned-unet".
+        unet.push_to_hub("your-org/my-finetuned-unet")
+        ```
+        """
+        repo_id = create_repo(repo_id, private=private, token=token, exist_ok=True).repo_id
+        # Save all files.
+        save_kwargs = {"safe_serialization": safe_serialization}
+        if "Scheduler" not in self.__class__.__name__:
+            save_kwargs.update({"variant": variant})
+        with tempfile.TemporaryDirectory() as tmpdir:
+            self.save_pretrained(tmpdir, **save_kwargs)
+            return self._upload_folder(
+                tmpdir,
+                repo_id,
+                token=token,
+                commit_message=commit_message,
+                create_pr=create_pr,
+            )

diffusers/utils/import_utils.py CHANGED Viewed

@@ -306,10 +306,6 @@ def is_torch_available():
     return _torch_available
-def is_safetensors_available():
-    return _safetensors_available
 def is_tf_available():
     return _tf_available

diffusers/utils/pil_utils.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from typing import List
 import PIL.Image
 import PIL.ImageOps
 from packaging import version
@@ -46,3 +48,20 @@ def numpy_to_pil(images):
         pil_images = [Image.fromarray(image) for image in images]
     return pil_images
+def make_image_grid(images: List[PIL.Image.Image], rows: int, cols: int, resize: int = None) -> PIL.Image.Image:
+    """
+    Prepares a single grid of images. Useful for visualization purposes.
+    """
+    assert len(images) == rows * cols
+    if resize is not None:
+        images = [img.resize((resize, resize)) for img in images]
+    w, h = images[0].size
+    grid = Image.new("RGB", size=(cols * w, rows * h))
+    for i, img in enumerate(images):
+        grid.paste(img, box=(i % cols * w, i // cols * h))
+    return grid

{diffusers-0.19.3.dist-info → diffusers-0.20.1.dist-info}/METADATA RENAMED Viewed

@@ -1,13 +1,12 @@
 Metadata-Version: 2.1
 Name: diffusers
-Version: 0.19.3
+Version: 0.20.1
 Summary: Diffusers
 Home-page: https://github.com/huggingface/diffusers
 Author: The HuggingFace team
 Author-email: patrick@huggingface.co
 License: Apache
 Keywords: deep learning
-Platform: UNKNOWN
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Education
@@ -21,6 +20,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.7.0
 Description-Content-Type: text/markdown
+License-File: LICENSE
 Requires-Dist: importlib-metadata
 Requires-Dist: filelock
 Requires-Dist: huggingface-hub (>=0.13.2)
@@ -33,7 +33,7 @@ Provides-Extra: dev
 Requires-Dist: urllib3 (<=2.0.0) ; extra == 'dev'
 Requires-Dist: black (~=23.1) ; extra == 'dev'
 Requires-Dist: isort (>=5.5.4) ; extra == 'dev'
-Requires-Dist: ruff (>=0.0.241) ; extra == 'dev'
+Requires-Dist: ruff (==0.0.280) ; extra == 'dev'
 Requires-Dist: hf-doc-builder (>=0.3.0) ; extra == 'dev'
 Requires-Dist: compel (==0.1.8) ; extra == 'dev'
 Requires-Dist: datasets ; extra == 'dev'
@@ -69,7 +69,7 @@ Provides-Extra: quality
 Requires-Dist: urllib3 (<=2.0.0) ; extra == 'quality'
 Requires-Dist: black (~=23.1) ; extra == 'quality'
 Requires-Dist: isort (>=5.5.4) ; extra == 'quality'
-Requires-Dist: ruff (>=0.0.241) ; extra == 'quality'
+Requires-Dist: ruff (==0.0.280) ; extra == 'quality'
 Requires-Dist: hf-doc-builder (>=0.3.0) ; extra == 'quality'
 Provides-Extra: test
 Requires-Dist: compel (==0.1.8) ; extra == 'test'
@@ -101,7 +101,7 @@ Requires-Dist: Jinja2 ; extra == 'training'
 <p align="center">
     <br>
-    <img src="https://github.com/huggingface/diffusers/blob/main/docs/source/en/imgs/diffusers_library.jpg" width="400"/>
+    <img src="https://raw.githubusercontent.com/huggingface/diffusers/main/docs/source/en/imgs/diffusers_library.jpg" width="400"/>
     <br>
 <p>
 <p align="center">
@@ -327,5 +327,3 @@ We also want to thank @heejkoo for the very helpful overview of papers, code and
   howpublished = {\url{https://github.com/huggingface/diffusers}}
 }
 ```

diffusers 0.19.3__py3-none-any.whl → 0.20.1__py3-none-any.whl

diffusers 0.19.3py3-none-any.whl → 0.20.1py3-none-any.whl