PyPI - diffusers - Versions diffs - 0.27.2__py3-none-any.whl → 0.28.1__py3-none-any.whl - Mend

diffusers 0.27.2py3-none-any.whl → 0.28.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (278) hide show

diffusers/models/transformers/transformer_temporal.py CHANGED Viewed

@@ -31,11 +31,11 @@ class TransformerTemporalModelOutput(BaseOutput):
     The output of [`TransformerTemporalModel`].
     Args:
-        sample (`torch.FloatTensor` of shape `(batch_size x num_frames, num_channels, height, width)`):
+        sample (`torch.Tensor` of shape `(batch_size x num_frames, num_channels, height, width)`):
             The hidden states output conditioned on `encoder_hidden_states` input.
     """
-    sample: torch.FloatTensor
+    sample: torch.Tensor
 class TransformerTemporalModel(ModelMixin, ConfigMixin):
@@ -120,7 +120,7 @@ class TransformerTemporalModel(ModelMixin, ConfigMixin):
     def forward(
         self,
-        hidden_states: torch.FloatTensor,
+        hidden_states: torch.Tensor,
         encoder_hidden_states: Optional[torch.LongTensor] = None,
         timestep: Optional[torch.LongTensor] = None,
         class_labels: torch.LongTensor = None,
@@ -132,7 +132,7 @@ class TransformerTemporalModel(ModelMixin, ConfigMixin):
         The [`TransformerTemporal`] forward method.
         Args:
-            hidden_states (`torch.LongTensor` of shape `(batch size, num latent pixels)` if discrete, `torch.FloatTensor` of shape `(batch size, channel, height, width)` if continuous):
+            hidden_states (`torch.LongTensor` of shape `(batch size, num latent pixels)` if discrete, `torch.Tensor` of shape `(batch size, channel, height, width)` if continuous):
                 Input hidden_states.
             encoder_hidden_states ( `torch.LongTensor` of shape `(batch size, encoder_hidden_states dim)`, *optional*):
                 Conditional embeddings for cross attention layer. If not given, cross-attention defaults to
@@ -283,7 +283,7 @@ class TransformerSpatioTemporalModel(nn.Module):
     ):
         """
         Args:
-            hidden_states (`torch.FloatTensor` of shape `(batch size, channel, height, width)`):
+            hidden_states (`torch.Tensor` of shape `(batch size, channel, height, width)`):
                 Input hidden_states.
             num_frames (`int`):
                 The number of frames to be processed per batch. This is used to reshape the hidden states.
@@ -294,8 +294,8 @@ class TransformerSpatioTemporalModel(nn.Module):
                 A tensor indicating whether the input contains only images. 1 indicates that the input contains only
                 images, 0 indicates that the input contains video frames.
             return_dict (`bool`, *optional*, defaults to `True`):
-                Whether or not to return a [`~models.transformer_temporal.TransformerTemporalModelOutput`] instead of a plain
-                tuple.
+                Whether or not to return a [`~models.transformer_temporal.TransformerTemporalModelOutput`] instead of a
+                plain tuple.
         Returns:
             [`~models.transformer_temporal.TransformerTemporalModelOutput`] or `tuple`:
@@ -311,10 +311,10 @@ class TransformerSpatioTemporalModel(nn.Module):
         time_context_first_timestep = time_context[None, :].reshape(
             batch_size, num_frames, -1, time_context.shape[-1]
         )[:, 0]
-        time_context = time_context_first_timestep[None, :].broadcast_to(
-            height * width, batch_size, 1, time_context.shape[-1]
+        time_context = time_context_first_timestep[:, None].broadcast_to(
+            batch_size, height * width, time_context.shape[-2], time_context.shape[-1]
         )
-        time_context = time_context.reshape(height * width * batch_size, 1, time_context.shape[-1])
+        time_context = time_context.reshape(batch_size * height * width, -1, time_context.shape[-1])
         residual = hidden_states

diffusers/models/unets/unet_1d.py CHANGED Viewed

@@ -31,11 +31,11 @@ class UNet1DOutput(BaseOutput):
     The output of [`UNet1DModel`].
     Args:
-        sample (`torch.FloatTensor` of shape `(batch_size, num_channels, sample_size)`):
+        sample (`torch.Tensor` of shape `(batch_size, num_channels, sample_size)`):
             The hidden states output from the last layer of the model.
     """
-    sample: torch.FloatTensor
+    sample: torch.Tensor
 class UNet1DModel(ModelMixin, ConfigMixin):
@@ -194,7 +194,7 @@ class UNet1DModel(ModelMixin, ConfigMixin):
     def forward(
         self,
-        sample: torch.FloatTensor,
+        sample: torch.Tensor,
         timestep: Union[torch.Tensor, float, int],
         return_dict: bool = True,
     ) -> Union[UNet1DOutput, Tuple]:
@@ -202,9 +202,9 @@ class UNet1DModel(ModelMixin, ConfigMixin):
         The [`UNet1DModel`] forward method.
         Args:
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 The noisy input tensor with the following shape `(batch_size, num_channels, sample_size)`.
-            timestep (`torch.FloatTensor` or `float` or `int`): The number of timesteps to denoise an input.
+            timestep (`torch.Tensor` or `float` or `int`): The number of timesteps to denoise an input.
             return_dict (`bool`, *optional*, defaults to `True`):
                 Whether or not to return a [`~models.unet_1d.UNet1DOutput`] instead of a plain tuple.

diffusers/models/unets/unet_1d_blocks.py CHANGED Viewed

@@ -66,7 +66,7 @@ class DownResnetBlock1D(nn.Module):
         if add_downsample:
             self.downsample = Downsample1D(out_channels, use_conv=True, padding=1)
-    def forward(self, hidden_states: torch.FloatTensor, temb: Optional[torch.FloatTensor] = None) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor, temb: Optional[torch.Tensor] = None) -> torch.Tensor:
         output_states = ()
         hidden_states = self.resnets[0](hidden_states, temb)
@@ -128,10 +128,10 @@ class UpResnetBlock1D(nn.Module):
     def forward(
         self,
-        hidden_states: torch.FloatTensor,
-        res_hidden_states_tuple: Optional[Tuple[torch.FloatTensor, ...]] = None,
-        temb: Optional[torch.FloatTensor] = None,
-    ) -> torch.FloatTensor:
+        hidden_states: torch.Tensor,
+        res_hidden_states_tuple: Optional[Tuple[torch.Tensor, ...]] = None,
+        temb: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         if res_hidden_states_tuple is not None:
             res_hidden_states = res_hidden_states_tuple[-1]
             hidden_states = torch.cat((hidden_states, res_hidden_states), dim=1)
@@ -161,7 +161,7 @@ class ValueFunctionMidBlock1D(nn.Module):
         self.res2 = ResidualTemporalBlock1D(in_channels // 2, in_channels // 4, embed_dim=embed_dim)
         self.down2 = Downsample1D(out_channels // 4, use_conv=True)
-    def forward(self, x: torch.FloatTensor, temb: Optional[torch.FloatTensor] = None) -> torch.FloatTensor:
+    def forward(self, x: torch.Tensor, temb: Optional[torch.Tensor] = None) -> torch.Tensor:
         x = self.res1(x, temb)
         x = self.down1(x)
         x = self.res2(x, temb)
@@ -209,7 +209,7 @@ class MidResTemporalBlock1D(nn.Module):
         if self.upsample and self.downsample:
             raise ValueError("Block cannot downsample and upsample")
-    def forward(self, hidden_states: torch.FloatTensor, temb: torch.FloatTensor) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor, temb: torch.Tensor) -> torch.Tensor:
         hidden_states = self.resnets[0](hidden_states, temb)
         for resnet in self.resnets[1:]:
             hidden_states = resnet(hidden_states, temb)
@@ -230,7 +230,7 @@ class OutConv1DBlock(nn.Module):
         self.final_conv1d_act = get_activation(act_fn)
         self.final_conv1d_2 = nn.Conv1d(embed_dim, out_channels, 1)
-    def forward(self, hidden_states: torch.FloatTensor, temb: Optional[torch.FloatTensor] = None) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor, temb: Optional[torch.Tensor] = None) -> torch.Tensor:
         hidden_states = self.final_conv1d_1(hidden_states)
         hidden_states = rearrange_dims(hidden_states)
         hidden_states = self.final_conv1d_gn(hidden_states)
@@ -251,7 +251,7 @@ class OutValueFunctionBlock(nn.Module):
             ]
         )
-    def forward(self, hidden_states: torch.FloatTensor, temb: torch.FloatTensor) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor, temb: torch.Tensor) -> torch.Tensor:
         hidden_states = hidden_states.view(hidden_states.shape[0], -1)
         hidden_states = torch.cat((hidden_states, temb), dim=-1)
         for layer in self.final_block:
@@ -288,7 +288,7 @@ class Downsample1d(nn.Module):
         self.pad = kernel_1d.shape[0] // 2 - 1
         self.register_buffer("kernel", kernel_1d)
-    def forward(self, hidden_states: torch.FloatTensor) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         hidden_states = F.pad(hidden_states, (self.pad,) * 2, self.pad_mode)
         weight = hidden_states.new_zeros([hidden_states.shape[1], hidden_states.shape[1], self.kernel.shape[0]])
         indices = torch.arange(hidden_states.shape[1], device=hidden_states.device)
@@ -305,7 +305,7 @@ class Upsample1d(nn.Module):
         self.pad = kernel_1d.shape[0] // 2 - 1
         self.register_buffer("kernel", kernel_1d)
-    def forward(self, hidden_states: torch.FloatTensor, temb: Optional[torch.FloatTensor] = None) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor, temb: Optional[torch.Tensor] = None) -> torch.Tensor:
         hidden_states = F.pad(hidden_states, ((self.pad + 1) // 2,) * 2, self.pad_mode)
         weight = hidden_states.new_zeros([hidden_states.shape[1], hidden_states.shape[1], self.kernel.shape[0]])
         indices = torch.arange(hidden_states.shape[1], device=hidden_states.device)
@@ -335,7 +335,7 @@ class SelfAttention1d(nn.Module):
         new_projection = projection.view(new_projection_shape).permute(0, 2, 1, 3)
         return new_projection
-    def forward(self, hidden_states: torch.FloatTensor) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         residual = hidden_states
         batch, channel_dim, seq = hidden_states.shape
@@ -390,7 +390,7 @@ class ResConvBlock(nn.Module):
             self.group_norm_2 = nn.GroupNorm(1, out_channels)
             self.gelu_2 = nn.GELU()
-    def forward(self, hidden_states: torch.FloatTensor) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         residual = self.conv_skip(hidden_states) if self.has_conv_skip else hidden_states
         hidden_states = self.conv_1(hidden_states)
@@ -435,7 +435,7 @@ class UNetMidBlock1D(nn.Module):
         self.attentions = nn.ModuleList(attentions)
         self.resnets = nn.ModuleList(resnets)
-    def forward(self, hidden_states: torch.FloatTensor, temb: Optional[torch.FloatTensor] = None) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor, temb: Optional[torch.Tensor] = None) -> torch.Tensor:
         hidden_states = self.down(hidden_states)
         for attn, resnet in zip(self.attentions, self.resnets):
             hidden_states = resnet(hidden_states)
@@ -466,7 +466,7 @@ class AttnDownBlock1D(nn.Module):
         self.attentions = nn.ModuleList(attentions)
         self.resnets = nn.ModuleList(resnets)
-    def forward(self, hidden_states: torch.FloatTensor, temb: Optional[torch.FloatTensor] = None) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor, temb: Optional[torch.Tensor] = None) -> torch.Tensor:
         hidden_states = self.down(hidden_states)
         for resnet, attn in zip(self.resnets, self.attentions):
@@ -490,7 +490,7 @@ class DownBlock1D(nn.Module):
         self.resnets = nn.ModuleList(resnets)
-    def forward(self, hidden_states: torch.FloatTensor, temb: Optional[torch.FloatTensor] = None) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor, temb: Optional[torch.Tensor] = None) -> torch.Tensor:
         hidden_states = self.down(hidden_states)
         for resnet in self.resnets:
@@ -512,7 +512,7 @@ class DownBlock1DNoSkip(nn.Module):
         self.resnets = nn.ModuleList(resnets)
-    def forward(self, hidden_states: torch.FloatTensor, temb: Optional[torch.FloatTensor] = None) -> torch.FloatTensor:
+    def forward(self, hidden_states: torch.Tensor, temb: Optional[torch.Tensor] = None) -> torch.Tensor:
         hidden_states = torch.cat([hidden_states, temb], dim=1)
         for resnet in self.resnets:
             hidden_states = resnet(hidden_states)
@@ -542,10 +542,10 @@ class AttnUpBlock1D(nn.Module):
     def forward(
         self,
-        hidden_states: torch.FloatTensor,
-        res_hidden_states_tuple: Tuple[torch.FloatTensor, ...],
-        temb: Optional[torch.FloatTensor] = None,
-    ) -> torch.FloatTensor:
+        hidden_states: torch.Tensor,
+        res_hidden_states_tuple: Tuple[torch.Tensor, ...],
+        temb: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         res_hidden_states = res_hidden_states_tuple[-1]
         hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
@@ -574,10 +574,10 @@ class UpBlock1D(nn.Module):
     def forward(
         self,
-        hidden_states: torch.FloatTensor,
-        res_hidden_states_tuple: Tuple[torch.FloatTensor, ...],
-        temb: Optional[torch.FloatTensor] = None,
-    ) -> torch.FloatTensor:
+        hidden_states: torch.Tensor,
+        res_hidden_states_tuple: Tuple[torch.Tensor, ...],
+        temb: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         res_hidden_states = res_hidden_states_tuple[-1]
         hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
@@ -604,10 +604,10 @@ class UpBlock1DNoSkip(nn.Module):
     def forward(
         self,
-        hidden_states: torch.FloatTensor,
-        res_hidden_states_tuple: Tuple[torch.FloatTensor, ...],
-        temb: Optional[torch.FloatTensor] = None,
-    ) -> torch.FloatTensor:
+        hidden_states: torch.Tensor,
+        res_hidden_states_tuple: Tuple[torch.Tensor, ...],
+        temb: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         res_hidden_states = res_hidden_states_tuple[-1]
         hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)

diffusers/models/unets/unet_2d.py CHANGED Viewed

@@ -30,11 +30,11 @@ class UNet2DOutput(BaseOutput):
     The output of [`UNet2DModel`].
     Args:
-        sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
+        sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)`):
             The hidden states output from the last layer of the model.
     """
-    sample: torch.FloatTensor
+    sample: torch.Tensor
 class UNet2DModel(ModelMixin, ConfigMixin):
@@ -242,7 +242,7 @@ class UNet2DModel(ModelMixin, ConfigMixin):
     def forward(
         self,
-        sample: torch.FloatTensor,
+        sample: torch.Tensor,
         timestep: Union[torch.Tensor, float, int],
         class_labels: Optional[torch.Tensor] = None,
         return_dict: bool = True,
@@ -251,10 +251,10 @@ class UNet2DModel(ModelMixin, ConfigMixin):
         The [`UNet2DModel`] forward method.
         Args:
-            sample (`torch.FloatTensor`):
+            sample (`torch.Tensor`):
                 The noisy input tensor with the following shape `(batch, channel, height, width)`.
-            timestep (`torch.FloatTensor` or `float` or `int`): The number of timesteps to denoise an input.
-            class_labels (`torch.FloatTensor`, *optional*, defaults to `None`):
+            timestep (`torch.Tensor` or `float` or `int`): The number of timesteps to denoise an input.
+            class_labels (`torch.Tensor`, *optional*, defaults to `None`):
                 Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings.
             return_dict (`bool`, *optional*, defaults to `True`):
                 Whether or not to return a [`~models.unet_2d.UNet2DOutput`] instead of a plain tuple.

diffusers 0.27.2__py3-none-any.whl → 0.28.1__py3-none-any.whl

diffusers 0.27.2py3-none-any.whl → 0.28.1py3-none-any.whl