PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240611__py3-none-any.whl → 0.2.0.dev20240617__py3-none-any.whl - Mend - Supply Chain Defender

ai-edge-torch-nightly 0.2.0.dev20240611py3-none-any.whl → 0.2.0.dev20240617py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ai-edge-torch-nightly might be problematic. Click here for more details.

Files changed (21) hide show

ai_edge_torch/generative/layers/unet/blocks_2d.py CHANGED Viewed

@@ -13,13 +13,15 @@
 # limitations under the License.
 # ==============================================================================
-from typing import Optional
+from typing import List, Optional, Tuple
 import torch
 from torch import nn
+from ai_edge_torch.generative.layers.attention import CrossAttention
 from ai_edge_torch.generative.layers.attention import SelfAttention
 import ai_edge_torch.generative.layers.builder as layers_builder
+import ai_edge_torch.generative.layers.model_config as layers_cfg
 import ai_edge_torch.generative.layers.unet.builder as unet_builder
 import ai_edge_torch.generative.layers.unet.model_config as unet_cfg
@@ -78,6 +80,7 @@ class ResidualBlock2D(nn.Module):
     x = self.act_fn(x)
     x = self.conv_1(x)
     if self.time_emb_proj is not None:
+      time_emb = self.act_fn(time_emb)
       time_emb = self.time_emb_proj(time_emb)[:, :, None, None]
       x = x + time_emb
     x = self.norm_2(x)
@@ -90,7 +93,7 @@ class ResidualBlock2D(nn.Module):
 class AttentionBlock2D(nn.Module):
   """2D self attention block
-  x = SelfAttention(Norm(input_tensor))
+  x = SelfAttention(Norm(input_tensor)) + x
   """
@@ -101,8 +104,15 @@ class AttentionBlock2D(nn.Module):
       config (unet_cfg.AttentionBlock2DConfig): the configuration of this block.
     """
     super().__init__()
-    self.norm = layers_builder.build_norm(config.dims, config.normalization_config)
-    self.attention = SelfAttention(config.dims, config.attention_config, 0, True)
+    self.config = config
+    self.norm = layers_builder.build_norm(config.dim, config.normalization_config)
+    self.attention = SelfAttention(
+        config.attention_batch_size,
+        config.dim,
+        config.attention_config,
+        0,
+        enable_hlfb=config.enable_hlfb,
+    )
   def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:
     """Forward function of the AttentionBlock2D.
@@ -114,10 +124,17 @@ class AttentionBlock2D(nn.Module):
       output activation tensor after self attention.
     """
     residual = input_tensor
-    x = self.norm(input_tensor)
-    B, C, H, W = x.shape
-    x = x.view(B, C, H * W)
-    x = x.transpose(-1, -2)
+    B, C, H, W = input_tensor.shape
+    x = input_tensor
+    if self.config.normalization_config.type == layers_cfg.NormalizationType.GROUP_NORM:
+      x = self.norm(x)
+      x = input_tensor.view(B, C, H * W)
+      x = x.transpose(-1, -2)
+    else:
+      x = input_tensor.view(B, C, H * W)
+      x = x.transpose(-1, -2)
+      x = self.norm(x)
+    x = x.contiguous()  # Prevent BATCH_MATMUL op in converted tflite.
     x = self.attention(x)
     x = x.transpose(-1, -2)
     x = x.view(B, C, H, W)
@@ -125,28 +142,306 @@ class AttentionBlock2D(nn.Module):
     return x
-class UpDecoderBlock2D(nn.Module):
-  """Decoder block containing several residual blocks followed by an optional upsampler.
+class CrossAttentionBlock2D(nn.Module):
+  """2D cross attention block
-       input_tensor
-            |
-            ▼
-  ┌───────────────────┐
-  │  ResidualBlock2D  │ num_layers
-  └─────────┬─────────┘
+  x = CrossAttention(Norm(input_tensor), context) + x
+  """
+  def __init__(self, config: unet_cfg.CrossAttentionBlock2DConfig):
+    """Initialize an instance of the AttentionBlock2D.
+    Args:
+      config (unet_cfg.CrossAttentionBlock2DConfig): the configuration of this block.
+    """
+    super().__init__()
+    self.config = config
+    self.norm = layers_builder.build_norm(config.query_dim, config.normalization_config)
+    self.attention = CrossAttention(
+        config.attention_batch_size,
+        config.query_dim,
+        config.cross_dim,
+        config.attention_config,
+        0,
+        enable_hlfb=config.enable_hlfb,
+    )
+  def forward(
+      self, input_tensor: torch.Tensor, context_tensor: torch.Tensor
+  ) -> torch.Tensor:
+    """Forward function of the CrossAttentionBlock2D.
+    Args:
+      input_tensor (torch.Tensor): the input tensor.
+      context_tensor (torch.Tensor): the context tensor to apply cross attention on.
+    Returns:
+      output activation tensor after cross attention.
+    """
+    residual = input_tensor
+    B, C, H, W = input_tensor.shape
+    x = input_tensor
+    if self.config.normalization_config.type == layers_cfg.NormalizationType.GROUP_NORM:
+      x = self.norm(x)
+      x = input_tensor.view(B, C, H * W)
+      x = x.transpose(-1, -2)
+    else:
+      x = input_tensor.view(B, C, H * W)
+      x = x.transpose(-1, -2)
+      x = self.norm(x)
+    x = self.attention(x, context_tensor)
+    x = x.transpose(-1, -2)
+    x = x.view(B, C, H, W)
+    x = x + residual
+    return x
+class FeedForwardBlock2D(nn.Module):
+  """2D feed forward block
+  x = w2(Activation(w1(Norm(x)))) + x
+  """
+  def __init__(
+      self,
+      config: unet_cfg.FeedForwardBlock2DConfig,
+  ):
+    super().__init__()
+    self.config = config
+    self.act = layers_builder.get_activation(config.activation_config)
+    self.norm = layers_builder.build_norm(config.dim, config.normalization_config)
+    if config.activation_config.type == layers_cfg.ActivationType.GE_GLU:
+      self.w1 = nn.Identity()
+      self.w2 = nn.Linear(config.hidden_dim, config.dim)
+    else:
+      self.w1 = nn.Linear(config.dim, config.hidden_dim)
+      self.w2 = nn.Linear(config.hidden_dim, config.dim)
+  def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:
+    residual = input_tensor
+    B, C, H, W = input_tensor.shape
+    x = input_tensor
+    if self.config.normalization_config.type == layers_cfg.NormalizationType.GROUP_NORM:
+      x = self.norm(x)
+      x = input_tensor.view(B, C, H * W)
+      x = x.transpose(-1, -2)
+    else:
+      x = input_tensor.view(B, C, H * W)
+      x = x.transpose(-1, -2)
+      x = self.norm(x)
+    x = self.w1(x)
+    x = self.act(x)
+    x = self.w2(x)
+    x = x.transpose(-1, -2)  # (B, C, HW)
+    x = x.view((B, C, H, W))
+    return x + residual
+class TransformerBlock2D(nn.Module):
+  """Basic transformer block used in UNet of diffusion model
+       input_tensor    context_tensor
+            |                 |
+  ┌─────────▼─────────┐       |
+  │      ConvIn       |       │
+  └─────────┬─────────┘       |
+            |                 |
+            ▼                 |
+  ┌───────────────────┐       |
+  │  Attention Block  │       |
+  └─────────┬─────────┘       |
+            │                 |
+  ┌────────────────────┐      |
+  │CrossAttention Block│◄─────┘
+  └─────────┬──────────┘
             │
   ┌─────────▼─────────┐
-  │    (Optional)     │
-  │     Upsampler     │
+  │ FeedForwardBlock  │
   └─────────┬─────────┘
             │
   ┌─────────▼─────────┐
-  │    (Optional)     │
-  │      Conv2D       │
+  │      ConvOut      │
   └─────────┬─────────┘
-            │
             ▼
       hidden_states
+  """
+  def __init__(self, config: unet_cfg.TransformerBlock2Dconfig):
+    """Initialize an instance of the TransformerBlock2D.
+    Args:
+      config (unet_cfg.TransformerBlock2Dconfig): the configuration of this block.
+    """
+    super().__init__()
+    self.config = config
+    self.pre_conv_norm = layers_builder.build_norm(
+        config.attention_block_config.dim, config.pre_conv_normalization_config
+    )
+    self.conv_in = nn.Conv2d(
+        config.attention_block_config.dim,
+        config.attention_block_config.dim,
+        kernel_size=1,
+        padding=0,
+    )
+    self.self_attention = AttentionBlock2D(config.attention_block_config)
+    self.cross_attention = CrossAttentionBlock2D(config.cross_attention_block_config)
+    self.feed_forward = FeedForwardBlock2D(config.feed_forward_block_config)
+    self.conv_out = nn.Conv2d(
+        config.attention_block_config.dim,
+        config.attention_block_config.dim,
+        kernel_size=1,
+        padding=0,
+    )
+  def forward(self, x: torch.Tensor, context: torch.Tensor):
+    """Forward function of the TransformerBlock2D.
+    Args:
+      input_tensor (torch.Tensor): the input tensor.
+      context_tensor (torch.Tensor): the context tensor to apply cross attention on.
+    Returns:
+      output activation tensor after transformer block.
+    """
+    residual_long = x
+    x = self.pre_conv_norm(x)
+    x = self.conv_in(x)
+    x = self.self_attention(x)
+    x = self.cross_attention(x, context)
+    x = self.feed_forward(x)
+    x = self.conv_out(x)
+    x = x + residual_long
+    return x
+class DownEncoderBlock2D(nn.Module):
+  """Encoder block containing several residual blocks with optional interleaved transformer blocks.
+            input_tensor
+                 |
+  ┌──────────────▼─────────────┐
+  │   ┌────────────────────┐   │
+  │   │   ResidualBlock2D  │   │
+  │   └──────────┬─────────┘   │
+  │              │             │  num_layers
+  │   ┌────────────────────┐   │
+  │   │     (Optional)     │   │
+  │   │ TransformerBlock2D │   │
+  │   └──────────┬─────────┘   │
+  └──────────────┬─────────────┘
+                 │
+      ┌──────────▼─────────┐
+      │     (Optional)     │
+      │     Downsampler    │
+      └──────────┬─────────┘
+                 │
+                 ▼
+           hidden_states
+  """
+  def __init__(self, config: unet_cfg.DownEncoderBlock2DConfig):
+    """Initialize an instance of the DownEncoderBlock2D.
+    Args:
+      config (unet_cfg.DownEncoderBlock2DConfig): the configuration of this block.
+    """
+    super().__init__()
+    self.config = config
+    resnets = []
+    transformers = []
+    for i in range(config.num_layers):
+      input_channels = config.in_channels if i == 0 else config.out_channels
+      resnets.append(
+          ResidualBlock2D(
+              unet_cfg.ResidualBlock2DConfig(
+                  in_channels=input_channels,
+                  out_channels=config.out_channels,
+                  time_embedding_channels=config.time_embedding_channels,
+                  normalization_config=config.normalization_config,
+                  activation_config=config.activation_config,
+              )
+          )
+      )
+      if config.transformer_block_config:
+        transformers.append(TransformerBlock2D(config.transformer_block_config))
+    self.resnets = nn.ModuleList(resnets)
+    self.transformers = nn.ModuleList(transformers) if len(transformers) > 0 else None
+    if config.add_downsample:
+      self.downsampler = unet_builder.build_downsampling(config.sampling_config)
+    else:
+      self.downsampler = None
+  def forward(
+      self,
+      input_tensor: torch.Tensor,
+      time_emb: Optional[torch.Tensor] = None,
+      context_tensor: Optional[torch.Tensor] = None,
+      output_hidden_states: bool = False,
+  ) -> torch.Tensor | Tuple[torch.Tensor, List[torch.Tensor]]:
+    """Forward function of the DownEncoderBlock2D.
+    Args:
+      input_tensor (torch.Tensor): the input tensor.
+      time_emb (torch.Tensor): optional time embedding tensor, if the block is configured to accept
+        time embedding.
+      context_tensor (torch.Tensor): optional context tensor, if the block if configured to use transofrmer block.
+      output_hidden_states (bool): whether to output hidden states, usually for skip connections.
+    Returns:
+      output hidden_states tensor after DownEncoderBlock2D.
+    """
+    hidden_states = input_tensor
+    output_states = []
+    for i, resnet in enumerate(self.resnets):
+      hidden_states = resnet(hidden_states, time_emb)
+      if self.transformers is not None:
+        hidden_states = self.transformers[i](hidden_states, context_tensor)
+      output_states.append(hidden_states)
+    if self.downsampler:
+      hidden_states = self.downsampler(hidden_states)
+      output_states.append(hidden_states)
+    if output_hidden_states:
+      return hidden_states, output_states
+    else:
+      return hidden_states
+class UpDecoderBlock2D(nn.Module):
+  """Decoder block containing several residual blocks with optional interleaved transformer blocks.
+            input_tensor
+                 |
+  ┌──────────────▼─────────────┐
+  │   ┌────────────────────┐   │
+  │   │   ResidualBlock2D  │   │
+  │   └──────────┬─────────┘   │
+  │              │             │  num_layers
+  │   ┌────────────────────┐   │
+  │   │     (Optional)     │   │
+  │   │ TransformerBlock2D │   │
+  │   └──────────┬─────────┘   │
+  └──────────────┬─────────────┘
+                 │
+      ┌──────────▼─────────┐
+      │     (Optional)     │
+      │      Upsampler     │
+      └──────────┬─────────┘
+                 │
+      ┌──────────▼─────────┐
+      │     (Optional)     │
+      │       Conv2D       │
+      └──────────┬─────────┘
+                 │
+                 ▼
+           hidden_states
   """
   def __init__(self, config: unet_cfg.UpDecoderBlock2DConfig):
@@ -158,6 +453,7 @@ class UpDecoderBlock2D(nn.Module):
     super().__init__()
     self.config = config
     resnets = []
+    transformers = []
     for i in range(config.num_layers):
       input_channels = config.in_channels if i == 0 else config.out_channels
       resnets.append(
@@ -171,7 +467,10 @@ class UpDecoderBlock2D(nn.Module):
               )
           )
       )
+      if config.transformer_block_config:
+        transformers.append(TransformerBlock2D(config.transformer_block_config))
     self.resnets = nn.ModuleList(resnets)
+    self.transformers = nn.ModuleList(transformers) if len(transformers) > 0 else None
     if config.add_upsample:
       self.upsampler = unet_builder.build_upsampling(config.sampling_config)
       if config.upsample_conv:
@@ -182,21 +481,130 @@ class UpDecoderBlock2D(nn.Module):
       self.upsampler = None
   def forward(
-      self, input_tensor: torch.Tensor, time_emb: Optional[torch.Tensor] = None
+      self,
+      input_tensor: torch.Tensor,
+      time_emb: Optional[torch.Tensor] = None,
+      context_tensor: Optional[torch.Tensor] = None,
   ) -> torch.Tensor:
     """Forward function of the UpDecoderBlock2D.
     Args:
       input_tensor (torch.Tensor): the input tensor.
       time_emb (torch.Tensor): optional time embedding tensor, if the block is configured to accept
-        time embedding context.
+        time embedding.
+      context_tensor (torch.Tensor): optional context tensor, if the block if configured to use transofrmer block.
     Returns:
       output hidden_states tensor after UpDecoderBlock2D.
     """
     hidden_states = input_tensor
-    for resnet in self.resnets:
+    for i, resnet in enumerate(self.resnets):
       hidden_states = resnet(hidden_states, time_emb)
+      if self.transformers is not None:
+        hidden_states = self.transformers[i](hidden_states, context_tensor)
+    if self.upsampler:
+      hidden_states = self.upsampler(hidden_states)
+      if self.upsample_conv:
+        hidden_states = self.upsample_conv(hidden_states)
+    return hidden_states
+class SkipUpDecoderBlock2D(nn.Module):
+  """Decoder block contains skip connections and residual blocks with optional interleaved transformer blocks.
+   input_tensor, skip_connection_tensors
+                 |
+  ┌──────────────▼─────────────┐
+  │   ┌────────────────────┐   │
+  │   │   ResidualBlock2D  │   │
+  │   └──────────┬─────────┘   │
+  │              │             │  num_layers
+  │   ┌────────────────────┐   │
+  │   │     (Optional)     │   │
+  │   │ TransformerBlock2D │   │
+  │   └──────────┬─────────┘   │
+  └──────────────┬─────────────┘
+                 │
+      ┌──────────▼─────────┐
+      │     (Optional)     │
+      │      Upsampler     │
+      └──────────┬─────────┘
+                 │
+      ┌──────────▼─────────┐
+      │     (Optional)     │
+      │       Conv2D       │
+      └──────────┬─────────┘
+                 │
+                 ▼
+           hidden_states
+  """
+  def __init__(self, config: unet_cfg.SkipUpDecoderBlock2DConfig):
+    """Initialize an instance of the SkipUpDecoderBlock2D.
+    Args:
+      config (unet_cfg.SkipUpDecoderBlock2DConfig): the configuration of this block.
+    """
+    super().__init__()
+    self.config = config
+    resnets = []
+    transformers = []
+    for i in range(config.num_layers):
+      res_skip_channels = (
+          config.in_channels if (i == config.num_layers - 1) else config.out_channels
+      )
+      resnet_in_channels = config.prev_out_channels if i == 0 else config.out_channels
+      resnets.append(
+          ResidualBlock2D(
+              unet_cfg.ResidualBlock2DConfig(
+                  in_channels=resnet_in_channels + res_skip_channels,
+                  out_channels=config.out_channels,
+                  time_embedding_channels=config.time_embedding_channels,
+                  normalization_config=config.normalization_config,
+                  activation_config=config.activation_config,
+              )
+          )
+      )
+      if config.transformer_block_config:
+        transformers.append(TransformerBlock2D(config.transformer_block_config))
+    self.resnets = nn.ModuleList(resnets)
+    self.transformers = nn.ModuleList(transformers) if len(transformers) > 0 else None
+    if config.add_upsample:
+      self.upsampler = unet_builder.build_upsampling(config.sampling_config)
+      if config.upsample_conv:
+        self.upsample_conv = nn.Conv2d(
+            config.out_channels, config.out_channels, kernel_size=3, stride=1, padding=1
+        )
+    else:
+      self.upsampler = None
+  def forward(
+      self,
+      input_tensor: torch.Tensor,
+      skip_connection_tensors: List[torch.Tensor],
+      time_emb: Optional[torch.Tensor] = None,
+      context_tensor: Optional[torch.Tensor] = None,
+  ) -> torch.Tensor:
+    """Forward function of the SkipUpDecoderBlock2D.
+    Args:
+      input_tensor (torch.Tensor): the input tensor.
+      skip_connection_tensors (List[torch.Tensor]): the skip connection tensors from encoder blocks.
+      time_emb (torch.Tensor): optional time embedding tensor, if the block is configured to accept
+        time embedding.
+      context_tensor (torch.Tensor): optional context tensor, if the block if configured to use transofrmer block.
+    Returns:
+      output hidden_states tensor after SkipUpDecoderBlock2D.
+    """
+    hidden_states = input_tensor
+    for i, (resnet, skip_connection_tensor) in enumerate(
+        zip(self.resnets, skip_connection_tensors)
+    ):
+      hidden_states = torch.cat([hidden_states, skip_connection_tensor], dim=1)
+      hidden_states = resnet(hidden_states, time_emb)
+      if self.transformers is not None:
+        hidden_states = self.transformers[i](hidden_states, context_tensor)
     if self.upsampler:
       hidden_states = self.upsampler(hidden_states)
       if self.upsample_conv:
@@ -207,25 +615,30 @@ class UpDecoderBlock2D(nn.Module):
 class MidBlock2D(nn.Module):
   """Middle block containing at least one residual blocks with optional interleaved attention blocks.
-           input_tensor
-                |
-                ▼
-      ┌───────────────────┐
-      │  ResidualBlock2D  │
-      └─────────┬─────────┘
-                │
-  ┌─────────────▼─────────────┐
-  │   ┌───────────────────┐   │
-  │   │    (Optional)     │   │
-  │   │  AttentionBlock2D │   │
-  │   └─────────┬─────────┘   │  num_layers
-  │             │             │
-  │   ┌─────────▼─────────┐   │
-  │   │  ResidualBlock2D  │   │
-  │   └───────────────────┘   │
-  └─────────────┬─────────────┘
-                │
-                ▼
+            input_tensor
+                 |
+                 ▼
+       ┌───────────────────┐
+       │  ResidualBlock2D  │
+       └─────────┬─────────┘
+                 │
+  ┌──────────────▼─────────────┐
+  │   ┌────────────────────┐   │
+  │   │     (Optional)     │   │
+  │   │  AttentionBlock2D  │   │
+  │   └──────────┬─────────┘   │
+  │              │             │
+  │   ┌──────────▼─────────┐   │
+  │   │     (Optional)     │   │  num_layers
+  │   │ TransformerBlock2D │   │
+  │   └──────────┬─────────┘   │
+  │              │             │
+  │   ┌──────────▼─────────┐   │
+  │   │   ResidualBlock2D  │   │
+  │   └────────────────────┘   │
+  └──────────────┬─────────────┘
+                 │
+                 ▼
           hidden_states
   """
@@ -249,9 +662,12 @@ class MidBlock2D(nn.Module):
         )
     ]
     attentions = []
+    transformers = []
     for i in range(config.num_layers):
       if self.config.attention_block_config:
         attentions.append(AttentionBlock2D(config.attention_block_config))
+      if self.config.transformer_block_config:
+        transformers.append(TransformerBlock2D(config.transformer_block_config))
       resnets.append(
           ResidualBlock2D(
               unet_cfg.ResidualBlock2DConfig(
@@ -264,24 +680,32 @@ class MidBlock2D(nn.Module):
           )
       )
     self.resnets = nn.ModuleList(resnets)
-    self.attentions = nn.ModuleList(attentions)
+    self.attentions = nn.ModuleList(attentions) if len(attentions) > 0 else None
+    self.transformers = nn.ModuleList(transformers) if len(transformers) > 0 else None
   def forward(
-      self, input_tensor: torch.Tensor, time_emb: Optional[torch.Tensor] = None
+      self,
+      input_tensor: torch.Tensor,
+      time_emb: Optional[torch.Tensor] = None,
+      context_tensor: Optional[torch.Tensor] = None,
   ) -> torch.Tensor:
     """Forward function of the MidBlock2D.
     Args:
       input_tensor (torch.Tensor): the input tensor.
       time_emb (torch.Tensor): optional time embedding tensor, if the block is configured to accept
-        time embedding context.
+        time embedding.
+      context_tensor (torch.Tensor): optional context tensor, if the block if configured to use
+        transofrmer block.
     Returns:
       output hidden_states tensor after MidBlock2D.
     """
     hidden_states = self.resnets[0](input_tensor, time_emb)
-    for attn, resnet in zip(self.attentions, self.resnets[1:]):
-      if attn is not None:
-        hidden_states = attn(hidden_states)
+    for i, resnet in enumerate(self.resnets[1:]):
+      if self.attentions is not None:
+        hidden_states = self.attentions[i](hidden_states)
+      if self.transformers is not None:
+        hidden_states = self.transformers[i](hidden_states, context_tensor)
       hidden_states = resnet(hidden_states, time_emb)
     return hidden_states

ai_edge_torch/generative/layers/unet/builder.py CHANGED Viewed

@@ -15,15 +15,33 @@
 # Builder utils for individual components.
 from torch import nn
-import torch.nn.functional as F
 import ai_edge_torch.generative.layers.unet.model_config as unet_config
-def build_upsampling(config: unet_config.SamplingConfig):
+def build_upsampling(config: unet_config.UpSamplingConfig):
   if config.mode == unet_config.SamplingType.NEAREST:
     return nn.UpsamplingNearest2d(scale_factor=config.scale_factor)
   elif config.mode == unet_config.SamplingType.BILINEAR:
     return nn.UpsamplingBilinear2d(scale_factor=config.scale_factor)
   else:
     raise ValueError("Unsupported upsampling type.")
+def build_downsampling(config: unet_config.DownSamplingConfig):
+  if config.mode == unet_config.SamplingType.AVERAGE:
+    return nn.AvgPool2d(config.kernel_size, config.stride, padding=config.padding)
+  elif config.mode == unet_config.SamplingType.CONVOLUTION:
+    out_channels = (
+        config.in_channels if config.out_channels is None else config.out_channels
+    )
+    padding = (0, 1, 0, 1) if config.padding == 0 else config.padding
+    return nn.Conv2d(
+        config.in_channels,
+        out_channels=out_channels,
+        kernel_size=config.kernel_size,
+        stride=config.stride,
+        padding=padding,
+    )
+  else:
+    raise ValueError("Unsupported downsampling type.")