diffsynth 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. diffsynth/__init__.py +6 -0
  2. diffsynth/configs/__init__.py +0 -0
  3. diffsynth/configs/model_config.py +243 -0
  4. diffsynth/controlnets/__init__.py +2 -0
  5. diffsynth/controlnets/controlnet_unit.py +53 -0
  6. diffsynth/controlnets/processors.py +51 -0
  7. diffsynth/data/__init__.py +1 -0
  8. diffsynth/data/simple_text_image.py +35 -0
  9. diffsynth/data/video.py +148 -0
  10. diffsynth/extensions/ESRGAN/__init__.py +118 -0
  11. diffsynth/extensions/FastBlend/__init__.py +63 -0
  12. diffsynth/extensions/FastBlend/api.py +397 -0
  13. diffsynth/extensions/FastBlend/cupy_kernels.py +119 -0
  14. diffsynth/extensions/FastBlend/data.py +146 -0
  15. diffsynth/extensions/FastBlend/patch_match.py +298 -0
  16. diffsynth/extensions/FastBlend/runners/__init__.py +4 -0
  17. diffsynth/extensions/FastBlend/runners/accurate.py +35 -0
  18. diffsynth/extensions/FastBlend/runners/balanced.py +46 -0
  19. diffsynth/extensions/FastBlend/runners/fast.py +141 -0
  20. diffsynth/extensions/FastBlend/runners/interpolation.py +121 -0
  21. diffsynth/extensions/RIFE/__init__.py +242 -0
  22. diffsynth/extensions/__init__.py +0 -0
  23. diffsynth/models/__init__.py +1 -0
  24. diffsynth/models/attention.py +89 -0
  25. diffsynth/models/downloader.py +66 -0
  26. diffsynth/models/hunyuan_dit.py +451 -0
  27. diffsynth/models/hunyuan_dit_text_encoder.py +163 -0
  28. diffsynth/models/kolors_text_encoder.py +1363 -0
  29. diffsynth/models/lora.py +195 -0
  30. diffsynth/models/model_manager.py +536 -0
  31. diffsynth/models/sd3_dit.py +798 -0
  32. diffsynth/models/sd3_text_encoder.py +1107 -0
  33. diffsynth/models/sd3_vae_decoder.py +81 -0
  34. diffsynth/models/sd3_vae_encoder.py +95 -0
  35. diffsynth/models/sd_controlnet.py +588 -0
  36. diffsynth/models/sd_ipadapter.py +57 -0
  37. diffsynth/models/sd_motion.py +199 -0
  38. diffsynth/models/sd_text_encoder.py +321 -0
  39. diffsynth/models/sd_unet.py +1108 -0
  40. diffsynth/models/sd_vae_decoder.py +336 -0
  41. diffsynth/models/sd_vae_encoder.py +282 -0
  42. diffsynth/models/sdxl_ipadapter.py +122 -0
  43. diffsynth/models/sdxl_motion.py +104 -0
  44. diffsynth/models/sdxl_text_encoder.py +759 -0
  45. diffsynth/models/sdxl_unet.py +1899 -0
  46. diffsynth/models/sdxl_vae_decoder.py +24 -0
  47. diffsynth/models/sdxl_vae_encoder.py +24 -0
  48. diffsynth/models/svd_image_encoder.py +505 -0
  49. diffsynth/models/svd_unet.py +2004 -0
  50. diffsynth/models/svd_vae_decoder.py +578 -0
  51. diffsynth/models/svd_vae_encoder.py +139 -0
  52. diffsynth/models/tiler.py +106 -0
  53. diffsynth/pipelines/__init__.py +9 -0
  54. diffsynth/pipelines/base.py +34 -0
  55. diffsynth/pipelines/dancer.py +178 -0
  56. diffsynth/pipelines/hunyuan_image.py +274 -0
  57. diffsynth/pipelines/pipeline_runner.py +105 -0
  58. diffsynth/pipelines/sd3_image.py +132 -0
  59. diffsynth/pipelines/sd_image.py +173 -0
  60. diffsynth/pipelines/sd_video.py +266 -0
  61. diffsynth/pipelines/sdxl_image.py +191 -0
  62. diffsynth/pipelines/sdxl_video.py +223 -0
  63. diffsynth/pipelines/svd_video.py +297 -0
  64. diffsynth/processors/FastBlend.py +142 -0
  65. diffsynth/processors/PILEditor.py +28 -0
  66. diffsynth/processors/RIFE.py +77 -0
  67. diffsynth/processors/__init__.py +0 -0
  68. diffsynth/processors/base.py +6 -0
  69. diffsynth/processors/sequencial_processor.py +41 -0
  70. diffsynth/prompters/__init__.py +6 -0
  71. diffsynth/prompters/base_prompter.py +57 -0
  72. diffsynth/prompters/hunyuan_dit_prompter.py +69 -0
  73. diffsynth/prompters/kolors_prompter.py +353 -0
  74. diffsynth/prompters/prompt_refiners.py +77 -0
  75. diffsynth/prompters/sd3_prompter.py +92 -0
  76. diffsynth/prompters/sd_prompter.py +73 -0
  77. diffsynth/prompters/sdxl_prompter.py +61 -0
  78. diffsynth/schedulers/__init__.py +3 -0
  79. diffsynth/schedulers/continuous_ode.py +59 -0
  80. diffsynth/schedulers/ddim.py +79 -0
  81. diffsynth/schedulers/flow_match.py +51 -0
  82. diffsynth/tokenizer_configs/__init__.py +0 -0
  83. diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/special_tokens_map.json +7 -0
  84. diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/tokenizer_config.json +16 -0
  85. diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab.txt +47020 -0
  86. diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab_org.txt +21128 -0
  87. diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/config.json +28 -0
  88. diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/special_tokens_map.json +1 -0
  89. diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/spiece.model +0 -0
  90. diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/tokenizer_config.json +1 -0
  91. diffsynth/tokenizer_configs/kolors/tokenizer/tokenizer.model +0 -0
  92. diffsynth/tokenizer_configs/kolors/tokenizer/tokenizer_config.json +12 -0
  93. diffsynth/tokenizer_configs/kolors/tokenizer/vocab.txt +0 -0
  94. diffsynth/tokenizer_configs/stable_diffusion/tokenizer/merges.txt +48895 -0
  95. diffsynth/tokenizer_configs/stable_diffusion/tokenizer/special_tokens_map.json +24 -0
  96. diffsynth/tokenizer_configs/stable_diffusion/tokenizer/tokenizer_config.json +34 -0
  97. diffsynth/tokenizer_configs/stable_diffusion/tokenizer/vocab.json +49410 -0
  98. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/merges.txt +48895 -0
  99. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/special_tokens_map.json +30 -0
  100. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/tokenizer_config.json +30 -0
  101. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/vocab.json +49410 -0
  102. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/merges.txt +48895 -0
  103. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/special_tokens_map.json +30 -0
  104. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/tokenizer_config.json +38 -0
  105. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/vocab.json +49410 -0
  106. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/special_tokens_map.json +125 -0
  107. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/spiece.model +0 -0
  108. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/tokenizer.json +129428 -0
  109. diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/tokenizer_config.json +940 -0
  110. diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/merges.txt +40213 -0
  111. diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/special_tokens_map.json +24 -0
  112. diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/tokenizer_config.json +38 -0
  113. diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/vocab.json +49411 -0
  114. diffsynth/trainers/__init__.py +0 -0
  115. diffsynth/trainers/text_to_image.py +253 -0
  116. diffsynth-1.0.0.dist-info/LICENSE +201 -0
  117. diffsynth-1.0.0.dist-info/METADATA +23 -0
  118. diffsynth-1.0.0.dist-info/RECORD +120 -0
  119. diffsynth-1.0.0.dist-info/WHEEL +5 -0
  120. diffsynth-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2004 @@
1
+ import torch, math
2
+ from einops import rearrange, repeat
3
+ from .sd_unet import Timesteps, PushBlock, PopBlock, Attention, GEGLU, ResnetBlock, AttentionBlock, DownSampler, UpSampler
4
+
5
+
6
+ class TemporalResnetBlock(torch.nn.Module):
7
+ def __init__(self, in_channels, out_channels, temb_channels=None, groups=32, eps=1e-5):
8
+ super().__init__()
9
+ self.norm1 = torch.nn.GroupNorm(num_groups=groups, num_channels=in_channels, eps=eps, affine=True)
10
+ self.conv1 = torch.nn.Conv3d(in_channels, out_channels, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0))
11
+ if temb_channels is not None:
12
+ self.time_emb_proj = torch.nn.Linear(temb_channels, out_channels)
13
+ self.norm2 = torch.nn.GroupNorm(num_groups=groups, num_channels=out_channels, eps=eps, affine=True)
14
+ self.conv2 = torch.nn.Conv3d(out_channels, out_channels, kernel_size=(3, 1, 1), stride=(1, 1, 1), padding=(1, 0, 0))
15
+ self.nonlinearity = torch.nn.SiLU()
16
+ self.conv_shortcut = None
17
+ if in_channels != out_channels:
18
+ self.conv_shortcut = torch.nn.Conv3d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=True)
19
+
20
+ def forward(self, hidden_states, time_emb, text_emb, res_stack, **kwargs):
21
+ x = rearrange(hidden_states, "f c h w -> 1 c f h w")
22
+ x = self.norm1(x)
23
+ x = self.nonlinearity(x)
24
+ x = self.conv1(x)
25
+ if time_emb is not None:
26
+ emb = self.nonlinearity(time_emb)
27
+ emb = self.time_emb_proj(emb)
28
+ emb = repeat(emb, "b c -> b c f 1 1", f=hidden_states.shape[0])
29
+ x = x + emb
30
+ x = self.norm2(x)
31
+ x = self.nonlinearity(x)
32
+ x = self.conv2(x)
33
+ if self.conv_shortcut is not None:
34
+ hidden_states = self.conv_shortcut(hidden_states)
35
+ x = rearrange(x[0], "c f h w -> f c h w")
36
+ hidden_states = hidden_states + x
37
+ return hidden_states, time_emb, text_emb, res_stack
38
+
39
+
40
+ def get_timestep_embedding(
41
+ timesteps: torch.Tensor,
42
+ embedding_dim: int,
43
+ flip_sin_to_cos: bool = False,
44
+ downscale_freq_shift: float = 1,
45
+ scale: float = 1,
46
+ max_period: int = 10000,
47
+ ):
48
+ """
49
+ This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.
50
+
51
+ :param timesteps: a 1-D Tensor of N indices, one per batch element.
52
+ These may be fractional.
53
+ :param embedding_dim: the dimension of the output. :param max_period: controls the minimum frequency of the
54
+ embeddings. :return: an [N x dim] Tensor of positional embeddings.
55
+ """
56
+ assert len(timesteps.shape) == 1, "Timesteps should be a 1d-array"
57
+
58
+ half_dim = embedding_dim // 2
59
+ exponent = -math.log(max_period) * torch.arange(
60
+ start=0, end=half_dim, dtype=torch.float32, device=timesteps.device
61
+ )
62
+ exponent = exponent / (half_dim - downscale_freq_shift)
63
+
64
+ emb = torch.exp(exponent)
65
+ emb = timesteps[:, None].float() * emb[None, :]
66
+
67
+ # scale embeddings
68
+ emb = scale * emb
69
+
70
+ # concat sine and cosine embeddings
71
+ emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1)
72
+
73
+ # flip sine and cosine embeddings
74
+ if flip_sin_to_cos:
75
+ emb = torch.cat([emb[:, half_dim:], emb[:, :half_dim]], dim=-1)
76
+
77
+ # zero pad
78
+ if embedding_dim % 2 == 1:
79
+ emb = torch.nn.functional.pad(emb, (0, 1, 0, 0))
80
+ return emb
81
+
82
+
83
+ class TemporalTimesteps(torch.nn.Module):
84
+ def __init__(self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float):
85
+ super().__init__()
86
+ self.num_channels = num_channels
87
+ self.flip_sin_to_cos = flip_sin_to_cos
88
+ self.downscale_freq_shift = downscale_freq_shift
89
+
90
+ def forward(self, timesteps):
91
+ t_emb = get_timestep_embedding(
92
+ timesteps,
93
+ self.num_channels,
94
+ flip_sin_to_cos=self.flip_sin_to_cos,
95
+ downscale_freq_shift=self.downscale_freq_shift,
96
+ )
97
+ return t_emb
98
+
99
+
100
+ class TrainableTemporalTimesteps(torch.nn.Module):
101
+ def __init__(self, num_channels: int, flip_sin_to_cos: bool, downscale_freq_shift: float, num_frames: int):
102
+ super().__init__()
103
+ timesteps = PositionalID()(num_frames)
104
+ embeddings = get_timestep_embedding(timesteps, num_channels, flip_sin_to_cos, downscale_freq_shift)
105
+ self.embeddings = torch.nn.Parameter(embeddings)
106
+
107
+ def forward(self, timesteps):
108
+ t_emb = self.embeddings[timesteps]
109
+ return t_emb
110
+
111
+
112
+ class PositionalID(torch.nn.Module):
113
+ def __init__(self, max_id=25, repeat_length=20):
114
+ super().__init__()
115
+ self.max_id = max_id
116
+ self.repeat_length = repeat_length
117
+
118
+ def frame_id_to_position_id(self, frame_id):
119
+ if frame_id < self.max_id:
120
+ position_id = frame_id
121
+ else:
122
+ position_id = (frame_id - self.max_id) % (self.repeat_length * 2)
123
+ if position_id < self.repeat_length:
124
+ position_id = self.max_id - 2 - position_id
125
+ else:
126
+ position_id = self.max_id - 2 * self.repeat_length + position_id
127
+ return position_id
128
+
129
+ def forward(self, num_frames, pivot_frame_id=0):
130
+ position_ids = [self.frame_id_to_position_id(abs(i-pivot_frame_id)) for i in range(num_frames)]
131
+ position_ids = torch.IntTensor(position_ids)
132
+ return position_ids
133
+
134
+
135
+ class TemporalAttentionBlock(torch.nn.Module):
136
+
137
+ def __init__(self, num_attention_heads, attention_head_dim, in_channels, cross_attention_dim=None, add_positional_conv=None):
138
+ super().__init__()
139
+
140
+ self.positional_embedding_proj = torch.nn.Sequential(
141
+ torch.nn.Linear(in_channels, in_channels * 4),
142
+ torch.nn.SiLU(),
143
+ torch.nn.Linear(in_channels * 4, in_channels)
144
+ )
145
+ if add_positional_conv is not None:
146
+ self.positional_embedding = TrainableTemporalTimesteps(in_channels, True, 0, add_positional_conv)
147
+ self.positional_conv = torch.nn.Conv3d(in_channels, in_channels, kernel_size=3, padding=1, padding_mode="reflect")
148
+ else:
149
+ self.positional_embedding = TemporalTimesteps(in_channels, True, 0)
150
+ self.positional_conv = None
151
+
152
+ self.norm_in = torch.nn.LayerNorm(in_channels)
153
+ self.act_fn_in = GEGLU(in_channels, in_channels * 4)
154
+ self.ff_in = torch.nn.Linear(in_channels * 4, in_channels)
155
+
156
+ self.norm1 = torch.nn.LayerNorm(in_channels)
157
+ self.attn1 = Attention(
158
+ q_dim=in_channels,
159
+ num_heads=num_attention_heads,
160
+ head_dim=attention_head_dim,
161
+ bias_out=True
162
+ )
163
+
164
+ self.norm2 = torch.nn.LayerNorm(in_channels)
165
+ self.attn2 = Attention(
166
+ q_dim=in_channels,
167
+ kv_dim=cross_attention_dim,
168
+ num_heads=num_attention_heads,
169
+ head_dim=attention_head_dim,
170
+ bias_out=True
171
+ )
172
+
173
+ self.norm_out = torch.nn.LayerNorm(in_channels)
174
+ self.act_fn_out = GEGLU(in_channels, in_channels * 4)
175
+ self.ff_out = torch.nn.Linear(in_channels * 4, in_channels)
176
+
177
+ def forward(self, hidden_states, time_emb, text_emb, res_stack, **kwargs):
178
+
179
+ batch, inner_dim, height, width = hidden_states.shape
180
+ pos_emb = torch.arange(batch)
181
+ pos_emb = self.positional_embedding(pos_emb).to(dtype=hidden_states.dtype, device=hidden_states.device)
182
+ pos_emb = self.positional_embedding_proj(pos_emb)
183
+
184
+ hidden_states = rearrange(hidden_states, "T C H W -> 1 C T H W") + rearrange(pos_emb, "T C -> 1 C T 1 1")
185
+ if self.positional_conv is not None:
186
+ hidden_states = self.positional_conv(hidden_states)
187
+ hidden_states = rearrange(hidden_states[0], "C T H W -> (H W) T C")
188
+
189
+ residual = hidden_states
190
+ hidden_states = self.norm_in(hidden_states)
191
+ hidden_states = self.act_fn_in(hidden_states)
192
+ hidden_states = self.ff_in(hidden_states)
193
+ hidden_states = hidden_states + residual
194
+
195
+ norm_hidden_states = self.norm1(hidden_states)
196
+ attn_output = self.attn1(norm_hidden_states, encoder_hidden_states=None)
197
+ hidden_states = attn_output + hidden_states
198
+
199
+ norm_hidden_states = self.norm2(hidden_states)
200
+ attn_output = self.attn2(norm_hidden_states, encoder_hidden_states=text_emb.repeat(height * width, 1))
201
+ hidden_states = attn_output + hidden_states
202
+
203
+ residual = hidden_states
204
+ hidden_states = self.norm_out(hidden_states)
205
+ hidden_states = self.act_fn_out(hidden_states)
206
+ hidden_states = self.ff_out(hidden_states)
207
+ hidden_states = hidden_states + residual
208
+
209
+ hidden_states = hidden_states.reshape(height, width, batch, inner_dim).permute(2, 3, 0, 1)
210
+
211
+ return hidden_states, time_emb, text_emb, res_stack
212
+
213
+
214
+ class PopMixBlock(torch.nn.Module):
215
+ def __init__(self, in_channels=None):
216
+ super().__init__()
217
+ self.mix_factor = torch.nn.Parameter(torch.Tensor([0.5]))
218
+ self.need_proj = in_channels is not None
219
+ if self.need_proj:
220
+ self.proj = torch.nn.Linear(in_channels, in_channels)
221
+
222
+ def forward(self, hidden_states, time_emb, text_emb, res_stack, **kwargs):
223
+ res_hidden_states = res_stack.pop()
224
+ alpha = torch.sigmoid(self.mix_factor)
225
+ hidden_states = alpha * res_hidden_states + (1 - alpha) * hidden_states
226
+ if self.need_proj:
227
+ hidden_states = hidden_states.permute(0, 2, 3, 1)
228
+ hidden_states = self.proj(hidden_states)
229
+ hidden_states = hidden_states.permute(0, 3, 1, 2)
230
+ res_hidden_states = res_stack.pop()
231
+ hidden_states = hidden_states + res_hidden_states
232
+ return hidden_states, time_emb, text_emb, res_stack
233
+
234
+
235
+ class SVDUNet(torch.nn.Module):
236
+ def __init__(self, add_positional_conv=None):
237
+ super().__init__()
238
+ self.time_proj = Timesteps(320)
239
+ self.time_embedding = torch.nn.Sequential(
240
+ torch.nn.Linear(320, 1280),
241
+ torch.nn.SiLU(),
242
+ torch.nn.Linear(1280, 1280)
243
+ )
244
+ self.add_time_proj = Timesteps(256)
245
+ self.add_time_embedding = torch.nn.Sequential(
246
+ torch.nn.Linear(768, 1280),
247
+ torch.nn.SiLU(),
248
+ torch.nn.Linear(1280, 1280)
249
+ )
250
+ self.conv_in = torch.nn.Conv2d(8, 320, kernel_size=3, padding=1)
251
+
252
+ self.blocks = torch.nn.ModuleList([
253
+ # CrossAttnDownBlockSpatioTemporal
254
+ ResnetBlock(320, 320, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(320, 320, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
255
+ AttentionBlock(5, 64, 320, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(5, 64, 320, 1024, add_positional_conv), PopMixBlock(320), PushBlock(),
256
+ ResnetBlock(320, 320, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(320, 320, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
257
+ AttentionBlock(5, 64, 320, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(5, 64, 320, 1024, add_positional_conv), PopMixBlock(320), PushBlock(),
258
+ DownSampler(320), PushBlock(),
259
+ # CrossAttnDownBlockSpatioTemporal
260
+ ResnetBlock(320, 640, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(640, 640, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
261
+ AttentionBlock(10, 64, 640, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(10, 64, 640, 1024, add_positional_conv), PopMixBlock(640), PushBlock(),
262
+ ResnetBlock(640, 640, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(640, 640, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
263
+ AttentionBlock(10, 64, 640, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(10, 64, 640, 1024, add_positional_conv), PopMixBlock(640), PushBlock(),
264
+ DownSampler(640), PushBlock(),
265
+ # CrossAttnDownBlockSpatioTemporal
266
+ ResnetBlock(640, 1280, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
267
+ AttentionBlock(20, 64, 1280, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(20, 64, 1280, 1024, add_positional_conv), PopMixBlock(1280), PushBlock(),
268
+ ResnetBlock(1280, 1280, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
269
+ AttentionBlock(20, 64, 1280, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(20, 64, 1280, 1024, add_positional_conv), PopMixBlock(1280), PushBlock(),
270
+ DownSampler(1280), PushBlock(),
271
+ # DownBlockSpatioTemporal
272
+ ResnetBlock(1280, 1280, 1280, eps=1e-5), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-5), PopMixBlock(), PushBlock(),
273
+ ResnetBlock(1280, 1280, 1280, eps=1e-5), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-5), PopMixBlock(), PushBlock(),
274
+ # UNetMidBlockSpatioTemporal
275
+ ResnetBlock(1280, 1280, 1280, eps=1e-5), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-5), PopMixBlock(), PushBlock(),
276
+ AttentionBlock(20, 64, 1280, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(20, 64, 1280, 1024, add_positional_conv), PopMixBlock(1280),
277
+ ResnetBlock(1280, 1280, 1280, eps=1e-5), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-5), PopMixBlock(),
278
+ # UpBlockSpatioTemporal
279
+ PopBlock(), ResnetBlock(2560, 1280, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-5), PopMixBlock(),
280
+ PopBlock(), ResnetBlock(2560, 1280, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-5), PopMixBlock(),
281
+ PopBlock(), ResnetBlock(2560, 1280, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-5), PopMixBlock(),
282
+ UpSampler(1280),
283
+ # CrossAttnUpBlockSpatioTemporal
284
+ PopBlock(), ResnetBlock(2560, 1280, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
285
+ AttentionBlock(20, 64, 1280, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(20, 64, 1280, 1024, add_positional_conv), PopMixBlock(1280),
286
+ PopBlock(), ResnetBlock(2560, 1280, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
287
+ AttentionBlock(20, 64, 1280, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(20, 64, 1280, 1024, add_positional_conv), PopMixBlock(1280),
288
+ PopBlock(), ResnetBlock(1920, 1280, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(1280, 1280, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
289
+ AttentionBlock(20, 64, 1280, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(20, 64, 1280, 1024, add_positional_conv), PopMixBlock(1280),
290
+ UpSampler(1280),
291
+ # CrossAttnUpBlockSpatioTemporal
292
+ PopBlock(), ResnetBlock(1920, 640, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(640, 640, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
293
+ AttentionBlock(10, 64, 640, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(10, 64, 640, 1024, add_positional_conv), PopMixBlock(640),
294
+ PopBlock(), ResnetBlock(1280, 640, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(640, 640, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
295
+ AttentionBlock(10, 64, 640, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(10, 64, 640, 1024, add_positional_conv), PopMixBlock(640),
296
+ PopBlock(), ResnetBlock(960, 640, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(640, 640, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
297
+ AttentionBlock(10, 64, 640, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(10, 64, 640, 1024, add_positional_conv), PopMixBlock(640),
298
+ UpSampler(640),
299
+ # CrossAttnUpBlockSpatioTemporal
300
+ PopBlock(), ResnetBlock(960, 320, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(320, 320, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
301
+ AttentionBlock(5, 64, 320, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(5, 64, 320, 1024, add_positional_conv), PopMixBlock(320),
302
+ PopBlock(), ResnetBlock(640, 320, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(320, 320, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
303
+ AttentionBlock(5, 64, 320, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(5, 64, 320, 1024, add_positional_conv), PopMixBlock(320),
304
+ PopBlock(), ResnetBlock(640, 320, 1280, eps=1e-6), PushBlock(), TemporalResnetBlock(320, 320, 1280, eps=1e-6), PopMixBlock(), PushBlock(),
305
+ AttentionBlock(5, 64, 320, 1, 1024, need_proj_out=False), PushBlock(), TemporalAttentionBlock(5, 64, 320, 1024, add_positional_conv), PopMixBlock(320),
306
+ ])
307
+
308
+ self.conv_norm_out = torch.nn.GroupNorm(32, 320, eps=1e-05, affine=True)
309
+ self.conv_act = torch.nn.SiLU()
310
+ self.conv_out = torch.nn.Conv2d(320, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
311
+
312
+
313
+ def build_mask(self, data, is_bound):
314
+ T, C, H, W = data.shape
315
+ t = repeat(torch.arange(T), "T -> T H W", T=T, H=H, W=W)
316
+ h = repeat(torch.arange(H), "H -> T H W", T=T, H=H, W=W)
317
+ w = repeat(torch.arange(W), "W -> T H W", T=T, H=H, W=W)
318
+ border_width = (T + H + W) // 6
319
+ pad = torch.ones_like(t) * border_width
320
+ mask = torch.stack([
321
+ pad if is_bound[0] else t + 1,
322
+ pad if is_bound[1] else T - t,
323
+ pad if is_bound[2] else h + 1,
324
+ pad if is_bound[3] else H - h,
325
+ pad if is_bound[4] else w + 1,
326
+ pad if is_bound[5] else W - w
327
+ ]).min(dim=0).values
328
+ mask = mask.clip(1, border_width)
329
+ mask = (mask / border_width).to(dtype=data.dtype, device=data.device)
330
+ mask = rearrange(mask, "T H W -> T 1 H W")
331
+ return mask
332
+
333
+
334
+ def tiled_forward(
335
+ self, sample, timestep, encoder_hidden_states, add_time_id,
336
+ batch_time=25, batch_height=128, batch_width=128,
337
+ stride_time=5, stride_height=64, stride_width=64,
338
+ progress_bar=lambda x:x
339
+ ):
340
+ data_device = sample.device
341
+ computation_device = self.conv_in.weight.device
342
+ torch_dtype = sample.dtype
343
+ T, C, H, W = sample.shape
344
+
345
+ weight = torch.zeros((T, 1, H, W), dtype=torch_dtype, device=data_device)
346
+ values = torch.zeros((T, 4, H, W), dtype=torch_dtype, device=data_device)
347
+
348
+ # Split tasks
349
+ tasks = []
350
+ for t in range(0, T, stride_time):
351
+ for h in range(0, H, stride_height):
352
+ for w in range(0, W, stride_width):
353
+ if (t-stride_time >= 0 and t-stride_time+batch_time >= T)\
354
+ or (h-stride_height >= 0 and h-stride_height+batch_height >= H)\
355
+ or (w-stride_width >= 0 and w-stride_width+batch_width >= W):
356
+ continue
357
+ tasks.append((t, t+batch_time, h, h+batch_height, w, w+batch_width))
358
+
359
+ # Run
360
+ for tl, tr, hl, hr, wl, wr in progress_bar(tasks):
361
+ sample_batch = sample[tl:tr, :, hl:hr, wl:wr].to(computation_device)
362
+ sample_batch = self.forward(sample_batch, timestep, encoder_hidden_states, add_time_id).to(data_device)
363
+ mask = self.build_mask(sample_batch, is_bound=(tl==0, tr>=T, hl==0, hr>=H, wl==0, wr>=W))
364
+ values[tl:tr, :, hl:hr, wl:wr] += sample_batch * mask
365
+ weight[tl:tr, :, hl:hr, wl:wr] += mask
366
+ values /= weight
367
+ return values
368
+
369
+
370
+ def forward(self, sample, timestep, encoder_hidden_states, add_time_id, use_gradient_checkpointing=False, **kwargs):
371
+ # 1. time
372
+ timestep = torch.tensor((timestep,)).to(sample.device)
373
+ t_emb = self.time_proj(timestep).to(sample.dtype)
374
+ t_emb = self.time_embedding(t_emb)
375
+
376
+ add_embeds = self.add_time_proj(add_time_id.flatten()).to(sample.dtype)
377
+ add_embeds = add_embeds.reshape((-1, 768))
378
+ add_embeds = self.add_time_embedding(add_embeds)
379
+
380
+ time_emb = t_emb + add_embeds
381
+
382
+ # 2. pre-process
383
+ height, width = sample.shape[2], sample.shape[3]
384
+ hidden_states = self.conv_in(sample)
385
+ text_emb = encoder_hidden_states
386
+ res_stack = [hidden_states]
387
+
388
+ # 3. blocks
389
+ def create_custom_forward(module):
390
+ def custom_forward(*inputs):
391
+ return module(*inputs)
392
+ return custom_forward
393
+ for i, block in enumerate(self.blocks):
394
+ if self.training and use_gradient_checkpointing and not (isinstance(block, PushBlock) or isinstance(block, PopBlock) or isinstance(block, PopMixBlock)):
395
+ hidden_states, time_emb, text_emb, res_stack = torch.utils.checkpoint.checkpoint(
396
+ create_custom_forward(block),
397
+ hidden_states, time_emb, text_emb, res_stack,
398
+ use_reentrant=False,
399
+ )
400
+ else:
401
+ hidden_states, time_emb, text_emb, res_stack = block(hidden_states, time_emb, text_emb, res_stack)
402
+
403
+ # 4. output
404
+ hidden_states = self.conv_norm_out(hidden_states)
405
+ hidden_states = self.conv_act(hidden_states)
406
+ hidden_states = self.conv_out(hidden_states)
407
+
408
+ return hidden_states
409
+
410
+ @staticmethod
411
+ def state_dict_converter():
412
+ return SVDUNetStateDictConverter()
413
+
414
+
415
+
416
+ class SVDUNetStateDictConverter:
417
+ def __init__(self):
418
+ pass
419
+
420
+ def get_block_name(self, names):
421
+ if names[0] in ["down_blocks", "mid_block", "up_blocks"]:
422
+ if names[4] in ["norm", "proj_in"]:
423
+ return ".".join(names[:4] + ["transformer_blocks"])
424
+ elif names[4] in ["time_pos_embed"]:
425
+ return ".".join(names[:4] + ["temporal_transformer_blocks"])
426
+ elif names[4] in ["proj_out"]:
427
+ return ".".join(names[:4] + ["time_mixer"])
428
+ else:
429
+ return ".".join(names[:5])
430
+ return ""
431
+
432
+ def from_diffusers(self, state_dict):
433
+ rename_dict = {
434
+ "time_embedding.linear_1": "time_embedding.0",
435
+ "time_embedding.linear_2": "time_embedding.2",
436
+ "add_embedding.linear_1": "add_time_embedding.0",
437
+ "add_embedding.linear_2": "add_time_embedding.2",
438
+ "conv_in": "conv_in",
439
+ "conv_norm_out": "conv_norm_out",
440
+ "conv_out": "conv_out",
441
+ }
442
+ blocks_rename_dict = [
443
+ "down_blocks.0.resnets.0.spatial_res_block", None, "down_blocks.0.resnets.0.temporal_res_block", "down_blocks.0.resnets.0.time_mixer", None,
444
+ "down_blocks.0.attentions.0.transformer_blocks", None, "down_blocks.0.attentions.0.temporal_transformer_blocks", "down_blocks.0.attentions.0.time_mixer", None,
445
+ "down_blocks.0.resnets.1.spatial_res_block", None, "down_blocks.0.resnets.1.temporal_res_block", "down_blocks.0.resnets.1.time_mixer", None,
446
+ "down_blocks.0.attentions.1.transformer_blocks", None, "down_blocks.0.attentions.1.temporal_transformer_blocks", "down_blocks.0.attentions.1.time_mixer", None,
447
+ "down_blocks.0.downsamplers.0.conv", None,
448
+ "down_blocks.1.resnets.0.spatial_res_block", None, "down_blocks.1.resnets.0.temporal_res_block", "down_blocks.1.resnets.0.time_mixer", None,
449
+ "down_blocks.1.attentions.0.transformer_blocks", None, "down_blocks.1.attentions.0.temporal_transformer_blocks", "down_blocks.1.attentions.0.time_mixer", None,
450
+ "down_blocks.1.resnets.1.spatial_res_block", None, "down_blocks.1.resnets.1.temporal_res_block", "down_blocks.1.resnets.1.time_mixer", None,
451
+ "down_blocks.1.attentions.1.transformer_blocks", None, "down_blocks.1.attentions.1.temporal_transformer_blocks", "down_blocks.1.attentions.1.time_mixer", None,
452
+ "down_blocks.1.downsamplers.0.conv", None,
453
+ "down_blocks.2.resnets.0.spatial_res_block", None, "down_blocks.2.resnets.0.temporal_res_block", "down_blocks.2.resnets.0.time_mixer", None,
454
+ "down_blocks.2.attentions.0.transformer_blocks", None, "down_blocks.2.attentions.0.temporal_transformer_blocks", "down_blocks.2.attentions.0.time_mixer", None,
455
+ "down_blocks.2.resnets.1.spatial_res_block", None, "down_blocks.2.resnets.1.temporal_res_block", "down_blocks.2.resnets.1.time_mixer", None,
456
+ "down_blocks.2.attentions.1.transformer_blocks", None, "down_blocks.2.attentions.1.temporal_transformer_blocks", "down_blocks.2.attentions.1.time_mixer", None,
457
+ "down_blocks.2.downsamplers.0.conv", None,
458
+ "down_blocks.3.resnets.0.spatial_res_block", None, "down_blocks.3.resnets.0.temporal_res_block", "down_blocks.3.resnets.0.time_mixer", None,
459
+ "down_blocks.3.resnets.1.spatial_res_block", None, "down_blocks.3.resnets.1.temporal_res_block", "down_blocks.3.resnets.1.time_mixer", None,
460
+ "mid_block.mid_block.resnets.0.spatial_res_block", None, "mid_block.mid_block.resnets.0.temporal_res_block", "mid_block.mid_block.resnets.0.time_mixer", None,
461
+ "mid_block.mid_block.attentions.0.transformer_blocks", None, "mid_block.mid_block.attentions.0.temporal_transformer_blocks", "mid_block.mid_block.attentions.0.time_mixer",
462
+ "mid_block.mid_block.resnets.1.spatial_res_block", None, "mid_block.mid_block.resnets.1.temporal_res_block", "mid_block.mid_block.resnets.1.time_mixer",
463
+ None, "up_blocks.0.resnets.0.spatial_res_block", None, "up_blocks.0.resnets.0.temporal_res_block", "up_blocks.0.resnets.0.time_mixer",
464
+ None, "up_blocks.0.resnets.1.spatial_res_block", None, "up_blocks.0.resnets.1.temporal_res_block", "up_blocks.0.resnets.1.time_mixer",
465
+ None, "up_blocks.0.resnets.2.spatial_res_block", None, "up_blocks.0.resnets.2.temporal_res_block", "up_blocks.0.resnets.2.time_mixer",
466
+ "up_blocks.0.upsamplers.0.conv",
467
+ None, "up_blocks.1.resnets.0.spatial_res_block", None, "up_blocks.1.resnets.0.temporal_res_block", "up_blocks.1.resnets.0.time_mixer", None,
468
+ "up_blocks.1.attentions.0.transformer_blocks", None, "up_blocks.1.attentions.0.temporal_transformer_blocks", "up_blocks.1.attentions.0.time_mixer",
469
+ None, "up_blocks.1.resnets.1.spatial_res_block", None, "up_blocks.1.resnets.1.temporal_res_block", "up_blocks.1.resnets.1.time_mixer", None,
470
+ "up_blocks.1.attentions.1.transformer_blocks", None, "up_blocks.1.attentions.1.temporal_transformer_blocks", "up_blocks.1.attentions.1.time_mixer",
471
+ None, "up_blocks.1.resnets.2.spatial_res_block", None, "up_blocks.1.resnets.2.temporal_res_block", "up_blocks.1.resnets.2.time_mixer", None,
472
+ "up_blocks.1.attentions.2.transformer_blocks", None, "up_blocks.1.attentions.2.temporal_transformer_blocks", "up_blocks.1.attentions.2.time_mixer",
473
+ "up_blocks.1.upsamplers.0.conv",
474
+ None, "up_blocks.2.resnets.0.spatial_res_block", None, "up_blocks.2.resnets.0.temporal_res_block", "up_blocks.2.resnets.0.time_mixer", None,
475
+ "up_blocks.2.attentions.0.transformer_blocks", None, "up_blocks.2.attentions.0.temporal_transformer_blocks", "up_blocks.2.attentions.0.time_mixer",
476
+ None, "up_blocks.2.resnets.1.spatial_res_block", None, "up_blocks.2.resnets.1.temporal_res_block", "up_blocks.2.resnets.1.time_mixer", None,
477
+ "up_blocks.2.attentions.1.transformer_blocks", None, "up_blocks.2.attentions.1.temporal_transformer_blocks", "up_blocks.2.attentions.1.time_mixer",
478
+ None, "up_blocks.2.resnets.2.spatial_res_block", None, "up_blocks.2.resnets.2.temporal_res_block", "up_blocks.2.resnets.2.time_mixer", None,
479
+ "up_blocks.2.attentions.2.transformer_blocks", None, "up_blocks.2.attentions.2.temporal_transformer_blocks", "up_blocks.2.attentions.2.time_mixer",
480
+ "up_blocks.2.upsamplers.0.conv",
481
+ None, "up_blocks.3.resnets.0.spatial_res_block", None, "up_blocks.3.resnets.0.temporal_res_block", "up_blocks.3.resnets.0.time_mixer", None,
482
+ "up_blocks.3.attentions.0.transformer_blocks", None, "up_blocks.3.attentions.0.temporal_transformer_blocks", "up_blocks.3.attentions.0.time_mixer",
483
+ None, "up_blocks.3.resnets.1.spatial_res_block", None, "up_blocks.3.resnets.1.temporal_res_block", "up_blocks.3.resnets.1.time_mixer", None,
484
+ "up_blocks.3.attentions.1.transformer_blocks", None, "up_blocks.3.attentions.1.temporal_transformer_blocks", "up_blocks.3.attentions.1.time_mixer",
485
+ None, "up_blocks.3.resnets.2.spatial_res_block", None, "up_blocks.3.resnets.2.temporal_res_block", "up_blocks.3.resnets.2.time_mixer", None,
486
+ "up_blocks.3.attentions.2.transformer_blocks", None, "up_blocks.3.attentions.2.temporal_transformer_blocks", "up_blocks.3.attentions.2.time_mixer",
487
+ ]
488
+ blocks_rename_dict = {i:j for j,i in enumerate(blocks_rename_dict) if i is not None}
489
+ state_dict_ = {}
490
+ for name, param in sorted(state_dict.items()):
491
+ names = name.split(".")
492
+ if names[0] == "mid_block":
493
+ names = ["mid_block"] + names
494
+ if names[-1] in ["weight", "bias"]:
495
+ name_prefix = ".".join(names[:-1])
496
+ if name_prefix in rename_dict:
497
+ state_dict_[rename_dict[name_prefix] + "." + names[-1]] = param
498
+ else:
499
+ block_name = self.get_block_name(names)
500
+ if "resnets" in block_name and block_name in blocks_rename_dict:
501
+ rename = ".".join(["blocks", str(blocks_rename_dict[block_name])] + names[5:])
502
+ state_dict_[rename] = param
503
+ elif ("downsamplers" in block_name or "upsamplers" in block_name) and block_name in blocks_rename_dict:
504
+ rename = ".".join(["blocks", str(blocks_rename_dict[block_name])] + names[-2:])
505
+ state_dict_[rename] = param
506
+ elif "attentions" in block_name and block_name in blocks_rename_dict:
507
+ attention_id = names[5]
508
+ if "transformer_blocks" in names:
509
+ suffix_dict = {
510
+ "attn1.to_out.0": "attn1.to_out",
511
+ "attn2.to_out.0": "attn2.to_out",
512
+ "ff.net.0.proj": "act_fn.proj",
513
+ "ff.net.2": "ff",
514
+ }
515
+ suffix = ".".join(names[6:-1])
516
+ suffix = suffix_dict.get(suffix, suffix)
517
+ rename = ".".join(["blocks", str(blocks_rename_dict[block_name]), "transformer_blocks", attention_id, suffix, names[-1]])
518
+ elif "temporal_transformer_blocks" in names:
519
+ suffix_dict = {
520
+ "attn1.to_out.0": "attn1.to_out",
521
+ "attn2.to_out.0": "attn2.to_out",
522
+ "ff_in.net.0.proj": "act_fn_in.proj",
523
+ "ff_in.net.2": "ff_in",
524
+ "ff.net.0.proj": "act_fn_out.proj",
525
+ "ff.net.2": "ff_out",
526
+ "norm3": "norm_out",
527
+ }
528
+ suffix = ".".join(names[6:-1])
529
+ suffix = suffix_dict.get(suffix, suffix)
530
+ rename = ".".join(["blocks", str(blocks_rename_dict[block_name]), suffix, names[-1]])
531
+ elif "time_mixer" in block_name:
532
+ rename = ".".join(["blocks", str(blocks_rename_dict[block_name]), "proj", names[-1]])
533
+ else:
534
+ suffix_dict = {
535
+ "linear_1": "positional_embedding_proj.0",
536
+ "linear_2": "positional_embedding_proj.2",
537
+ }
538
+ suffix = names[-2]
539
+ suffix = suffix_dict.get(suffix, suffix)
540
+ rename = ".".join(["blocks", str(blocks_rename_dict[block_name]), suffix, names[-1]])
541
+ state_dict_[rename] = param
542
+ else:
543
+ print(name)
544
+ else:
545
+ block_name = self.get_block_name(names)
546
+ if len(block_name)>0 and block_name in blocks_rename_dict:
547
+ rename = ".".join(["blocks", str(blocks_rename_dict[block_name]), names[-1]])
548
+ state_dict_[rename] = param
549
+ return state_dict_
550
+
551
+
552
+ def from_civitai(self, state_dict, add_positional_conv=None):
553
+ rename_dict = {
554
+ "model.diffusion_model.input_blocks.0.0.bias": "conv_in.bias",
555
+ "model.diffusion_model.input_blocks.0.0.weight": "conv_in.weight",
556
+ "model.diffusion_model.input_blocks.1.0.emb_layers.1.bias": "blocks.0.time_emb_proj.bias",
557
+ "model.diffusion_model.input_blocks.1.0.emb_layers.1.weight": "blocks.0.time_emb_proj.weight",
558
+ "model.diffusion_model.input_blocks.1.0.in_layers.0.bias": "blocks.0.norm1.bias",
559
+ "model.diffusion_model.input_blocks.1.0.in_layers.0.weight": "blocks.0.norm1.weight",
560
+ "model.diffusion_model.input_blocks.1.0.in_layers.2.bias": "blocks.0.conv1.bias",
561
+ "model.diffusion_model.input_blocks.1.0.in_layers.2.weight": "blocks.0.conv1.weight",
562
+ "model.diffusion_model.input_blocks.1.0.out_layers.0.bias": "blocks.0.norm2.bias",
563
+ "model.diffusion_model.input_blocks.1.0.out_layers.0.weight": "blocks.0.norm2.weight",
564
+ "model.diffusion_model.input_blocks.1.0.out_layers.3.bias": "blocks.0.conv2.bias",
565
+ "model.diffusion_model.input_blocks.1.0.out_layers.3.weight": "blocks.0.conv2.weight",
566
+ "model.diffusion_model.input_blocks.1.0.time_mixer.mix_factor": "blocks.3.mix_factor",
567
+ "model.diffusion_model.input_blocks.1.0.time_stack.emb_layers.1.bias": "blocks.2.time_emb_proj.bias",
568
+ "model.diffusion_model.input_blocks.1.0.time_stack.emb_layers.1.weight": "blocks.2.time_emb_proj.weight",
569
+ "model.diffusion_model.input_blocks.1.0.time_stack.in_layers.0.bias": "blocks.2.norm1.bias",
570
+ "model.diffusion_model.input_blocks.1.0.time_stack.in_layers.0.weight": "blocks.2.norm1.weight",
571
+ "model.diffusion_model.input_blocks.1.0.time_stack.in_layers.2.bias": "blocks.2.conv1.bias",
572
+ "model.diffusion_model.input_blocks.1.0.time_stack.in_layers.2.weight": "blocks.2.conv1.weight",
573
+ "model.diffusion_model.input_blocks.1.0.time_stack.out_layers.0.bias": "blocks.2.norm2.bias",
574
+ "model.diffusion_model.input_blocks.1.0.time_stack.out_layers.0.weight": "blocks.2.norm2.weight",
575
+ "model.diffusion_model.input_blocks.1.0.time_stack.out_layers.3.bias": "blocks.2.conv2.bias",
576
+ "model.diffusion_model.input_blocks.1.0.time_stack.out_layers.3.weight": "blocks.2.conv2.weight",
577
+ "model.diffusion_model.input_blocks.1.1.norm.bias": "blocks.5.norm.bias",
578
+ "model.diffusion_model.input_blocks.1.1.norm.weight": "blocks.5.norm.weight",
579
+ "model.diffusion_model.input_blocks.1.1.proj_in.bias": "blocks.5.proj_in.bias",
580
+ "model.diffusion_model.input_blocks.1.1.proj_in.weight": "blocks.5.proj_in.weight",
581
+ "model.diffusion_model.input_blocks.1.1.proj_out.bias": "blocks.8.proj.bias",
582
+ "model.diffusion_model.input_blocks.1.1.proj_out.weight": "blocks.8.proj.weight",
583
+ "model.diffusion_model.input_blocks.1.1.time_mixer.mix_factor": "blocks.8.mix_factor",
584
+ "model.diffusion_model.input_blocks.1.1.time_pos_embed.0.bias": "blocks.7.positional_embedding_proj.0.bias",
585
+ "model.diffusion_model.input_blocks.1.1.time_pos_embed.0.weight": "blocks.7.positional_embedding_proj.0.weight",
586
+ "model.diffusion_model.input_blocks.1.1.time_pos_embed.2.bias": "blocks.7.positional_embedding_proj.2.bias",
587
+ "model.diffusion_model.input_blocks.1.1.time_pos_embed.2.weight": "blocks.7.positional_embedding_proj.2.weight",
588
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.attn1.to_k.weight": "blocks.7.attn1.to_k.weight",
589
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.attn1.to_out.0.bias": "blocks.7.attn1.to_out.bias",
590
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.attn1.to_out.0.weight": "blocks.7.attn1.to_out.weight",
591
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.attn1.to_q.weight": "blocks.7.attn1.to_q.weight",
592
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.attn1.to_v.weight": "blocks.7.attn1.to_v.weight",
593
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.attn2.to_k.weight": "blocks.7.attn2.to_k.weight",
594
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.attn2.to_out.0.bias": "blocks.7.attn2.to_out.bias",
595
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.attn2.to_out.0.weight": "blocks.7.attn2.to_out.weight",
596
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.attn2.to_q.weight": "blocks.7.attn2.to_q.weight",
597
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.attn2.to_v.weight": "blocks.7.attn2.to_v.weight",
598
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.ff.net.0.proj.bias": "blocks.7.act_fn_out.proj.bias",
599
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.ff.net.0.proj.weight": "blocks.7.act_fn_out.proj.weight",
600
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.ff.net.2.bias": "blocks.7.ff_out.bias",
601
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.ff.net.2.weight": "blocks.7.ff_out.weight",
602
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.7.act_fn_in.proj.bias",
603
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.7.act_fn_in.proj.weight",
604
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.ff_in.net.2.bias": "blocks.7.ff_in.bias",
605
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.ff_in.net.2.weight": "blocks.7.ff_in.weight",
606
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.norm1.bias": "blocks.7.norm1.bias",
607
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.norm1.weight": "blocks.7.norm1.weight",
608
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.norm2.bias": "blocks.7.norm2.bias",
609
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.norm2.weight": "blocks.7.norm2.weight",
610
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.norm3.bias": "blocks.7.norm_out.bias",
611
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.norm3.weight": "blocks.7.norm_out.weight",
612
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.norm_in.bias": "blocks.7.norm_in.bias",
613
+ "model.diffusion_model.input_blocks.1.1.time_stack.0.norm_in.weight": "blocks.7.norm_in.weight",
614
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_k.weight": "blocks.5.transformer_blocks.0.attn1.to_k.weight",
615
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.5.transformer_blocks.0.attn1.to_out.bias",
616
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.5.transformer_blocks.0.attn1.to_out.weight",
617
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_q.weight": "blocks.5.transformer_blocks.0.attn1.to_q.weight",
618
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn1.to_v.weight": "blocks.5.transformer_blocks.0.attn1.to_v.weight",
619
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_k.weight": "blocks.5.transformer_blocks.0.attn2.to_k.weight",
620
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.5.transformer_blocks.0.attn2.to_out.bias",
621
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.5.transformer_blocks.0.attn2.to_out.weight",
622
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_q.weight": "blocks.5.transformer_blocks.0.attn2.to_q.weight",
623
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.attn2.to_v.weight": "blocks.5.transformer_blocks.0.attn2.to_v.weight",
624
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.5.transformer_blocks.0.act_fn.proj.bias",
625
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.5.transformer_blocks.0.act_fn.proj.weight",
626
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.bias": "blocks.5.transformer_blocks.0.ff.bias",
627
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.ff.net.2.weight": "blocks.5.transformer_blocks.0.ff.weight",
628
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.bias": "blocks.5.transformer_blocks.0.norm1.bias",
629
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm1.weight": "blocks.5.transformer_blocks.0.norm1.weight",
630
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.bias": "blocks.5.transformer_blocks.0.norm2.bias",
631
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm2.weight": "blocks.5.transformer_blocks.0.norm2.weight",
632
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.bias": "blocks.5.transformer_blocks.0.norm3.bias",
633
+ "model.diffusion_model.input_blocks.1.1.transformer_blocks.0.norm3.weight": "blocks.5.transformer_blocks.0.norm3.weight",
634
+ "model.diffusion_model.input_blocks.10.0.emb_layers.1.bias": "blocks.66.time_emb_proj.bias",
635
+ "model.diffusion_model.input_blocks.10.0.emb_layers.1.weight": "blocks.66.time_emb_proj.weight",
636
+ "model.diffusion_model.input_blocks.10.0.in_layers.0.bias": "blocks.66.norm1.bias",
637
+ "model.diffusion_model.input_blocks.10.0.in_layers.0.weight": "blocks.66.norm1.weight",
638
+ "model.diffusion_model.input_blocks.10.0.in_layers.2.bias": "blocks.66.conv1.bias",
639
+ "model.diffusion_model.input_blocks.10.0.in_layers.2.weight": "blocks.66.conv1.weight",
640
+ "model.diffusion_model.input_blocks.10.0.out_layers.0.bias": "blocks.66.norm2.bias",
641
+ "model.diffusion_model.input_blocks.10.0.out_layers.0.weight": "blocks.66.norm2.weight",
642
+ "model.diffusion_model.input_blocks.10.0.out_layers.3.bias": "blocks.66.conv2.bias",
643
+ "model.diffusion_model.input_blocks.10.0.out_layers.3.weight": "blocks.66.conv2.weight",
644
+ "model.diffusion_model.input_blocks.10.0.time_mixer.mix_factor": "blocks.69.mix_factor",
645
+ "model.diffusion_model.input_blocks.10.0.time_stack.emb_layers.1.bias": "blocks.68.time_emb_proj.bias",
646
+ "model.diffusion_model.input_blocks.10.0.time_stack.emb_layers.1.weight": "blocks.68.time_emb_proj.weight",
647
+ "model.diffusion_model.input_blocks.10.0.time_stack.in_layers.0.bias": "blocks.68.norm1.bias",
648
+ "model.diffusion_model.input_blocks.10.0.time_stack.in_layers.0.weight": "blocks.68.norm1.weight",
649
+ "model.diffusion_model.input_blocks.10.0.time_stack.in_layers.2.bias": "blocks.68.conv1.bias",
650
+ "model.diffusion_model.input_blocks.10.0.time_stack.in_layers.2.weight": "blocks.68.conv1.weight",
651
+ "model.diffusion_model.input_blocks.10.0.time_stack.out_layers.0.bias": "blocks.68.norm2.bias",
652
+ "model.diffusion_model.input_blocks.10.0.time_stack.out_layers.0.weight": "blocks.68.norm2.weight",
653
+ "model.diffusion_model.input_blocks.10.0.time_stack.out_layers.3.bias": "blocks.68.conv2.bias",
654
+ "model.diffusion_model.input_blocks.10.0.time_stack.out_layers.3.weight": "blocks.68.conv2.weight",
655
+ "model.diffusion_model.input_blocks.11.0.emb_layers.1.bias": "blocks.71.time_emb_proj.bias",
656
+ "model.diffusion_model.input_blocks.11.0.emb_layers.1.weight": "blocks.71.time_emb_proj.weight",
657
+ "model.diffusion_model.input_blocks.11.0.in_layers.0.bias": "blocks.71.norm1.bias",
658
+ "model.diffusion_model.input_blocks.11.0.in_layers.0.weight": "blocks.71.norm1.weight",
659
+ "model.diffusion_model.input_blocks.11.0.in_layers.2.bias": "blocks.71.conv1.bias",
660
+ "model.diffusion_model.input_blocks.11.0.in_layers.2.weight": "blocks.71.conv1.weight",
661
+ "model.diffusion_model.input_blocks.11.0.out_layers.0.bias": "blocks.71.norm2.bias",
662
+ "model.diffusion_model.input_blocks.11.0.out_layers.0.weight": "blocks.71.norm2.weight",
663
+ "model.diffusion_model.input_blocks.11.0.out_layers.3.bias": "blocks.71.conv2.bias",
664
+ "model.diffusion_model.input_blocks.11.0.out_layers.3.weight": "blocks.71.conv2.weight",
665
+ "model.diffusion_model.input_blocks.11.0.time_mixer.mix_factor": "blocks.74.mix_factor",
666
+ "model.diffusion_model.input_blocks.11.0.time_stack.emb_layers.1.bias": "blocks.73.time_emb_proj.bias",
667
+ "model.diffusion_model.input_blocks.11.0.time_stack.emb_layers.1.weight": "blocks.73.time_emb_proj.weight",
668
+ "model.diffusion_model.input_blocks.11.0.time_stack.in_layers.0.bias": "blocks.73.norm1.bias",
669
+ "model.diffusion_model.input_blocks.11.0.time_stack.in_layers.0.weight": "blocks.73.norm1.weight",
670
+ "model.diffusion_model.input_blocks.11.0.time_stack.in_layers.2.bias": "blocks.73.conv1.bias",
671
+ "model.diffusion_model.input_blocks.11.0.time_stack.in_layers.2.weight": "blocks.73.conv1.weight",
672
+ "model.diffusion_model.input_blocks.11.0.time_stack.out_layers.0.bias": "blocks.73.norm2.bias",
673
+ "model.diffusion_model.input_blocks.11.0.time_stack.out_layers.0.weight": "blocks.73.norm2.weight",
674
+ "model.diffusion_model.input_blocks.11.0.time_stack.out_layers.3.bias": "blocks.73.conv2.bias",
675
+ "model.diffusion_model.input_blocks.11.0.time_stack.out_layers.3.weight": "blocks.73.conv2.weight",
676
+ "model.diffusion_model.input_blocks.2.0.emb_layers.1.bias": "blocks.10.time_emb_proj.bias",
677
+ "model.diffusion_model.input_blocks.2.0.emb_layers.1.weight": "blocks.10.time_emb_proj.weight",
678
+ "model.diffusion_model.input_blocks.2.0.in_layers.0.bias": "blocks.10.norm1.bias",
679
+ "model.diffusion_model.input_blocks.2.0.in_layers.0.weight": "blocks.10.norm1.weight",
680
+ "model.diffusion_model.input_blocks.2.0.in_layers.2.bias": "blocks.10.conv1.bias",
681
+ "model.diffusion_model.input_blocks.2.0.in_layers.2.weight": "blocks.10.conv1.weight",
682
+ "model.diffusion_model.input_blocks.2.0.out_layers.0.bias": "blocks.10.norm2.bias",
683
+ "model.diffusion_model.input_blocks.2.0.out_layers.0.weight": "blocks.10.norm2.weight",
684
+ "model.diffusion_model.input_blocks.2.0.out_layers.3.bias": "blocks.10.conv2.bias",
685
+ "model.diffusion_model.input_blocks.2.0.out_layers.3.weight": "blocks.10.conv2.weight",
686
+ "model.diffusion_model.input_blocks.2.0.time_mixer.mix_factor": "blocks.13.mix_factor",
687
+ "model.diffusion_model.input_blocks.2.0.time_stack.emb_layers.1.bias": "blocks.12.time_emb_proj.bias",
688
+ "model.diffusion_model.input_blocks.2.0.time_stack.emb_layers.1.weight": "blocks.12.time_emb_proj.weight",
689
+ "model.diffusion_model.input_blocks.2.0.time_stack.in_layers.0.bias": "blocks.12.norm1.bias",
690
+ "model.diffusion_model.input_blocks.2.0.time_stack.in_layers.0.weight": "blocks.12.norm1.weight",
691
+ "model.diffusion_model.input_blocks.2.0.time_stack.in_layers.2.bias": "blocks.12.conv1.bias",
692
+ "model.diffusion_model.input_blocks.2.0.time_stack.in_layers.2.weight": "blocks.12.conv1.weight",
693
+ "model.diffusion_model.input_blocks.2.0.time_stack.out_layers.0.bias": "blocks.12.norm2.bias",
694
+ "model.diffusion_model.input_blocks.2.0.time_stack.out_layers.0.weight": "blocks.12.norm2.weight",
695
+ "model.diffusion_model.input_blocks.2.0.time_stack.out_layers.3.bias": "blocks.12.conv2.bias",
696
+ "model.diffusion_model.input_blocks.2.0.time_stack.out_layers.3.weight": "blocks.12.conv2.weight",
697
+ "model.diffusion_model.input_blocks.2.1.norm.bias": "blocks.15.norm.bias",
698
+ "model.diffusion_model.input_blocks.2.1.norm.weight": "blocks.15.norm.weight",
699
+ "model.diffusion_model.input_blocks.2.1.proj_in.bias": "blocks.15.proj_in.bias",
700
+ "model.diffusion_model.input_blocks.2.1.proj_in.weight": "blocks.15.proj_in.weight",
701
+ "model.diffusion_model.input_blocks.2.1.proj_out.bias": "blocks.18.proj.bias",
702
+ "model.diffusion_model.input_blocks.2.1.proj_out.weight": "blocks.18.proj.weight",
703
+ "model.diffusion_model.input_blocks.2.1.time_mixer.mix_factor": "blocks.18.mix_factor",
704
+ "model.diffusion_model.input_blocks.2.1.time_pos_embed.0.bias": "blocks.17.positional_embedding_proj.0.bias",
705
+ "model.diffusion_model.input_blocks.2.1.time_pos_embed.0.weight": "blocks.17.positional_embedding_proj.0.weight",
706
+ "model.diffusion_model.input_blocks.2.1.time_pos_embed.2.bias": "blocks.17.positional_embedding_proj.2.bias",
707
+ "model.diffusion_model.input_blocks.2.1.time_pos_embed.2.weight": "blocks.17.positional_embedding_proj.2.weight",
708
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.attn1.to_k.weight": "blocks.17.attn1.to_k.weight",
709
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.attn1.to_out.0.bias": "blocks.17.attn1.to_out.bias",
710
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.attn1.to_out.0.weight": "blocks.17.attn1.to_out.weight",
711
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.attn1.to_q.weight": "blocks.17.attn1.to_q.weight",
712
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.attn1.to_v.weight": "blocks.17.attn1.to_v.weight",
713
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.attn2.to_k.weight": "blocks.17.attn2.to_k.weight",
714
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.attn2.to_out.0.bias": "blocks.17.attn2.to_out.bias",
715
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.attn2.to_out.0.weight": "blocks.17.attn2.to_out.weight",
716
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.attn2.to_q.weight": "blocks.17.attn2.to_q.weight",
717
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.attn2.to_v.weight": "blocks.17.attn2.to_v.weight",
718
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.ff.net.0.proj.bias": "blocks.17.act_fn_out.proj.bias",
719
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.ff.net.0.proj.weight": "blocks.17.act_fn_out.proj.weight",
720
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.ff.net.2.bias": "blocks.17.ff_out.bias",
721
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.ff.net.2.weight": "blocks.17.ff_out.weight",
722
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.17.act_fn_in.proj.bias",
723
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.17.act_fn_in.proj.weight",
724
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.ff_in.net.2.bias": "blocks.17.ff_in.bias",
725
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.ff_in.net.2.weight": "blocks.17.ff_in.weight",
726
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.norm1.bias": "blocks.17.norm1.bias",
727
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.norm1.weight": "blocks.17.norm1.weight",
728
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.norm2.bias": "blocks.17.norm2.bias",
729
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.norm2.weight": "blocks.17.norm2.weight",
730
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.norm3.bias": "blocks.17.norm_out.bias",
731
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.norm3.weight": "blocks.17.norm_out.weight",
732
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.norm_in.bias": "blocks.17.norm_in.bias",
733
+ "model.diffusion_model.input_blocks.2.1.time_stack.0.norm_in.weight": "blocks.17.norm_in.weight",
734
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_k.weight": "blocks.15.transformer_blocks.0.attn1.to_k.weight",
735
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.15.transformer_blocks.0.attn1.to_out.bias",
736
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.15.transformer_blocks.0.attn1.to_out.weight",
737
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_q.weight": "blocks.15.transformer_blocks.0.attn1.to_q.weight",
738
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn1.to_v.weight": "blocks.15.transformer_blocks.0.attn1.to_v.weight",
739
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight": "blocks.15.transformer_blocks.0.attn2.to_k.weight",
740
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.15.transformer_blocks.0.attn2.to_out.bias",
741
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.15.transformer_blocks.0.attn2.to_out.weight",
742
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_q.weight": "blocks.15.transformer_blocks.0.attn2.to_q.weight",
743
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_v.weight": "blocks.15.transformer_blocks.0.attn2.to_v.weight",
744
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.15.transformer_blocks.0.act_fn.proj.bias",
745
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.15.transformer_blocks.0.act_fn.proj.weight",
746
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.bias": "blocks.15.transformer_blocks.0.ff.bias",
747
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.ff.net.2.weight": "blocks.15.transformer_blocks.0.ff.weight",
748
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.bias": "blocks.15.transformer_blocks.0.norm1.bias",
749
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm1.weight": "blocks.15.transformer_blocks.0.norm1.weight",
750
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.bias": "blocks.15.transformer_blocks.0.norm2.bias",
751
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm2.weight": "blocks.15.transformer_blocks.0.norm2.weight",
752
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.bias": "blocks.15.transformer_blocks.0.norm3.bias",
753
+ "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.norm3.weight": "blocks.15.transformer_blocks.0.norm3.weight",
754
+ "model.diffusion_model.input_blocks.3.0.op.bias": "blocks.20.conv.bias",
755
+ "model.diffusion_model.input_blocks.3.0.op.weight": "blocks.20.conv.weight",
756
+ "model.diffusion_model.input_blocks.4.0.emb_layers.1.bias": "blocks.22.time_emb_proj.bias",
757
+ "model.diffusion_model.input_blocks.4.0.emb_layers.1.weight": "blocks.22.time_emb_proj.weight",
758
+ "model.diffusion_model.input_blocks.4.0.in_layers.0.bias": "blocks.22.norm1.bias",
759
+ "model.diffusion_model.input_blocks.4.0.in_layers.0.weight": "blocks.22.norm1.weight",
760
+ "model.diffusion_model.input_blocks.4.0.in_layers.2.bias": "blocks.22.conv1.bias",
761
+ "model.diffusion_model.input_blocks.4.0.in_layers.2.weight": "blocks.22.conv1.weight",
762
+ "model.diffusion_model.input_blocks.4.0.out_layers.0.bias": "blocks.22.norm2.bias",
763
+ "model.diffusion_model.input_blocks.4.0.out_layers.0.weight": "blocks.22.norm2.weight",
764
+ "model.diffusion_model.input_blocks.4.0.out_layers.3.bias": "blocks.22.conv2.bias",
765
+ "model.diffusion_model.input_blocks.4.0.out_layers.3.weight": "blocks.22.conv2.weight",
766
+ "model.diffusion_model.input_blocks.4.0.skip_connection.bias": "blocks.22.conv_shortcut.bias",
767
+ "model.diffusion_model.input_blocks.4.0.skip_connection.weight": "blocks.22.conv_shortcut.weight",
768
+ "model.diffusion_model.input_blocks.4.0.time_mixer.mix_factor": "blocks.25.mix_factor",
769
+ "model.diffusion_model.input_blocks.4.0.time_stack.emb_layers.1.bias": "blocks.24.time_emb_proj.bias",
770
+ "model.diffusion_model.input_blocks.4.0.time_stack.emb_layers.1.weight": "blocks.24.time_emb_proj.weight",
771
+ "model.diffusion_model.input_blocks.4.0.time_stack.in_layers.0.bias": "blocks.24.norm1.bias",
772
+ "model.diffusion_model.input_blocks.4.0.time_stack.in_layers.0.weight": "blocks.24.norm1.weight",
773
+ "model.diffusion_model.input_blocks.4.0.time_stack.in_layers.2.bias": "blocks.24.conv1.bias",
774
+ "model.diffusion_model.input_blocks.4.0.time_stack.in_layers.2.weight": "blocks.24.conv1.weight",
775
+ "model.diffusion_model.input_blocks.4.0.time_stack.out_layers.0.bias": "blocks.24.norm2.bias",
776
+ "model.diffusion_model.input_blocks.4.0.time_stack.out_layers.0.weight": "blocks.24.norm2.weight",
777
+ "model.diffusion_model.input_blocks.4.0.time_stack.out_layers.3.bias": "blocks.24.conv2.bias",
778
+ "model.diffusion_model.input_blocks.4.0.time_stack.out_layers.3.weight": "blocks.24.conv2.weight",
779
+ "model.diffusion_model.input_blocks.4.1.norm.bias": "blocks.27.norm.bias",
780
+ "model.diffusion_model.input_blocks.4.1.norm.weight": "blocks.27.norm.weight",
781
+ "model.diffusion_model.input_blocks.4.1.proj_in.bias": "blocks.27.proj_in.bias",
782
+ "model.diffusion_model.input_blocks.4.1.proj_in.weight": "blocks.27.proj_in.weight",
783
+ "model.diffusion_model.input_blocks.4.1.proj_out.bias": "blocks.30.proj.bias",
784
+ "model.diffusion_model.input_blocks.4.1.proj_out.weight": "blocks.30.proj.weight",
785
+ "model.diffusion_model.input_blocks.4.1.time_mixer.mix_factor": "blocks.30.mix_factor",
786
+ "model.diffusion_model.input_blocks.4.1.time_pos_embed.0.bias": "blocks.29.positional_embedding_proj.0.bias",
787
+ "model.diffusion_model.input_blocks.4.1.time_pos_embed.0.weight": "blocks.29.positional_embedding_proj.0.weight",
788
+ "model.diffusion_model.input_blocks.4.1.time_pos_embed.2.bias": "blocks.29.positional_embedding_proj.2.bias",
789
+ "model.diffusion_model.input_blocks.4.1.time_pos_embed.2.weight": "blocks.29.positional_embedding_proj.2.weight",
790
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.attn1.to_k.weight": "blocks.29.attn1.to_k.weight",
791
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.attn1.to_out.0.bias": "blocks.29.attn1.to_out.bias",
792
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.attn1.to_out.0.weight": "blocks.29.attn1.to_out.weight",
793
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.attn1.to_q.weight": "blocks.29.attn1.to_q.weight",
794
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.attn1.to_v.weight": "blocks.29.attn1.to_v.weight",
795
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.attn2.to_k.weight": "blocks.29.attn2.to_k.weight",
796
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.attn2.to_out.0.bias": "blocks.29.attn2.to_out.bias",
797
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.attn2.to_out.0.weight": "blocks.29.attn2.to_out.weight",
798
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.attn2.to_q.weight": "blocks.29.attn2.to_q.weight",
799
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.attn2.to_v.weight": "blocks.29.attn2.to_v.weight",
800
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.ff.net.0.proj.bias": "blocks.29.act_fn_out.proj.bias",
801
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.ff.net.0.proj.weight": "blocks.29.act_fn_out.proj.weight",
802
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.ff.net.2.bias": "blocks.29.ff_out.bias",
803
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.ff.net.2.weight": "blocks.29.ff_out.weight",
804
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.29.act_fn_in.proj.bias",
805
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.29.act_fn_in.proj.weight",
806
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.ff_in.net.2.bias": "blocks.29.ff_in.bias",
807
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.ff_in.net.2.weight": "blocks.29.ff_in.weight",
808
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.norm1.bias": "blocks.29.norm1.bias",
809
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.norm1.weight": "blocks.29.norm1.weight",
810
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.norm2.bias": "blocks.29.norm2.bias",
811
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.norm2.weight": "blocks.29.norm2.weight",
812
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.norm3.bias": "blocks.29.norm_out.bias",
813
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.norm3.weight": "blocks.29.norm_out.weight",
814
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.norm_in.bias": "blocks.29.norm_in.bias",
815
+ "model.diffusion_model.input_blocks.4.1.time_stack.0.norm_in.weight": "blocks.29.norm_in.weight",
816
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "blocks.27.transformer_blocks.0.attn1.to_k.weight",
817
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.27.transformer_blocks.0.attn1.to_out.bias",
818
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.27.transformer_blocks.0.attn1.to_out.weight",
819
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "blocks.27.transformer_blocks.0.attn1.to_q.weight",
820
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "blocks.27.transformer_blocks.0.attn1.to_v.weight",
821
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "blocks.27.transformer_blocks.0.attn2.to_k.weight",
822
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.27.transformer_blocks.0.attn2.to_out.bias",
823
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.27.transformer_blocks.0.attn2.to_out.weight",
824
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "blocks.27.transformer_blocks.0.attn2.to_q.weight",
825
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "blocks.27.transformer_blocks.0.attn2.to_v.weight",
826
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.27.transformer_blocks.0.act_fn.proj.bias",
827
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.27.transformer_blocks.0.act_fn.proj.weight",
828
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "blocks.27.transformer_blocks.0.ff.bias",
829
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "blocks.27.transformer_blocks.0.ff.weight",
830
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.bias": "blocks.27.transformer_blocks.0.norm1.bias",
831
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm1.weight": "blocks.27.transformer_blocks.0.norm1.weight",
832
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.bias": "blocks.27.transformer_blocks.0.norm2.bias",
833
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm2.weight": "blocks.27.transformer_blocks.0.norm2.weight",
834
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.bias": "blocks.27.transformer_blocks.0.norm3.bias",
835
+ "model.diffusion_model.input_blocks.4.1.transformer_blocks.0.norm3.weight": "blocks.27.transformer_blocks.0.norm3.weight",
836
+ "model.diffusion_model.input_blocks.5.0.emb_layers.1.bias": "blocks.32.time_emb_proj.bias",
837
+ "model.diffusion_model.input_blocks.5.0.emb_layers.1.weight": "blocks.32.time_emb_proj.weight",
838
+ "model.diffusion_model.input_blocks.5.0.in_layers.0.bias": "blocks.32.norm1.bias",
839
+ "model.diffusion_model.input_blocks.5.0.in_layers.0.weight": "blocks.32.norm1.weight",
840
+ "model.diffusion_model.input_blocks.5.0.in_layers.2.bias": "blocks.32.conv1.bias",
841
+ "model.diffusion_model.input_blocks.5.0.in_layers.2.weight": "blocks.32.conv1.weight",
842
+ "model.diffusion_model.input_blocks.5.0.out_layers.0.bias": "blocks.32.norm2.bias",
843
+ "model.diffusion_model.input_blocks.5.0.out_layers.0.weight": "blocks.32.norm2.weight",
844
+ "model.diffusion_model.input_blocks.5.0.out_layers.3.bias": "blocks.32.conv2.bias",
845
+ "model.diffusion_model.input_blocks.5.0.out_layers.3.weight": "blocks.32.conv2.weight",
846
+ "model.diffusion_model.input_blocks.5.0.time_mixer.mix_factor": "blocks.35.mix_factor",
847
+ "model.diffusion_model.input_blocks.5.0.time_stack.emb_layers.1.bias": "blocks.34.time_emb_proj.bias",
848
+ "model.diffusion_model.input_blocks.5.0.time_stack.emb_layers.1.weight": "blocks.34.time_emb_proj.weight",
849
+ "model.diffusion_model.input_blocks.5.0.time_stack.in_layers.0.bias": "blocks.34.norm1.bias",
850
+ "model.diffusion_model.input_blocks.5.0.time_stack.in_layers.0.weight": "blocks.34.norm1.weight",
851
+ "model.diffusion_model.input_blocks.5.0.time_stack.in_layers.2.bias": "blocks.34.conv1.bias",
852
+ "model.diffusion_model.input_blocks.5.0.time_stack.in_layers.2.weight": "blocks.34.conv1.weight",
853
+ "model.diffusion_model.input_blocks.5.0.time_stack.out_layers.0.bias": "blocks.34.norm2.bias",
854
+ "model.diffusion_model.input_blocks.5.0.time_stack.out_layers.0.weight": "blocks.34.norm2.weight",
855
+ "model.diffusion_model.input_blocks.5.0.time_stack.out_layers.3.bias": "blocks.34.conv2.bias",
856
+ "model.diffusion_model.input_blocks.5.0.time_stack.out_layers.3.weight": "blocks.34.conv2.weight",
857
+ "model.diffusion_model.input_blocks.5.1.norm.bias": "blocks.37.norm.bias",
858
+ "model.diffusion_model.input_blocks.5.1.norm.weight": "blocks.37.norm.weight",
859
+ "model.diffusion_model.input_blocks.5.1.proj_in.bias": "blocks.37.proj_in.bias",
860
+ "model.diffusion_model.input_blocks.5.1.proj_in.weight": "blocks.37.proj_in.weight",
861
+ "model.diffusion_model.input_blocks.5.1.proj_out.bias": "blocks.40.proj.bias",
862
+ "model.diffusion_model.input_blocks.5.1.proj_out.weight": "blocks.40.proj.weight",
863
+ "model.diffusion_model.input_blocks.5.1.time_mixer.mix_factor": "blocks.40.mix_factor",
864
+ "model.diffusion_model.input_blocks.5.1.time_pos_embed.0.bias": "blocks.39.positional_embedding_proj.0.bias",
865
+ "model.diffusion_model.input_blocks.5.1.time_pos_embed.0.weight": "blocks.39.positional_embedding_proj.0.weight",
866
+ "model.diffusion_model.input_blocks.5.1.time_pos_embed.2.bias": "blocks.39.positional_embedding_proj.2.bias",
867
+ "model.diffusion_model.input_blocks.5.1.time_pos_embed.2.weight": "blocks.39.positional_embedding_proj.2.weight",
868
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.attn1.to_k.weight": "blocks.39.attn1.to_k.weight",
869
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.attn1.to_out.0.bias": "blocks.39.attn1.to_out.bias",
870
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.attn1.to_out.0.weight": "blocks.39.attn1.to_out.weight",
871
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.attn1.to_q.weight": "blocks.39.attn1.to_q.weight",
872
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.attn1.to_v.weight": "blocks.39.attn1.to_v.weight",
873
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.attn2.to_k.weight": "blocks.39.attn2.to_k.weight",
874
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.attn2.to_out.0.bias": "blocks.39.attn2.to_out.bias",
875
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.attn2.to_out.0.weight": "blocks.39.attn2.to_out.weight",
876
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.attn2.to_q.weight": "blocks.39.attn2.to_q.weight",
877
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.attn2.to_v.weight": "blocks.39.attn2.to_v.weight",
878
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.ff.net.0.proj.bias": "blocks.39.act_fn_out.proj.bias",
879
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.ff.net.0.proj.weight": "blocks.39.act_fn_out.proj.weight",
880
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.ff.net.2.bias": "blocks.39.ff_out.bias",
881
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.ff.net.2.weight": "blocks.39.ff_out.weight",
882
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.39.act_fn_in.proj.bias",
883
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.39.act_fn_in.proj.weight",
884
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.ff_in.net.2.bias": "blocks.39.ff_in.bias",
885
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.ff_in.net.2.weight": "blocks.39.ff_in.weight",
886
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.norm1.bias": "blocks.39.norm1.bias",
887
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.norm1.weight": "blocks.39.norm1.weight",
888
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.norm2.bias": "blocks.39.norm2.bias",
889
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.norm2.weight": "blocks.39.norm2.weight",
890
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.norm3.bias": "blocks.39.norm_out.bias",
891
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.norm3.weight": "blocks.39.norm_out.weight",
892
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.norm_in.bias": "blocks.39.norm_in.bias",
893
+ "model.diffusion_model.input_blocks.5.1.time_stack.0.norm_in.weight": "blocks.39.norm_in.weight",
894
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "blocks.37.transformer_blocks.0.attn1.to_k.weight",
895
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.37.transformer_blocks.0.attn1.to_out.bias",
896
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.37.transformer_blocks.0.attn1.to_out.weight",
897
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "blocks.37.transformer_blocks.0.attn1.to_q.weight",
898
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "blocks.37.transformer_blocks.0.attn1.to_v.weight",
899
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "blocks.37.transformer_blocks.0.attn2.to_k.weight",
900
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.37.transformer_blocks.0.attn2.to_out.bias",
901
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.37.transformer_blocks.0.attn2.to_out.weight",
902
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "blocks.37.transformer_blocks.0.attn2.to_q.weight",
903
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "blocks.37.transformer_blocks.0.attn2.to_v.weight",
904
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.37.transformer_blocks.0.act_fn.proj.bias",
905
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.37.transformer_blocks.0.act_fn.proj.weight",
906
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "blocks.37.transformer_blocks.0.ff.bias",
907
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "blocks.37.transformer_blocks.0.ff.weight",
908
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.bias": "blocks.37.transformer_blocks.0.norm1.bias",
909
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm1.weight": "blocks.37.transformer_blocks.0.norm1.weight",
910
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.bias": "blocks.37.transformer_blocks.0.norm2.bias",
911
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm2.weight": "blocks.37.transformer_blocks.0.norm2.weight",
912
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.bias": "blocks.37.transformer_blocks.0.norm3.bias",
913
+ "model.diffusion_model.input_blocks.5.1.transformer_blocks.0.norm3.weight": "blocks.37.transformer_blocks.0.norm3.weight",
914
+ "model.diffusion_model.input_blocks.6.0.op.bias": "blocks.42.conv.bias",
915
+ "model.diffusion_model.input_blocks.6.0.op.weight": "blocks.42.conv.weight",
916
+ "model.diffusion_model.input_blocks.7.0.emb_layers.1.bias": "blocks.44.time_emb_proj.bias",
917
+ "model.diffusion_model.input_blocks.7.0.emb_layers.1.weight": "blocks.44.time_emb_proj.weight",
918
+ "model.diffusion_model.input_blocks.7.0.in_layers.0.bias": "blocks.44.norm1.bias",
919
+ "model.diffusion_model.input_blocks.7.0.in_layers.0.weight": "blocks.44.norm1.weight",
920
+ "model.diffusion_model.input_blocks.7.0.in_layers.2.bias": "blocks.44.conv1.bias",
921
+ "model.diffusion_model.input_blocks.7.0.in_layers.2.weight": "blocks.44.conv1.weight",
922
+ "model.diffusion_model.input_blocks.7.0.out_layers.0.bias": "blocks.44.norm2.bias",
923
+ "model.diffusion_model.input_blocks.7.0.out_layers.0.weight": "blocks.44.norm2.weight",
924
+ "model.diffusion_model.input_blocks.7.0.out_layers.3.bias": "blocks.44.conv2.bias",
925
+ "model.diffusion_model.input_blocks.7.0.out_layers.3.weight": "blocks.44.conv2.weight",
926
+ "model.diffusion_model.input_blocks.7.0.skip_connection.bias": "blocks.44.conv_shortcut.bias",
927
+ "model.diffusion_model.input_blocks.7.0.skip_connection.weight": "blocks.44.conv_shortcut.weight",
928
+ "model.diffusion_model.input_blocks.7.0.time_mixer.mix_factor": "blocks.47.mix_factor",
929
+ "model.diffusion_model.input_blocks.7.0.time_stack.emb_layers.1.bias": "blocks.46.time_emb_proj.bias",
930
+ "model.diffusion_model.input_blocks.7.0.time_stack.emb_layers.1.weight": "blocks.46.time_emb_proj.weight",
931
+ "model.diffusion_model.input_blocks.7.0.time_stack.in_layers.0.bias": "blocks.46.norm1.bias",
932
+ "model.diffusion_model.input_blocks.7.0.time_stack.in_layers.0.weight": "blocks.46.norm1.weight",
933
+ "model.diffusion_model.input_blocks.7.0.time_stack.in_layers.2.bias": "blocks.46.conv1.bias",
934
+ "model.diffusion_model.input_blocks.7.0.time_stack.in_layers.2.weight": "blocks.46.conv1.weight",
935
+ "model.diffusion_model.input_blocks.7.0.time_stack.out_layers.0.bias": "blocks.46.norm2.bias",
936
+ "model.diffusion_model.input_blocks.7.0.time_stack.out_layers.0.weight": "blocks.46.norm2.weight",
937
+ "model.diffusion_model.input_blocks.7.0.time_stack.out_layers.3.bias": "blocks.46.conv2.bias",
938
+ "model.diffusion_model.input_blocks.7.0.time_stack.out_layers.3.weight": "blocks.46.conv2.weight",
939
+ "model.diffusion_model.input_blocks.7.1.norm.bias": "blocks.49.norm.bias",
940
+ "model.diffusion_model.input_blocks.7.1.norm.weight": "blocks.49.norm.weight",
941
+ "model.diffusion_model.input_blocks.7.1.proj_in.bias": "blocks.49.proj_in.bias",
942
+ "model.diffusion_model.input_blocks.7.1.proj_in.weight": "blocks.49.proj_in.weight",
943
+ "model.diffusion_model.input_blocks.7.1.proj_out.bias": "blocks.52.proj.bias",
944
+ "model.diffusion_model.input_blocks.7.1.proj_out.weight": "blocks.52.proj.weight",
945
+ "model.diffusion_model.input_blocks.7.1.time_mixer.mix_factor": "blocks.52.mix_factor",
946
+ "model.diffusion_model.input_blocks.7.1.time_pos_embed.0.bias": "blocks.51.positional_embedding_proj.0.bias",
947
+ "model.diffusion_model.input_blocks.7.1.time_pos_embed.0.weight": "blocks.51.positional_embedding_proj.0.weight",
948
+ "model.diffusion_model.input_blocks.7.1.time_pos_embed.2.bias": "blocks.51.positional_embedding_proj.2.bias",
949
+ "model.diffusion_model.input_blocks.7.1.time_pos_embed.2.weight": "blocks.51.positional_embedding_proj.2.weight",
950
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.attn1.to_k.weight": "blocks.51.attn1.to_k.weight",
951
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.attn1.to_out.0.bias": "blocks.51.attn1.to_out.bias",
952
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.attn1.to_out.0.weight": "blocks.51.attn1.to_out.weight",
953
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.attn1.to_q.weight": "blocks.51.attn1.to_q.weight",
954
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.attn1.to_v.weight": "blocks.51.attn1.to_v.weight",
955
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.attn2.to_k.weight": "blocks.51.attn2.to_k.weight",
956
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.attn2.to_out.0.bias": "blocks.51.attn2.to_out.bias",
957
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.attn2.to_out.0.weight": "blocks.51.attn2.to_out.weight",
958
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.attn2.to_q.weight": "blocks.51.attn2.to_q.weight",
959
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.attn2.to_v.weight": "blocks.51.attn2.to_v.weight",
960
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.ff.net.0.proj.bias": "blocks.51.act_fn_out.proj.bias",
961
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.ff.net.0.proj.weight": "blocks.51.act_fn_out.proj.weight",
962
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.ff.net.2.bias": "blocks.51.ff_out.bias",
963
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.ff.net.2.weight": "blocks.51.ff_out.weight",
964
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.51.act_fn_in.proj.bias",
965
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.51.act_fn_in.proj.weight",
966
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.ff_in.net.2.bias": "blocks.51.ff_in.bias",
967
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.ff_in.net.2.weight": "blocks.51.ff_in.weight",
968
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.norm1.bias": "blocks.51.norm1.bias",
969
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.norm1.weight": "blocks.51.norm1.weight",
970
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.norm2.bias": "blocks.51.norm2.bias",
971
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.norm2.weight": "blocks.51.norm2.weight",
972
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.norm3.bias": "blocks.51.norm_out.bias",
973
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.norm3.weight": "blocks.51.norm_out.weight",
974
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.norm_in.bias": "blocks.51.norm_in.bias",
975
+ "model.diffusion_model.input_blocks.7.1.time_stack.0.norm_in.weight": "blocks.51.norm_in.weight",
976
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": "blocks.49.transformer_blocks.0.attn1.to_k.weight",
977
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.49.transformer_blocks.0.attn1.to_out.bias",
978
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.49.transformer_blocks.0.attn1.to_out.weight",
979
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": "blocks.49.transformer_blocks.0.attn1.to_q.weight",
980
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": "blocks.49.transformer_blocks.0.attn1.to_v.weight",
981
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": "blocks.49.transformer_blocks.0.attn2.to_k.weight",
982
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.49.transformer_blocks.0.attn2.to_out.bias",
983
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.49.transformer_blocks.0.attn2.to_out.weight",
984
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": "blocks.49.transformer_blocks.0.attn2.to_q.weight",
985
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": "blocks.49.transformer_blocks.0.attn2.to_v.weight",
986
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.49.transformer_blocks.0.act_fn.proj.bias",
987
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.49.transformer_blocks.0.act_fn.proj.weight",
988
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.bias": "blocks.49.transformer_blocks.0.ff.bias",
989
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.ff.net.2.weight": "blocks.49.transformer_blocks.0.ff.weight",
990
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.bias": "blocks.49.transformer_blocks.0.norm1.bias",
991
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm1.weight": "blocks.49.transformer_blocks.0.norm1.weight",
992
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.bias": "blocks.49.transformer_blocks.0.norm2.bias",
993
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm2.weight": "blocks.49.transformer_blocks.0.norm2.weight",
994
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.bias": "blocks.49.transformer_blocks.0.norm3.bias",
995
+ "model.diffusion_model.input_blocks.7.1.transformer_blocks.0.norm3.weight": "blocks.49.transformer_blocks.0.norm3.weight",
996
+ "model.diffusion_model.input_blocks.8.0.emb_layers.1.bias": "blocks.54.time_emb_proj.bias",
997
+ "model.diffusion_model.input_blocks.8.0.emb_layers.1.weight": "blocks.54.time_emb_proj.weight",
998
+ "model.diffusion_model.input_blocks.8.0.in_layers.0.bias": "blocks.54.norm1.bias",
999
+ "model.diffusion_model.input_blocks.8.0.in_layers.0.weight": "blocks.54.norm1.weight",
1000
+ "model.diffusion_model.input_blocks.8.0.in_layers.2.bias": "blocks.54.conv1.bias",
1001
+ "model.diffusion_model.input_blocks.8.0.in_layers.2.weight": "blocks.54.conv1.weight",
1002
+ "model.diffusion_model.input_blocks.8.0.out_layers.0.bias": "blocks.54.norm2.bias",
1003
+ "model.diffusion_model.input_blocks.8.0.out_layers.0.weight": "blocks.54.norm2.weight",
1004
+ "model.diffusion_model.input_blocks.8.0.out_layers.3.bias": "blocks.54.conv2.bias",
1005
+ "model.diffusion_model.input_blocks.8.0.out_layers.3.weight": "blocks.54.conv2.weight",
1006
+ "model.diffusion_model.input_blocks.8.0.time_mixer.mix_factor": "blocks.57.mix_factor",
1007
+ "model.diffusion_model.input_blocks.8.0.time_stack.emb_layers.1.bias": "blocks.56.time_emb_proj.bias",
1008
+ "model.diffusion_model.input_blocks.8.0.time_stack.emb_layers.1.weight": "blocks.56.time_emb_proj.weight",
1009
+ "model.diffusion_model.input_blocks.8.0.time_stack.in_layers.0.bias": "blocks.56.norm1.bias",
1010
+ "model.diffusion_model.input_blocks.8.0.time_stack.in_layers.0.weight": "blocks.56.norm1.weight",
1011
+ "model.diffusion_model.input_blocks.8.0.time_stack.in_layers.2.bias": "blocks.56.conv1.bias",
1012
+ "model.diffusion_model.input_blocks.8.0.time_stack.in_layers.2.weight": "blocks.56.conv1.weight",
1013
+ "model.diffusion_model.input_blocks.8.0.time_stack.out_layers.0.bias": "blocks.56.norm2.bias",
1014
+ "model.diffusion_model.input_blocks.8.0.time_stack.out_layers.0.weight": "blocks.56.norm2.weight",
1015
+ "model.diffusion_model.input_blocks.8.0.time_stack.out_layers.3.bias": "blocks.56.conv2.bias",
1016
+ "model.diffusion_model.input_blocks.8.0.time_stack.out_layers.3.weight": "blocks.56.conv2.weight",
1017
+ "model.diffusion_model.input_blocks.8.1.norm.bias": "blocks.59.norm.bias",
1018
+ "model.diffusion_model.input_blocks.8.1.norm.weight": "blocks.59.norm.weight",
1019
+ "model.diffusion_model.input_blocks.8.1.proj_in.bias": "blocks.59.proj_in.bias",
1020
+ "model.diffusion_model.input_blocks.8.1.proj_in.weight": "blocks.59.proj_in.weight",
1021
+ "model.diffusion_model.input_blocks.8.1.proj_out.bias": "blocks.62.proj.bias",
1022
+ "model.diffusion_model.input_blocks.8.1.proj_out.weight": "blocks.62.proj.weight",
1023
+ "model.diffusion_model.input_blocks.8.1.time_mixer.mix_factor": "blocks.62.mix_factor",
1024
+ "model.diffusion_model.input_blocks.8.1.time_pos_embed.0.bias": "blocks.61.positional_embedding_proj.0.bias",
1025
+ "model.diffusion_model.input_blocks.8.1.time_pos_embed.0.weight": "blocks.61.positional_embedding_proj.0.weight",
1026
+ "model.diffusion_model.input_blocks.8.1.time_pos_embed.2.bias": "blocks.61.positional_embedding_proj.2.bias",
1027
+ "model.diffusion_model.input_blocks.8.1.time_pos_embed.2.weight": "blocks.61.positional_embedding_proj.2.weight",
1028
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.attn1.to_k.weight": "blocks.61.attn1.to_k.weight",
1029
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.attn1.to_out.0.bias": "blocks.61.attn1.to_out.bias",
1030
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.attn1.to_out.0.weight": "blocks.61.attn1.to_out.weight",
1031
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.attn1.to_q.weight": "blocks.61.attn1.to_q.weight",
1032
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.attn1.to_v.weight": "blocks.61.attn1.to_v.weight",
1033
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.attn2.to_k.weight": "blocks.61.attn2.to_k.weight",
1034
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.attn2.to_out.0.bias": "blocks.61.attn2.to_out.bias",
1035
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.attn2.to_out.0.weight": "blocks.61.attn2.to_out.weight",
1036
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.attn2.to_q.weight": "blocks.61.attn2.to_q.weight",
1037
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.attn2.to_v.weight": "blocks.61.attn2.to_v.weight",
1038
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.ff.net.0.proj.bias": "blocks.61.act_fn_out.proj.bias",
1039
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.ff.net.0.proj.weight": "blocks.61.act_fn_out.proj.weight",
1040
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.ff.net.2.bias": "blocks.61.ff_out.bias",
1041
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.ff.net.2.weight": "blocks.61.ff_out.weight",
1042
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.61.act_fn_in.proj.bias",
1043
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.61.act_fn_in.proj.weight",
1044
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.ff_in.net.2.bias": "blocks.61.ff_in.bias",
1045
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.ff_in.net.2.weight": "blocks.61.ff_in.weight",
1046
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.norm1.bias": "blocks.61.norm1.bias",
1047
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.norm1.weight": "blocks.61.norm1.weight",
1048
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.norm2.bias": "blocks.61.norm2.bias",
1049
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.norm2.weight": "blocks.61.norm2.weight",
1050
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.norm3.bias": "blocks.61.norm_out.bias",
1051
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.norm3.weight": "blocks.61.norm_out.weight",
1052
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.norm_in.bias": "blocks.61.norm_in.bias",
1053
+ "model.diffusion_model.input_blocks.8.1.time_stack.0.norm_in.weight": "blocks.61.norm_in.weight",
1054
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": "blocks.59.transformer_blocks.0.attn1.to_k.weight",
1055
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.59.transformer_blocks.0.attn1.to_out.bias",
1056
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.59.transformer_blocks.0.attn1.to_out.weight",
1057
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": "blocks.59.transformer_blocks.0.attn1.to_q.weight",
1058
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": "blocks.59.transformer_blocks.0.attn1.to_v.weight",
1059
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": "blocks.59.transformer_blocks.0.attn2.to_k.weight",
1060
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.59.transformer_blocks.0.attn2.to_out.bias",
1061
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.59.transformer_blocks.0.attn2.to_out.weight",
1062
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": "blocks.59.transformer_blocks.0.attn2.to_q.weight",
1063
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": "blocks.59.transformer_blocks.0.attn2.to_v.weight",
1064
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.59.transformer_blocks.0.act_fn.proj.bias",
1065
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.59.transformer_blocks.0.act_fn.proj.weight",
1066
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.bias": "blocks.59.transformer_blocks.0.ff.bias",
1067
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.ff.net.2.weight": "blocks.59.transformer_blocks.0.ff.weight",
1068
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.bias": "blocks.59.transformer_blocks.0.norm1.bias",
1069
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm1.weight": "blocks.59.transformer_blocks.0.norm1.weight",
1070
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.bias": "blocks.59.transformer_blocks.0.norm2.bias",
1071
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm2.weight": "blocks.59.transformer_blocks.0.norm2.weight",
1072
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.bias": "blocks.59.transformer_blocks.0.norm3.bias",
1073
+ "model.diffusion_model.input_blocks.8.1.transformer_blocks.0.norm3.weight": "blocks.59.transformer_blocks.0.norm3.weight",
1074
+ "model.diffusion_model.input_blocks.9.0.op.bias": "blocks.64.conv.bias",
1075
+ "model.diffusion_model.input_blocks.9.0.op.weight": "blocks.64.conv.weight",
1076
+ "model.diffusion_model.label_emb.0.0.bias": "add_time_embedding.0.bias",
1077
+ "model.diffusion_model.label_emb.0.0.weight": "add_time_embedding.0.weight",
1078
+ "model.diffusion_model.label_emb.0.2.bias": "add_time_embedding.2.bias",
1079
+ "model.diffusion_model.label_emb.0.2.weight": "add_time_embedding.2.weight",
1080
+ "model.diffusion_model.middle_block.0.emb_layers.1.bias": "blocks.76.time_emb_proj.bias",
1081
+ "model.diffusion_model.middle_block.0.emb_layers.1.weight": "blocks.76.time_emb_proj.weight",
1082
+ "model.diffusion_model.middle_block.0.in_layers.0.bias": "blocks.76.norm1.bias",
1083
+ "model.diffusion_model.middle_block.0.in_layers.0.weight": "blocks.76.norm1.weight",
1084
+ "model.diffusion_model.middle_block.0.in_layers.2.bias": "blocks.76.conv1.bias",
1085
+ "model.diffusion_model.middle_block.0.in_layers.2.weight": "blocks.76.conv1.weight",
1086
+ "model.diffusion_model.middle_block.0.out_layers.0.bias": "blocks.76.norm2.bias",
1087
+ "model.diffusion_model.middle_block.0.out_layers.0.weight": "blocks.76.norm2.weight",
1088
+ "model.diffusion_model.middle_block.0.out_layers.3.bias": "blocks.76.conv2.bias",
1089
+ "model.diffusion_model.middle_block.0.out_layers.3.weight": "blocks.76.conv2.weight",
1090
+ "model.diffusion_model.middle_block.0.time_mixer.mix_factor": "blocks.79.mix_factor",
1091
+ "model.diffusion_model.middle_block.0.time_stack.emb_layers.1.bias": "blocks.78.time_emb_proj.bias",
1092
+ "model.diffusion_model.middle_block.0.time_stack.emb_layers.1.weight": "blocks.78.time_emb_proj.weight",
1093
+ "model.diffusion_model.middle_block.0.time_stack.in_layers.0.bias": "blocks.78.norm1.bias",
1094
+ "model.diffusion_model.middle_block.0.time_stack.in_layers.0.weight": "blocks.78.norm1.weight",
1095
+ "model.diffusion_model.middle_block.0.time_stack.in_layers.2.bias": "blocks.78.conv1.bias",
1096
+ "model.diffusion_model.middle_block.0.time_stack.in_layers.2.weight": "blocks.78.conv1.weight",
1097
+ "model.diffusion_model.middle_block.0.time_stack.out_layers.0.bias": "blocks.78.norm2.bias",
1098
+ "model.diffusion_model.middle_block.0.time_stack.out_layers.0.weight": "blocks.78.norm2.weight",
1099
+ "model.diffusion_model.middle_block.0.time_stack.out_layers.3.bias": "blocks.78.conv2.bias",
1100
+ "model.diffusion_model.middle_block.0.time_stack.out_layers.3.weight": "blocks.78.conv2.weight",
1101
+ "model.diffusion_model.middle_block.1.norm.bias": "blocks.81.norm.bias",
1102
+ "model.diffusion_model.middle_block.1.norm.weight": "blocks.81.norm.weight",
1103
+ "model.diffusion_model.middle_block.1.proj_in.bias": "blocks.81.proj_in.bias",
1104
+ "model.diffusion_model.middle_block.1.proj_in.weight": "blocks.81.proj_in.weight",
1105
+ "model.diffusion_model.middle_block.1.proj_out.bias": "blocks.84.proj.bias",
1106
+ "model.diffusion_model.middle_block.1.proj_out.weight": "blocks.84.proj.weight",
1107
+ "model.diffusion_model.middle_block.1.time_mixer.mix_factor": "blocks.84.mix_factor",
1108
+ "model.diffusion_model.middle_block.1.time_pos_embed.0.bias": "blocks.83.positional_embedding_proj.0.bias",
1109
+ "model.diffusion_model.middle_block.1.time_pos_embed.0.weight": "blocks.83.positional_embedding_proj.0.weight",
1110
+ "model.diffusion_model.middle_block.1.time_pos_embed.2.bias": "blocks.83.positional_embedding_proj.2.bias",
1111
+ "model.diffusion_model.middle_block.1.time_pos_embed.2.weight": "blocks.83.positional_embedding_proj.2.weight",
1112
+ "model.diffusion_model.middle_block.1.time_stack.0.attn1.to_k.weight": "blocks.83.attn1.to_k.weight",
1113
+ "model.diffusion_model.middle_block.1.time_stack.0.attn1.to_out.0.bias": "blocks.83.attn1.to_out.bias",
1114
+ "model.diffusion_model.middle_block.1.time_stack.0.attn1.to_out.0.weight": "blocks.83.attn1.to_out.weight",
1115
+ "model.diffusion_model.middle_block.1.time_stack.0.attn1.to_q.weight": "blocks.83.attn1.to_q.weight",
1116
+ "model.diffusion_model.middle_block.1.time_stack.0.attn1.to_v.weight": "blocks.83.attn1.to_v.weight",
1117
+ "model.diffusion_model.middle_block.1.time_stack.0.attn2.to_k.weight": "blocks.83.attn2.to_k.weight",
1118
+ "model.diffusion_model.middle_block.1.time_stack.0.attn2.to_out.0.bias": "blocks.83.attn2.to_out.bias",
1119
+ "model.diffusion_model.middle_block.1.time_stack.0.attn2.to_out.0.weight": "blocks.83.attn2.to_out.weight",
1120
+ "model.diffusion_model.middle_block.1.time_stack.0.attn2.to_q.weight": "blocks.83.attn2.to_q.weight",
1121
+ "model.diffusion_model.middle_block.1.time_stack.0.attn2.to_v.weight": "blocks.83.attn2.to_v.weight",
1122
+ "model.diffusion_model.middle_block.1.time_stack.0.ff.net.0.proj.bias": "blocks.83.act_fn_out.proj.bias",
1123
+ "model.diffusion_model.middle_block.1.time_stack.0.ff.net.0.proj.weight": "blocks.83.act_fn_out.proj.weight",
1124
+ "model.diffusion_model.middle_block.1.time_stack.0.ff.net.2.bias": "blocks.83.ff_out.bias",
1125
+ "model.diffusion_model.middle_block.1.time_stack.0.ff.net.2.weight": "blocks.83.ff_out.weight",
1126
+ "model.diffusion_model.middle_block.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.83.act_fn_in.proj.bias",
1127
+ "model.diffusion_model.middle_block.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.83.act_fn_in.proj.weight",
1128
+ "model.diffusion_model.middle_block.1.time_stack.0.ff_in.net.2.bias": "blocks.83.ff_in.bias",
1129
+ "model.diffusion_model.middle_block.1.time_stack.0.ff_in.net.2.weight": "blocks.83.ff_in.weight",
1130
+ "model.diffusion_model.middle_block.1.time_stack.0.norm1.bias": "blocks.83.norm1.bias",
1131
+ "model.diffusion_model.middle_block.1.time_stack.0.norm1.weight": "blocks.83.norm1.weight",
1132
+ "model.diffusion_model.middle_block.1.time_stack.0.norm2.bias": "blocks.83.norm2.bias",
1133
+ "model.diffusion_model.middle_block.1.time_stack.0.norm2.weight": "blocks.83.norm2.weight",
1134
+ "model.diffusion_model.middle_block.1.time_stack.0.norm3.bias": "blocks.83.norm_out.bias",
1135
+ "model.diffusion_model.middle_block.1.time_stack.0.norm3.weight": "blocks.83.norm_out.weight",
1136
+ "model.diffusion_model.middle_block.1.time_stack.0.norm_in.bias": "blocks.83.norm_in.bias",
1137
+ "model.diffusion_model.middle_block.1.time_stack.0.norm_in.weight": "blocks.83.norm_in.weight",
1138
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_k.weight": "blocks.81.transformer_blocks.0.attn1.to_k.weight",
1139
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.81.transformer_blocks.0.attn1.to_out.bias",
1140
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.81.transformer_blocks.0.attn1.to_out.weight",
1141
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_q.weight": "blocks.81.transformer_blocks.0.attn1.to_q.weight",
1142
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.attn1.to_v.weight": "blocks.81.transformer_blocks.0.attn1.to_v.weight",
1143
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_k.weight": "blocks.81.transformer_blocks.0.attn2.to_k.weight",
1144
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.81.transformer_blocks.0.attn2.to_out.bias",
1145
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.81.transformer_blocks.0.attn2.to_out.weight",
1146
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_q.weight": "blocks.81.transformer_blocks.0.attn2.to_q.weight",
1147
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.attn2.to_v.weight": "blocks.81.transformer_blocks.0.attn2.to_v.weight",
1148
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.81.transformer_blocks.0.act_fn.proj.bias",
1149
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.81.transformer_blocks.0.act_fn.proj.weight",
1150
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.bias": "blocks.81.transformer_blocks.0.ff.bias",
1151
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.ff.net.2.weight": "blocks.81.transformer_blocks.0.ff.weight",
1152
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.bias": "blocks.81.transformer_blocks.0.norm1.bias",
1153
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.norm1.weight": "blocks.81.transformer_blocks.0.norm1.weight",
1154
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.bias": "blocks.81.transformer_blocks.0.norm2.bias",
1155
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.norm2.weight": "blocks.81.transformer_blocks.0.norm2.weight",
1156
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.bias": "blocks.81.transformer_blocks.0.norm3.bias",
1157
+ "model.diffusion_model.middle_block.1.transformer_blocks.0.norm3.weight": "blocks.81.transformer_blocks.0.norm3.weight",
1158
+ "model.diffusion_model.middle_block.2.emb_layers.1.bias": "blocks.85.time_emb_proj.bias",
1159
+ "model.diffusion_model.middle_block.2.emb_layers.1.weight": "blocks.85.time_emb_proj.weight",
1160
+ "model.diffusion_model.middle_block.2.in_layers.0.bias": "blocks.85.norm1.bias",
1161
+ "model.diffusion_model.middle_block.2.in_layers.0.weight": "blocks.85.norm1.weight",
1162
+ "model.diffusion_model.middle_block.2.in_layers.2.bias": "blocks.85.conv1.bias",
1163
+ "model.diffusion_model.middle_block.2.in_layers.2.weight": "blocks.85.conv1.weight",
1164
+ "model.diffusion_model.middle_block.2.out_layers.0.bias": "blocks.85.norm2.bias",
1165
+ "model.diffusion_model.middle_block.2.out_layers.0.weight": "blocks.85.norm2.weight",
1166
+ "model.diffusion_model.middle_block.2.out_layers.3.bias": "blocks.85.conv2.bias",
1167
+ "model.diffusion_model.middle_block.2.out_layers.3.weight": "blocks.85.conv2.weight",
1168
+ "model.diffusion_model.middle_block.2.time_mixer.mix_factor": "blocks.88.mix_factor",
1169
+ "model.diffusion_model.middle_block.2.time_stack.emb_layers.1.bias": "blocks.87.time_emb_proj.bias",
1170
+ "model.diffusion_model.middle_block.2.time_stack.emb_layers.1.weight": "blocks.87.time_emb_proj.weight",
1171
+ "model.diffusion_model.middle_block.2.time_stack.in_layers.0.bias": "blocks.87.norm1.bias",
1172
+ "model.diffusion_model.middle_block.2.time_stack.in_layers.0.weight": "blocks.87.norm1.weight",
1173
+ "model.diffusion_model.middle_block.2.time_stack.in_layers.2.bias": "blocks.87.conv1.bias",
1174
+ "model.diffusion_model.middle_block.2.time_stack.in_layers.2.weight": "blocks.87.conv1.weight",
1175
+ "model.diffusion_model.middle_block.2.time_stack.out_layers.0.bias": "blocks.87.norm2.bias",
1176
+ "model.diffusion_model.middle_block.2.time_stack.out_layers.0.weight": "blocks.87.norm2.weight",
1177
+ "model.diffusion_model.middle_block.2.time_stack.out_layers.3.bias": "blocks.87.conv2.bias",
1178
+ "model.diffusion_model.middle_block.2.time_stack.out_layers.3.weight": "blocks.87.conv2.weight",
1179
+ "model.diffusion_model.out.0.bias": "conv_norm_out.bias",
1180
+ "model.diffusion_model.out.0.weight": "conv_norm_out.weight",
1181
+ "model.diffusion_model.out.2.bias": "conv_out.bias",
1182
+ "model.diffusion_model.out.2.weight": "conv_out.weight",
1183
+ "model.diffusion_model.output_blocks.0.0.emb_layers.1.bias": "blocks.90.time_emb_proj.bias",
1184
+ "model.diffusion_model.output_blocks.0.0.emb_layers.1.weight": "blocks.90.time_emb_proj.weight",
1185
+ "model.diffusion_model.output_blocks.0.0.in_layers.0.bias": "blocks.90.norm1.bias",
1186
+ "model.diffusion_model.output_blocks.0.0.in_layers.0.weight": "blocks.90.norm1.weight",
1187
+ "model.diffusion_model.output_blocks.0.0.in_layers.2.bias": "blocks.90.conv1.bias",
1188
+ "model.diffusion_model.output_blocks.0.0.in_layers.2.weight": "blocks.90.conv1.weight",
1189
+ "model.diffusion_model.output_blocks.0.0.out_layers.0.bias": "blocks.90.norm2.bias",
1190
+ "model.diffusion_model.output_blocks.0.0.out_layers.0.weight": "blocks.90.norm2.weight",
1191
+ "model.diffusion_model.output_blocks.0.0.out_layers.3.bias": "blocks.90.conv2.bias",
1192
+ "model.diffusion_model.output_blocks.0.0.out_layers.3.weight": "blocks.90.conv2.weight",
1193
+ "model.diffusion_model.output_blocks.0.0.skip_connection.bias": "blocks.90.conv_shortcut.bias",
1194
+ "model.diffusion_model.output_blocks.0.0.skip_connection.weight": "blocks.90.conv_shortcut.weight",
1195
+ "model.diffusion_model.output_blocks.0.0.time_mixer.mix_factor": "blocks.93.mix_factor",
1196
+ "model.diffusion_model.output_blocks.0.0.time_stack.emb_layers.1.bias": "blocks.92.time_emb_proj.bias",
1197
+ "model.diffusion_model.output_blocks.0.0.time_stack.emb_layers.1.weight": "blocks.92.time_emb_proj.weight",
1198
+ "model.diffusion_model.output_blocks.0.0.time_stack.in_layers.0.bias": "blocks.92.norm1.bias",
1199
+ "model.diffusion_model.output_blocks.0.0.time_stack.in_layers.0.weight": "blocks.92.norm1.weight",
1200
+ "model.diffusion_model.output_blocks.0.0.time_stack.in_layers.2.bias": "blocks.92.conv1.bias",
1201
+ "model.diffusion_model.output_blocks.0.0.time_stack.in_layers.2.weight": "blocks.92.conv1.weight",
1202
+ "model.diffusion_model.output_blocks.0.0.time_stack.out_layers.0.bias": "blocks.92.norm2.bias",
1203
+ "model.diffusion_model.output_blocks.0.0.time_stack.out_layers.0.weight": "blocks.92.norm2.weight",
1204
+ "model.diffusion_model.output_blocks.0.0.time_stack.out_layers.3.bias": "blocks.92.conv2.bias",
1205
+ "model.diffusion_model.output_blocks.0.0.time_stack.out_layers.3.weight": "blocks.92.conv2.weight",
1206
+ "model.diffusion_model.output_blocks.1.0.emb_layers.1.bias": "blocks.95.time_emb_proj.bias",
1207
+ "model.diffusion_model.output_blocks.1.0.emb_layers.1.weight": "blocks.95.time_emb_proj.weight",
1208
+ "model.diffusion_model.output_blocks.1.0.in_layers.0.bias": "blocks.95.norm1.bias",
1209
+ "model.diffusion_model.output_blocks.1.0.in_layers.0.weight": "blocks.95.norm1.weight",
1210
+ "model.diffusion_model.output_blocks.1.0.in_layers.2.bias": "blocks.95.conv1.bias",
1211
+ "model.diffusion_model.output_blocks.1.0.in_layers.2.weight": "blocks.95.conv1.weight",
1212
+ "model.diffusion_model.output_blocks.1.0.out_layers.0.bias": "blocks.95.norm2.bias",
1213
+ "model.diffusion_model.output_blocks.1.0.out_layers.0.weight": "blocks.95.norm2.weight",
1214
+ "model.diffusion_model.output_blocks.1.0.out_layers.3.bias": "blocks.95.conv2.bias",
1215
+ "model.diffusion_model.output_blocks.1.0.out_layers.3.weight": "blocks.95.conv2.weight",
1216
+ "model.diffusion_model.output_blocks.1.0.skip_connection.bias": "blocks.95.conv_shortcut.bias",
1217
+ "model.diffusion_model.output_blocks.1.0.skip_connection.weight": "blocks.95.conv_shortcut.weight",
1218
+ "model.diffusion_model.output_blocks.1.0.time_mixer.mix_factor": "blocks.98.mix_factor",
1219
+ "model.diffusion_model.output_blocks.1.0.time_stack.emb_layers.1.bias": "blocks.97.time_emb_proj.bias",
1220
+ "model.diffusion_model.output_blocks.1.0.time_stack.emb_layers.1.weight": "blocks.97.time_emb_proj.weight",
1221
+ "model.diffusion_model.output_blocks.1.0.time_stack.in_layers.0.bias": "blocks.97.norm1.bias",
1222
+ "model.diffusion_model.output_blocks.1.0.time_stack.in_layers.0.weight": "blocks.97.norm1.weight",
1223
+ "model.diffusion_model.output_blocks.1.0.time_stack.in_layers.2.bias": "blocks.97.conv1.bias",
1224
+ "model.diffusion_model.output_blocks.1.0.time_stack.in_layers.2.weight": "blocks.97.conv1.weight",
1225
+ "model.diffusion_model.output_blocks.1.0.time_stack.out_layers.0.bias": "blocks.97.norm2.bias",
1226
+ "model.diffusion_model.output_blocks.1.0.time_stack.out_layers.0.weight": "blocks.97.norm2.weight",
1227
+ "model.diffusion_model.output_blocks.1.0.time_stack.out_layers.3.bias": "blocks.97.conv2.bias",
1228
+ "model.diffusion_model.output_blocks.1.0.time_stack.out_layers.3.weight": "blocks.97.conv2.weight",
1229
+ "model.diffusion_model.output_blocks.10.0.emb_layers.1.bias": "blocks.178.time_emb_proj.bias",
1230
+ "model.diffusion_model.output_blocks.10.0.emb_layers.1.weight": "blocks.178.time_emb_proj.weight",
1231
+ "model.diffusion_model.output_blocks.10.0.in_layers.0.bias": "blocks.178.norm1.bias",
1232
+ "model.diffusion_model.output_blocks.10.0.in_layers.0.weight": "blocks.178.norm1.weight",
1233
+ "model.diffusion_model.output_blocks.10.0.in_layers.2.bias": "blocks.178.conv1.bias",
1234
+ "model.diffusion_model.output_blocks.10.0.in_layers.2.weight": "blocks.178.conv1.weight",
1235
+ "model.diffusion_model.output_blocks.10.0.out_layers.0.bias": "blocks.178.norm2.bias",
1236
+ "model.diffusion_model.output_blocks.10.0.out_layers.0.weight": "blocks.178.norm2.weight",
1237
+ "model.diffusion_model.output_blocks.10.0.out_layers.3.bias": "blocks.178.conv2.bias",
1238
+ "model.diffusion_model.output_blocks.10.0.out_layers.3.weight": "blocks.178.conv2.weight",
1239
+ "model.diffusion_model.output_blocks.10.0.skip_connection.bias": "blocks.178.conv_shortcut.bias",
1240
+ "model.diffusion_model.output_blocks.10.0.skip_connection.weight": "blocks.178.conv_shortcut.weight",
1241
+ "model.diffusion_model.output_blocks.10.0.time_mixer.mix_factor": "blocks.181.mix_factor",
1242
+ "model.diffusion_model.output_blocks.10.0.time_stack.emb_layers.1.bias": "blocks.180.time_emb_proj.bias",
1243
+ "model.diffusion_model.output_blocks.10.0.time_stack.emb_layers.1.weight": "blocks.180.time_emb_proj.weight",
1244
+ "model.diffusion_model.output_blocks.10.0.time_stack.in_layers.0.bias": "blocks.180.norm1.bias",
1245
+ "model.diffusion_model.output_blocks.10.0.time_stack.in_layers.0.weight": "blocks.180.norm1.weight",
1246
+ "model.diffusion_model.output_blocks.10.0.time_stack.in_layers.2.bias": "blocks.180.conv1.bias",
1247
+ "model.diffusion_model.output_blocks.10.0.time_stack.in_layers.2.weight": "blocks.180.conv1.weight",
1248
+ "model.diffusion_model.output_blocks.10.0.time_stack.out_layers.0.bias": "blocks.180.norm2.bias",
1249
+ "model.diffusion_model.output_blocks.10.0.time_stack.out_layers.0.weight": "blocks.180.norm2.weight",
1250
+ "model.diffusion_model.output_blocks.10.0.time_stack.out_layers.3.bias": "blocks.180.conv2.bias",
1251
+ "model.diffusion_model.output_blocks.10.0.time_stack.out_layers.3.weight": "blocks.180.conv2.weight",
1252
+ "model.diffusion_model.output_blocks.10.1.norm.bias": "blocks.183.norm.bias",
1253
+ "model.diffusion_model.output_blocks.10.1.norm.weight": "blocks.183.norm.weight",
1254
+ "model.diffusion_model.output_blocks.10.1.proj_in.bias": "blocks.183.proj_in.bias",
1255
+ "model.diffusion_model.output_blocks.10.1.proj_in.weight": "blocks.183.proj_in.weight",
1256
+ "model.diffusion_model.output_blocks.10.1.proj_out.bias": "blocks.186.proj.bias",
1257
+ "model.diffusion_model.output_blocks.10.1.proj_out.weight": "blocks.186.proj.weight",
1258
+ "model.diffusion_model.output_blocks.10.1.time_mixer.mix_factor": "blocks.186.mix_factor",
1259
+ "model.diffusion_model.output_blocks.10.1.time_pos_embed.0.bias": "blocks.185.positional_embedding_proj.0.bias",
1260
+ "model.diffusion_model.output_blocks.10.1.time_pos_embed.0.weight": "blocks.185.positional_embedding_proj.0.weight",
1261
+ "model.diffusion_model.output_blocks.10.1.time_pos_embed.2.bias": "blocks.185.positional_embedding_proj.2.bias",
1262
+ "model.diffusion_model.output_blocks.10.1.time_pos_embed.2.weight": "blocks.185.positional_embedding_proj.2.weight",
1263
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.attn1.to_k.weight": "blocks.185.attn1.to_k.weight",
1264
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.attn1.to_out.0.bias": "blocks.185.attn1.to_out.bias",
1265
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.attn1.to_out.0.weight": "blocks.185.attn1.to_out.weight",
1266
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.attn1.to_q.weight": "blocks.185.attn1.to_q.weight",
1267
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.attn1.to_v.weight": "blocks.185.attn1.to_v.weight",
1268
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.attn2.to_k.weight": "blocks.185.attn2.to_k.weight",
1269
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.attn2.to_out.0.bias": "blocks.185.attn2.to_out.bias",
1270
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.attn2.to_out.0.weight": "blocks.185.attn2.to_out.weight",
1271
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.attn2.to_q.weight": "blocks.185.attn2.to_q.weight",
1272
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.attn2.to_v.weight": "blocks.185.attn2.to_v.weight",
1273
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.ff.net.0.proj.bias": "blocks.185.act_fn_out.proj.bias",
1274
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.ff.net.0.proj.weight": "blocks.185.act_fn_out.proj.weight",
1275
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.ff.net.2.bias": "blocks.185.ff_out.bias",
1276
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.ff.net.2.weight": "blocks.185.ff_out.weight",
1277
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.185.act_fn_in.proj.bias",
1278
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.185.act_fn_in.proj.weight",
1279
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.ff_in.net.2.bias": "blocks.185.ff_in.bias",
1280
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.ff_in.net.2.weight": "blocks.185.ff_in.weight",
1281
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.norm1.bias": "blocks.185.norm1.bias",
1282
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.norm1.weight": "blocks.185.norm1.weight",
1283
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.norm2.bias": "blocks.185.norm2.bias",
1284
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.norm2.weight": "blocks.185.norm2.weight",
1285
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.norm3.bias": "blocks.185.norm_out.bias",
1286
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.norm3.weight": "blocks.185.norm_out.weight",
1287
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.norm_in.bias": "blocks.185.norm_in.bias",
1288
+ "model.diffusion_model.output_blocks.10.1.time_stack.0.norm_in.weight": "blocks.185.norm_in.weight",
1289
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_k.weight": "blocks.183.transformer_blocks.0.attn1.to_k.weight",
1290
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.183.transformer_blocks.0.attn1.to_out.bias",
1291
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.183.transformer_blocks.0.attn1.to_out.weight",
1292
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_q.weight": "blocks.183.transformer_blocks.0.attn1.to_q.weight",
1293
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn1.to_v.weight": "blocks.183.transformer_blocks.0.attn1.to_v.weight",
1294
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_k.weight": "blocks.183.transformer_blocks.0.attn2.to_k.weight",
1295
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.183.transformer_blocks.0.attn2.to_out.bias",
1296
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.183.transformer_blocks.0.attn2.to_out.weight",
1297
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_q.weight": "blocks.183.transformer_blocks.0.attn2.to_q.weight",
1298
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.attn2.to_v.weight": "blocks.183.transformer_blocks.0.attn2.to_v.weight",
1299
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.183.transformer_blocks.0.act_fn.proj.bias",
1300
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.183.transformer_blocks.0.act_fn.proj.weight",
1301
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.bias": "blocks.183.transformer_blocks.0.ff.bias",
1302
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.ff.net.2.weight": "blocks.183.transformer_blocks.0.ff.weight",
1303
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.bias": "blocks.183.transformer_blocks.0.norm1.bias",
1304
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm1.weight": "blocks.183.transformer_blocks.0.norm1.weight",
1305
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.bias": "blocks.183.transformer_blocks.0.norm2.bias",
1306
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm2.weight": "blocks.183.transformer_blocks.0.norm2.weight",
1307
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.bias": "blocks.183.transformer_blocks.0.norm3.bias",
1308
+ "model.diffusion_model.output_blocks.10.1.transformer_blocks.0.norm3.weight": "blocks.183.transformer_blocks.0.norm3.weight",
1309
+ "model.diffusion_model.output_blocks.11.0.emb_layers.1.bias": "blocks.188.time_emb_proj.bias",
1310
+ "model.diffusion_model.output_blocks.11.0.emb_layers.1.weight": "blocks.188.time_emb_proj.weight",
1311
+ "model.diffusion_model.output_blocks.11.0.in_layers.0.bias": "blocks.188.norm1.bias",
1312
+ "model.diffusion_model.output_blocks.11.0.in_layers.0.weight": "blocks.188.norm1.weight",
1313
+ "model.diffusion_model.output_blocks.11.0.in_layers.2.bias": "blocks.188.conv1.bias",
1314
+ "model.diffusion_model.output_blocks.11.0.in_layers.2.weight": "blocks.188.conv1.weight",
1315
+ "model.diffusion_model.output_blocks.11.0.out_layers.0.bias": "blocks.188.norm2.bias",
1316
+ "model.diffusion_model.output_blocks.11.0.out_layers.0.weight": "blocks.188.norm2.weight",
1317
+ "model.diffusion_model.output_blocks.11.0.out_layers.3.bias": "blocks.188.conv2.bias",
1318
+ "model.diffusion_model.output_blocks.11.0.out_layers.3.weight": "blocks.188.conv2.weight",
1319
+ "model.diffusion_model.output_blocks.11.0.skip_connection.bias": "blocks.188.conv_shortcut.bias",
1320
+ "model.diffusion_model.output_blocks.11.0.skip_connection.weight": "blocks.188.conv_shortcut.weight",
1321
+ "model.diffusion_model.output_blocks.11.0.time_mixer.mix_factor": "blocks.191.mix_factor",
1322
+ "model.diffusion_model.output_blocks.11.0.time_stack.emb_layers.1.bias": "blocks.190.time_emb_proj.bias",
1323
+ "model.diffusion_model.output_blocks.11.0.time_stack.emb_layers.1.weight": "blocks.190.time_emb_proj.weight",
1324
+ "model.diffusion_model.output_blocks.11.0.time_stack.in_layers.0.bias": "blocks.190.norm1.bias",
1325
+ "model.diffusion_model.output_blocks.11.0.time_stack.in_layers.0.weight": "blocks.190.norm1.weight",
1326
+ "model.diffusion_model.output_blocks.11.0.time_stack.in_layers.2.bias": "blocks.190.conv1.bias",
1327
+ "model.diffusion_model.output_blocks.11.0.time_stack.in_layers.2.weight": "blocks.190.conv1.weight",
1328
+ "model.diffusion_model.output_blocks.11.0.time_stack.out_layers.0.bias": "blocks.190.norm2.bias",
1329
+ "model.diffusion_model.output_blocks.11.0.time_stack.out_layers.0.weight": "blocks.190.norm2.weight",
1330
+ "model.diffusion_model.output_blocks.11.0.time_stack.out_layers.3.bias": "blocks.190.conv2.bias",
1331
+ "model.diffusion_model.output_blocks.11.0.time_stack.out_layers.3.weight": "blocks.190.conv2.weight",
1332
+ "model.diffusion_model.output_blocks.11.1.norm.bias": "blocks.193.norm.bias",
1333
+ "model.diffusion_model.output_blocks.11.1.norm.weight": "blocks.193.norm.weight",
1334
+ "model.diffusion_model.output_blocks.11.1.proj_in.bias": "blocks.193.proj_in.bias",
1335
+ "model.diffusion_model.output_blocks.11.1.proj_in.weight": "blocks.193.proj_in.weight",
1336
+ "model.diffusion_model.output_blocks.11.1.proj_out.bias": "blocks.196.proj.bias",
1337
+ "model.diffusion_model.output_blocks.11.1.proj_out.weight": "blocks.196.proj.weight",
1338
+ "model.diffusion_model.output_blocks.11.1.time_mixer.mix_factor": "blocks.196.mix_factor",
1339
+ "model.diffusion_model.output_blocks.11.1.time_pos_embed.0.bias": "blocks.195.positional_embedding_proj.0.bias",
1340
+ "model.diffusion_model.output_blocks.11.1.time_pos_embed.0.weight": "blocks.195.positional_embedding_proj.0.weight",
1341
+ "model.diffusion_model.output_blocks.11.1.time_pos_embed.2.bias": "blocks.195.positional_embedding_proj.2.bias",
1342
+ "model.diffusion_model.output_blocks.11.1.time_pos_embed.2.weight": "blocks.195.positional_embedding_proj.2.weight",
1343
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.attn1.to_k.weight": "blocks.195.attn1.to_k.weight",
1344
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.attn1.to_out.0.bias": "blocks.195.attn1.to_out.bias",
1345
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.attn1.to_out.0.weight": "blocks.195.attn1.to_out.weight",
1346
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.attn1.to_q.weight": "blocks.195.attn1.to_q.weight",
1347
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.attn1.to_v.weight": "blocks.195.attn1.to_v.weight",
1348
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.attn2.to_k.weight": "blocks.195.attn2.to_k.weight",
1349
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.attn2.to_out.0.bias": "blocks.195.attn2.to_out.bias",
1350
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.attn2.to_out.0.weight": "blocks.195.attn2.to_out.weight",
1351
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.attn2.to_q.weight": "blocks.195.attn2.to_q.weight",
1352
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.attn2.to_v.weight": "blocks.195.attn2.to_v.weight",
1353
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.ff.net.0.proj.bias": "blocks.195.act_fn_out.proj.bias",
1354
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.ff.net.0.proj.weight": "blocks.195.act_fn_out.proj.weight",
1355
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.ff.net.2.bias": "blocks.195.ff_out.bias",
1356
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.ff.net.2.weight": "blocks.195.ff_out.weight",
1357
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.195.act_fn_in.proj.bias",
1358
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.195.act_fn_in.proj.weight",
1359
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.ff_in.net.2.bias": "blocks.195.ff_in.bias",
1360
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.ff_in.net.2.weight": "blocks.195.ff_in.weight",
1361
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.norm1.bias": "blocks.195.norm1.bias",
1362
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.norm1.weight": "blocks.195.norm1.weight",
1363
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.norm2.bias": "blocks.195.norm2.bias",
1364
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.norm2.weight": "blocks.195.norm2.weight",
1365
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.norm3.bias": "blocks.195.norm_out.bias",
1366
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.norm3.weight": "blocks.195.norm_out.weight",
1367
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.norm_in.bias": "blocks.195.norm_in.bias",
1368
+ "model.diffusion_model.output_blocks.11.1.time_stack.0.norm_in.weight": "blocks.195.norm_in.weight",
1369
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_k.weight": "blocks.193.transformer_blocks.0.attn1.to_k.weight",
1370
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.193.transformer_blocks.0.attn1.to_out.bias",
1371
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.193.transformer_blocks.0.attn1.to_out.weight",
1372
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_q.weight": "blocks.193.transformer_blocks.0.attn1.to_q.weight",
1373
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn1.to_v.weight": "blocks.193.transformer_blocks.0.attn1.to_v.weight",
1374
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_k.weight": "blocks.193.transformer_blocks.0.attn2.to_k.weight",
1375
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.193.transformer_blocks.0.attn2.to_out.bias",
1376
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.193.transformer_blocks.0.attn2.to_out.weight",
1377
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_q.weight": "blocks.193.transformer_blocks.0.attn2.to_q.weight",
1378
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.attn2.to_v.weight": "blocks.193.transformer_blocks.0.attn2.to_v.weight",
1379
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.193.transformer_blocks.0.act_fn.proj.bias",
1380
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.193.transformer_blocks.0.act_fn.proj.weight",
1381
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.bias": "blocks.193.transformer_blocks.0.ff.bias",
1382
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.ff.net.2.weight": "blocks.193.transformer_blocks.0.ff.weight",
1383
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.bias": "blocks.193.transformer_blocks.0.norm1.bias",
1384
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm1.weight": "blocks.193.transformer_blocks.0.norm1.weight",
1385
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.bias": "blocks.193.transformer_blocks.0.norm2.bias",
1386
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm2.weight": "blocks.193.transformer_blocks.0.norm2.weight",
1387
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.bias": "blocks.193.transformer_blocks.0.norm3.bias",
1388
+ "model.diffusion_model.output_blocks.11.1.transformer_blocks.0.norm3.weight": "blocks.193.transformer_blocks.0.norm3.weight",
1389
+ "model.diffusion_model.output_blocks.2.0.emb_layers.1.bias": "blocks.100.time_emb_proj.bias",
1390
+ "model.diffusion_model.output_blocks.2.0.emb_layers.1.weight": "blocks.100.time_emb_proj.weight",
1391
+ "model.diffusion_model.output_blocks.2.0.in_layers.0.bias": "blocks.100.norm1.bias",
1392
+ "model.diffusion_model.output_blocks.2.0.in_layers.0.weight": "blocks.100.norm1.weight",
1393
+ "model.diffusion_model.output_blocks.2.0.in_layers.2.bias": "blocks.100.conv1.bias",
1394
+ "model.diffusion_model.output_blocks.2.0.in_layers.2.weight": "blocks.100.conv1.weight",
1395
+ "model.diffusion_model.output_blocks.2.0.out_layers.0.bias": "blocks.100.norm2.bias",
1396
+ "model.diffusion_model.output_blocks.2.0.out_layers.0.weight": "blocks.100.norm2.weight",
1397
+ "model.diffusion_model.output_blocks.2.0.out_layers.3.bias": "blocks.100.conv2.bias",
1398
+ "model.diffusion_model.output_blocks.2.0.out_layers.3.weight": "blocks.100.conv2.weight",
1399
+ "model.diffusion_model.output_blocks.2.0.skip_connection.bias": "blocks.100.conv_shortcut.bias",
1400
+ "model.diffusion_model.output_blocks.2.0.skip_connection.weight": "blocks.100.conv_shortcut.weight",
1401
+ "model.diffusion_model.output_blocks.2.0.time_mixer.mix_factor": "blocks.103.mix_factor",
1402
+ "model.diffusion_model.output_blocks.2.0.time_stack.emb_layers.1.bias": "blocks.102.time_emb_proj.bias",
1403
+ "model.diffusion_model.output_blocks.2.0.time_stack.emb_layers.1.weight": "blocks.102.time_emb_proj.weight",
1404
+ "model.diffusion_model.output_blocks.2.0.time_stack.in_layers.0.bias": "blocks.102.norm1.bias",
1405
+ "model.diffusion_model.output_blocks.2.0.time_stack.in_layers.0.weight": "blocks.102.norm1.weight",
1406
+ "model.diffusion_model.output_blocks.2.0.time_stack.in_layers.2.bias": "blocks.102.conv1.bias",
1407
+ "model.diffusion_model.output_blocks.2.0.time_stack.in_layers.2.weight": "blocks.102.conv1.weight",
1408
+ "model.diffusion_model.output_blocks.2.0.time_stack.out_layers.0.bias": "blocks.102.norm2.bias",
1409
+ "model.diffusion_model.output_blocks.2.0.time_stack.out_layers.0.weight": "blocks.102.norm2.weight",
1410
+ "model.diffusion_model.output_blocks.2.0.time_stack.out_layers.3.bias": "blocks.102.conv2.bias",
1411
+ "model.diffusion_model.output_blocks.2.0.time_stack.out_layers.3.weight": "blocks.102.conv2.weight",
1412
+ "model.diffusion_model.output_blocks.2.1.conv.bias": "blocks.104.conv.bias",
1413
+ "model.diffusion_model.output_blocks.2.1.conv.weight": "blocks.104.conv.weight",
1414
+ "model.diffusion_model.output_blocks.3.0.emb_layers.1.bias": "blocks.106.time_emb_proj.bias",
1415
+ "model.diffusion_model.output_blocks.3.0.emb_layers.1.weight": "blocks.106.time_emb_proj.weight",
1416
+ "model.diffusion_model.output_blocks.3.0.in_layers.0.bias": "blocks.106.norm1.bias",
1417
+ "model.diffusion_model.output_blocks.3.0.in_layers.0.weight": "blocks.106.norm1.weight",
1418
+ "model.diffusion_model.output_blocks.3.0.in_layers.2.bias": "blocks.106.conv1.bias",
1419
+ "model.diffusion_model.output_blocks.3.0.in_layers.2.weight": "blocks.106.conv1.weight",
1420
+ "model.diffusion_model.output_blocks.3.0.out_layers.0.bias": "blocks.106.norm2.bias",
1421
+ "model.diffusion_model.output_blocks.3.0.out_layers.0.weight": "blocks.106.norm2.weight",
1422
+ "model.diffusion_model.output_blocks.3.0.out_layers.3.bias": "blocks.106.conv2.bias",
1423
+ "model.diffusion_model.output_blocks.3.0.out_layers.3.weight": "blocks.106.conv2.weight",
1424
+ "model.diffusion_model.output_blocks.3.0.skip_connection.bias": "blocks.106.conv_shortcut.bias",
1425
+ "model.diffusion_model.output_blocks.3.0.skip_connection.weight": "blocks.106.conv_shortcut.weight",
1426
+ "model.diffusion_model.output_blocks.3.0.time_mixer.mix_factor": "blocks.109.mix_factor",
1427
+ "model.diffusion_model.output_blocks.3.0.time_stack.emb_layers.1.bias": "blocks.108.time_emb_proj.bias",
1428
+ "model.diffusion_model.output_blocks.3.0.time_stack.emb_layers.1.weight": "blocks.108.time_emb_proj.weight",
1429
+ "model.diffusion_model.output_blocks.3.0.time_stack.in_layers.0.bias": "blocks.108.norm1.bias",
1430
+ "model.diffusion_model.output_blocks.3.0.time_stack.in_layers.0.weight": "blocks.108.norm1.weight",
1431
+ "model.diffusion_model.output_blocks.3.0.time_stack.in_layers.2.bias": "blocks.108.conv1.bias",
1432
+ "model.diffusion_model.output_blocks.3.0.time_stack.in_layers.2.weight": "blocks.108.conv1.weight",
1433
+ "model.diffusion_model.output_blocks.3.0.time_stack.out_layers.0.bias": "blocks.108.norm2.bias",
1434
+ "model.diffusion_model.output_blocks.3.0.time_stack.out_layers.0.weight": "blocks.108.norm2.weight",
1435
+ "model.diffusion_model.output_blocks.3.0.time_stack.out_layers.3.bias": "blocks.108.conv2.bias",
1436
+ "model.diffusion_model.output_blocks.3.0.time_stack.out_layers.3.weight": "blocks.108.conv2.weight",
1437
+ "model.diffusion_model.output_blocks.3.1.norm.bias": "blocks.111.norm.bias",
1438
+ "model.diffusion_model.output_blocks.3.1.norm.weight": "blocks.111.norm.weight",
1439
+ "model.diffusion_model.output_blocks.3.1.proj_in.bias": "blocks.111.proj_in.bias",
1440
+ "model.diffusion_model.output_blocks.3.1.proj_in.weight": "blocks.111.proj_in.weight",
1441
+ "model.diffusion_model.output_blocks.3.1.proj_out.bias": "blocks.114.proj.bias",
1442
+ "model.diffusion_model.output_blocks.3.1.proj_out.weight": "blocks.114.proj.weight",
1443
+ "model.diffusion_model.output_blocks.3.1.time_mixer.mix_factor": "blocks.114.mix_factor",
1444
+ "model.diffusion_model.output_blocks.3.1.time_pos_embed.0.bias": "blocks.113.positional_embedding_proj.0.bias",
1445
+ "model.diffusion_model.output_blocks.3.1.time_pos_embed.0.weight": "blocks.113.positional_embedding_proj.0.weight",
1446
+ "model.diffusion_model.output_blocks.3.1.time_pos_embed.2.bias": "blocks.113.positional_embedding_proj.2.bias",
1447
+ "model.diffusion_model.output_blocks.3.1.time_pos_embed.2.weight": "blocks.113.positional_embedding_proj.2.weight",
1448
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.attn1.to_k.weight": "blocks.113.attn1.to_k.weight",
1449
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.attn1.to_out.0.bias": "blocks.113.attn1.to_out.bias",
1450
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.attn1.to_out.0.weight": "blocks.113.attn1.to_out.weight",
1451
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.attn1.to_q.weight": "blocks.113.attn1.to_q.weight",
1452
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.attn1.to_v.weight": "blocks.113.attn1.to_v.weight",
1453
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.attn2.to_k.weight": "blocks.113.attn2.to_k.weight",
1454
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.attn2.to_out.0.bias": "blocks.113.attn2.to_out.bias",
1455
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.attn2.to_out.0.weight": "blocks.113.attn2.to_out.weight",
1456
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.attn2.to_q.weight": "blocks.113.attn2.to_q.weight",
1457
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.attn2.to_v.weight": "blocks.113.attn2.to_v.weight",
1458
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.ff.net.0.proj.bias": "blocks.113.act_fn_out.proj.bias",
1459
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.ff.net.0.proj.weight": "blocks.113.act_fn_out.proj.weight",
1460
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.ff.net.2.bias": "blocks.113.ff_out.bias",
1461
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.ff.net.2.weight": "blocks.113.ff_out.weight",
1462
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.113.act_fn_in.proj.bias",
1463
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.113.act_fn_in.proj.weight",
1464
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.ff_in.net.2.bias": "blocks.113.ff_in.bias",
1465
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.ff_in.net.2.weight": "blocks.113.ff_in.weight",
1466
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.norm1.bias": "blocks.113.norm1.bias",
1467
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.norm1.weight": "blocks.113.norm1.weight",
1468
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.norm2.bias": "blocks.113.norm2.bias",
1469
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.norm2.weight": "blocks.113.norm2.weight",
1470
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.norm3.bias": "blocks.113.norm_out.bias",
1471
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.norm3.weight": "blocks.113.norm_out.weight",
1472
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.norm_in.bias": "blocks.113.norm_in.bias",
1473
+ "model.diffusion_model.output_blocks.3.1.time_stack.0.norm_in.weight": "blocks.113.norm_in.weight",
1474
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_k.weight": "blocks.111.transformer_blocks.0.attn1.to_k.weight",
1475
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.111.transformer_blocks.0.attn1.to_out.bias",
1476
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.111.transformer_blocks.0.attn1.to_out.weight",
1477
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_q.weight": "blocks.111.transformer_blocks.0.attn1.to_q.weight",
1478
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn1.to_v.weight": "blocks.111.transformer_blocks.0.attn1.to_v.weight",
1479
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_k.weight": "blocks.111.transformer_blocks.0.attn2.to_k.weight",
1480
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.111.transformer_blocks.0.attn2.to_out.bias",
1481
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.111.transformer_blocks.0.attn2.to_out.weight",
1482
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_q.weight": "blocks.111.transformer_blocks.0.attn2.to_q.weight",
1483
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.attn2.to_v.weight": "blocks.111.transformer_blocks.0.attn2.to_v.weight",
1484
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.111.transformer_blocks.0.act_fn.proj.bias",
1485
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.111.transformer_blocks.0.act_fn.proj.weight",
1486
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.bias": "blocks.111.transformer_blocks.0.ff.bias",
1487
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.ff.net.2.weight": "blocks.111.transformer_blocks.0.ff.weight",
1488
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.bias": "blocks.111.transformer_blocks.0.norm1.bias",
1489
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm1.weight": "blocks.111.transformer_blocks.0.norm1.weight",
1490
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.bias": "blocks.111.transformer_blocks.0.norm2.bias",
1491
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm2.weight": "blocks.111.transformer_blocks.0.norm2.weight",
1492
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.bias": "blocks.111.transformer_blocks.0.norm3.bias",
1493
+ "model.diffusion_model.output_blocks.3.1.transformer_blocks.0.norm3.weight": "blocks.111.transformer_blocks.0.norm3.weight",
1494
+ "model.diffusion_model.output_blocks.4.0.emb_layers.1.bias": "blocks.116.time_emb_proj.bias",
1495
+ "model.diffusion_model.output_blocks.4.0.emb_layers.1.weight": "blocks.116.time_emb_proj.weight",
1496
+ "model.diffusion_model.output_blocks.4.0.in_layers.0.bias": "blocks.116.norm1.bias",
1497
+ "model.diffusion_model.output_blocks.4.0.in_layers.0.weight": "blocks.116.norm1.weight",
1498
+ "model.diffusion_model.output_blocks.4.0.in_layers.2.bias": "blocks.116.conv1.bias",
1499
+ "model.diffusion_model.output_blocks.4.0.in_layers.2.weight": "blocks.116.conv1.weight",
1500
+ "model.diffusion_model.output_blocks.4.0.out_layers.0.bias": "blocks.116.norm2.bias",
1501
+ "model.diffusion_model.output_blocks.4.0.out_layers.0.weight": "blocks.116.norm2.weight",
1502
+ "model.diffusion_model.output_blocks.4.0.out_layers.3.bias": "blocks.116.conv2.bias",
1503
+ "model.diffusion_model.output_blocks.4.0.out_layers.3.weight": "blocks.116.conv2.weight",
1504
+ "model.diffusion_model.output_blocks.4.0.skip_connection.bias": "blocks.116.conv_shortcut.bias",
1505
+ "model.diffusion_model.output_blocks.4.0.skip_connection.weight": "blocks.116.conv_shortcut.weight",
1506
+ "model.diffusion_model.output_blocks.4.0.time_mixer.mix_factor": "blocks.119.mix_factor",
1507
+ "model.diffusion_model.output_blocks.4.0.time_stack.emb_layers.1.bias": "blocks.118.time_emb_proj.bias",
1508
+ "model.diffusion_model.output_blocks.4.0.time_stack.emb_layers.1.weight": "blocks.118.time_emb_proj.weight",
1509
+ "model.diffusion_model.output_blocks.4.0.time_stack.in_layers.0.bias": "blocks.118.norm1.bias",
1510
+ "model.diffusion_model.output_blocks.4.0.time_stack.in_layers.0.weight": "blocks.118.norm1.weight",
1511
+ "model.diffusion_model.output_blocks.4.0.time_stack.in_layers.2.bias": "blocks.118.conv1.bias",
1512
+ "model.diffusion_model.output_blocks.4.0.time_stack.in_layers.2.weight": "blocks.118.conv1.weight",
1513
+ "model.diffusion_model.output_blocks.4.0.time_stack.out_layers.0.bias": "blocks.118.norm2.bias",
1514
+ "model.diffusion_model.output_blocks.4.0.time_stack.out_layers.0.weight": "blocks.118.norm2.weight",
1515
+ "model.diffusion_model.output_blocks.4.0.time_stack.out_layers.3.bias": "blocks.118.conv2.bias",
1516
+ "model.diffusion_model.output_blocks.4.0.time_stack.out_layers.3.weight": "blocks.118.conv2.weight",
1517
+ "model.diffusion_model.output_blocks.4.1.norm.bias": "blocks.121.norm.bias",
1518
+ "model.diffusion_model.output_blocks.4.1.norm.weight": "blocks.121.norm.weight",
1519
+ "model.diffusion_model.output_blocks.4.1.proj_in.bias": "blocks.121.proj_in.bias",
1520
+ "model.diffusion_model.output_blocks.4.1.proj_in.weight": "blocks.121.proj_in.weight",
1521
+ "model.diffusion_model.output_blocks.4.1.proj_out.bias": "blocks.124.proj.bias",
1522
+ "model.diffusion_model.output_blocks.4.1.proj_out.weight": "blocks.124.proj.weight",
1523
+ "model.diffusion_model.output_blocks.4.1.time_mixer.mix_factor": "blocks.124.mix_factor",
1524
+ "model.diffusion_model.output_blocks.4.1.time_pos_embed.0.bias": "blocks.123.positional_embedding_proj.0.bias",
1525
+ "model.diffusion_model.output_blocks.4.1.time_pos_embed.0.weight": "blocks.123.positional_embedding_proj.0.weight",
1526
+ "model.diffusion_model.output_blocks.4.1.time_pos_embed.2.bias": "blocks.123.positional_embedding_proj.2.bias",
1527
+ "model.diffusion_model.output_blocks.4.1.time_pos_embed.2.weight": "blocks.123.positional_embedding_proj.2.weight",
1528
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.attn1.to_k.weight": "blocks.123.attn1.to_k.weight",
1529
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.attn1.to_out.0.bias": "blocks.123.attn1.to_out.bias",
1530
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.attn1.to_out.0.weight": "blocks.123.attn1.to_out.weight",
1531
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.attn1.to_q.weight": "blocks.123.attn1.to_q.weight",
1532
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.attn1.to_v.weight": "blocks.123.attn1.to_v.weight",
1533
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.attn2.to_k.weight": "blocks.123.attn2.to_k.weight",
1534
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.attn2.to_out.0.bias": "blocks.123.attn2.to_out.bias",
1535
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.attn2.to_out.0.weight": "blocks.123.attn2.to_out.weight",
1536
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.attn2.to_q.weight": "blocks.123.attn2.to_q.weight",
1537
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.attn2.to_v.weight": "blocks.123.attn2.to_v.weight",
1538
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.ff.net.0.proj.bias": "blocks.123.act_fn_out.proj.bias",
1539
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.ff.net.0.proj.weight": "blocks.123.act_fn_out.proj.weight",
1540
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.ff.net.2.bias": "blocks.123.ff_out.bias",
1541
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.ff.net.2.weight": "blocks.123.ff_out.weight",
1542
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.123.act_fn_in.proj.bias",
1543
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.123.act_fn_in.proj.weight",
1544
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.ff_in.net.2.bias": "blocks.123.ff_in.bias",
1545
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.ff_in.net.2.weight": "blocks.123.ff_in.weight",
1546
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.norm1.bias": "blocks.123.norm1.bias",
1547
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.norm1.weight": "blocks.123.norm1.weight",
1548
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.norm2.bias": "blocks.123.norm2.bias",
1549
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.norm2.weight": "blocks.123.norm2.weight",
1550
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.norm3.bias": "blocks.123.norm_out.bias",
1551
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.norm3.weight": "blocks.123.norm_out.weight",
1552
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.norm_in.bias": "blocks.123.norm_in.bias",
1553
+ "model.diffusion_model.output_blocks.4.1.time_stack.0.norm_in.weight": "blocks.123.norm_in.weight",
1554
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_k.weight": "blocks.121.transformer_blocks.0.attn1.to_k.weight",
1555
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.121.transformer_blocks.0.attn1.to_out.bias",
1556
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.121.transformer_blocks.0.attn1.to_out.weight",
1557
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_q.weight": "blocks.121.transformer_blocks.0.attn1.to_q.weight",
1558
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn1.to_v.weight": "blocks.121.transformer_blocks.0.attn1.to_v.weight",
1559
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_k.weight": "blocks.121.transformer_blocks.0.attn2.to_k.weight",
1560
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.121.transformer_blocks.0.attn2.to_out.bias",
1561
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.121.transformer_blocks.0.attn2.to_out.weight",
1562
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_q.weight": "blocks.121.transformer_blocks.0.attn2.to_q.weight",
1563
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.attn2.to_v.weight": "blocks.121.transformer_blocks.0.attn2.to_v.weight",
1564
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.121.transformer_blocks.0.act_fn.proj.bias",
1565
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.121.transformer_blocks.0.act_fn.proj.weight",
1566
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.bias": "blocks.121.transformer_blocks.0.ff.bias",
1567
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.ff.net.2.weight": "blocks.121.transformer_blocks.0.ff.weight",
1568
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.bias": "blocks.121.transformer_blocks.0.norm1.bias",
1569
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm1.weight": "blocks.121.transformer_blocks.0.norm1.weight",
1570
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.bias": "blocks.121.transformer_blocks.0.norm2.bias",
1571
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm2.weight": "blocks.121.transformer_blocks.0.norm2.weight",
1572
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.bias": "blocks.121.transformer_blocks.0.norm3.bias",
1573
+ "model.diffusion_model.output_blocks.4.1.transformer_blocks.0.norm3.weight": "blocks.121.transformer_blocks.0.norm3.weight",
1574
+ "model.diffusion_model.output_blocks.5.0.emb_layers.1.bias": "blocks.126.time_emb_proj.bias",
1575
+ "model.diffusion_model.output_blocks.5.0.emb_layers.1.weight": "blocks.126.time_emb_proj.weight",
1576
+ "model.diffusion_model.output_blocks.5.0.in_layers.0.bias": "blocks.126.norm1.bias",
1577
+ "model.diffusion_model.output_blocks.5.0.in_layers.0.weight": "blocks.126.norm1.weight",
1578
+ "model.diffusion_model.output_blocks.5.0.in_layers.2.bias": "blocks.126.conv1.bias",
1579
+ "model.diffusion_model.output_blocks.5.0.in_layers.2.weight": "blocks.126.conv1.weight",
1580
+ "model.diffusion_model.output_blocks.5.0.out_layers.0.bias": "blocks.126.norm2.bias",
1581
+ "model.diffusion_model.output_blocks.5.0.out_layers.0.weight": "blocks.126.norm2.weight",
1582
+ "model.diffusion_model.output_blocks.5.0.out_layers.3.bias": "blocks.126.conv2.bias",
1583
+ "model.diffusion_model.output_blocks.5.0.out_layers.3.weight": "blocks.126.conv2.weight",
1584
+ "model.diffusion_model.output_blocks.5.0.skip_connection.bias": "blocks.126.conv_shortcut.bias",
1585
+ "model.diffusion_model.output_blocks.5.0.skip_connection.weight": "blocks.126.conv_shortcut.weight",
1586
+ "model.diffusion_model.output_blocks.5.0.time_mixer.mix_factor": "blocks.129.mix_factor",
1587
+ "model.diffusion_model.output_blocks.5.0.time_stack.emb_layers.1.bias": "blocks.128.time_emb_proj.bias",
1588
+ "model.diffusion_model.output_blocks.5.0.time_stack.emb_layers.1.weight": "blocks.128.time_emb_proj.weight",
1589
+ "model.diffusion_model.output_blocks.5.0.time_stack.in_layers.0.bias": "blocks.128.norm1.bias",
1590
+ "model.diffusion_model.output_blocks.5.0.time_stack.in_layers.0.weight": "blocks.128.norm1.weight",
1591
+ "model.diffusion_model.output_blocks.5.0.time_stack.in_layers.2.bias": "blocks.128.conv1.bias",
1592
+ "model.diffusion_model.output_blocks.5.0.time_stack.in_layers.2.weight": "blocks.128.conv1.weight",
1593
+ "model.diffusion_model.output_blocks.5.0.time_stack.out_layers.0.bias": "blocks.128.norm2.bias",
1594
+ "model.diffusion_model.output_blocks.5.0.time_stack.out_layers.0.weight": "blocks.128.norm2.weight",
1595
+ "model.diffusion_model.output_blocks.5.0.time_stack.out_layers.3.bias": "blocks.128.conv2.bias",
1596
+ "model.diffusion_model.output_blocks.5.0.time_stack.out_layers.3.weight": "blocks.128.conv2.weight",
1597
+ "model.diffusion_model.output_blocks.5.1.norm.bias": "blocks.131.norm.bias",
1598
+ "model.diffusion_model.output_blocks.5.1.norm.weight": "blocks.131.norm.weight",
1599
+ "model.diffusion_model.output_blocks.5.1.proj_in.bias": "blocks.131.proj_in.bias",
1600
+ "model.diffusion_model.output_blocks.5.1.proj_in.weight": "blocks.131.proj_in.weight",
1601
+ "model.diffusion_model.output_blocks.5.1.proj_out.bias": "blocks.134.proj.bias",
1602
+ "model.diffusion_model.output_blocks.5.1.proj_out.weight": "blocks.134.proj.weight",
1603
+ "model.diffusion_model.output_blocks.5.1.time_mixer.mix_factor": "blocks.134.mix_factor",
1604
+ "model.diffusion_model.output_blocks.5.1.time_pos_embed.0.bias": "blocks.133.positional_embedding_proj.0.bias",
1605
+ "model.diffusion_model.output_blocks.5.1.time_pos_embed.0.weight": "blocks.133.positional_embedding_proj.0.weight",
1606
+ "model.diffusion_model.output_blocks.5.1.time_pos_embed.2.bias": "blocks.133.positional_embedding_proj.2.bias",
1607
+ "model.diffusion_model.output_blocks.5.1.time_pos_embed.2.weight": "blocks.133.positional_embedding_proj.2.weight",
1608
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.attn1.to_k.weight": "blocks.133.attn1.to_k.weight",
1609
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.attn1.to_out.0.bias": "blocks.133.attn1.to_out.bias",
1610
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.attn1.to_out.0.weight": "blocks.133.attn1.to_out.weight",
1611
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.attn1.to_q.weight": "blocks.133.attn1.to_q.weight",
1612
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.attn1.to_v.weight": "blocks.133.attn1.to_v.weight",
1613
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.attn2.to_k.weight": "blocks.133.attn2.to_k.weight",
1614
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.attn2.to_out.0.bias": "blocks.133.attn2.to_out.bias",
1615
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.attn2.to_out.0.weight": "blocks.133.attn2.to_out.weight",
1616
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.attn2.to_q.weight": "blocks.133.attn2.to_q.weight",
1617
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.attn2.to_v.weight": "blocks.133.attn2.to_v.weight",
1618
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.ff.net.0.proj.bias": "blocks.133.act_fn_out.proj.bias",
1619
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.ff.net.0.proj.weight": "blocks.133.act_fn_out.proj.weight",
1620
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.ff.net.2.bias": "blocks.133.ff_out.bias",
1621
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.ff.net.2.weight": "blocks.133.ff_out.weight",
1622
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.133.act_fn_in.proj.bias",
1623
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.133.act_fn_in.proj.weight",
1624
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.ff_in.net.2.bias": "blocks.133.ff_in.bias",
1625
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.ff_in.net.2.weight": "blocks.133.ff_in.weight",
1626
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.norm1.bias": "blocks.133.norm1.bias",
1627
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.norm1.weight": "blocks.133.norm1.weight",
1628
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.norm2.bias": "blocks.133.norm2.bias",
1629
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.norm2.weight": "blocks.133.norm2.weight",
1630
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.norm3.bias": "blocks.133.norm_out.bias",
1631
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.norm3.weight": "blocks.133.norm_out.weight",
1632
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.norm_in.bias": "blocks.133.norm_in.bias",
1633
+ "model.diffusion_model.output_blocks.5.1.time_stack.0.norm_in.weight": "blocks.133.norm_in.weight",
1634
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_k.weight": "blocks.131.transformer_blocks.0.attn1.to_k.weight",
1635
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.131.transformer_blocks.0.attn1.to_out.bias",
1636
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.131.transformer_blocks.0.attn1.to_out.weight",
1637
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_q.weight": "blocks.131.transformer_blocks.0.attn1.to_q.weight",
1638
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn1.to_v.weight": "blocks.131.transformer_blocks.0.attn1.to_v.weight",
1639
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_k.weight": "blocks.131.transformer_blocks.0.attn2.to_k.weight",
1640
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.131.transformer_blocks.0.attn2.to_out.bias",
1641
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.131.transformer_blocks.0.attn2.to_out.weight",
1642
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_q.weight": "blocks.131.transformer_blocks.0.attn2.to_q.weight",
1643
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.attn2.to_v.weight": "blocks.131.transformer_blocks.0.attn2.to_v.weight",
1644
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.131.transformer_blocks.0.act_fn.proj.bias",
1645
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.131.transformer_blocks.0.act_fn.proj.weight",
1646
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.bias": "blocks.131.transformer_blocks.0.ff.bias",
1647
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.ff.net.2.weight": "blocks.131.transformer_blocks.0.ff.weight",
1648
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.bias": "blocks.131.transformer_blocks.0.norm1.bias",
1649
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm1.weight": "blocks.131.transformer_blocks.0.norm1.weight",
1650
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.bias": "blocks.131.transformer_blocks.0.norm2.bias",
1651
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm2.weight": "blocks.131.transformer_blocks.0.norm2.weight",
1652
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.bias": "blocks.131.transformer_blocks.0.norm3.bias",
1653
+ "model.diffusion_model.output_blocks.5.1.transformer_blocks.0.norm3.weight": "blocks.131.transformer_blocks.0.norm3.weight",
1654
+ "model.diffusion_model.output_blocks.5.2.conv.bias": "blocks.135.conv.bias",
1655
+ "model.diffusion_model.output_blocks.5.2.conv.weight": "blocks.135.conv.weight",
1656
+ "model.diffusion_model.output_blocks.6.0.emb_layers.1.bias": "blocks.137.time_emb_proj.bias",
1657
+ "model.diffusion_model.output_blocks.6.0.emb_layers.1.weight": "blocks.137.time_emb_proj.weight",
1658
+ "model.diffusion_model.output_blocks.6.0.in_layers.0.bias": "blocks.137.norm1.bias",
1659
+ "model.diffusion_model.output_blocks.6.0.in_layers.0.weight": "blocks.137.norm1.weight",
1660
+ "model.diffusion_model.output_blocks.6.0.in_layers.2.bias": "blocks.137.conv1.bias",
1661
+ "model.diffusion_model.output_blocks.6.0.in_layers.2.weight": "blocks.137.conv1.weight",
1662
+ "model.diffusion_model.output_blocks.6.0.out_layers.0.bias": "blocks.137.norm2.bias",
1663
+ "model.diffusion_model.output_blocks.6.0.out_layers.0.weight": "blocks.137.norm2.weight",
1664
+ "model.diffusion_model.output_blocks.6.0.out_layers.3.bias": "blocks.137.conv2.bias",
1665
+ "model.diffusion_model.output_blocks.6.0.out_layers.3.weight": "blocks.137.conv2.weight",
1666
+ "model.diffusion_model.output_blocks.6.0.skip_connection.bias": "blocks.137.conv_shortcut.bias",
1667
+ "model.diffusion_model.output_blocks.6.0.skip_connection.weight": "blocks.137.conv_shortcut.weight",
1668
+ "model.diffusion_model.output_blocks.6.0.time_mixer.mix_factor": "blocks.140.mix_factor",
1669
+ "model.diffusion_model.output_blocks.6.0.time_stack.emb_layers.1.bias": "blocks.139.time_emb_proj.bias",
1670
+ "model.diffusion_model.output_blocks.6.0.time_stack.emb_layers.1.weight": "blocks.139.time_emb_proj.weight",
1671
+ "model.diffusion_model.output_blocks.6.0.time_stack.in_layers.0.bias": "blocks.139.norm1.bias",
1672
+ "model.diffusion_model.output_blocks.6.0.time_stack.in_layers.0.weight": "blocks.139.norm1.weight",
1673
+ "model.diffusion_model.output_blocks.6.0.time_stack.in_layers.2.bias": "blocks.139.conv1.bias",
1674
+ "model.diffusion_model.output_blocks.6.0.time_stack.in_layers.2.weight": "blocks.139.conv1.weight",
1675
+ "model.diffusion_model.output_blocks.6.0.time_stack.out_layers.0.bias": "blocks.139.norm2.bias",
1676
+ "model.diffusion_model.output_blocks.6.0.time_stack.out_layers.0.weight": "blocks.139.norm2.weight",
1677
+ "model.diffusion_model.output_blocks.6.0.time_stack.out_layers.3.bias": "blocks.139.conv2.bias",
1678
+ "model.diffusion_model.output_blocks.6.0.time_stack.out_layers.3.weight": "blocks.139.conv2.weight",
1679
+ "model.diffusion_model.output_blocks.6.1.norm.bias": "blocks.142.norm.bias",
1680
+ "model.diffusion_model.output_blocks.6.1.norm.weight": "blocks.142.norm.weight",
1681
+ "model.diffusion_model.output_blocks.6.1.proj_in.bias": "blocks.142.proj_in.bias",
1682
+ "model.diffusion_model.output_blocks.6.1.proj_in.weight": "blocks.142.proj_in.weight",
1683
+ "model.diffusion_model.output_blocks.6.1.proj_out.bias": "blocks.145.proj.bias",
1684
+ "model.diffusion_model.output_blocks.6.1.proj_out.weight": "blocks.145.proj.weight",
1685
+ "model.diffusion_model.output_blocks.6.1.time_mixer.mix_factor": "blocks.145.mix_factor",
1686
+ "model.diffusion_model.output_blocks.6.1.time_pos_embed.0.bias": "blocks.144.positional_embedding_proj.0.bias",
1687
+ "model.diffusion_model.output_blocks.6.1.time_pos_embed.0.weight": "blocks.144.positional_embedding_proj.0.weight",
1688
+ "model.diffusion_model.output_blocks.6.1.time_pos_embed.2.bias": "blocks.144.positional_embedding_proj.2.bias",
1689
+ "model.diffusion_model.output_blocks.6.1.time_pos_embed.2.weight": "blocks.144.positional_embedding_proj.2.weight",
1690
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.attn1.to_k.weight": "blocks.144.attn1.to_k.weight",
1691
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.attn1.to_out.0.bias": "blocks.144.attn1.to_out.bias",
1692
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.attn1.to_out.0.weight": "blocks.144.attn1.to_out.weight",
1693
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.attn1.to_q.weight": "blocks.144.attn1.to_q.weight",
1694
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.attn1.to_v.weight": "blocks.144.attn1.to_v.weight",
1695
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.attn2.to_k.weight": "blocks.144.attn2.to_k.weight",
1696
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.attn2.to_out.0.bias": "blocks.144.attn2.to_out.bias",
1697
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.attn2.to_out.0.weight": "blocks.144.attn2.to_out.weight",
1698
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.attn2.to_q.weight": "blocks.144.attn2.to_q.weight",
1699
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.attn2.to_v.weight": "blocks.144.attn2.to_v.weight",
1700
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.ff.net.0.proj.bias": "blocks.144.act_fn_out.proj.bias",
1701
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.ff.net.0.proj.weight": "blocks.144.act_fn_out.proj.weight",
1702
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.ff.net.2.bias": "blocks.144.ff_out.bias",
1703
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.ff.net.2.weight": "blocks.144.ff_out.weight",
1704
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.144.act_fn_in.proj.bias",
1705
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.144.act_fn_in.proj.weight",
1706
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.ff_in.net.2.bias": "blocks.144.ff_in.bias",
1707
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.ff_in.net.2.weight": "blocks.144.ff_in.weight",
1708
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.norm1.bias": "blocks.144.norm1.bias",
1709
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.norm1.weight": "blocks.144.norm1.weight",
1710
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.norm2.bias": "blocks.144.norm2.bias",
1711
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.norm2.weight": "blocks.144.norm2.weight",
1712
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.norm3.bias": "blocks.144.norm_out.bias",
1713
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.norm3.weight": "blocks.144.norm_out.weight",
1714
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.norm_in.bias": "blocks.144.norm_in.bias",
1715
+ "model.diffusion_model.output_blocks.6.1.time_stack.0.norm_in.weight": "blocks.144.norm_in.weight",
1716
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_k.weight": "blocks.142.transformer_blocks.0.attn1.to_k.weight",
1717
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.142.transformer_blocks.0.attn1.to_out.bias",
1718
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.142.transformer_blocks.0.attn1.to_out.weight",
1719
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_q.weight": "blocks.142.transformer_blocks.0.attn1.to_q.weight",
1720
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn1.to_v.weight": "blocks.142.transformer_blocks.0.attn1.to_v.weight",
1721
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_k.weight": "blocks.142.transformer_blocks.0.attn2.to_k.weight",
1722
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.142.transformer_blocks.0.attn2.to_out.bias",
1723
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.142.transformer_blocks.0.attn2.to_out.weight",
1724
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_q.weight": "blocks.142.transformer_blocks.0.attn2.to_q.weight",
1725
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.attn2.to_v.weight": "blocks.142.transformer_blocks.0.attn2.to_v.weight",
1726
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.142.transformer_blocks.0.act_fn.proj.bias",
1727
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.142.transformer_blocks.0.act_fn.proj.weight",
1728
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.bias": "blocks.142.transformer_blocks.0.ff.bias",
1729
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.ff.net.2.weight": "blocks.142.transformer_blocks.0.ff.weight",
1730
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.bias": "blocks.142.transformer_blocks.0.norm1.bias",
1731
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm1.weight": "blocks.142.transformer_blocks.0.norm1.weight",
1732
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.bias": "blocks.142.transformer_blocks.0.norm2.bias",
1733
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm2.weight": "blocks.142.transformer_blocks.0.norm2.weight",
1734
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.bias": "blocks.142.transformer_blocks.0.norm3.bias",
1735
+ "model.diffusion_model.output_blocks.6.1.transformer_blocks.0.norm3.weight": "blocks.142.transformer_blocks.0.norm3.weight",
1736
+ "model.diffusion_model.output_blocks.7.0.emb_layers.1.bias": "blocks.147.time_emb_proj.bias",
1737
+ "model.diffusion_model.output_blocks.7.0.emb_layers.1.weight": "blocks.147.time_emb_proj.weight",
1738
+ "model.diffusion_model.output_blocks.7.0.in_layers.0.bias": "blocks.147.norm1.bias",
1739
+ "model.diffusion_model.output_blocks.7.0.in_layers.0.weight": "blocks.147.norm1.weight",
1740
+ "model.diffusion_model.output_blocks.7.0.in_layers.2.bias": "blocks.147.conv1.bias",
1741
+ "model.diffusion_model.output_blocks.7.0.in_layers.2.weight": "blocks.147.conv1.weight",
1742
+ "model.diffusion_model.output_blocks.7.0.out_layers.0.bias": "blocks.147.norm2.bias",
1743
+ "model.diffusion_model.output_blocks.7.0.out_layers.0.weight": "blocks.147.norm2.weight",
1744
+ "model.diffusion_model.output_blocks.7.0.out_layers.3.bias": "blocks.147.conv2.bias",
1745
+ "model.diffusion_model.output_blocks.7.0.out_layers.3.weight": "blocks.147.conv2.weight",
1746
+ "model.diffusion_model.output_blocks.7.0.skip_connection.bias": "blocks.147.conv_shortcut.bias",
1747
+ "model.diffusion_model.output_blocks.7.0.skip_connection.weight": "blocks.147.conv_shortcut.weight",
1748
+ "model.diffusion_model.output_blocks.7.0.time_mixer.mix_factor": "blocks.150.mix_factor",
1749
+ "model.diffusion_model.output_blocks.7.0.time_stack.emb_layers.1.bias": "blocks.149.time_emb_proj.bias",
1750
+ "model.diffusion_model.output_blocks.7.0.time_stack.emb_layers.1.weight": "blocks.149.time_emb_proj.weight",
1751
+ "model.diffusion_model.output_blocks.7.0.time_stack.in_layers.0.bias": "blocks.149.norm1.bias",
1752
+ "model.diffusion_model.output_blocks.7.0.time_stack.in_layers.0.weight": "blocks.149.norm1.weight",
1753
+ "model.diffusion_model.output_blocks.7.0.time_stack.in_layers.2.bias": "blocks.149.conv1.bias",
1754
+ "model.diffusion_model.output_blocks.7.0.time_stack.in_layers.2.weight": "blocks.149.conv1.weight",
1755
+ "model.diffusion_model.output_blocks.7.0.time_stack.out_layers.0.bias": "blocks.149.norm2.bias",
1756
+ "model.diffusion_model.output_blocks.7.0.time_stack.out_layers.0.weight": "blocks.149.norm2.weight",
1757
+ "model.diffusion_model.output_blocks.7.0.time_stack.out_layers.3.bias": "blocks.149.conv2.bias",
1758
+ "model.diffusion_model.output_blocks.7.0.time_stack.out_layers.3.weight": "blocks.149.conv2.weight",
1759
+ "model.diffusion_model.output_blocks.7.1.norm.bias": "blocks.152.norm.bias",
1760
+ "model.diffusion_model.output_blocks.7.1.norm.weight": "blocks.152.norm.weight",
1761
+ "model.diffusion_model.output_blocks.7.1.proj_in.bias": "blocks.152.proj_in.bias",
1762
+ "model.diffusion_model.output_blocks.7.1.proj_in.weight": "blocks.152.proj_in.weight",
1763
+ "model.diffusion_model.output_blocks.7.1.proj_out.bias": "blocks.155.proj.bias",
1764
+ "model.diffusion_model.output_blocks.7.1.proj_out.weight": "blocks.155.proj.weight",
1765
+ "model.diffusion_model.output_blocks.7.1.time_mixer.mix_factor": "blocks.155.mix_factor",
1766
+ "model.diffusion_model.output_blocks.7.1.time_pos_embed.0.bias": "blocks.154.positional_embedding_proj.0.bias",
1767
+ "model.diffusion_model.output_blocks.7.1.time_pos_embed.0.weight": "blocks.154.positional_embedding_proj.0.weight",
1768
+ "model.diffusion_model.output_blocks.7.1.time_pos_embed.2.bias": "blocks.154.positional_embedding_proj.2.bias",
1769
+ "model.diffusion_model.output_blocks.7.1.time_pos_embed.2.weight": "blocks.154.positional_embedding_proj.2.weight",
1770
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.attn1.to_k.weight": "blocks.154.attn1.to_k.weight",
1771
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.attn1.to_out.0.bias": "blocks.154.attn1.to_out.bias",
1772
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.attn1.to_out.0.weight": "blocks.154.attn1.to_out.weight",
1773
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.attn1.to_q.weight": "blocks.154.attn1.to_q.weight",
1774
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.attn1.to_v.weight": "blocks.154.attn1.to_v.weight",
1775
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.attn2.to_k.weight": "blocks.154.attn2.to_k.weight",
1776
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.attn2.to_out.0.bias": "blocks.154.attn2.to_out.bias",
1777
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.attn2.to_out.0.weight": "blocks.154.attn2.to_out.weight",
1778
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.attn2.to_q.weight": "blocks.154.attn2.to_q.weight",
1779
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.attn2.to_v.weight": "blocks.154.attn2.to_v.weight",
1780
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.ff.net.0.proj.bias": "blocks.154.act_fn_out.proj.bias",
1781
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.ff.net.0.proj.weight": "blocks.154.act_fn_out.proj.weight",
1782
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.ff.net.2.bias": "blocks.154.ff_out.bias",
1783
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.ff.net.2.weight": "blocks.154.ff_out.weight",
1784
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.154.act_fn_in.proj.bias",
1785
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.154.act_fn_in.proj.weight",
1786
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.ff_in.net.2.bias": "blocks.154.ff_in.bias",
1787
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.ff_in.net.2.weight": "blocks.154.ff_in.weight",
1788
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.norm1.bias": "blocks.154.norm1.bias",
1789
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.norm1.weight": "blocks.154.norm1.weight",
1790
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.norm2.bias": "blocks.154.norm2.bias",
1791
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.norm2.weight": "blocks.154.norm2.weight",
1792
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.norm3.bias": "blocks.154.norm_out.bias",
1793
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.norm3.weight": "blocks.154.norm_out.weight",
1794
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.norm_in.bias": "blocks.154.norm_in.bias",
1795
+ "model.diffusion_model.output_blocks.7.1.time_stack.0.norm_in.weight": "blocks.154.norm_in.weight",
1796
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_k.weight": "blocks.152.transformer_blocks.0.attn1.to_k.weight",
1797
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.152.transformer_blocks.0.attn1.to_out.bias",
1798
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.152.transformer_blocks.0.attn1.to_out.weight",
1799
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_q.weight": "blocks.152.transformer_blocks.0.attn1.to_q.weight",
1800
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn1.to_v.weight": "blocks.152.transformer_blocks.0.attn1.to_v.weight",
1801
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_k.weight": "blocks.152.transformer_blocks.0.attn2.to_k.weight",
1802
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.152.transformer_blocks.0.attn2.to_out.bias",
1803
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.152.transformer_blocks.0.attn2.to_out.weight",
1804
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_q.weight": "blocks.152.transformer_blocks.0.attn2.to_q.weight",
1805
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.attn2.to_v.weight": "blocks.152.transformer_blocks.0.attn2.to_v.weight",
1806
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.152.transformer_blocks.0.act_fn.proj.bias",
1807
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.152.transformer_blocks.0.act_fn.proj.weight",
1808
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.bias": "blocks.152.transformer_blocks.0.ff.bias",
1809
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.ff.net.2.weight": "blocks.152.transformer_blocks.0.ff.weight",
1810
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.bias": "blocks.152.transformer_blocks.0.norm1.bias",
1811
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm1.weight": "blocks.152.transformer_blocks.0.norm1.weight",
1812
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.bias": "blocks.152.transformer_blocks.0.norm2.bias",
1813
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm2.weight": "blocks.152.transformer_blocks.0.norm2.weight",
1814
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.bias": "blocks.152.transformer_blocks.0.norm3.bias",
1815
+ "model.diffusion_model.output_blocks.7.1.transformer_blocks.0.norm3.weight": "blocks.152.transformer_blocks.0.norm3.weight",
1816
+ "model.diffusion_model.output_blocks.8.0.emb_layers.1.bias": "blocks.157.time_emb_proj.bias",
1817
+ "model.diffusion_model.output_blocks.8.0.emb_layers.1.weight": "blocks.157.time_emb_proj.weight",
1818
+ "model.diffusion_model.output_blocks.8.0.in_layers.0.bias": "blocks.157.norm1.bias",
1819
+ "model.diffusion_model.output_blocks.8.0.in_layers.0.weight": "blocks.157.norm1.weight",
1820
+ "model.diffusion_model.output_blocks.8.0.in_layers.2.bias": "blocks.157.conv1.bias",
1821
+ "model.diffusion_model.output_blocks.8.0.in_layers.2.weight": "blocks.157.conv1.weight",
1822
+ "model.diffusion_model.output_blocks.8.0.out_layers.0.bias": "blocks.157.norm2.bias",
1823
+ "model.diffusion_model.output_blocks.8.0.out_layers.0.weight": "blocks.157.norm2.weight",
1824
+ "model.diffusion_model.output_blocks.8.0.out_layers.3.bias": "blocks.157.conv2.bias",
1825
+ "model.diffusion_model.output_blocks.8.0.out_layers.3.weight": "blocks.157.conv2.weight",
1826
+ "model.diffusion_model.output_blocks.8.0.skip_connection.bias": "blocks.157.conv_shortcut.bias",
1827
+ "model.diffusion_model.output_blocks.8.0.skip_connection.weight": "blocks.157.conv_shortcut.weight",
1828
+ "model.diffusion_model.output_blocks.8.0.time_mixer.mix_factor": "blocks.160.mix_factor",
1829
+ "model.diffusion_model.output_blocks.8.0.time_stack.emb_layers.1.bias": "blocks.159.time_emb_proj.bias",
1830
+ "model.diffusion_model.output_blocks.8.0.time_stack.emb_layers.1.weight": "blocks.159.time_emb_proj.weight",
1831
+ "model.diffusion_model.output_blocks.8.0.time_stack.in_layers.0.bias": "blocks.159.norm1.bias",
1832
+ "model.diffusion_model.output_blocks.8.0.time_stack.in_layers.0.weight": "blocks.159.norm1.weight",
1833
+ "model.diffusion_model.output_blocks.8.0.time_stack.in_layers.2.bias": "blocks.159.conv1.bias",
1834
+ "model.diffusion_model.output_blocks.8.0.time_stack.in_layers.2.weight": "blocks.159.conv1.weight",
1835
+ "model.diffusion_model.output_blocks.8.0.time_stack.out_layers.0.bias": "blocks.159.norm2.bias",
1836
+ "model.diffusion_model.output_blocks.8.0.time_stack.out_layers.0.weight": "blocks.159.norm2.weight",
1837
+ "model.diffusion_model.output_blocks.8.0.time_stack.out_layers.3.bias": "blocks.159.conv2.bias",
1838
+ "model.diffusion_model.output_blocks.8.0.time_stack.out_layers.3.weight": "blocks.159.conv2.weight",
1839
+ "model.diffusion_model.output_blocks.8.1.norm.bias": "blocks.162.norm.bias",
1840
+ "model.diffusion_model.output_blocks.8.1.norm.weight": "blocks.162.norm.weight",
1841
+ "model.diffusion_model.output_blocks.8.1.proj_in.bias": "blocks.162.proj_in.bias",
1842
+ "model.diffusion_model.output_blocks.8.1.proj_in.weight": "blocks.162.proj_in.weight",
1843
+ "model.diffusion_model.output_blocks.8.1.proj_out.bias": "blocks.165.proj.bias",
1844
+ "model.diffusion_model.output_blocks.8.1.proj_out.weight": "blocks.165.proj.weight",
1845
+ "model.diffusion_model.output_blocks.8.1.time_mixer.mix_factor": "blocks.165.mix_factor",
1846
+ "model.diffusion_model.output_blocks.8.1.time_pos_embed.0.bias": "blocks.164.positional_embedding_proj.0.bias",
1847
+ "model.diffusion_model.output_blocks.8.1.time_pos_embed.0.weight": "blocks.164.positional_embedding_proj.0.weight",
1848
+ "model.diffusion_model.output_blocks.8.1.time_pos_embed.2.bias": "blocks.164.positional_embedding_proj.2.bias",
1849
+ "model.diffusion_model.output_blocks.8.1.time_pos_embed.2.weight": "blocks.164.positional_embedding_proj.2.weight",
1850
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.attn1.to_k.weight": "blocks.164.attn1.to_k.weight",
1851
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.attn1.to_out.0.bias": "blocks.164.attn1.to_out.bias",
1852
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.attn1.to_out.0.weight": "blocks.164.attn1.to_out.weight",
1853
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.attn1.to_q.weight": "blocks.164.attn1.to_q.weight",
1854
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.attn1.to_v.weight": "blocks.164.attn1.to_v.weight",
1855
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.attn2.to_k.weight": "blocks.164.attn2.to_k.weight",
1856
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.attn2.to_out.0.bias": "blocks.164.attn2.to_out.bias",
1857
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.attn2.to_out.0.weight": "blocks.164.attn2.to_out.weight",
1858
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.attn2.to_q.weight": "blocks.164.attn2.to_q.weight",
1859
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.attn2.to_v.weight": "blocks.164.attn2.to_v.weight",
1860
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.ff.net.0.proj.bias": "blocks.164.act_fn_out.proj.bias",
1861
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.ff.net.0.proj.weight": "blocks.164.act_fn_out.proj.weight",
1862
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.ff.net.2.bias": "blocks.164.ff_out.bias",
1863
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.ff.net.2.weight": "blocks.164.ff_out.weight",
1864
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.164.act_fn_in.proj.bias",
1865
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.164.act_fn_in.proj.weight",
1866
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.ff_in.net.2.bias": "blocks.164.ff_in.bias",
1867
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.ff_in.net.2.weight": "blocks.164.ff_in.weight",
1868
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.norm1.bias": "blocks.164.norm1.bias",
1869
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.norm1.weight": "blocks.164.norm1.weight",
1870
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.norm2.bias": "blocks.164.norm2.bias",
1871
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.norm2.weight": "blocks.164.norm2.weight",
1872
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.norm3.bias": "blocks.164.norm_out.bias",
1873
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.norm3.weight": "blocks.164.norm_out.weight",
1874
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.norm_in.bias": "blocks.164.norm_in.bias",
1875
+ "model.diffusion_model.output_blocks.8.1.time_stack.0.norm_in.weight": "blocks.164.norm_in.weight",
1876
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_k.weight": "blocks.162.transformer_blocks.0.attn1.to_k.weight",
1877
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.162.transformer_blocks.0.attn1.to_out.bias",
1878
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.162.transformer_blocks.0.attn1.to_out.weight",
1879
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_q.weight": "blocks.162.transformer_blocks.0.attn1.to_q.weight",
1880
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn1.to_v.weight": "blocks.162.transformer_blocks.0.attn1.to_v.weight",
1881
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_k.weight": "blocks.162.transformer_blocks.0.attn2.to_k.weight",
1882
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.162.transformer_blocks.0.attn2.to_out.bias",
1883
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.162.transformer_blocks.0.attn2.to_out.weight",
1884
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_q.weight": "blocks.162.transformer_blocks.0.attn2.to_q.weight",
1885
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.attn2.to_v.weight": "blocks.162.transformer_blocks.0.attn2.to_v.weight",
1886
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.162.transformer_blocks.0.act_fn.proj.bias",
1887
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.162.transformer_blocks.0.act_fn.proj.weight",
1888
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.bias": "blocks.162.transformer_blocks.0.ff.bias",
1889
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.ff.net.2.weight": "blocks.162.transformer_blocks.0.ff.weight",
1890
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.bias": "blocks.162.transformer_blocks.0.norm1.bias",
1891
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm1.weight": "blocks.162.transformer_blocks.0.norm1.weight",
1892
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.bias": "blocks.162.transformer_blocks.0.norm2.bias",
1893
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm2.weight": "blocks.162.transformer_blocks.0.norm2.weight",
1894
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.bias": "blocks.162.transformer_blocks.0.norm3.bias",
1895
+ "model.diffusion_model.output_blocks.8.1.transformer_blocks.0.norm3.weight": "blocks.162.transformer_blocks.0.norm3.weight",
1896
+ "model.diffusion_model.output_blocks.8.2.conv.bias": "blocks.166.conv.bias",
1897
+ "model.diffusion_model.output_blocks.8.2.conv.weight": "blocks.166.conv.weight",
1898
+ "model.diffusion_model.output_blocks.9.0.emb_layers.1.bias": "blocks.168.time_emb_proj.bias",
1899
+ "model.diffusion_model.output_blocks.9.0.emb_layers.1.weight": "blocks.168.time_emb_proj.weight",
1900
+ "model.diffusion_model.output_blocks.9.0.in_layers.0.bias": "blocks.168.norm1.bias",
1901
+ "model.diffusion_model.output_blocks.9.0.in_layers.0.weight": "blocks.168.norm1.weight",
1902
+ "model.diffusion_model.output_blocks.9.0.in_layers.2.bias": "blocks.168.conv1.bias",
1903
+ "model.diffusion_model.output_blocks.9.0.in_layers.2.weight": "blocks.168.conv1.weight",
1904
+ "model.diffusion_model.output_blocks.9.0.out_layers.0.bias": "blocks.168.norm2.bias",
1905
+ "model.diffusion_model.output_blocks.9.0.out_layers.0.weight": "blocks.168.norm2.weight",
1906
+ "model.diffusion_model.output_blocks.9.0.out_layers.3.bias": "blocks.168.conv2.bias",
1907
+ "model.diffusion_model.output_blocks.9.0.out_layers.3.weight": "blocks.168.conv2.weight",
1908
+ "model.diffusion_model.output_blocks.9.0.skip_connection.bias": "blocks.168.conv_shortcut.bias",
1909
+ "model.diffusion_model.output_blocks.9.0.skip_connection.weight": "blocks.168.conv_shortcut.weight",
1910
+ "model.diffusion_model.output_blocks.9.0.time_mixer.mix_factor": "blocks.171.mix_factor",
1911
+ "model.diffusion_model.output_blocks.9.0.time_stack.emb_layers.1.bias": "blocks.170.time_emb_proj.bias",
1912
+ "model.diffusion_model.output_blocks.9.0.time_stack.emb_layers.1.weight": "blocks.170.time_emb_proj.weight",
1913
+ "model.diffusion_model.output_blocks.9.0.time_stack.in_layers.0.bias": "blocks.170.norm1.bias",
1914
+ "model.diffusion_model.output_blocks.9.0.time_stack.in_layers.0.weight": "blocks.170.norm1.weight",
1915
+ "model.diffusion_model.output_blocks.9.0.time_stack.in_layers.2.bias": "blocks.170.conv1.bias",
1916
+ "model.diffusion_model.output_blocks.9.0.time_stack.in_layers.2.weight": "blocks.170.conv1.weight",
1917
+ "model.diffusion_model.output_blocks.9.0.time_stack.out_layers.0.bias": "blocks.170.norm2.bias",
1918
+ "model.diffusion_model.output_blocks.9.0.time_stack.out_layers.0.weight": "blocks.170.norm2.weight",
1919
+ "model.diffusion_model.output_blocks.9.0.time_stack.out_layers.3.bias": "blocks.170.conv2.bias",
1920
+ "model.diffusion_model.output_blocks.9.0.time_stack.out_layers.3.weight": "blocks.170.conv2.weight",
1921
+ "model.diffusion_model.output_blocks.9.1.norm.bias": "blocks.173.norm.bias",
1922
+ "model.diffusion_model.output_blocks.9.1.norm.weight": "blocks.173.norm.weight",
1923
+ "model.diffusion_model.output_blocks.9.1.proj_in.bias": "blocks.173.proj_in.bias",
1924
+ "model.diffusion_model.output_blocks.9.1.proj_in.weight": "blocks.173.proj_in.weight",
1925
+ "model.diffusion_model.output_blocks.9.1.proj_out.bias": "blocks.176.proj.bias",
1926
+ "model.diffusion_model.output_blocks.9.1.proj_out.weight": "blocks.176.proj.weight",
1927
+ "model.diffusion_model.output_blocks.9.1.time_mixer.mix_factor": "blocks.176.mix_factor",
1928
+ "model.diffusion_model.output_blocks.9.1.time_pos_embed.0.bias": "blocks.175.positional_embedding_proj.0.bias",
1929
+ "model.diffusion_model.output_blocks.9.1.time_pos_embed.0.weight": "blocks.175.positional_embedding_proj.0.weight",
1930
+ "model.diffusion_model.output_blocks.9.1.time_pos_embed.2.bias": "blocks.175.positional_embedding_proj.2.bias",
1931
+ "model.diffusion_model.output_blocks.9.1.time_pos_embed.2.weight": "blocks.175.positional_embedding_proj.2.weight",
1932
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.attn1.to_k.weight": "blocks.175.attn1.to_k.weight",
1933
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.attn1.to_out.0.bias": "blocks.175.attn1.to_out.bias",
1934
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.attn1.to_out.0.weight": "blocks.175.attn1.to_out.weight",
1935
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.attn1.to_q.weight": "blocks.175.attn1.to_q.weight",
1936
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.attn1.to_v.weight": "blocks.175.attn1.to_v.weight",
1937
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.attn2.to_k.weight": "blocks.175.attn2.to_k.weight",
1938
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.attn2.to_out.0.bias": "blocks.175.attn2.to_out.bias",
1939
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.attn2.to_out.0.weight": "blocks.175.attn2.to_out.weight",
1940
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.attn2.to_q.weight": "blocks.175.attn2.to_q.weight",
1941
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.attn2.to_v.weight": "blocks.175.attn2.to_v.weight",
1942
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.ff.net.0.proj.bias": "blocks.175.act_fn_out.proj.bias",
1943
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.ff.net.0.proj.weight": "blocks.175.act_fn_out.proj.weight",
1944
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.ff.net.2.bias": "blocks.175.ff_out.bias",
1945
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.ff.net.2.weight": "blocks.175.ff_out.weight",
1946
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.ff_in.net.0.proj.bias": "blocks.175.act_fn_in.proj.bias",
1947
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.ff_in.net.0.proj.weight": "blocks.175.act_fn_in.proj.weight",
1948
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.ff_in.net.2.bias": "blocks.175.ff_in.bias",
1949
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.ff_in.net.2.weight": "blocks.175.ff_in.weight",
1950
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.norm1.bias": "blocks.175.norm1.bias",
1951
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.norm1.weight": "blocks.175.norm1.weight",
1952
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.norm2.bias": "blocks.175.norm2.bias",
1953
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.norm2.weight": "blocks.175.norm2.weight",
1954
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.norm3.bias": "blocks.175.norm_out.bias",
1955
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.norm3.weight": "blocks.175.norm_out.weight",
1956
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.norm_in.bias": "blocks.175.norm_in.bias",
1957
+ "model.diffusion_model.output_blocks.9.1.time_stack.0.norm_in.weight": "blocks.175.norm_in.weight",
1958
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_k.weight": "blocks.173.transformer_blocks.0.attn1.to_k.weight",
1959
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.bias": "blocks.173.transformer_blocks.0.attn1.to_out.bias",
1960
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_out.0.weight": "blocks.173.transformer_blocks.0.attn1.to_out.weight",
1961
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_q.weight": "blocks.173.transformer_blocks.0.attn1.to_q.weight",
1962
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn1.to_v.weight": "blocks.173.transformer_blocks.0.attn1.to_v.weight",
1963
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_k.weight": "blocks.173.transformer_blocks.0.attn2.to_k.weight",
1964
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.bias": "blocks.173.transformer_blocks.0.attn2.to_out.bias",
1965
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_out.0.weight": "blocks.173.transformer_blocks.0.attn2.to_out.weight",
1966
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_q.weight": "blocks.173.transformer_blocks.0.attn2.to_q.weight",
1967
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.attn2.to_v.weight": "blocks.173.transformer_blocks.0.attn2.to_v.weight",
1968
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.bias": "blocks.173.transformer_blocks.0.act_fn.proj.bias",
1969
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.0.proj.weight": "blocks.173.transformer_blocks.0.act_fn.proj.weight",
1970
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.bias": "blocks.173.transformer_blocks.0.ff.bias",
1971
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.ff.net.2.weight": "blocks.173.transformer_blocks.0.ff.weight",
1972
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.bias": "blocks.173.transformer_blocks.0.norm1.bias",
1973
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm1.weight": "blocks.173.transformer_blocks.0.norm1.weight",
1974
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.bias": "blocks.173.transformer_blocks.0.norm2.bias",
1975
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm2.weight": "blocks.173.transformer_blocks.0.norm2.weight",
1976
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.bias": "blocks.173.transformer_blocks.0.norm3.bias",
1977
+ "model.diffusion_model.output_blocks.9.1.transformer_blocks.0.norm3.weight": "blocks.173.transformer_blocks.0.norm3.weight",
1978
+ "model.diffusion_model.time_embed.0.bias": "time_embedding.0.bias",
1979
+ "model.diffusion_model.time_embed.0.weight": "time_embedding.0.weight",
1980
+ "model.diffusion_model.time_embed.2.bias": "time_embedding.2.bias",
1981
+ "model.diffusion_model.time_embed.2.weight": "time_embedding.2.weight",
1982
+ }
1983
+ state_dict_ = {}
1984
+ for name in state_dict:
1985
+ if name in rename_dict:
1986
+ param = state_dict[name]
1987
+ if ".proj_in." in name or ".proj_out." in name:
1988
+ param = param.squeeze()
1989
+ state_dict_[rename_dict[name]] = param
1990
+ if add_positional_conv is not None:
1991
+ extra_names = [
1992
+ "blocks.7.positional_conv", "blocks.17.positional_conv", "blocks.29.positional_conv", "blocks.39.positional_conv",
1993
+ "blocks.51.positional_conv", "blocks.61.positional_conv", "blocks.83.positional_conv", "blocks.113.positional_conv",
1994
+ "blocks.123.positional_conv", "blocks.133.positional_conv", "blocks.144.positional_conv", "blocks.154.positional_conv",
1995
+ "blocks.164.positional_conv", "blocks.175.positional_conv", "blocks.185.positional_conv", "blocks.195.positional_conv",
1996
+ ]
1997
+ extra_channels = [320, 320, 640, 640, 1280, 1280, 1280, 1280, 1280, 1280, 640, 640, 640, 320, 320, 320]
1998
+ for name, channels in zip(extra_names, extra_channels):
1999
+ weight = torch.zeros((channels, channels, 3, 3, 3))
2000
+ weight[:,:,1,1,1] = torch.eye(channels, channels)
2001
+ bias = torch.zeros((channels,))
2002
+ state_dict_[name + ".weight"] = weight
2003
+ state_dict_[name + ".bias"] = bias
2004
+ return state_dict_