InvokeAI 6.10.0__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invokeai/app/invocations/flux_denoise.py +1 -15
- invokeai/app/invocations/metadata_linked.py +0 -47
- invokeai/app/invocations/z_image_denoise.py +84 -244
- invokeai/app/services/config/config_default.py +1 -3
- invokeai/app/services/model_manager/model_manager_default.py +0 -7
- invokeai/backend/flux/denoise.py +11 -196
- invokeai/backend/model_manager/configs/lora.py +0 -36
- invokeai/backend/model_manager/load/model_cache/model_cache.py +2 -104
- invokeai/backend/model_manager/load/model_loaders/cogview4.py +1 -2
- invokeai/backend/model_manager/load/model_loaders/flux.py +6 -13
- invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +2 -4
- invokeai/backend/model_manager/load/model_loaders/onnx.py +0 -1
- invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +1 -2
- invokeai/backend/model_manager/load/model_loaders/z_image.py +3 -37
- invokeai/backend/model_manager/starter_models.py +4 -13
- invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +5 -39
- invokeai/backend/quantization/gguf/ggml_tensor.py +4 -15
- invokeai/backend/z_image/extensions/regional_prompting_extension.py +12 -10
- invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +161 -0
- invokeai/frontend/web/dist/assets/{browser-ponyfill-4xPFTMT3.js → browser-ponyfill-DHZxq1nk.js} +1 -1
- invokeai/frontend/web/dist/assets/{index-vCDSQboA.js → index-dgSJAY--.js} +51 -51
- invokeai/frontend/web/dist/index.html +1 -1
- invokeai/frontend/web/dist/locales/en.json +5 -11
- invokeai/version/invokeai_version.py +1 -1
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/METADATA +2 -2
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/RECORD +32 -39
- invokeai/app/invocations/pbr_maps.py +0 -59
- invokeai/backend/flux/schedulers.py +0 -62
- invokeai/backend/image_util/pbr_maps/architecture/block.py +0 -367
- invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +0 -70
- invokeai/backend/image_util/pbr_maps/pbr_maps.py +0 -141
- invokeai/backend/image_util/pbr_maps/utils/image_ops.py +0 -93
- invokeai/frontend/web/dist/assets/App-BBELGD-n.js +0 -161
- invokeai/frontend/web/dist/locales/en-GB.json +0 -1
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/WHEEL +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/entry_points.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,367 +0,0 @@
|
|
|
1
|
-
# Original: https://github.com/joeyballentine/Material-Map-Generator
|
|
2
|
-
# Adopted and optimized for Invoke AI
|
|
3
|
-
|
|
4
|
-
from collections import OrderedDict
|
|
5
|
-
from typing import Any, List, Literal, Optional
|
|
6
|
-
|
|
7
|
-
import torch
|
|
8
|
-
import torch.nn as nn
|
|
9
|
-
|
|
10
|
-
ACTIVATION_LAYER_TYPE = Literal["relu", "leakyrelu", "prelu"]
|
|
11
|
-
NORMALIZATION_LAYER_TYPE = Literal["batch", "instance"]
|
|
12
|
-
PADDING_LAYER_TYPE = Literal["zero", "reflect", "replicate"]
|
|
13
|
-
BLOCK_MODE = Literal["CNA", "NAC", "CNAC"]
|
|
14
|
-
UPCONV_BLOCK_MODE = Literal["nearest", "linear", "bilinear", "bicubic", "trilinear"]
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def act(act_type: ACTIVATION_LAYER_TYPE, inplace: bool = True, neg_slope: float = 0.2, n_prelu: int = 1):
|
|
18
|
-
"""Helper to select Activation Layer"""
|
|
19
|
-
if act_type == "relu":
|
|
20
|
-
layer = nn.ReLU(inplace)
|
|
21
|
-
elif act_type == "leakyrelu":
|
|
22
|
-
layer = nn.LeakyReLU(neg_slope, inplace)
|
|
23
|
-
elif act_type == "prelu":
|
|
24
|
-
layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
|
|
25
|
-
return layer
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def norm(norm_type: NORMALIZATION_LAYER_TYPE, nc: int):
|
|
29
|
-
"""Helper to select Normalization Layer"""
|
|
30
|
-
if norm_type == "batch":
|
|
31
|
-
layer = nn.BatchNorm2d(nc, affine=True)
|
|
32
|
-
elif norm_type == "instance":
|
|
33
|
-
layer = nn.InstanceNorm2d(nc, affine=False)
|
|
34
|
-
return layer
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def pad(pad_type: PADDING_LAYER_TYPE, padding: int):
|
|
38
|
-
"""Helper to select Padding Layer"""
|
|
39
|
-
if padding == 0 or pad_type == "zero":
|
|
40
|
-
return None
|
|
41
|
-
if pad_type == "reflect":
|
|
42
|
-
layer = nn.ReflectionPad2d(padding)
|
|
43
|
-
elif pad_type == "replicate":
|
|
44
|
-
layer = nn.ReplicationPad2d(padding)
|
|
45
|
-
return layer
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def get_valid_padding(kernel_size: int, dilation: int):
|
|
49
|
-
kernel_size = kernel_size + (kernel_size - 1) * (dilation - 1)
|
|
50
|
-
padding = (kernel_size - 1) // 2
|
|
51
|
-
return padding
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def sequential(*args: Any):
|
|
55
|
-
# Flatten Sequential. It unwraps nn.Sequential.
|
|
56
|
-
if len(args) == 1:
|
|
57
|
-
if isinstance(args[0], OrderedDict):
|
|
58
|
-
raise NotImplementedError("sequential does not support OrderedDict input.")
|
|
59
|
-
return args[0] # No sequential is needed.
|
|
60
|
-
modules: List[nn.Module] = []
|
|
61
|
-
for module in args:
|
|
62
|
-
if isinstance(module, nn.Sequential):
|
|
63
|
-
for submodule in module.children():
|
|
64
|
-
modules.append(submodule)
|
|
65
|
-
elif isinstance(module, nn.Module):
|
|
66
|
-
modules.append(module)
|
|
67
|
-
return nn.Sequential(*modules)
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def conv_block(
|
|
71
|
-
in_nc: int,
|
|
72
|
-
out_nc: int,
|
|
73
|
-
kernel_size: int,
|
|
74
|
-
stride: int = 1,
|
|
75
|
-
dilation: int = 1,
|
|
76
|
-
groups: int = 1,
|
|
77
|
-
bias: bool = True,
|
|
78
|
-
pad_type: Optional[PADDING_LAYER_TYPE] = "zero",
|
|
79
|
-
norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
|
|
80
|
-
act_type: Optional[ACTIVATION_LAYER_TYPE] = "relu",
|
|
81
|
-
mode: BLOCK_MODE = "CNA",
|
|
82
|
-
):
|
|
83
|
-
"""
|
|
84
|
-
Conv layer with padding, normalization, activation
|
|
85
|
-
mode: CNA --> Conv -> Norm -> Act
|
|
86
|
-
NAC --> Norm -> Act --> Conv (Identity Mappings in Deep Residual Networks, ECCV16)
|
|
87
|
-
"""
|
|
88
|
-
assert mode in ["CNA", "NAC", "CNAC"], f"Wrong conv mode [{mode}]"
|
|
89
|
-
padding = get_valid_padding(kernel_size, dilation)
|
|
90
|
-
p = pad(pad_type, padding) if pad_type else None
|
|
91
|
-
padding = padding if pad_type == "zero" else 0
|
|
92
|
-
|
|
93
|
-
c = nn.Conv2d(
|
|
94
|
-
in_nc,
|
|
95
|
-
out_nc,
|
|
96
|
-
kernel_size=kernel_size,
|
|
97
|
-
stride=stride,
|
|
98
|
-
padding=padding,
|
|
99
|
-
dilation=dilation,
|
|
100
|
-
bias=bias,
|
|
101
|
-
groups=groups,
|
|
102
|
-
)
|
|
103
|
-
a = act(act_type) if act_type else None
|
|
104
|
-
match mode:
|
|
105
|
-
case "CNA":
|
|
106
|
-
n = norm(norm_type, out_nc) if norm_type else None
|
|
107
|
-
return sequential(p, c, n, a)
|
|
108
|
-
case "NAC":
|
|
109
|
-
if norm_type is None and act_type is not None:
|
|
110
|
-
a = act(act_type, inplace=False)
|
|
111
|
-
n = norm(norm_type, in_nc) if norm_type else None
|
|
112
|
-
return sequential(n, a, p, c)
|
|
113
|
-
case "CNAC":
|
|
114
|
-
n = norm(norm_type, in_nc) if norm_type else None
|
|
115
|
-
return sequential(n, a, p, c)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
class ConcatBlock(nn.Module):
|
|
119
|
-
# Concat the output of a submodule to its input
|
|
120
|
-
def __init__(self, submodule: nn.Module):
|
|
121
|
-
super(ConcatBlock, self).__init__()
|
|
122
|
-
self.sub = submodule
|
|
123
|
-
|
|
124
|
-
def forward(self, x: torch.Tensor):
|
|
125
|
-
output = torch.cat((x, self.sub(x)), dim=1)
|
|
126
|
-
return output
|
|
127
|
-
|
|
128
|
-
def __repr__(self):
|
|
129
|
-
tmpstr = "Identity .. \n|"
|
|
130
|
-
modstr = self.sub.__repr__().replace("\n", "\n|")
|
|
131
|
-
tmpstr = tmpstr + modstr
|
|
132
|
-
return tmpstr
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
class ShortcutBlock(nn.Module):
|
|
136
|
-
# Elementwise sum the output of a submodule to its input
|
|
137
|
-
def __init__(self, submodule: nn.Module):
|
|
138
|
-
super(ShortcutBlock, self).__init__()
|
|
139
|
-
self.sub = submodule
|
|
140
|
-
|
|
141
|
-
def forward(self, x: torch.Tensor):
|
|
142
|
-
output = x + self.sub(x)
|
|
143
|
-
return output
|
|
144
|
-
|
|
145
|
-
def __repr__(self):
|
|
146
|
-
tmpstr = "Identity + \n|"
|
|
147
|
-
modstr = self.sub.__repr__().replace("\n", "\n|")
|
|
148
|
-
tmpstr = tmpstr + modstr
|
|
149
|
-
return tmpstr
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
class ShortcutBlockSPSR(nn.Module):
|
|
153
|
-
# Elementwise sum the output of a submodule to its input
|
|
154
|
-
def __init__(self, submodule: nn.Module):
|
|
155
|
-
super(ShortcutBlockSPSR, self).__init__()
|
|
156
|
-
self.sub = submodule
|
|
157
|
-
|
|
158
|
-
def forward(self, x: torch.Tensor):
|
|
159
|
-
return x, self.sub
|
|
160
|
-
|
|
161
|
-
def __repr__(self):
|
|
162
|
-
tmpstr = "Identity + \n|"
|
|
163
|
-
modstr = self.sub.__repr__().replace("\n", "\n|")
|
|
164
|
-
tmpstr = tmpstr + modstr
|
|
165
|
-
return tmpstr
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
class ResNetBlock(nn.Module):
|
|
169
|
-
"""
|
|
170
|
-
ResNet Block, 3-3 style
|
|
171
|
-
with extra residual scaling used in EDSR
|
|
172
|
-
(Enhanced Deep Residual Networks for Single Image Super-Resolution, CVPRW 17)
|
|
173
|
-
"""
|
|
174
|
-
|
|
175
|
-
def __init__(
|
|
176
|
-
self,
|
|
177
|
-
in_nc: int,
|
|
178
|
-
mid_nc: int,
|
|
179
|
-
out_nc: int,
|
|
180
|
-
kernel_size: int = 3,
|
|
181
|
-
stride: int = 1,
|
|
182
|
-
dilation: int = 1,
|
|
183
|
-
groups: int = 1,
|
|
184
|
-
bias: bool = True,
|
|
185
|
-
pad_type: PADDING_LAYER_TYPE = "zero",
|
|
186
|
-
norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
|
|
187
|
-
act_type: Optional[ACTIVATION_LAYER_TYPE] = "relu",
|
|
188
|
-
mode: BLOCK_MODE = "CNA",
|
|
189
|
-
res_scale: int = 1,
|
|
190
|
-
):
|
|
191
|
-
super(ResNetBlock, self).__init__()
|
|
192
|
-
conv0 = conv_block(
|
|
193
|
-
in_nc, mid_nc, kernel_size, stride, dilation, groups, bias, pad_type, norm_type, act_type, mode
|
|
194
|
-
)
|
|
195
|
-
if mode == "CNA":
|
|
196
|
-
act_type = None
|
|
197
|
-
if mode == "CNAC": # Residual path: |-CNAC-|
|
|
198
|
-
act_type = None
|
|
199
|
-
norm_type = None
|
|
200
|
-
conv1 = conv_block(
|
|
201
|
-
mid_nc, out_nc, kernel_size, stride, dilation, groups, bias, pad_type, norm_type, act_type, mode
|
|
202
|
-
)
|
|
203
|
-
|
|
204
|
-
self.res = sequential(conv0, conv1)
|
|
205
|
-
self.res_scale = res_scale
|
|
206
|
-
|
|
207
|
-
def forward(self, x: torch.Tensor):
|
|
208
|
-
res = self.res(x).mul(self.res_scale)
|
|
209
|
-
return x + res
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
class ResidualDenseBlock_5C(nn.Module):
|
|
213
|
-
"""
|
|
214
|
-
Residual Dense Block
|
|
215
|
-
style: 5 convs
|
|
216
|
-
The core module of paper: (Residual Dense Network for Image Super-Resolution, CVPR 18)
|
|
217
|
-
"""
|
|
218
|
-
|
|
219
|
-
def __init__(
|
|
220
|
-
self,
|
|
221
|
-
nc: int,
|
|
222
|
-
kernel_size: int = 3,
|
|
223
|
-
gc: int = 32,
|
|
224
|
-
stride: int = 1,
|
|
225
|
-
bias: bool = True,
|
|
226
|
-
pad_type: PADDING_LAYER_TYPE = "zero",
|
|
227
|
-
norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
|
|
228
|
-
act_type: ACTIVATION_LAYER_TYPE = "leakyrelu",
|
|
229
|
-
mode: BLOCK_MODE = "CNA",
|
|
230
|
-
):
|
|
231
|
-
super(ResidualDenseBlock_5C, self).__init__()
|
|
232
|
-
# gc: growth channel, i.e. intermediate channels
|
|
233
|
-
self.conv1 = conv_block(
|
|
234
|
-
nc, gc, kernel_size, stride, bias=bias, pad_type=pad_type, norm_type=norm_type, act_type=act_type, mode=mode
|
|
235
|
-
)
|
|
236
|
-
self.conv2 = conv_block(
|
|
237
|
-
nc + gc,
|
|
238
|
-
gc,
|
|
239
|
-
kernel_size,
|
|
240
|
-
stride,
|
|
241
|
-
bias=bias,
|
|
242
|
-
pad_type=pad_type,
|
|
243
|
-
norm_type=norm_type,
|
|
244
|
-
act_type=act_type,
|
|
245
|
-
mode=mode,
|
|
246
|
-
)
|
|
247
|
-
self.conv3 = conv_block(
|
|
248
|
-
nc + 2 * gc,
|
|
249
|
-
gc,
|
|
250
|
-
kernel_size,
|
|
251
|
-
stride,
|
|
252
|
-
bias=bias,
|
|
253
|
-
pad_type=pad_type,
|
|
254
|
-
norm_type=norm_type,
|
|
255
|
-
act_type=act_type,
|
|
256
|
-
mode=mode,
|
|
257
|
-
)
|
|
258
|
-
self.conv4 = conv_block(
|
|
259
|
-
nc + 3 * gc,
|
|
260
|
-
gc,
|
|
261
|
-
kernel_size,
|
|
262
|
-
stride,
|
|
263
|
-
bias=bias,
|
|
264
|
-
pad_type=pad_type,
|
|
265
|
-
norm_type=norm_type,
|
|
266
|
-
act_type=act_type,
|
|
267
|
-
mode=mode,
|
|
268
|
-
)
|
|
269
|
-
if mode == "CNA":
|
|
270
|
-
last_act = None
|
|
271
|
-
else:
|
|
272
|
-
last_act = act_type
|
|
273
|
-
self.conv5 = conv_block(
|
|
274
|
-
nc + 4 * gc, nc, 3, stride, bias=bias, pad_type=pad_type, norm_type=norm_type, act_type=last_act, mode=mode
|
|
275
|
-
)
|
|
276
|
-
|
|
277
|
-
def forward(self, x: torch.Tensor):
|
|
278
|
-
x1 = self.conv1(x)
|
|
279
|
-
x2 = self.conv2(torch.cat((x, x1), 1))
|
|
280
|
-
x3 = self.conv3(torch.cat((x, x1, x2), 1))
|
|
281
|
-
x4 = self.conv4(torch.cat((x, x1, x2, x3), 1))
|
|
282
|
-
x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
|
|
283
|
-
return x5.mul(0.2) + x
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
class RRDB(nn.Module):
|
|
287
|
-
"""
|
|
288
|
-
Residual in Residual Dense Block
|
|
289
|
-
(ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks)
|
|
290
|
-
"""
|
|
291
|
-
|
|
292
|
-
def __init__(
|
|
293
|
-
self,
|
|
294
|
-
nc: int,
|
|
295
|
-
kernel_size: int = 3,
|
|
296
|
-
gc: int = 32,
|
|
297
|
-
stride: int = 1,
|
|
298
|
-
bias: bool = True,
|
|
299
|
-
pad_type: PADDING_LAYER_TYPE = "zero",
|
|
300
|
-
norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
|
|
301
|
-
act_type: ACTIVATION_LAYER_TYPE = "leakyrelu",
|
|
302
|
-
mode: BLOCK_MODE = "CNA",
|
|
303
|
-
):
|
|
304
|
-
super(RRDB, self).__init__()
|
|
305
|
-
self.RDB1 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias, pad_type, norm_type, act_type, mode)
|
|
306
|
-
self.RDB2 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias, pad_type, norm_type, act_type, mode)
|
|
307
|
-
self.RDB3 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias, pad_type, norm_type, act_type, mode)
|
|
308
|
-
|
|
309
|
-
def forward(self, x: torch.Tensor):
|
|
310
|
-
out = self.RDB1(x)
|
|
311
|
-
out = self.RDB2(out)
|
|
312
|
-
out = self.RDB3(out)
|
|
313
|
-
return out.mul(0.2) + x
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
# Upsampler
|
|
317
|
-
def pixelshuffle_block(
|
|
318
|
-
in_nc: int,
|
|
319
|
-
out_nc: int,
|
|
320
|
-
upscale_factor: int = 2,
|
|
321
|
-
kernel_size: int = 3,
|
|
322
|
-
stride: int = 1,
|
|
323
|
-
bias: bool = True,
|
|
324
|
-
pad_type: PADDING_LAYER_TYPE = "zero",
|
|
325
|
-
norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
|
|
326
|
-
act_type: ACTIVATION_LAYER_TYPE = "relu",
|
|
327
|
-
):
|
|
328
|
-
"""
|
|
329
|
-
Pixel shuffle layer
|
|
330
|
-
(Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional
|
|
331
|
-
Neural Network, CVPR17)
|
|
332
|
-
"""
|
|
333
|
-
conv = conv_block(
|
|
334
|
-
in_nc,
|
|
335
|
-
out_nc * (upscale_factor**2),
|
|
336
|
-
kernel_size,
|
|
337
|
-
stride,
|
|
338
|
-
bias=bias,
|
|
339
|
-
pad_type=pad_type,
|
|
340
|
-
norm_type=None,
|
|
341
|
-
act_type=None,
|
|
342
|
-
)
|
|
343
|
-
pixel_shuffle = nn.PixelShuffle(upscale_factor)
|
|
344
|
-
|
|
345
|
-
n = norm(norm_type, out_nc) if norm_type else None
|
|
346
|
-
a = act(act_type) if act_type else None
|
|
347
|
-
return sequential(conv, pixel_shuffle, n, a)
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
def upconv_block(
|
|
351
|
-
in_nc: int,
|
|
352
|
-
out_nc: int,
|
|
353
|
-
upscale_factor: int = 2,
|
|
354
|
-
kernel_size: int = 3,
|
|
355
|
-
stride: int = 1,
|
|
356
|
-
bias: bool = True,
|
|
357
|
-
pad_type: PADDING_LAYER_TYPE = "zero",
|
|
358
|
-
norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
|
|
359
|
-
act_type: ACTIVATION_LAYER_TYPE = "relu",
|
|
360
|
-
mode: UPCONV_BLOCK_MODE = "nearest",
|
|
361
|
-
):
|
|
362
|
-
# Adopted from https://distill.pub/2016/deconv-checkerboard/
|
|
363
|
-
upsample = nn.Upsample(scale_factor=upscale_factor, mode=mode)
|
|
364
|
-
conv = conv_block(
|
|
365
|
-
in_nc, out_nc, kernel_size, stride, bias=bias, pad_type=pad_type, norm_type=norm_type, act_type=act_type
|
|
366
|
-
)
|
|
367
|
-
return sequential(upsample, conv)
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
# Original: https://github.com/joeyballentine/Material-Map-Generator
|
|
2
|
-
# Adopted and optimized for Invoke AI
|
|
3
|
-
|
|
4
|
-
import math
|
|
5
|
-
from typing import Literal, Optional
|
|
6
|
-
|
|
7
|
-
import torch
|
|
8
|
-
import torch.nn as nn
|
|
9
|
-
|
|
10
|
-
import invokeai.backend.image_util.pbr_maps.architecture.block as B
|
|
11
|
-
|
|
12
|
-
UPSCALE_MODE = Literal["upconv", "pixelshuffle"]
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class PBR_RRDB_Net(nn.Module):
|
|
16
|
-
def __init__(
|
|
17
|
-
self,
|
|
18
|
-
in_nc: int,
|
|
19
|
-
out_nc: int,
|
|
20
|
-
nf: int,
|
|
21
|
-
nb: int,
|
|
22
|
-
gc: int = 32,
|
|
23
|
-
upscale: int = 4,
|
|
24
|
-
norm_type: Optional[B.NORMALIZATION_LAYER_TYPE] = None,
|
|
25
|
-
act_type: B.ACTIVATION_LAYER_TYPE = "leakyrelu",
|
|
26
|
-
mode: B.BLOCK_MODE = "CNA",
|
|
27
|
-
res_scale: int = 1,
|
|
28
|
-
upsample_mode: UPSCALE_MODE = "upconv",
|
|
29
|
-
):
|
|
30
|
-
super(PBR_RRDB_Net, self).__init__()
|
|
31
|
-
n_upscale = int(math.log(upscale, 2))
|
|
32
|
-
if upscale == 3:
|
|
33
|
-
n_upscale = 1
|
|
34
|
-
|
|
35
|
-
fea_conv = B.conv_block(in_nc, nf, kernel_size=3, norm_type=None, act_type=None)
|
|
36
|
-
rb_blocks = [
|
|
37
|
-
B.RRDB(
|
|
38
|
-
nf,
|
|
39
|
-
kernel_size=3,
|
|
40
|
-
gc=32,
|
|
41
|
-
stride=1,
|
|
42
|
-
bias=True,
|
|
43
|
-
pad_type="zero",
|
|
44
|
-
norm_type=norm_type,
|
|
45
|
-
act_type=act_type,
|
|
46
|
-
mode="CNA",
|
|
47
|
-
)
|
|
48
|
-
for _ in range(nb)
|
|
49
|
-
]
|
|
50
|
-
LR_conv = B.conv_block(nf, nf, kernel_size=3, norm_type=norm_type, act_type=None, mode=mode)
|
|
51
|
-
|
|
52
|
-
if upsample_mode == "upconv":
|
|
53
|
-
upsample_block = B.upconv_block
|
|
54
|
-
elif upsample_mode == "pixelshuffle":
|
|
55
|
-
upsample_block = B.pixelshuffle_block
|
|
56
|
-
|
|
57
|
-
if upscale == 3:
|
|
58
|
-
upsampler = upsample_block(nf, nf, 3, act_type=act_type)
|
|
59
|
-
else:
|
|
60
|
-
upsampler = [upsample_block(nf, nf, act_type=act_type) for _ in range(n_upscale)]
|
|
61
|
-
|
|
62
|
-
HR_conv0 = B.conv_block(nf, nf, kernel_size=3, norm_type=None, act_type=act_type)
|
|
63
|
-
HR_conv1 = B.conv_block(nf, out_nc, kernel_size=3, norm_type=None, act_type=None)
|
|
64
|
-
|
|
65
|
-
self.model = B.sequential(
|
|
66
|
-
fea_conv, B.ShortcutBlock(B.sequential(*rb_blocks, LR_conv)), *upsampler, HR_conv0, HR_conv1
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
def forward(self, x: torch.Tensor):
|
|
70
|
-
return self.model(x)
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
# Original: https://github.com/joeyballentine/Material-Map-Generator
|
|
2
|
-
# Adopted and optimized for Invoke AI
|
|
3
|
-
|
|
4
|
-
import pathlib
|
|
5
|
-
from typing import Any, Literal
|
|
6
|
-
|
|
7
|
-
import cv2
|
|
8
|
-
import numpy as np
|
|
9
|
-
import numpy.typing as npt
|
|
10
|
-
import torch
|
|
11
|
-
from PIL import Image
|
|
12
|
-
from safetensors.torch import load_file
|
|
13
|
-
|
|
14
|
-
from invokeai.backend.image_util.pbr_maps.architecture.pbr_rrdb_net import PBR_RRDB_Net
|
|
15
|
-
from invokeai.backend.image_util.pbr_maps.utils.image_ops import crop_seamless, esrgan_launcher_split_merge
|
|
16
|
-
|
|
17
|
-
NORMAL_MAP_MODEL = (
|
|
18
|
-
"https://huggingface.co/InvokeAI/pbr-material-maps/resolve/main/normal_map_generator.safetensors?download=true"
|
|
19
|
-
)
|
|
20
|
-
OTHER_MAP_MODEL = (
|
|
21
|
-
"https://huggingface.co/InvokeAI/pbr-material-maps/resolve/main/franken_map_generator.safetensors?download=true"
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class PBRMapsGenerator:
|
|
26
|
-
def __init__(self, normal_map_model: PBR_RRDB_Net, other_map_model: PBR_RRDB_Net, device: torch.device) -> None:
|
|
27
|
-
self.normal_map_model = normal_map_model
|
|
28
|
-
self.other_map_model = other_map_model
|
|
29
|
-
self.device = device
|
|
30
|
-
|
|
31
|
-
@staticmethod
|
|
32
|
-
def load_model(model_path: pathlib.Path, device: torch.device) -> PBR_RRDB_Net:
|
|
33
|
-
state_dict = load_file(model_path.as_posix(), device=device.type)
|
|
34
|
-
|
|
35
|
-
model = PBR_RRDB_Net(
|
|
36
|
-
3,
|
|
37
|
-
3,
|
|
38
|
-
32,
|
|
39
|
-
12,
|
|
40
|
-
gc=32,
|
|
41
|
-
upscale=1,
|
|
42
|
-
norm_type=None,
|
|
43
|
-
act_type="leakyrelu",
|
|
44
|
-
mode="CNA",
|
|
45
|
-
res_scale=1,
|
|
46
|
-
upsample_mode="upconv",
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
model.load_state_dict(state_dict, strict=False)
|
|
50
|
-
|
|
51
|
-
del state_dict
|
|
52
|
-
if torch.cuda.is_available() and device.type == "cuda":
|
|
53
|
-
torch.cuda.empty_cache()
|
|
54
|
-
|
|
55
|
-
model.eval()
|
|
56
|
-
|
|
57
|
-
for _, v in model.named_parameters():
|
|
58
|
-
v.requires_grad = False
|
|
59
|
-
|
|
60
|
-
return model.to(device)
|
|
61
|
-
|
|
62
|
-
def process(self, img: npt.NDArray[Any], model: PBR_RRDB_Net):
|
|
63
|
-
img = img.astype(np.float32) / np.iinfo(img.dtype).max
|
|
64
|
-
img = img[..., ::-1].copy()
|
|
65
|
-
tensor_img = torch.tensor(img).permute(2, 0, 1).unsqueeze(0).to(self.device)
|
|
66
|
-
|
|
67
|
-
with torch.no_grad():
|
|
68
|
-
output = model(tensor_img).data.squeeze(0).float().cpu().clamp_(0, 1).numpy()
|
|
69
|
-
output = output[[2, 1, 0], :, :]
|
|
70
|
-
output = np.transpose(output, (1, 2, 0))
|
|
71
|
-
output = (output * 255.0).round()
|
|
72
|
-
return output
|
|
73
|
-
|
|
74
|
-
def _cv2_to_pil(self, image: npt.NDArray[Any]):
|
|
75
|
-
return Image.fromarray(cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_RGB2BGR))
|
|
76
|
-
|
|
77
|
-
def generate_maps(
|
|
78
|
-
self,
|
|
79
|
-
image: Image.Image,
|
|
80
|
-
tile_size: int = 512,
|
|
81
|
-
border_mode: Literal["none", "seamless", "mirror", "replicate"] = "none",
|
|
82
|
-
):
|
|
83
|
-
"""
|
|
84
|
-
Generate PBR texture maps (normal, roughness, and displacement) from an input image.
|
|
85
|
-
The image can optionally be padded before inference to control how borders are treated,
|
|
86
|
-
which can help create seamless or edge‑consistent textures.
|
|
87
|
-
|
|
88
|
-
Args:
|
|
89
|
-
image: Source image used to generate the PBR maps.
|
|
90
|
-
tile_size: Maximum tile size used for tiled inference. If the image is larger than
|
|
91
|
-
this size in either dimension, it will be split into tiles for processing and
|
|
92
|
-
then merged.
|
|
93
|
-
|
|
94
|
-
border_mode: Strategy for padding the image before inference:
|
|
95
|
-
- "none": No padding is applied; the image is processed as‑is.
|
|
96
|
-
- "seamless": Pads the image using wrap‑around tiling
|
|
97
|
-
(`cv2.BORDER_WRAP`) to help produce seamless textures.
|
|
98
|
-
- "mirror": Pads the image by mirroring border pixels
|
|
99
|
-
(`cv2.BORDER_REFLECT_101`) to reduce edge artifacts.
|
|
100
|
-
- "replicate": Pads the image by replicating the edge pixels outward
|
|
101
|
-
(`cv2.BORDER_REPLICATE`).
|
|
102
|
-
|
|
103
|
-
Returns:
|
|
104
|
-
A tuple of three PIL Images:
|
|
105
|
-
- normal_map: RGB normal map generated from the input.
|
|
106
|
-
- roughness: Single‑channel roughness map extracted from the second model output.
|
|
107
|
-
- displacement: Single‑channel displacement (height) map extracted from the
|
|
108
|
-
second model output.
|
|
109
|
-
"""
|
|
110
|
-
|
|
111
|
-
models = [self.normal_map_model, self.other_map_model]
|
|
112
|
-
np_image = np.array(image).astype(np.uint8)
|
|
113
|
-
|
|
114
|
-
match border_mode:
|
|
115
|
-
case "seamless":
|
|
116
|
-
np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_WRAP)
|
|
117
|
-
case "mirror":
|
|
118
|
-
np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_REFLECT_101)
|
|
119
|
-
case "replicate":
|
|
120
|
-
np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_REPLICATE)
|
|
121
|
-
case "none":
|
|
122
|
-
pass
|
|
123
|
-
|
|
124
|
-
img_height, img_width = np_image.shape[:2]
|
|
125
|
-
|
|
126
|
-
# Checking whether to perform tiled inference
|
|
127
|
-
do_split = img_height > tile_size or img_width > tile_size
|
|
128
|
-
|
|
129
|
-
if do_split:
|
|
130
|
-
rlts = esrgan_launcher_split_merge(np_image, self.process, models, scale_factor=1, tile_size=tile_size)
|
|
131
|
-
else:
|
|
132
|
-
rlts = [self.process(np_image, model) for model in models]
|
|
133
|
-
|
|
134
|
-
if border_mode != "none":
|
|
135
|
-
rlts = [crop_seamless(rlt) for rlt in rlts]
|
|
136
|
-
|
|
137
|
-
normal_map = self._cv2_to_pil(rlts[0])
|
|
138
|
-
roughness = self._cv2_to_pil(rlts[1][:, :, 1])
|
|
139
|
-
displacement = self._cv2_to_pil(rlts[1][:, :, 0])
|
|
140
|
-
|
|
141
|
-
return normal_map, roughness, displacement
|
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
# Original: https://github.com/joeyballentine/Material-Map-Generator
|
|
2
|
-
# Adopted and optimized for Invoke AI
|
|
3
|
-
|
|
4
|
-
import math
|
|
5
|
-
from typing import Any, Callable, List
|
|
6
|
-
|
|
7
|
-
import numpy as np
|
|
8
|
-
import numpy.typing as npt
|
|
9
|
-
|
|
10
|
-
from invokeai.backend.image_util.pbr_maps.architecture.pbr_rrdb_net import PBR_RRDB_Net
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def crop_seamless(img: npt.NDArray[Any]):
|
|
14
|
-
img_height, img_width = img.shape[:2]
|
|
15
|
-
y, x = 16, 16
|
|
16
|
-
h, w = img_height - 32, img_width - 32
|
|
17
|
-
img = img[y : y + h, x : x + w]
|
|
18
|
-
return img
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
# from https://github.com/ata4/esrgan-launcher/blob/master/upscale.py
|
|
22
|
-
def esrgan_launcher_split_merge(
|
|
23
|
-
input_image: npt.NDArray[Any],
|
|
24
|
-
upscale_function: Callable[[npt.NDArray[Any], PBR_RRDB_Net], npt.NDArray[Any]],
|
|
25
|
-
models: List[PBR_RRDB_Net],
|
|
26
|
-
scale_factor: int = 4,
|
|
27
|
-
tile_size: int = 512,
|
|
28
|
-
tile_padding: float = 0.125,
|
|
29
|
-
):
|
|
30
|
-
width, height, depth = input_image.shape
|
|
31
|
-
output_width = width * scale_factor
|
|
32
|
-
output_height = height * scale_factor
|
|
33
|
-
output_shape = (output_width, output_height, depth)
|
|
34
|
-
|
|
35
|
-
# start with black image
|
|
36
|
-
output_images = [np.zeros(output_shape, np.uint8) for _ in range(len(models))]
|
|
37
|
-
|
|
38
|
-
tile_padding = math.ceil(tile_size * tile_padding)
|
|
39
|
-
tile_size = math.ceil(tile_size / scale_factor)
|
|
40
|
-
|
|
41
|
-
tiles_x = math.ceil(width / tile_size)
|
|
42
|
-
tiles_y = math.ceil(height / tile_size)
|
|
43
|
-
|
|
44
|
-
for y in range(tiles_y):
|
|
45
|
-
for x in range(tiles_x):
|
|
46
|
-
# extract tile from input image
|
|
47
|
-
ofs_x = x * tile_size
|
|
48
|
-
ofs_y = y * tile_size
|
|
49
|
-
|
|
50
|
-
# input tile area on total image
|
|
51
|
-
input_start_x = ofs_x
|
|
52
|
-
input_end_x = min(ofs_x + tile_size, width)
|
|
53
|
-
|
|
54
|
-
input_start_y = ofs_y
|
|
55
|
-
input_end_y = min(ofs_y + tile_size, height)
|
|
56
|
-
|
|
57
|
-
# input tile area on total image with padding
|
|
58
|
-
input_start_x_pad = max(input_start_x - tile_padding, 0)
|
|
59
|
-
input_end_x_pad = min(input_end_x + tile_padding, width)
|
|
60
|
-
|
|
61
|
-
input_start_y_pad = max(input_start_y - tile_padding, 0)
|
|
62
|
-
input_end_y_pad = min(input_end_y + tile_padding, height)
|
|
63
|
-
|
|
64
|
-
# input tile dimensions
|
|
65
|
-
input_tile_width = input_end_x - input_start_x
|
|
66
|
-
input_tile_height = input_end_y - input_start_y
|
|
67
|
-
|
|
68
|
-
input_tile = input_image[input_start_x_pad:input_end_x_pad, input_start_y_pad:input_end_y_pad]
|
|
69
|
-
|
|
70
|
-
for idx, model in enumerate(models):
|
|
71
|
-
# upscale tile
|
|
72
|
-
output_tile = upscale_function(input_tile, model)
|
|
73
|
-
|
|
74
|
-
# output tile area on total image
|
|
75
|
-
output_start_x = input_start_x * scale_factor
|
|
76
|
-
output_end_x = input_end_x * scale_factor
|
|
77
|
-
|
|
78
|
-
output_start_y = input_start_y * scale_factor
|
|
79
|
-
output_end_y = input_end_y * scale_factor
|
|
80
|
-
|
|
81
|
-
# output tile area without padding
|
|
82
|
-
output_start_x_tile = (input_start_x - input_start_x_pad) * scale_factor
|
|
83
|
-
output_end_x_tile = output_start_x_tile + input_tile_width * scale_factor
|
|
84
|
-
|
|
85
|
-
output_start_y_tile = (input_start_y - input_start_y_pad) * scale_factor
|
|
86
|
-
output_end_y_tile = output_start_y_tile + input_tile_height * scale_factor
|
|
87
|
-
|
|
88
|
-
# put tile into output image
|
|
89
|
-
output_images[idx][output_start_x:output_end_x, output_start_y:output_end_y] = output_tile[
|
|
90
|
-
output_start_x_tile:output_end_x_tile, output_start_y_tile:output_end_y_tile
|
|
91
|
-
]
|
|
92
|
-
|
|
93
|
-
return output_images
|