InvokeAI 6.10.0rc1__py3-none-any.whl → 6.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. invokeai/app/api/routers/model_manager.py +43 -1
  2. invokeai/app/invocations/fields.py +1 -1
  3. invokeai/app/invocations/flux2_denoise.py +499 -0
  4. invokeai/app/invocations/flux2_klein_model_loader.py +222 -0
  5. invokeai/app/invocations/flux2_klein_text_encoder.py +222 -0
  6. invokeai/app/invocations/flux2_vae_decode.py +106 -0
  7. invokeai/app/invocations/flux2_vae_encode.py +88 -0
  8. invokeai/app/invocations/flux_denoise.py +77 -3
  9. invokeai/app/invocations/flux_lora_loader.py +1 -1
  10. invokeai/app/invocations/flux_model_loader.py +2 -5
  11. invokeai/app/invocations/ideal_size.py +6 -1
  12. invokeai/app/invocations/metadata.py +4 -0
  13. invokeai/app/invocations/metadata_linked.py +47 -0
  14. invokeai/app/invocations/model.py +1 -0
  15. invokeai/app/invocations/pbr_maps.py +59 -0
  16. invokeai/app/invocations/z_image_denoise.py +244 -84
  17. invokeai/app/invocations/z_image_image_to_latents.py +9 -1
  18. invokeai/app/invocations/z_image_latents_to_image.py +9 -1
  19. invokeai/app/invocations/z_image_seed_variance_enhancer.py +110 -0
  20. invokeai/app/services/config/config_default.py +3 -1
  21. invokeai/app/services/invocation_stats/invocation_stats_common.py +6 -6
  22. invokeai/app/services/invocation_stats/invocation_stats_default.py +9 -4
  23. invokeai/app/services/model_manager/model_manager_default.py +7 -0
  24. invokeai/app/services/model_records/model_records_base.py +4 -2
  25. invokeai/app/services/shared/invocation_context.py +15 -0
  26. invokeai/app/services/shared/sqlite/sqlite_util.py +2 -0
  27. invokeai/app/services/shared/sqlite_migrator/migrations/migration_25.py +61 -0
  28. invokeai/app/util/step_callback.py +58 -2
  29. invokeai/backend/flux/denoise.py +338 -118
  30. invokeai/backend/flux/dype/__init__.py +31 -0
  31. invokeai/backend/flux/dype/base.py +260 -0
  32. invokeai/backend/flux/dype/embed.py +116 -0
  33. invokeai/backend/flux/dype/presets.py +148 -0
  34. invokeai/backend/flux/dype/rope.py +110 -0
  35. invokeai/backend/flux/extensions/dype_extension.py +91 -0
  36. invokeai/backend/flux/schedulers.py +62 -0
  37. invokeai/backend/flux/util.py +35 -1
  38. invokeai/backend/flux2/__init__.py +4 -0
  39. invokeai/backend/flux2/denoise.py +280 -0
  40. invokeai/backend/flux2/ref_image_extension.py +294 -0
  41. invokeai/backend/flux2/sampling_utils.py +209 -0
  42. invokeai/backend/image_util/pbr_maps/architecture/block.py +367 -0
  43. invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +70 -0
  44. invokeai/backend/image_util/pbr_maps/pbr_maps.py +141 -0
  45. invokeai/backend/image_util/pbr_maps/utils/image_ops.py +93 -0
  46. invokeai/backend/model_manager/configs/factory.py +19 -1
  47. invokeai/backend/model_manager/configs/lora.py +36 -0
  48. invokeai/backend/model_manager/configs/main.py +395 -3
  49. invokeai/backend/model_manager/configs/qwen3_encoder.py +116 -7
  50. invokeai/backend/model_manager/configs/vae.py +104 -2
  51. invokeai/backend/model_manager/load/model_cache/model_cache.py +107 -2
  52. invokeai/backend/model_manager/load/model_loaders/cogview4.py +2 -1
  53. invokeai/backend/model_manager/load/model_loaders/flux.py +1020 -8
  54. invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +4 -2
  55. invokeai/backend/model_manager/load/model_loaders/onnx.py +1 -0
  56. invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +2 -1
  57. invokeai/backend/model_manager/load/model_loaders/z_image.py +158 -31
  58. invokeai/backend/model_manager/starter_models.py +141 -4
  59. invokeai/backend/model_manager/taxonomy.py +31 -4
  60. invokeai/backend/model_manager/util/select_hf_files.py +3 -2
  61. invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +39 -5
  62. invokeai/backend/quantization/gguf/ggml_tensor.py +15 -4
  63. invokeai/backend/util/vae_working_memory.py +0 -2
  64. invokeai/backend/z_image/extensions/regional_prompting_extension.py +10 -12
  65. invokeai/frontend/web/dist/assets/App-D13dX7be.js +161 -0
  66. invokeai/frontend/web/dist/assets/{browser-ponyfill-DHZxq1nk.js → browser-ponyfill-u_ZjhQTI.js} +1 -1
  67. invokeai/frontend/web/dist/assets/index-BB0nHmDe.js +530 -0
  68. invokeai/frontend/web/dist/index.html +1 -1
  69. invokeai/frontend/web/dist/locales/en-GB.json +1 -0
  70. invokeai/frontend/web/dist/locales/en.json +85 -6
  71. invokeai/frontend/web/dist/locales/it.json +135 -15
  72. invokeai/frontend/web/dist/locales/ru.json +11 -11
  73. invokeai/version/invokeai_version.py +1 -1
  74. {invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/METADATA +8 -2
  75. {invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/RECORD +81 -57
  76. {invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/WHEEL +1 -1
  77. invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +0 -161
  78. invokeai/frontend/web/dist/assets/index-dgSJAY--.js +0 -530
  79. {invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/entry_points.txt +0 -0
  80. {invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE +0 -0
  81. {invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
  82. {invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
  83. {invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,367 @@
1
+ # Original: https://github.com/joeyballentine/Material-Map-Generator
2
+ # Adopted and optimized for Invoke AI
3
+
4
+ from collections import OrderedDict
5
+ from typing import Any, List, Literal, Optional
6
+
7
+ import torch
8
+ import torch.nn as nn
9
+
10
+ ACTIVATION_LAYER_TYPE = Literal["relu", "leakyrelu", "prelu"]
11
+ NORMALIZATION_LAYER_TYPE = Literal["batch", "instance"]
12
+ PADDING_LAYER_TYPE = Literal["zero", "reflect", "replicate"]
13
+ BLOCK_MODE = Literal["CNA", "NAC", "CNAC"]
14
+ UPCONV_BLOCK_MODE = Literal["nearest", "linear", "bilinear", "bicubic", "trilinear"]
15
+
16
+
17
+ def act(act_type: ACTIVATION_LAYER_TYPE, inplace: bool = True, neg_slope: float = 0.2, n_prelu: int = 1):
18
+ """Helper to select Activation Layer"""
19
+ if act_type == "relu":
20
+ layer = nn.ReLU(inplace)
21
+ elif act_type == "leakyrelu":
22
+ layer = nn.LeakyReLU(neg_slope, inplace)
23
+ elif act_type == "prelu":
24
+ layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
25
+ return layer
26
+
27
+
28
+ def norm(norm_type: NORMALIZATION_LAYER_TYPE, nc: int):
29
+ """Helper to select Normalization Layer"""
30
+ if norm_type == "batch":
31
+ layer = nn.BatchNorm2d(nc, affine=True)
32
+ elif norm_type == "instance":
33
+ layer = nn.InstanceNorm2d(nc, affine=False)
34
+ return layer
35
+
36
+
37
+ def pad(pad_type: PADDING_LAYER_TYPE, padding: int):
38
+ """Helper to select Padding Layer"""
39
+ if padding == 0 or pad_type == "zero":
40
+ return None
41
+ if pad_type == "reflect":
42
+ layer = nn.ReflectionPad2d(padding)
43
+ elif pad_type == "replicate":
44
+ layer = nn.ReplicationPad2d(padding)
45
+ return layer
46
+
47
+
48
+ def get_valid_padding(kernel_size: int, dilation: int):
49
+ kernel_size = kernel_size + (kernel_size - 1) * (dilation - 1)
50
+ padding = (kernel_size - 1) // 2
51
+ return padding
52
+
53
+
54
+ def sequential(*args: Any):
55
+ # Flatten Sequential. It unwraps nn.Sequential.
56
+ if len(args) == 1:
57
+ if isinstance(args[0], OrderedDict):
58
+ raise NotImplementedError("sequential does not support OrderedDict input.")
59
+ return args[0] # No sequential is needed.
60
+ modules: List[nn.Module] = []
61
+ for module in args:
62
+ if isinstance(module, nn.Sequential):
63
+ for submodule in module.children():
64
+ modules.append(submodule)
65
+ elif isinstance(module, nn.Module):
66
+ modules.append(module)
67
+ return nn.Sequential(*modules)
68
+
69
+
70
+ def conv_block(
71
+ in_nc: int,
72
+ out_nc: int,
73
+ kernel_size: int,
74
+ stride: int = 1,
75
+ dilation: int = 1,
76
+ groups: int = 1,
77
+ bias: bool = True,
78
+ pad_type: Optional[PADDING_LAYER_TYPE] = "zero",
79
+ norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
80
+ act_type: Optional[ACTIVATION_LAYER_TYPE] = "relu",
81
+ mode: BLOCK_MODE = "CNA",
82
+ ):
83
+ """
84
+ Conv layer with padding, normalization, activation
85
+ mode: CNA --> Conv -> Norm -> Act
86
+ NAC --> Norm -> Act --> Conv (Identity Mappings in Deep Residual Networks, ECCV16)
87
+ """
88
+ assert mode in ["CNA", "NAC", "CNAC"], f"Wrong conv mode [{mode}]"
89
+ padding = get_valid_padding(kernel_size, dilation)
90
+ p = pad(pad_type, padding) if pad_type else None
91
+ padding = padding if pad_type == "zero" else 0
92
+
93
+ c = nn.Conv2d(
94
+ in_nc,
95
+ out_nc,
96
+ kernel_size=kernel_size,
97
+ stride=stride,
98
+ padding=padding,
99
+ dilation=dilation,
100
+ bias=bias,
101
+ groups=groups,
102
+ )
103
+ a = act(act_type) if act_type else None
104
+ match mode:
105
+ case "CNA":
106
+ n = norm(norm_type, out_nc) if norm_type else None
107
+ return sequential(p, c, n, a)
108
+ case "NAC":
109
+ if norm_type is None and act_type is not None:
110
+ a = act(act_type, inplace=False)
111
+ n = norm(norm_type, in_nc) if norm_type else None
112
+ return sequential(n, a, p, c)
113
+ case "CNAC":
114
+ n = norm(norm_type, in_nc) if norm_type else None
115
+ return sequential(n, a, p, c)
116
+
117
+
118
+ class ConcatBlock(nn.Module):
119
+ # Concat the output of a submodule to its input
120
+ def __init__(self, submodule: nn.Module):
121
+ super(ConcatBlock, self).__init__()
122
+ self.sub = submodule
123
+
124
+ def forward(self, x: torch.Tensor):
125
+ output = torch.cat((x, self.sub(x)), dim=1)
126
+ return output
127
+
128
+ def __repr__(self):
129
+ tmpstr = "Identity .. \n|"
130
+ modstr = self.sub.__repr__().replace("\n", "\n|")
131
+ tmpstr = tmpstr + modstr
132
+ return tmpstr
133
+
134
+
135
+ class ShortcutBlock(nn.Module):
136
+ # Elementwise sum the output of a submodule to its input
137
+ def __init__(self, submodule: nn.Module):
138
+ super(ShortcutBlock, self).__init__()
139
+ self.sub = submodule
140
+
141
+ def forward(self, x: torch.Tensor):
142
+ output = x + self.sub(x)
143
+ return output
144
+
145
+ def __repr__(self):
146
+ tmpstr = "Identity + \n|"
147
+ modstr = self.sub.__repr__().replace("\n", "\n|")
148
+ tmpstr = tmpstr + modstr
149
+ return tmpstr
150
+
151
+
152
+ class ShortcutBlockSPSR(nn.Module):
153
+ # Elementwise sum the output of a submodule to its input
154
+ def __init__(self, submodule: nn.Module):
155
+ super(ShortcutBlockSPSR, self).__init__()
156
+ self.sub = submodule
157
+
158
+ def forward(self, x: torch.Tensor):
159
+ return x, self.sub
160
+
161
+ def __repr__(self):
162
+ tmpstr = "Identity + \n|"
163
+ modstr = self.sub.__repr__().replace("\n", "\n|")
164
+ tmpstr = tmpstr + modstr
165
+ return tmpstr
166
+
167
+
168
+ class ResNetBlock(nn.Module):
169
+ """
170
+ ResNet Block, 3-3 style
171
+ with extra residual scaling used in EDSR
172
+ (Enhanced Deep Residual Networks for Single Image Super-Resolution, CVPRW 17)
173
+ """
174
+
175
+ def __init__(
176
+ self,
177
+ in_nc: int,
178
+ mid_nc: int,
179
+ out_nc: int,
180
+ kernel_size: int = 3,
181
+ stride: int = 1,
182
+ dilation: int = 1,
183
+ groups: int = 1,
184
+ bias: bool = True,
185
+ pad_type: PADDING_LAYER_TYPE = "zero",
186
+ norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
187
+ act_type: Optional[ACTIVATION_LAYER_TYPE] = "relu",
188
+ mode: BLOCK_MODE = "CNA",
189
+ res_scale: int = 1,
190
+ ):
191
+ super(ResNetBlock, self).__init__()
192
+ conv0 = conv_block(
193
+ in_nc, mid_nc, kernel_size, stride, dilation, groups, bias, pad_type, norm_type, act_type, mode
194
+ )
195
+ if mode == "CNA":
196
+ act_type = None
197
+ if mode == "CNAC": # Residual path: |-CNAC-|
198
+ act_type = None
199
+ norm_type = None
200
+ conv1 = conv_block(
201
+ mid_nc, out_nc, kernel_size, stride, dilation, groups, bias, pad_type, norm_type, act_type, mode
202
+ )
203
+
204
+ self.res = sequential(conv0, conv1)
205
+ self.res_scale = res_scale
206
+
207
+ def forward(self, x: torch.Tensor):
208
+ res = self.res(x).mul(self.res_scale)
209
+ return x + res
210
+
211
+
212
+ class ResidualDenseBlock_5C(nn.Module):
213
+ """
214
+ Residual Dense Block
215
+ style: 5 convs
216
+ The core module of paper: (Residual Dense Network for Image Super-Resolution, CVPR 18)
217
+ """
218
+
219
+ def __init__(
220
+ self,
221
+ nc: int,
222
+ kernel_size: int = 3,
223
+ gc: int = 32,
224
+ stride: int = 1,
225
+ bias: bool = True,
226
+ pad_type: PADDING_LAYER_TYPE = "zero",
227
+ norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
228
+ act_type: ACTIVATION_LAYER_TYPE = "leakyrelu",
229
+ mode: BLOCK_MODE = "CNA",
230
+ ):
231
+ super(ResidualDenseBlock_5C, self).__init__()
232
+ # gc: growth channel, i.e. intermediate channels
233
+ self.conv1 = conv_block(
234
+ nc, gc, kernel_size, stride, bias=bias, pad_type=pad_type, norm_type=norm_type, act_type=act_type, mode=mode
235
+ )
236
+ self.conv2 = conv_block(
237
+ nc + gc,
238
+ gc,
239
+ kernel_size,
240
+ stride,
241
+ bias=bias,
242
+ pad_type=pad_type,
243
+ norm_type=norm_type,
244
+ act_type=act_type,
245
+ mode=mode,
246
+ )
247
+ self.conv3 = conv_block(
248
+ nc + 2 * gc,
249
+ gc,
250
+ kernel_size,
251
+ stride,
252
+ bias=bias,
253
+ pad_type=pad_type,
254
+ norm_type=norm_type,
255
+ act_type=act_type,
256
+ mode=mode,
257
+ )
258
+ self.conv4 = conv_block(
259
+ nc + 3 * gc,
260
+ gc,
261
+ kernel_size,
262
+ stride,
263
+ bias=bias,
264
+ pad_type=pad_type,
265
+ norm_type=norm_type,
266
+ act_type=act_type,
267
+ mode=mode,
268
+ )
269
+ if mode == "CNA":
270
+ last_act = None
271
+ else:
272
+ last_act = act_type
273
+ self.conv5 = conv_block(
274
+ nc + 4 * gc, nc, 3, stride, bias=bias, pad_type=pad_type, norm_type=norm_type, act_type=last_act, mode=mode
275
+ )
276
+
277
+ def forward(self, x: torch.Tensor):
278
+ x1 = self.conv1(x)
279
+ x2 = self.conv2(torch.cat((x, x1), 1))
280
+ x3 = self.conv3(torch.cat((x, x1, x2), 1))
281
+ x4 = self.conv4(torch.cat((x, x1, x2, x3), 1))
282
+ x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
283
+ return x5.mul(0.2) + x
284
+
285
+
286
+ class RRDB(nn.Module):
287
+ """
288
+ Residual in Residual Dense Block
289
+ (ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks)
290
+ """
291
+
292
+ def __init__(
293
+ self,
294
+ nc: int,
295
+ kernel_size: int = 3,
296
+ gc: int = 32,
297
+ stride: int = 1,
298
+ bias: bool = True,
299
+ pad_type: PADDING_LAYER_TYPE = "zero",
300
+ norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
301
+ act_type: ACTIVATION_LAYER_TYPE = "leakyrelu",
302
+ mode: BLOCK_MODE = "CNA",
303
+ ):
304
+ super(RRDB, self).__init__()
305
+ self.RDB1 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias, pad_type, norm_type, act_type, mode)
306
+ self.RDB2 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias, pad_type, norm_type, act_type, mode)
307
+ self.RDB3 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias, pad_type, norm_type, act_type, mode)
308
+
309
+ def forward(self, x: torch.Tensor):
310
+ out = self.RDB1(x)
311
+ out = self.RDB2(out)
312
+ out = self.RDB3(out)
313
+ return out.mul(0.2) + x
314
+
315
+
316
+ # Upsampler
317
+ def pixelshuffle_block(
318
+ in_nc: int,
319
+ out_nc: int,
320
+ upscale_factor: int = 2,
321
+ kernel_size: int = 3,
322
+ stride: int = 1,
323
+ bias: bool = True,
324
+ pad_type: PADDING_LAYER_TYPE = "zero",
325
+ norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
326
+ act_type: ACTIVATION_LAYER_TYPE = "relu",
327
+ ):
328
+ """
329
+ Pixel shuffle layer
330
+ (Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional
331
+ Neural Network, CVPR17)
332
+ """
333
+ conv = conv_block(
334
+ in_nc,
335
+ out_nc * (upscale_factor**2),
336
+ kernel_size,
337
+ stride,
338
+ bias=bias,
339
+ pad_type=pad_type,
340
+ norm_type=None,
341
+ act_type=None,
342
+ )
343
+ pixel_shuffle = nn.PixelShuffle(upscale_factor)
344
+
345
+ n = norm(norm_type, out_nc) if norm_type else None
346
+ a = act(act_type) if act_type else None
347
+ return sequential(conv, pixel_shuffle, n, a)
348
+
349
+
350
+ def upconv_block(
351
+ in_nc: int,
352
+ out_nc: int,
353
+ upscale_factor: int = 2,
354
+ kernel_size: int = 3,
355
+ stride: int = 1,
356
+ bias: bool = True,
357
+ pad_type: PADDING_LAYER_TYPE = "zero",
358
+ norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
359
+ act_type: ACTIVATION_LAYER_TYPE = "relu",
360
+ mode: UPCONV_BLOCK_MODE = "nearest",
361
+ ):
362
+ # Adopted from https://distill.pub/2016/deconv-checkerboard/
363
+ upsample = nn.Upsample(scale_factor=upscale_factor, mode=mode)
364
+ conv = conv_block(
365
+ in_nc, out_nc, kernel_size, stride, bias=bias, pad_type=pad_type, norm_type=norm_type, act_type=act_type
366
+ )
367
+ return sequential(upsample, conv)
@@ -0,0 +1,70 @@
1
+ # Original: https://github.com/joeyballentine/Material-Map-Generator
2
+ # Adopted and optimized for Invoke AI
3
+
4
+ import math
5
+ from typing import Literal, Optional
6
+
7
+ import torch
8
+ import torch.nn as nn
9
+
10
+ import invokeai.backend.image_util.pbr_maps.architecture.block as B
11
+
12
+ UPSCALE_MODE = Literal["upconv", "pixelshuffle"]
13
+
14
+
15
+ class PBR_RRDB_Net(nn.Module):
16
+ def __init__(
17
+ self,
18
+ in_nc: int,
19
+ out_nc: int,
20
+ nf: int,
21
+ nb: int,
22
+ gc: int = 32,
23
+ upscale: int = 4,
24
+ norm_type: Optional[B.NORMALIZATION_LAYER_TYPE] = None,
25
+ act_type: B.ACTIVATION_LAYER_TYPE = "leakyrelu",
26
+ mode: B.BLOCK_MODE = "CNA",
27
+ res_scale: int = 1,
28
+ upsample_mode: UPSCALE_MODE = "upconv",
29
+ ):
30
+ super(PBR_RRDB_Net, self).__init__()
31
+ n_upscale = int(math.log(upscale, 2))
32
+ if upscale == 3:
33
+ n_upscale = 1
34
+
35
+ fea_conv = B.conv_block(in_nc, nf, kernel_size=3, norm_type=None, act_type=None)
36
+ rb_blocks = [
37
+ B.RRDB(
38
+ nf,
39
+ kernel_size=3,
40
+ gc=32,
41
+ stride=1,
42
+ bias=True,
43
+ pad_type="zero",
44
+ norm_type=norm_type,
45
+ act_type=act_type,
46
+ mode="CNA",
47
+ )
48
+ for _ in range(nb)
49
+ ]
50
+ LR_conv = B.conv_block(nf, nf, kernel_size=3, norm_type=norm_type, act_type=None, mode=mode)
51
+
52
+ if upsample_mode == "upconv":
53
+ upsample_block = B.upconv_block
54
+ elif upsample_mode == "pixelshuffle":
55
+ upsample_block = B.pixelshuffle_block
56
+
57
+ if upscale == 3:
58
+ upsampler = upsample_block(nf, nf, 3, act_type=act_type)
59
+ else:
60
+ upsampler = [upsample_block(nf, nf, act_type=act_type) for _ in range(n_upscale)]
61
+
62
+ HR_conv0 = B.conv_block(nf, nf, kernel_size=3, norm_type=None, act_type=act_type)
63
+ HR_conv1 = B.conv_block(nf, out_nc, kernel_size=3, norm_type=None, act_type=None)
64
+
65
+ self.model = B.sequential(
66
+ fea_conv, B.ShortcutBlock(B.sequential(*rb_blocks, LR_conv)), *upsampler, HR_conv0, HR_conv1
67
+ )
68
+
69
+ def forward(self, x: torch.Tensor):
70
+ return self.model(x)
@@ -0,0 +1,141 @@
1
+ # Original: https://github.com/joeyballentine/Material-Map-Generator
2
+ # Adopted and optimized for Invoke AI
3
+
4
+ import pathlib
5
+ from typing import Any, Literal
6
+
7
+ import cv2
8
+ import numpy as np
9
+ import numpy.typing as npt
10
+ import torch
11
+ from PIL import Image
12
+ from safetensors.torch import load_file
13
+
14
+ from invokeai.backend.image_util.pbr_maps.architecture.pbr_rrdb_net import PBR_RRDB_Net
15
+ from invokeai.backend.image_util.pbr_maps.utils.image_ops import crop_seamless, esrgan_launcher_split_merge
16
+
17
+ NORMAL_MAP_MODEL = (
18
+ "https://huggingface.co/InvokeAI/pbr-material-maps/resolve/main/normal_map_generator.safetensors?download=true"
19
+ )
20
+ OTHER_MAP_MODEL = (
21
+ "https://huggingface.co/InvokeAI/pbr-material-maps/resolve/main/franken_map_generator.safetensors?download=true"
22
+ )
23
+
24
+
25
+ class PBRMapsGenerator:
26
+ def __init__(self, normal_map_model: PBR_RRDB_Net, other_map_model: PBR_RRDB_Net, device: torch.device) -> None:
27
+ self.normal_map_model = normal_map_model
28
+ self.other_map_model = other_map_model
29
+ self.device = device
30
+
31
+ @staticmethod
32
+ def load_model(model_path: pathlib.Path, device: torch.device) -> PBR_RRDB_Net:
33
+ state_dict = load_file(model_path.as_posix(), device=device.type)
34
+
35
+ model = PBR_RRDB_Net(
36
+ 3,
37
+ 3,
38
+ 32,
39
+ 12,
40
+ gc=32,
41
+ upscale=1,
42
+ norm_type=None,
43
+ act_type="leakyrelu",
44
+ mode="CNA",
45
+ res_scale=1,
46
+ upsample_mode="upconv",
47
+ )
48
+
49
+ model.load_state_dict(state_dict, strict=False)
50
+
51
+ del state_dict
52
+ if torch.cuda.is_available() and device.type == "cuda":
53
+ torch.cuda.empty_cache()
54
+
55
+ model.eval()
56
+
57
+ for _, v in model.named_parameters():
58
+ v.requires_grad = False
59
+
60
+ return model.to(device)
61
+
62
+ def process(self, img: npt.NDArray[Any], model: PBR_RRDB_Net):
63
+ img = img.astype(np.float32) / np.iinfo(img.dtype).max
64
+ img = img[..., ::-1].copy()
65
+ tensor_img = torch.tensor(img).permute(2, 0, 1).unsqueeze(0).to(self.device)
66
+
67
+ with torch.no_grad():
68
+ output = model(tensor_img).data.squeeze(0).float().cpu().clamp_(0, 1).numpy()
69
+ output = output[[2, 1, 0], :, :]
70
+ output = np.transpose(output, (1, 2, 0))
71
+ output = (output * 255.0).round()
72
+ return output
73
+
74
+ def _cv2_to_pil(self, image: npt.NDArray[Any]):
75
+ return Image.fromarray(cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_RGB2BGR))
76
+
77
+ def generate_maps(
78
+ self,
79
+ image: Image.Image,
80
+ tile_size: int = 512,
81
+ border_mode: Literal["none", "seamless", "mirror", "replicate"] = "none",
82
+ ):
83
+ """
84
+ Generate PBR texture maps (normal, roughness, and displacement) from an input image.
85
+ The image can optionally be padded before inference to control how borders are treated,
86
+ which can help create seamless or edge‑consistent textures.
87
+
88
+ Args:
89
+ image: Source image used to generate the PBR maps.
90
+ tile_size: Maximum tile size used for tiled inference. If the image is larger than
91
+ this size in either dimension, it will be split into tiles for processing and
92
+ then merged.
93
+
94
+ border_mode: Strategy for padding the image before inference:
95
+ - "none": No padding is applied; the image is processed as‑is.
96
+ - "seamless": Pads the image using wrap‑around tiling
97
+ (`cv2.BORDER_WRAP`) to help produce seamless textures.
98
+ - "mirror": Pads the image by mirroring border pixels
99
+ (`cv2.BORDER_REFLECT_101`) to reduce edge artifacts.
100
+ - "replicate": Pads the image by replicating the edge pixels outward
101
+ (`cv2.BORDER_REPLICATE`).
102
+
103
+ Returns:
104
+ A tuple of three PIL Images:
105
+ - normal_map: RGB normal map generated from the input.
106
+ - roughness: Single‑channel roughness map extracted from the second model output.
107
+ - displacement: Single‑channel displacement (height) map extracted from the
108
+ second model output.
109
+ """
110
+
111
+ models = [self.normal_map_model, self.other_map_model]
112
+ np_image = np.array(image).astype(np.uint8)
113
+
114
+ match border_mode:
115
+ case "seamless":
116
+ np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_WRAP)
117
+ case "mirror":
118
+ np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_REFLECT_101)
119
+ case "replicate":
120
+ np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_REPLICATE)
121
+ case "none":
122
+ pass
123
+
124
+ img_height, img_width = np_image.shape[:2]
125
+
126
+ # Checking whether to perform tiled inference
127
+ do_split = img_height > tile_size or img_width > tile_size
128
+
129
+ if do_split:
130
+ rlts = esrgan_launcher_split_merge(np_image, self.process, models, scale_factor=1, tile_size=tile_size)
131
+ else:
132
+ rlts = [self.process(np_image, model) for model in models]
133
+
134
+ if border_mode != "none":
135
+ rlts = [crop_seamless(rlt) for rlt in rlts]
136
+
137
+ normal_map = self._cv2_to_pil(rlts[0])
138
+ roughness = self._cv2_to_pil(rlts[1][:, :, 1])
139
+ displacement = self._cv2_to_pil(rlts[1][:, :, 0])
140
+
141
+ return normal_map, roughness, displacement
@@ -0,0 +1,93 @@
1
+ # Original: https://github.com/joeyballentine/Material-Map-Generator
2
+ # Adopted and optimized for Invoke AI
3
+
4
+ import math
5
+ from typing import Any, Callable, List
6
+
7
+ import numpy as np
8
+ import numpy.typing as npt
9
+
10
+ from invokeai.backend.image_util.pbr_maps.architecture.pbr_rrdb_net import PBR_RRDB_Net
11
+
12
+
13
+ def crop_seamless(img: npt.NDArray[Any]):
14
+ img_height, img_width = img.shape[:2]
15
+ y, x = 16, 16
16
+ h, w = img_height - 32, img_width - 32
17
+ img = img[y : y + h, x : x + w]
18
+ return img
19
+
20
+
21
+ # from https://github.com/ata4/esrgan-launcher/blob/master/upscale.py
22
+ def esrgan_launcher_split_merge(
23
+ input_image: npt.NDArray[Any],
24
+ upscale_function: Callable[[npt.NDArray[Any], PBR_RRDB_Net], npt.NDArray[Any]],
25
+ models: List[PBR_RRDB_Net],
26
+ scale_factor: int = 4,
27
+ tile_size: int = 512,
28
+ tile_padding: float = 0.125,
29
+ ):
30
+ width, height, depth = input_image.shape
31
+ output_width = width * scale_factor
32
+ output_height = height * scale_factor
33
+ output_shape = (output_width, output_height, depth)
34
+
35
+ # start with black image
36
+ output_images = [np.zeros(output_shape, np.uint8) for _ in range(len(models))]
37
+
38
+ tile_padding = math.ceil(tile_size * tile_padding)
39
+ tile_size = math.ceil(tile_size / scale_factor)
40
+
41
+ tiles_x = math.ceil(width / tile_size)
42
+ tiles_y = math.ceil(height / tile_size)
43
+
44
+ for y in range(tiles_y):
45
+ for x in range(tiles_x):
46
+ # extract tile from input image
47
+ ofs_x = x * tile_size
48
+ ofs_y = y * tile_size
49
+
50
+ # input tile area on total image
51
+ input_start_x = ofs_x
52
+ input_end_x = min(ofs_x + tile_size, width)
53
+
54
+ input_start_y = ofs_y
55
+ input_end_y = min(ofs_y + tile_size, height)
56
+
57
+ # input tile area on total image with padding
58
+ input_start_x_pad = max(input_start_x - tile_padding, 0)
59
+ input_end_x_pad = min(input_end_x + tile_padding, width)
60
+
61
+ input_start_y_pad = max(input_start_y - tile_padding, 0)
62
+ input_end_y_pad = min(input_end_y + tile_padding, height)
63
+
64
+ # input tile dimensions
65
+ input_tile_width = input_end_x - input_start_x
66
+ input_tile_height = input_end_y - input_start_y
67
+
68
+ input_tile = input_image[input_start_x_pad:input_end_x_pad, input_start_y_pad:input_end_y_pad]
69
+
70
+ for idx, model in enumerate(models):
71
+ # upscale tile
72
+ output_tile = upscale_function(input_tile, model)
73
+
74
+ # output tile area on total image
75
+ output_start_x = input_start_x * scale_factor
76
+ output_end_x = input_end_x * scale_factor
77
+
78
+ output_start_y = input_start_y * scale_factor
79
+ output_end_y = input_end_y * scale_factor
80
+
81
+ # output tile area without padding
82
+ output_start_x_tile = (input_start_x - input_start_x_pad) * scale_factor
83
+ output_end_x_tile = output_start_x_tile + input_tile_width * scale_factor
84
+
85
+ output_start_y_tile = (input_start_y - input_start_y_pad) * scale_factor
86
+ output_end_y_tile = output_start_y_tile + input_tile_height * scale_factor
87
+
88
+ # put tile into output image
89
+ output_images[idx][output_start_x:output_end_x, output_start_y:output_end_y] = output_tile[
90
+ output_start_x_tile:output_end_x_tile, output_start_y_tile:output_end_y_tile
91
+ ]
92
+
93
+ return output_images