InvokeAI 6.9.0rc3__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. invokeai/app/api/dependencies.py +2 -0
  2. invokeai/app/api/routers/model_manager.py +91 -2
  3. invokeai/app/api/routers/workflows.py +9 -0
  4. invokeai/app/invocations/fields.py +19 -0
  5. invokeai/app/invocations/image_to_latents.py +23 -5
  6. invokeai/app/invocations/latents_to_image.py +2 -25
  7. invokeai/app/invocations/metadata.py +9 -1
  8. invokeai/app/invocations/model.py +8 -0
  9. invokeai/app/invocations/primitives.py +12 -0
  10. invokeai/app/invocations/prompt_template.py +57 -0
  11. invokeai/app/invocations/z_image_control.py +112 -0
  12. invokeai/app/invocations/z_image_denoise.py +610 -0
  13. invokeai/app/invocations/z_image_image_to_latents.py +102 -0
  14. invokeai/app/invocations/z_image_latents_to_image.py +103 -0
  15. invokeai/app/invocations/z_image_lora_loader.py +153 -0
  16. invokeai/app/invocations/z_image_model_loader.py +135 -0
  17. invokeai/app/invocations/z_image_text_encoder.py +197 -0
  18. invokeai/app/services/model_install/model_install_common.py +14 -1
  19. invokeai/app/services/model_install/model_install_default.py +119 -19
  20. invokeai/app/services/model_records/model_records_base.py +12 -0
  21. invokeai/app/services/model_records/model_records_sql.py +17 -0
  22. invokeai/app/services/shared/graph.py +132 -77
  23. invokeai/app/services/workflow_records/workflow_records_base.py +8 -0
  24. invokeai/app/services/workflow_records/workflow_records_sqlite.py +42 -0
  25. invokeai/app/util/step_callback.py +3 -0
  26. invokeai/backend/model_manager/configs/controlnet.py +47 -1
  27. invokeai/backend/model_manager/configs/factory.py +26 -1
  28. invokeai/backend/model_manager/configs/lora.py +43 -1
  29. invokeai/backend/model_manager/configs/main.py +113 -0
  30. invokeai/backend/model_manager/configs/qwen3_encoder.py +156 -0
  31. invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_diffusers_rms_norm.py +40 -0
  32. invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_layer_norm.py +25 -0
  33. invokeai/backend/model_manager/load/model_cache/torch_module_autocast/torch_module_autocast.py +11 -2
  34. invokeai/backend/model_manager/load/model_loaders/lora.py +11 -0
  35. invokeai/backend/model_manager/load/model_loaders/z_image.py +935 -0
  36. invokeai/backend/model_manager/load/model_util.py +6 -1
  37. invokeai/backend/model_manager/metadata/metadata_base.py +12 -5
  38. invokeai/backend/model_manager/model_on_disk.py +3 -0
  39. invokeai/backend/model_manager/starter_models.py +70 -0
  40. invokeai/backend/model_manager/taxonomy.py +5 -0
  41. invokeai/backend/model_manager/util/select_hf_files.py +23 -8
  42. invokeai/backend/patches/layer_patcher.py +34 -16
  43. invokeai/backend/patches/layers/lora_layer_base.py +2 -1
  44. invokeai/backend/patches/lora_conversions/flux_aitoolkit_lora_conversion_utils.py +17 -2
  45. invokeai/backend/patches/lora_conversions/flux_xlabs_lora_conversion_utils.py +92 -0
  46. invokeai/backend/patches/lora_conversions/formats.py +5 -0
  47. invokeai/backend/patches/lora_conversions/z_image_lora_constants.py +8 -0
  48. invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +155 -0
  49. invokeai/backend/quantization/gguf/ggml_tensor.py +27 -4
  50. invokeai/backend/quantization/gguf/loaders.py +47 -12
  51. invokeai/backend/stable_diffusion/diffusion/conditioning_data.py +13 -0
  52. invokeai/backend/util/devices.py +25 -0
  53. invokeai/backend/util/hotfixes.py +2 -2
  54. invokeai/backend/z_image/__init__.py +16 -0
  55. invokeai/backend/z_image/extensions/__init__.py +1 -0
  56. invokeai/backend/z_image/extensions/regional_prompting_extension.py +207 -0
  57. invokeai/backend/z_image/text_conditioning.py +74 -0
  58. invokeai/backend/z_image/z_image_control_adapter.py +238 -0
  59. invokeai/backend/z_image/z_image_control_transformer.py +643 -0
  60. invokeai/backend/z_image/z_image_controlnet_extension.py +531 -0
  61. invokeai/backend/z_image/z_image_patchify_utils.py +135 -0
  62. invokeai/backend/z_image/z_image_transformer_patch.py +234 -0
  63. invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +161 -0
  64. invokeai/frontend/web/dist/assets/{browser-ponyfill-CN1j0ARZ.js → browser-ponyfill-DHZxq1nk.js} +1 -1
  65. invokeai/frontend/web/dist/assets/index-dgSJAY--.js +530 -0
  66. invokeai/frontend/web/dist/index.html +1 -1
  67. invokeai/frontend/web/dist/locales/de.json +24 -6
  68. invokeai/frontend/web/dist/locales/en.json +70 -1
  69. invokeai/frontend/web/dist/locales/es.json +0 -5
  70. invokeai/frontend/web/dist/locales/fr.json +0 -6
  71. invokeai/frontend/web/dist/locales/it.json +17 -64
  72. invokeai/frontend/web/dist/locales/ja.json +379 -44
  73. invokeai/frontend/web/dist/locales/ru.json +0 -6
  74. invokeai/frontend/web/dist/locales/vi.json +7 -54
  75. invokeai/frontend/web/dist/locales/zh-CN.json +0 -6
  76. invokeai/version/invokeai_version.py +1 -1
  77. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/METADATA +3 -3
  78. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/RECORD +84 -60
  79. invokeai/frontend/web/dist/assets/App-Cn9UyjoV.js +0 -161
  80. invokeai/frontend/web/dist/assets/index-BDrf9CL-.js +0 -530
  81. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/WHEEL +0 -0
  82. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/entry_points.txt +0 -0
  83. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE +0 -0
  84. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
  85. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
  86. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,102 @@
1
+ from typing import Union
2
+
3
+ import einops
4
+ import torch
5
+ from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
6
+
7
+ from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
8
+ from invokeai.app.invocations.fields import (
9
+ FieldDescriptions,
10
+ ImageField,
11
+ Input,
12
+ InputField,
13
+ WithBoard,
14
+ WithMetadata,
15
+ )
16
+ from invokeai.app.invocations.model import VAEField
17
+ from invokeai.app.invocations.primitives import LatentsOutput
18
+ from invokeai.app.services.shared.invocation_context import InvocationContext
19
+ from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder
20
+ from invokeai.backend.model_manager.load.load_base import LoadedModel
21
+ from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
22
+ from invokeai.backend.util.devices import TorchDevice
23
+
24
+ # Z-Image can use either the Diffusers AutoencoderKL or the FLUX AutoEncoder
25
+ ZImageVAE = Union[AutoencoderKL, FluxAutoEncoder]
26
+
27
+
28
+ @invocation(
29
+ "z_image_i2l",
30
+ title="Image to Latents - Z-Image",
31
+ tags=["image", "latents", "vae", "i2l", "z-image"],
32
+ category="image",
33
+ version="1.1.0",
34
+ classification=Classification.Prototype,
35
+ )
36
+ class ZImageImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard):
37
+ """Generates latents from an image using Z-Image VAE (supports both Diffusers and FLUX VAE)."""
38
+
39
+ image: ImageField = InputField(description="The image to encode.")
40
+ vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection)
41
+
42
+ @staticmethod
43
+ def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tensor:
44
+ if not isinstance(vae_info.model, (AutoencoderKL, FluxAutoEncoder)):
45
+ raise TypeError(
46
+ f"Expected AutoencoderKL or FluxAutoEncoder for Z-Image VAE, got {type(vae_info.model).__name__}. "
47
+ "Ensure you are using a compatible VAE model."
48
+ )
49
+
50
+ with vae_info.model_on_device() as (_, vae):
51
+ if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)):
52
+ raise TypeError(
53
+ f"Expected AutoencoderKL or FluxAutoEncoder, got {type(vae).__name__}. "
54
+ "VAE model type changed unexpectedly after loading."
55
+ )
56
+
57
+ vae_dtype = next(iter(vae.parameters())).dtype
58
+ image_tensor = image_tensor.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
59
+
60
+ with torch.inference_mode():
61
+ if isinstance(vae, FluxAutoEncoder):
62
+ # FLUX VAE handles scaling internally
63
+ generator = torch.Generator(device=TorchDevice.choose_torch_device()).manual_seed(0)
64
+ latents = vae.encode(image_tensor, sample=True, generator=generator)
65
+ else:
66
+ # AutoencoderKL - needs manual scaling
67
+ vae.disable_tiling()
68
+ image_tensor_dist = vae.encode(image_tensor).latent_dist
69
+ latents: torch.Tensor = image_tensor_dist.sample().to(dtype=vae.dtype)
70
+
71
+ # Apply scaling_factor and shift_factor from VAE config
72
+ # Z-Image uses: latents = (latents - shift_factor) * scaling_factor
73
+ scaling_factor = vae.config.scaling_factor
74
+ shift_factor = getattr(vae.config, "shift_factor", None)
75
+
76
+ if shift_factor is not None:
77
+ latents = latents - shift_factor
78
+ latents = latents * scaling_factor
79
+
80
+ return latents
81
+
82
+ @torch.no_grad()
83
+ def invoke(self, context: InvocationContext) -> LatentsOutput:
84
+ image = context.images.get_pil(self.image.image_name)
85
+
86
+ image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB"))
87
+ if image_tensor.dim() == 3:
88
+ image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w")
89
+
90
+ vae_info = context.models.load(self.vae.vae)
91
+ if not isinstance(vae_info.model, (AutoencoderKL, FluxAutoEncoder)):
92
+ raise TypeError(
93
+ f"Expected AutoencoderKL or FluxAutoEncoder for Z-Image VAE, got {type(vae_info.model).__name__}. "
94
+ "Ensure you are using a compatible VAE model."
95
+ )
96
+
97
+ context.util.signal_progress("Running VAE")
98
+ latents = self.vae_encode(vae_info=vae_info, image_tensor=image_tensor)
99
+
100
+ latents = latents.to("cpu")
101
+ name = context.tensors.save(tensor=latents)
102
+ return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
@@ -0,0 +1,103 @@
1
+ from contextlib import nullcontext
2
+ from typing import Union
3
+
4
+ import torch
5
+ from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
6
+ from einops import rearrange
7
+ from PIL import Image
8
+
9
+ from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
10
+ from invokeai.app.invocations.fields import (
11
+ FieldDescriptions,
12
+ Input,
13
+ InputField,
14
+ LatentsField,
15
+ WithBoard,
16
+ WithMetadata,
17
+ )
18
+ from invokeai.app.invocations.model import VAEField
19
+ from invokeai.app.invocations.primitives import ImageOutput
20
+ from invokeai.app.services.shared.invocation_context import InvocationContext
21
+ from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder
22
+ from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
23
+ from invokeai.backend.util.devices import TorchDevice
24
+
25
+ # Z-Image can use either the Diffusers AutoencoderKL or the FLUX AutoEncoder
26
+ ZImageVAE = Union[AutoencoderKL, FluxAutoEncoder]
27
+
28
+
29
+ @invocation(
30
+ "z_image_l2i",
31
+ title="Latents to Image - Z-Image",
32
+ tags=["latents", "image", "vae", "l2i", "z-image"],
33
+ category="latents",
34
+ version="1.1.0",
35
+ classification=Classification.Prototype,
36
+ )
37
+ class ZImageLatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
38
+ """Generates an image from latents using Z-Image VAE (supports both Diffusers and FLUX VAE)."""
39
+
40
+ latents: LatentsField = InputField(description=FieldDescriptions.latents, input=Input.Connection)
41
+ vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection)
42
+
43
+ @torch.no_grad()
44
+ def invoke(self, context: InvocationContext) -> ImageOutput:
45
+ latents = context.tensors.load(self.latents.latents_name)
46
+
47
+ vae_info = context.models.load(self.vae.vae)
48
+ if not isinstance(vae_info.model, (AutoencoderKL, FluxAutoEncoder)):
49
+ raise TypeError(
50
+ f"Expected AutoencoderKL or FluxAutoEncoder for Z-Image VAE, got {type(vae_info.model).__name__}. "
51
+ "Ensure you are using a compatible VAE model."
52
+ )
53
+
54
+ is_flux_vae = isinstance(vae_info.model, FluxAutoEncoder)
55
+
56
+ # FLUX VAE doesn't support seamless, so only apply for AutoencoderKL
57
+ seamless_context = (
58
+ nullcontext() if is_flux_vae else SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes)
59
+ )
60
+
61
+ with seamless_context, vae_info.model_on_device() as (_, vae):
62
+ context.util.signal_progress("Running VAE")
63
+ if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)):
64
+ raise TypeError(
65
+ f"Expected AutoencoderKL or FluxAutoEncoder, got {type(vae).__name__}. "
66
+ "VAE model type changed unexpectedly after loading."
67
+ )
68
+
69
+ vae_dtype = next(iter(vae.parameters())).dtype
70
+ latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype)
71
+
72
+ # Disable tiling for AutoencoderKL
73
+ if isinstance(vae, AutoencoderKL):
74
+ vae.disable_tiling()
75
+
76
+ # Clear memory as VAE decode can request a lot
77
+ TorchDevice.empty_cache()
78
+
79
+ with torch.inference_mode():
80
+ if isinstance(vae, FluxAutoEncoder):
81
+ # FLUX VAE handles scaling internally
82
+ img = vae.decode(latents)
83
+ else:
84
+ # AutoencoderKL - Apply scaling_factor and shift_factor from VAE config
85
+ # Z-Image uses: latents = latents / scaling_factor + shift_factor
86
+ scaling_factor = vae.config.scaling_factor
87
+ shift_factor = getattr(vae.config, "shift_factor", None)
88
+
89
+ latents = latents / scaling_factor
90
+ if shift_factor is not None:
91
+ latents = latents + shift_factor
92
+
93
+ img = vae.decode(latents, return_dict=False)[0]
94
+
95
+ img = img.clamp(-1, 1)
96
+ img = rearrange(img[0], "c h w -> h w c")
97
+ img_pil = Image.fromarray((127.5 * (img + 1.0)).byte().cpu().numpy())
98
+
99
+ TorchDevice.empty_cache()
100
+
101
+ image_dto = context.images.save(image=img_pil)
102
+
103
+ return ImageOutput.build(image_dto)
@@ -0,0 +1,153 @@
1
+ from typing import Optional
2
+
3
+ from invokeai.app.invocations.baseinvocation import (
4
+ BaseInvocation,
5
+ BaseInvocationOutput,
6
+ invocation,
7
+ invocation_output,
8
+ )
9
+ from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField
10
+ from invokeai.app.invocations.model import LoRAField, ModelIdentifierField, Qwen3EncoderField, TransformerField
11
+ from invokeai.app.services.shared.invocation_context import InvocationContext
12
+ from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType
13
+
14
+
15
+ @invocation_output("z_image_lora_loader_output")
16
+ class ZImageLoRALoaderOutput(BaseInvocationOutput):
17
+ """Z-Image LoRA Loader Output"""
18
+
19
+ transformer: Optional[TransformerField] = OutputField(
20
+ default=None, description=FieldDescriptions.transformer, title="Z-Image Transformer"
21
+ )
22
+ qwen3_encoder: Optional[Qwen3EncoderField] = OutputField(
23
+ default=None, description=FieldDescriptions.qwen3_encoder, title="Qwen3 Encoder"
24
+ )
25
+
26
+
27
+ @invocation(
28
+ "z_image_lora_loader",
29
+ title="Apply LoRA - Z-Image",
30
+ tags=["lora", "model", "z-image"],
31
+ category="model",
32
+ version="1.0.0",
33
+ )
34
+ class ZImageLoRALoaderInvocation(BaseInvocation):
35
+ """Apply a LoRA model to a Z-Image transformer and/or Qwen3 text encoder."""
36
+
37
+ lora: ModelIdentifierField = InputField(
38
+ description=FieldDescriptions.lora_model,
39
+ title="LoRA",
40
+ ui_model_base=BaseModelType.ZImage,
41
+ ui_model_type=ModelType.LoRA,
42
+ )
43
+ weight: float = InputField(default=0.75, description=FieldDescriptions.lora_weight)
44
+ transformer: TransformerField | None = InputField(
45
+ default=None,
46
+ description=FieldDescriptions.transformer,
47
+ input=Input.Connection,
48
+ title="Z-Image Transformer",
49
+ )
50
+ qwen3_encoder: Qwen3EncoderField | None = InputField(
51
+ default=None,
52
+ title="Qwen3 Encoder",
53
+ description=FieldDescriptions.qwen3_encoder,
54
+ input=Input.Connection,
55
+ )
56
+
57
+ def invoke(self, context: InvocationContext) -> ZImageLoRALoaderOutput:
58
+ lora_key = self.lora.key
59
+
60
+ if not context.models.exists(lora_key):
61
+ raise ValueError(f"Unknown lora: {lora_key}!")
62
+
63
+ # Check for existing LoRAs with the same key.
64
+ if self.transformer and any(lora.lora.key == lora_key for lora in self.transformer.loras):
65
+ raise ValueError(f'LoRA "{lora_key}" already applied to transformer.')
66
+ if self.qwen3_encoder and any(lora.lora.key == lora_key for lora in self.qwen3_encoder.loras):
67
+ raise ValueError(f'LoRA "{lora_key}" already applied to Qwen3 encoder.')
68
+
69
+ output = ZImageLoRALoaderOutput()
70
+
71
+ # Attach LoRA layers to the models.
72
+ if self.transformer is not None:
73
+ output.transformer = self.transformer.model_copy(deep=True)
74
+ output.transformer.loras.append(
75
+ LoRAField(
76
+ lora=self.lora,
77
+ weight=self.weight,
78
+ )
79
+ )
80
+ if self.qwen3_encoder is not None:
81
+ output.qwen3_encoder = self.qwen3_encoder.model_copy(deep=True)
82
+ output.qwen3_encoder.loras.append(
83
+ LoRAField(
84
+ lora=self.lora,
85
+ weight=self.weight,
86
+ )
87
+ )
88
+
89
+ return output
90
+
91
+
92
+ @invocation(
93
+ "z_image_lora_collection_loader",
94
+ title="Apply LoRA Collection - Z-Image",
95
+ tags=["lora", "model", "z-image"],
96
+ category="model",
97
+ version="1.0.0",
98
+ )
99
+ class ZImageLoRACollectionLoader(BaseInvocation):
100
+ """Applies a collection of LoRAs to a Z-Image transformer."""
101
+
102
+ loras: Optional[LoRAField | list[LoRAField]] = InputField(
103
+ default=None, description="LoRA models and weights. May be a single LoRA or collection.", title="LoRAs"
104
+ )
105
+
106
+ transformer: Optional[TransformerField] = InputField(
107
+ default=None,
108
+ description=FieldDescriptions.transformer,
109
+ input=Input.Connection,
110
+ title="Transformer",
111
+ )
112
+ qwen3_encoder: Qwen3EncoderField | None = InputField(
113
+ default=None,
114
+ title="Qwen3 Encoder",
115
+ description=FieldDescriptions.qwen3_encoder,
116
+ input=Input.Connection,
117
+ )
118
+
119
+ def invoke(self, context: InvocationContext) -> ZImageLoRALoaderOutput:
120
+ output = ZImageLoRALoaderOutput()
121
+ loras = self.loras if isinstance(self.loras, list) else [self.loras]
122
+ added_loras: list[str] = []
123
+
124
+ if self.transformer is not None:
125
+ output.transformer = self.transformer.model_copy(deep=True)
126
+
127
+ if self.qwen3_encoder is not None:
128
+ output.qwen3_encoder = self.qwen3_encoder.model_copy(deep=True)
129
+
130
+ for lora in loras:
131
+ if lora is None:
132
+ continue
133
+ if lora.lora.key in added_loras:
134
+ continue
135
+
136
+ if not context.models.exists(lora.lora.key):
137
+ raise Exception(f"Unknown lora: {lora.lora.key}!")
138
+
139
+ if lora.lora.base is not BaseModelType.ZImage:
140
+ raise ValueError(
141
+ f"LoRA '{lora.lora.key}' is for {lora.lora.base.value if lora.lora.base else 'unknown'} models, "
142
+ "not Z-Image models. Ensure you are using a Z-Image compatible LoRA."
143
+ )
144
+
145
+ added_loras.append(lora.lora.key)
146
+
147
+ if self.transformer is not None and output.transformer is not None:
148
+ output.transformer.loras.append(lora)
149
+
150
+ if self.qwen3_encoder is not None and output.qwen3_encoder is not None:
151
+ output.qwen3_encoder.loras.append(lora)
152
+
153
+ return output
@@ -0,0 +1,135 @@
1
+ from typing import Optional
2
+
3
+ from invokeai.app.invocations.baseinvocation import (
4
+ BaseInvocation,
5
+ BaseInvocationOutput,
6
+ Classification,
7
+ invocation,
8
+ invocation_output,
9
+ )
10
+ from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField
11
+ from invokeai.app.invocations.model import (
12
+ ModelIdentifierField,
13
+ Qwen3EncoderField,
14
+ TransformerField,
15
+ VAEField,
16
+ )
17
+ from invokeai.app.services.shared.invocation_context import InvocationContext
18
+ from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, ModelType, SubModelType
19
+
20
+
21
+ @invocation_output("z_image_model_loader_output")
22
+ class ZImageModelLoaderOutput(BaseInvocationOutput):
23
+ """Z-Image base model loader output."""
24
+
25
+ transformer: TransformerField = OutputField(description=FieldDescriptions.transformer, title="Transformer")
26
+ qwen3_encoder: Qwen3EncoderField = OutputField(description=FieldDescriptions.qwen3_encoder, title="Qwen3 Encoder")
27
+ vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE")
28
+
29
+
30
+ @invocation(
31
+ "z_image_model_loader",
32
+ title="Main Model - Z-Image",
33
+ tags=["model", "z-image"],
34
+ category="model",
35
+ version="3.0.0",
36
+ classification=Classification.Prototype,
37
+ )
38
+ class ZImageModelLoaderInvocation(BaseInvocation):
39
+ """Loads a Z-Image model, outputting its submodels.
40
+
41
+ Similar to FLUX, you can mix and match components:
42
+ - Transformer: From Z-Image main model (GGUF quantized or Diffusers format)
43
+ - VAE: Separate FLUX VAE (shared with FLUX models) or from a Diffusers Z-Image model
44
+ - Qwen3 Encoder: Separate Qwen3Encoder model or from a Diffusers Z-Image model
45
+ """
46
+
47
+ model: ModelIdentifierField = InputField(
48
+ description=FieldDescriptions.z_image_model,
49
+ input=Input.Direct,
50
+ ui_model_base=BaseModelType.ZImage,
51
+ ui_model_type=ModelType.Main,
52
+ title="Transformer",
53
+ )
54
+
55
+ vae_model: Optional[ModelIdentifierField] = InputField(
56
+ default=None,
57
+ description="Standalone VAE model. Z-Image uses the same VAE as FLUX (16-channel). "
58
+ "If not provided, VAE will be loaded from the Qwen3 Source model.",
59
+ input=Input.Direct,
60
+ ui_model_base=BaseModelType.Flux,
61
+ ui_model_type=ModelType.VAE,
62
+ title="VAE",
63
+ )
64
+
65
+ qwen3_encoder_model: Optional[ModelIdentifierField] = InputField(
66
+ default=None,
67
+ description="Standalone Qwen3 Encoder model. "
68
+ "If not provided, encoder will be loaded from the Qwen3 Source model.",
69
+ input=Input.Direct,
70
+ ui_model_type=ModelType.Qwen3Encoder,
71
+ title="Qwen3 Encoder",
72
+ )
73
+
74
+ qwen3_source_model: Optional[ModelIdentifierField] = InputField(
75
+ default=None,
76
+ description="Diffusers Z-Image model to extract VAE and/or Qwen3 encoder from. "
77
+ "Use this if you don't have separate VAE/Qwen3 models. "
78
+ "Ignored if both VAE and Qwen3 Encoder are provided separately.",
79
+ input=Input.Direct,
80
+ ui_model_base=BaseModelType.ZImage,
81
+ ui_model_type=ModelType.Main,
82
+ ui_model_format=ModelFormat.Diffusers,
83
+ title="Qwen3 Source (Diffusers)",
84
+ )
85
+
86
+ def invoke(self, context: InvocationContext) -> ZImageModelLoaderOutput:
87
+ # Transformer always comes from the main model
88
+ transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer})
89
+
90
+ # Determine VAE source
91
+ if self.vae_model is not None:
92
+ # Use standalone FLUX VAE
93
+ vae = self.vae_model.model_copy(update={"submodel_type": SubModelType.VAE})
94
+ elif self.qwen3_source_model is not None:
95
+ # Extract from Diffusers Z-Image model
96
+ self._validate_diffusers_format(context, self.qwen3_source_model, "Qwen3 Source")
97
+ vae = self.qwen3_source_model.model_copy(update={"submodel_type": SubModelType.VAE})
98
+ else:
99
+ raise ValueError(
100
+ "No VAE source provided. Either set 'VAE' to a FLUX VAE model, "
101
+ "or set 'Qwen3 Source' to a Diffusers Z-Image model."
102
+ )
103
+
104
+ # Determine Qwen3 Encoder source
105
+ if self.qwen3_encoder_model is not None:
106
+ # Use standalone Qwen3 Encoder
107
+ qwen3_tokenizer = self.qwen3_encoder_model.model_copy(update={"submodel_type": SubModelType.Tokenizer})
108
+ qwen3_encoder = self.qwen3_encoder_model.model_copy(update={"submodel_type": SubModelType.TextEncoder})
109
+ elif self.qwen3_source_model is not None:
110
+ # Extract from Diffusers Z-Image model
111
+ self._validate_diffusers_format(context, self.qwen3_source_model, "Qwen3 Source")
112
+ qwen3_tokenizer = self.qwen3_source_model.model_copy(update={"submodel_type": SubModelType.Tokenizer})
113
+ qwen3_encoder = self.qwen3_source_model.model_copy(update={"submodel_type": SubModelType.TextEncoder})
114
+ else:
115
+ raise ValueError(
116
+ "No Qwen3 Encoder source provided. Either set 'Qwen3 Encoder' to a standalone model, "
117
+ "or set 'Qwen3 Source' to a Diffusers Z-Image model."
118
+ )
119
+
120
+ return ZImageModelLoaderOutput(
121
+ transformer=TransformerField(transformer=transformer, loras=[]),
122
+ qwen3_encoder=Qwen3EncoderField(tokenizer=qwen3_tokenizer, text_encoder=qwen3_encoder),
123
+ vae=VAEField(vae=vae),
124
+ )
125
+
126
+ def _validate_diffusers_format(
127
+ self, context: InvocationContext, model: ModelIdentifierField, model_name: str
128
+ ) -> None:
129
+ """Validate that a model is in Diffusers format."""
130
+ config = context.models.get_config(model)
131
+ if config.format != ModelFormat.Diffusers:
132
+ raise ValueError(
133
+ f"The {model_name} model must be a Diffusers format Z-Image model. "
134
+ f"The selected model '{config.name}' is in {config.format.value} format."
135
+ )