comfy-diffusion 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. comfy_diffusion-0.1.0/PKG-INFO +183 -0
  2. comfy_diffusion-0.1.0/README.md +137 -0
  3. comfy_diffusion-0.1.0/comfy_diffusion/__init__.py +30 -0
  4. comfy_diffusion-0.1.0/comfy_diffusion/_runtime.py +26 -0
  5. comfy_diffusion-0.1.0/comfy_diffusion/audio.py +168 -0
  6. comfy_diffusion-0.1.0/comfy_diffusion/conditioning.py +25 -0
  7. comfy_diffusion-0.1.0/comfy_diffusion/lora.py +34 -0
  8. comfy_diffusion-0.1.0/comfy_diffusion/models.py +245 -0
  9. comfy_diffusion-0.1.0/comfy_diffusion/runtime.py +86 -0
  10. comfy_diffusion-0.1.0/comfy_diffusion/sampling.py +383 -0
  11. comfy_diffusion-0.1.0/comfy_diffusion/vae.py +390 -0
  12. comfy_diffusion-0.1.0/comfy_diffusion.egg-info/PKG-INFO +183 -0
  13. comfy_diffusion-0.1.0/comfy_diffusion.egg-info/SOURCES.txt +35 -0
  14. comfy_diffusion-0.1.0/comfy_diffusion.egg-info/dependency_links.txt +1 -0
  15. comfy_diffusion-0.1.0/comfy_diffusion.egg-info/requires.txt +42 -0
  16. comfy_diffusion-0.1.0/comfy_diffusion.egg-info/top_level.txt +1 -0
  17. comfy_diffusion-0.1.0/pyproject.toml +102 -0
  18. comfy_diffusion-0.1.0/setup.cfg +4 -0
  19. comfy_diffusion-0.1.0/tests/test_audio.py +405 -0
  20. comfy_diffusion-0.1.0/tests/test_comfyui_submodule.py +73 -0
  21. comfy_diffusion-0.1.0/tests/test_conditioning.py +199 -0
  22. comfy_diffusion-0.1.0/tests/test_cpu_only_smoke.py +67 -0
  23. comfy_diffusion-0.1.0/tests/test_lora.py +243 -0
  24. comfy_diffusion-0.1.0/tests/test_model_manager_checkpoint_loading.py +178 -0
  25. comfy_diffusion-0.1.0/tests/test_model_manager_clip_loading.py +132 -0
  26. comfy_diffusion-0.1.0/tests/test_model_manager_init.py +34 -0
  27. comfy_diffusion-0.1.0/tests/test_model_manager_ltxav_text_encoder_loading.py +240 -0
  28. comfy_diffusion-0.1.0/tests/test_model_manager_ltxv_audio_vae_loading.py +179 -0
  29. comfy_diffusion-0.1.0/tests/test_model_manager_unet_loading.py +108 -0
  30. comfy_diffusion-0.1.0/tests/test_model_manager_vae_loading.py +143 -0
  31. comfy_diffusion-0.1.0/tests/test_models_import.py +71 -0
  32. comfy_diffusion-0.1.0/tests/test_package_structure.py +26 -0
  33. comfy_diffusion-0.1.0/tests/test_pyproject_editable_install.py +88 -0
  34. comfy_diffusion-0.1.0/tests/test_runtime_diagnostics.py +171 -0
  35. comfy_diffusion-0.1.0/tests/test_runtime_path_management.py +89 -0
  36. comfy_diffusion-0.1.0/tests/test_sampling.py +1367 -0
  37. comfy_diffusion-0.1.0/tests/test_vae.py +837 -0
@@ -0,0 +1,183 @@
1
+ Metadata-Version: 2.4
2
+ Name: comfy-diffusion
3
+ Version: 0.1.0
4
+ Summary: ComfyUI inference engine as a standalone Python library (no server, no UI).
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: pillow>=12.1.1
8
+ Requires-Dist: psutil
9
+ Provides-Extra: cpu
10
+ Requires-Dist: torch; extra == "cpu"
11
+ Provides-Extra: cuda
12
+ Requires-Dist: torch; extra == "cuda"
13
+ Provides-Extra: comfyui
14
+ Requires-Dist: aiohttp>=3.11.8; extra == "comfyui"
15
+ Requires-Dist: alembic>=1.18.4; extra == "comfyui"
16
+ Requires-Dist: av>=14.2.0; extra == "comfyui"
17
+ Requires-Dist: comfy-aimdo>=0.2.7; extra == "comfyui"
18
+ Requires-Dist: comfy-kitchen>=0.2.7; extra == "comfyui"
19
+ Requires-Dist: comfyui-embedded-docs==0.4.3; extra == "comfyui"
20
+ Requires-Dist: comfyui-frontend-package==1.39.19; extra == "comfyui"
21
+ Requires-Dist: comfyui-workflow-templates==0.9.10; extra == "comfyui"
22
+ Requires-Dist: einops>=0.8.2; extra == "comfyui"
23
+ Requires-Dist: glfw>=2.10.0; extra == "comfyui"
24
+ Requires-Dist: kornia>=0.7.1; extra == "comfyui"
25
+ Requires-Dist: numpy>=1.25.0; extra == "comfyui"
26
+ Requires-Dist: pillow>=12.1.1; extra == "comfyui"
27
+ Requires-Dist: psutil>=7.2.2; extra == "comfyui"
28
+ Requires-Dist: pydantic~=2.0; extra == "comfyui"
29
+ Requires-Dist: pydantic-settings~=2.0; extra == "comfyui"
30
+ Requires-Dist: pyopengl>=3.1.10; extra == "comfyui"
31
+ Requires-Dist: pyyaml>=6.0.3; extra == "comfyui"
32
+ Requires-Dist: requests>=2.32.5; extra == "comfyui"
33
+ Requires-Dist: safetensors>=0.4.2; extra == "comfyui"
34
+ Requires-Dist: scipy>=1.17.1; extra == "comfyui"
35
+ Requires-Dist: sentencepiece>=0.2.1; extra == "comfyui"
36
+ Requires-Dist: spandrel>=0.4.2; extra == "comfyui"
37
+ Requires-Dist: sqlalchemy>=2.0.48; extra == "comfyui"
38
+ Requires-Dist: tokenizers>=0.13.3; extra == "comfyui"
39
+ Requires-Dist: torch>=2.10.0; extra == "comfyui"
40
+ Requires-Dist: torchaudio>=2.10.0; extra == "comfyui"
41
+ Requires-Dist: torchsde>=0.2.6; extra == "comfyui"
42
+ Requires-Dist: torchvision>=0.25.0; extra == "comfyui"
43
+ Requires-Dist: tqdm>=4.67.3; extra == "comfyui"
44
+ Requires-Dist: transformers>=4.50.3; extra == "comfyui"
45
+ Requires-Dist: yarl>=1.18.0; extra == "comfyui"
46
+
47
+ # comfy-diffusion
48
+
49
+ A Python library that exposes ComfyUI's inference engine as importable modules — no server, no node graph, no UI.
50
+
51
+ ```python
52
+ from comfy_diffusion import check_runtime
53
+
54
+ print(check_runtime())
55
+ # {"comfyui_version": "0.9.x", "device": "cuda:0", "vram_total_mb": 8192, ...}
56
+ ```
57
+
58
+ ---
59
+
60
+ ## Why I built this
61
+
62
+ I've been building creative AI applications — tools that generate music, visuals, and video for streaming platforms. For a while I used `diffusers` and `DiffSynth-Studio` as my inference backends. They're great libraries, well-documented, easy to import. But I kept hitting the same wall: the best models, the best fine-tunes, the ones that actually produce good results, are all built for ComfyUI.
63
+
64
+ The LoRAs on Civitai, the checkpoints people spend months training, the workflows the community shares — they're tested on ComfyUI. When I used them through diffusers I'd get inconsistent results, or they just wouldn't work the way they were intended. ComfyUI's sampler implementations, its VRAM management, its model loading logic — these aren't just UI conveniences, they're the reason the outputs look the way they do.
65
+
66
+ The problem is ComfyUI wasn't built to be a library. It's an application. The only way to use it programmatically is to run it as a server and talk to it over HTTP — which means every project I build needs to depend on a full ComfyUI backend running somewhere. That's a separate process to manage, a separate service to deploy, and a monolith that loads every node and capability whether my app needs them or not.
67
+
68
+ `comfy-diffusion` is my answer to that. ComfyUI's inference engine — `comfy.model_management`, `comfy.samplers`, `comfy.sd`, all of it — is perfectly importable Python code. It just was never packaged as a library. So I'm packaging it as one.
69
+
70
+ I built this for myself, to use in my own projects. But I'm building it in the open because I suspect I'm not the only one who wants to write `import comfy_diffusion` instead of running a server.
71
+
72
+ ---
73
+
74
+ ## What it is
75
+
76
+ `comfy-diffusion` imports ComfyUI's internal modules directly — no server, no HTTP, no node system. ComfyUI is vendored as a git submodule and its internals are made transparently importable when you `import comfy_diffusion`.
77
+
78
+ The API exposes ComfyUI's building blocks as plain Python functions. You compose them directly — the same way you'd wire nodes in ComfyUI, but in code:
79
+
80
+ ```python
81
+ from comfy_diffusion.models import ModelManager
82
+ from comfy_diffusion.conditioning import encode_prompt
83
+ from comfy_diffusion.sampling import sample
84
+ from comfy_diffusion import vae_decode, vae_encode, apply_lora
85
+
86
+ manager = ModelManager(models_dir="/path/to/models")
87
+ checkpoint = manager.load_checkpoint("animagine-xl.safetensors")
88
+
89
+ # Apply a LoRA
90
+ model, clip = apply_lora(checkpoint.model, checkpoint.clip, "style.safetensors", 0.8, 0.8)
91
+
92
+ # Encode prompts
93
+ positive = encode_prompt(clip, "a portrait of a woman, studio lighting")
94
+ negative = encode_prompt(clip, "blurry, low quality")
95
+
96
+ # txt2img
97
+ import torch
98
+ latent = {"samples": torch.zeros(1, 4, 64, 64)}
99
+ denoised = sample(
100
+ model, positive, negative, latent,
101
+ steps=20, cfg=7.0, sampler_name="euler",
102
+ scheduler="normal", seed=42,
103
+ )
104
+ image = vae_decode(checkpoint.vae, denoised)
105
+ image.save("output.png")
106
+
107
+ # img2img
108
+ source = Image.open("input.png")
109
+ latent = vae_encode(checkpoint.vae, source)
110
+ denoised = sample(
111
+ model, positive, negative, latent,
112
+ steps=20, cfg=7.0, sampler_name="euler",
113
+ scheduler="normal", seed=42, denoise=0.75,
114
+ )
115
+ image = vae_decode(checkpoint.vae, denoised)
116
+ image.save("output_img2img.png")
117
+ ```
118
+
119
+ The modularity is the point. Every building block is explicit — you see exactly what's happening at each step, and you can swap any piece without fighting a pipeline abstraction.
120
+
121
+ ---
122
+
123
+ ## What it is not
124
+
125
+ - Not a ComfyUI wrapper that talks to a running server
126
+ - Not a node system or workflow runner
127
+ - Not a replacement for ComfyUI — it depends on it
128
+ - Not a general-purpose diffusion library — it's opinionated toward ComfyUI's engine
129
+ - Not an opinionated pipeline — there is no `ImagePipeline`. You compose the blocks yourself.
130
+
131
+ ---
132
+
133
+ ## Status
134
+
135
+ Early development. Built iteratively, one capability block at a time. The full node inventory and iteration plan is in [`ROADMAP.md`](ROADMAP.md).
136
+
137
+ | # | Module | Goal | Status |
138
+ |---|--------|------|--------|
139
+ | 01 | `_runtime` / `check_runtime()` | Package foundation + ComfyUI vendoring | ✅ Done |
140
+ | 02 | `models` | Checkpoint loading (`ModelManager`, `CheckpointResult`) | ✅ Done |
141
+ | 03 | `conditioning` | Prompt encoding via `encode_prompt` | ✅ Done |
142
+ | 04 | `sampling` | KSampler wrapper via `sample()` | ✅ Done |
143
+ | 05 | `vae` | VAE decode latent→PIL via `vae_decode()` | ✅ Done |
144
+ | 06 | `lora` | LoRA loading and stacking via `apply_lora()` | ✅ Done |
145
+ | 07 | `vae` + `models` | VAE encode + standalone loaders (`load_vae`, `load_clip`, `load_unet`) | ✅ Done |
146
+ | 08 | `vae` — tiled | Tiled VAE encode/decode for large images without OOM | ⬜ Next |
147
+ | 09 | `vae` — batch/video | Batch VAE encode/decode for video frame sequences | ⬜ |
148
+ | 10 | `sampling` — advanced | `SamplerCustomAdvanced`, schedulers, sigma manipulation | ⬜ |
149
+ | 11 | `audio` | Stable Audio, WAN sound-to-video, LTXV audio, ACE Step | ⬜ |
150
+ | — | **`v0.1.0-preview`** | **Preview release milestone** | ⬜ |
151
+ | 12–18 | conditioning, controlnet, latent, image, mask, model patches, packaging | Post-preview | ⬜ |
152
+
153
+ ---
154
+
155
+ ## Installation
156
+
157
+ The package is **not published on PyPI yet**. Install from the repo (clone + submodule + uv).
158
+
159
+ ComfyUI deps come from `vendor/ComfyUI/requirements.txt` (extra `comfyui`).
160
+
161
+ **Note:** `uv.lock` is kept with the CPU variant of torch so CI (no GPU) can run `uv sync` and get reproducible tests. One sync installs CPU torch for everyone; GPU users replace torch with the step below.
162
+
163
+ ```bash
164
+ # 1. ComfyUI submodule (required after clone)
165
+ git submodule update --init
166
+
167
+ # 2. Same for everyone (installs CPU torch)
168
+ uv sync --extra comfyui
169
+
170
+ # 3. GPU only: replace torch with CUDA build (required after every uv sync)
171
+ uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 --force-reinstall
172
+ # RTX 50xx (Blackwell): use cu128
173
+ uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 --force-reinstall
174
+ # Verify: uv run python -c "import torch; print(torch.__version__, torch.cuda.is_available())"
175
+ ```
176
+
177
+ > Requires Python 3.12+. ComfyUI is vendored — no separate installation needed. Once the package is on PyPI you can use `pip install comfy-diffusion[cuda]` or `uv add comfy-diffusion[cuda]`.
178
+
179
+ ---
180
+
181
+ ## License
182
+
183
+ GPL-3.0 — same as ComfyUI, which this project depends on.
@@ -0,0 +1,137 @@
1
+ # comfy-diffusion
2
+
3
+ A Python library that exposes ComfyUI's inference engine as importable modules — no server, no node graph, no UI.
4
+
5
+ ```python
6
+ from comfy_diffusion import check_runtime
7
+
8
+ print(check_runtime())
9
+ # {"comfyui_version": "0.9.x", "device": "cuda:0", "vram_total_mb": 8192, ...}
10
+ ```
11
+
12
+ ---
13
+
14
+ ## Why I built this
15
+
16
+ I've been building creative AI applications — tools that generate music, visuals, and video for streaming platforms. For a while I used `diffusers` and `DiffSynth-Studio` as my inference backends. They're great libraries, well-documented, easy to import. But I kept hitting the same wall: the best models, the best fine-tunes, the ones that actually produce good results, are all built for ComfyUI.
17
+
18
+ The LoRAs on Civitai, the checkpoints people spend months training, the workflows the community shares — they're tested on ComfyUI. When I used them through diffusers I'd get inconsistent results, or they just wouldn't work the way they were intended. ComfyUI's sampler implementations, its VRAM management, its model loading logic — these aren't just UI conveniences, they're the reason the outputs look the way they do.
19
+
20
+ The problem is ComfyUI wasn't built to be a library. It's an application. The only way to use it programmatically is to run it as a server and talk to it over HTTP — which means every project I build needs to depend on a full ComfyUI backend running somewhere. That's a separate process to manage, a separate service to deploy, and a monolith that loads every node and capability whether my app needs them or not.
21
+
22
+ `comfy-diffusion` is my answer to that. ComfyUI's inference engine — `comfy.model_management`, `comfy.samplers`, `comfy.sd`, all of it — is perfectly importable Python code. It just was never packaged as a library. So I'm packaging it as one.
23
+
24
+ I built this for myself, to use in my own projects. But I'm building it in the open because I suspect I'm not the only one who wants to write `import comfy_diffusion` instead of running a server.
25
+
26
+ ---
27
+
28
+ ## What it is
29
+
30
+ `comfy-diffusion` imports ComfyUI's internal modules directly — no server, no HTTP, no node system. ComfyUI is vendored as a git submodule and its internals are made transparently importable when you `import comfy_diffusion`.
31
+
32
+ The API exposes ComfyUI's building blocks as plain Python functions. You compose them directly — the same way you'd wire nodes in ComfyUI, but in code:
33
+
34
+ ```python
35
+ from comfy_diffusion.models import ModelManager
36
+ from comfy_diffusion.conditioning import encode_prompt
37
+ from comfy_diffusion.sampling import sample
38
+ from comfy_diffusion import vae_decode, vae_encode, apply_lora
39
+
40
+ manager = ModelManager(models_dir="/path/to/models")
41
+ checkpoint = manager.load_checkpoint("animagine-xl.safetensors")
42
+
43
+ # Apply a LoRA
44
+ model, clip = apply_lora(checkpoint.model, checkpoint.clip, "style.safetensors", 0.8, 0.8)
45
+
46
+ # Encode prompts
47
+ positive = encode_prompt(clip, "a portrait of a woman, studio lighting")
48
+ negative = encode_prompt(clip, "blurry, low quality")
49
+
50
+ # txt2img
51
+ import torch
52
+ latent = {"samples": torch.zeros(1, 4, 64, 64)}
53
+ denoised = sample(
54
+ model, positive, negative, latent,
55
+ steps=20, cfg=7.0, sampler_name="euler",
56
+ scheduler="normal", seed=42,
57
+ )
58
+ image = vae_decode(checkpoint.vae, denoised)
59
+ image.save("output.png")
60
+
61
+ # img2img
62
+ source = Image.open("input.png")
63
+ latent = vae_encode(checkpoint.vae, source)
64
+ denoised = sample(
65
+ model, positive, negative, latent,
66
+ steps=20, cfg=7.0, sampler_name="euler",
67
+ scheduler="normal", seed=42, denoise=0.75,
68
+ )
69
+ image = vae_decode(checkpoint.vae, denoised)
70
+ image.save("output_img2img.png")
71
+ ```
72
+
73
+ The modularity is the point. Every building block is explicit — you see exactly what's happening at each step, and you can swap any piece without fighting a pipeline abstraction.
74
+
75
+ ---
76
+
77
+ ## What it is not
78
+
79
+ - Not a ComfyUI wrapper that talks to a running server
80
+ - Not a node system or workflow runner
81
+ - Not a replacement for ComfyUI — it depends on it
82
+ - Not a general-purpose diffusion library — it's opinionated toward ComfyUI's engine
83
+ - Not an opinionated pipeline — there is no `ImagePipeline`. You compose the blocks yourself.
84
+
85
+ ---
86
+
87
+ ## Status
88
+
89
+ Early development. Built iteratively, one capability block at a time. The full node inventory and iteration plan is in [`ROADMAP.md`](ROADMAP.md).
90
+
91
+ | # | Module | Goal | Status |
92
+ |---|--------|------|--------|
93
+ | 01 | `_runtime` / `check_runtime()` | Package foundation + ComfyUI vendoring | ✅ Done |
94
+ | 02 | `models` | Checkpoint loading (`ModelManager`, `CheckpointResult`) | ✅ Done |
95
+ | 03 | `conditioning` | Prompt encoding via `encode_prompt` | ✅ Done |
96
+ | 04 | `sampling` | KSampler wrapper via `sample()` | ✅ Done |
97
+ | 05 | `vae` | VAE decode latent→PIL via `vae_decode()` | ✅ Done |
98
+ | 06 | `lora` | LoRA loading and stacking via `apply_lora()` | ✅ Done |
99
+ | 07 | `vae` + `models` | VAE encode + standalone loaders (`load_vae`, `load_clip`, `load_unet`) | ✅ Done |
100
+ | 08 | `vae` — tiled | Tiled VAE encode/decode for large images without OOM | ⬜ Next |
101
+ | 09 | `vae` — batch/video | Batch VAE encode/decode for video frame sequences | ⬜ |
102
+ | 10 | `sampling` — advanced | `SamplerCustomAdvanced`, schedulers, sigma manipulation | ⬜ |
103
+ | 11 | `audio` | Stable Audio, WAN sound-to-video, LTXV audio, ACE Step | ⬜ |
104
+ | — | **`v0.1.0-preview`** | **Preview release milestone** | ⬜ |
105
+ | 12–18 | conditioning, controlnet, latent, image, mask, model patches, packaging | Post-preview | ⬜ |
106
+
107
+ ---
108
+
109
+ ## Installation
110
+
111
+ The package is **not published on PyPI yet**. Install from the repo (clone + submodule + uv).
112
+
113
+ ComfyUI deps come from `vendor/ComfyUI/requirements.txt` (extra `comfyui`).
114
+
115
+ **Note:** `uv.lock` is kept with the CPU variant of torch so CI (no GPU) can run `uv sync` and get reproducible tests. One sync installs CPU torch for everyone; GPU users replace torch with the step below.
116
+
117
+ ```bash
118
+ # 1. ComfyUI submodule (required after clone)
119
+ git submodule update --init
120
+
121
+ # 2. Same for everyone (installs CPU torch)
122
+ uv sync --extra comfyui
123
+
124
+ # 3. GPU only: replace torch with CUDA build (required after every uv sync)
125
+ uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 --force-reinstall
126
+ # RTX 50xx (Blackwell): use cu128
127
+ uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 --force-reinstall
128
+ # Verify: uv run python -c "import torch; print(torch.__version__, torch.cuda.is_available())"
129
+ ```
130
+
131
+ > Requires Python 3.12+. ComfyUI is vendored — no separate installation needed. Once the package is on PyPI you can use `pip install comfy-diffusion[cuda]` or `uv add comfy-diffusion[cuda]`.
132
+
133
+ ---
134
+
135
+ ## License
136
+
137
+ GPL-3.0 — same as ComfyUI, which this project depends on.
@@ -0,0 +1,30 @@
1
+ """Public package entrypoint for comfy_diffusion."""
2
+
3
+ from ._runtime import ensure_comfyui_on_path
4
+ from .lora import apply_lora
5
+ from .runtime import check_runtime
6
+ from .vae import (
7
+ vae_decode,
8
+ vae_decode_batch,
9
+ vae_decode_batch_tiled,
10
+ vae_decode_tiled,
11
+ vae_encode,
12
+ vae_encode_batch,
13
+ vae_encode_batch_tiled,
14
+ vae_encode_tiled,
15
+ )
16
+
17
+ ensure_comfyui_on_path()
18
+
19
+ __all__ = [
20
+ "check_runtime",
21
+ "vae_decode",
22
+ "vae_decode_batch",
23
+ "vae_decode_batch_tiled",
24
+ "vae_decode_tiled",
25
+ "vae_encode",
26
+ "vae_encode_batch",
27
+ "vae_encode_batch_tiled",
28
+ "vae_encode_tiled",
29
+ "apply_lora",
30
+ ]
@@ -0,0 +1,26 @@
1
+ """Internal runtime bootstrap for comfy_diffusion.
2
+
3
+ Path insertion is intentionally lightweight and import-safe: this module must not
4
+ import torch or comfy internals just to make ComfyUI discoverable.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import sys
10
+ from pathlib import Path
11
+
12
+
13
+ def _comfyui_root() -> Path:
14
+ """Return the absolute path to the vendored ComfyUI directory."""
15
+ return Path(__file__).resolve().parents[1] / "vendor" / "ComfyUI"
16
+
17
+
18
+ def ensure_comfyui_on_path() -> Path:
19
+ """Ensure vendored ComfyUI is importable and return the inserted path."""
20
+ comfyui_root = _comfyui_root()
21
+ comfyui_root_str = str(comfyui_root)
22
+
23
+ if comfyui_root_str not in sys.path:
24
+ sys.path.insert(0, comfyui_root_str)
25
+
26
+ return comfyui_root
@@ -0,0 +1,168 @@
1
+ """Audio helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Protocol, cast
6
+
7
+
8
+ class _LtxvAudioVaeEncoder(Protocol):
9
+ sample_rate: int
10
+
11
+ def encode(self, audio: Any) -> Any: ...
12
+
13
+
14
+ class _LtxvAudioVaeDecoder(Protocol):
15
+ output_sample_rate: int
16
+
17
+ def decode(self, latent: Any) -> Any: ...
18
+
19
+
20
+ class _LtxvAudioVae(Protocol):
21
+ sample_rate: int
22
+ latent_channels: int
23
+ latent_frequency_bins: int
24
+
25
+ def num_of_latents_from_frames(self, frames_number: int, frame_rate: int) -> int: ...
26
+
27
+
28
+ class _AceStep15Clip(Protocol):
29
+ def tokenize(
30
+ self,
31
+ tags: str,
32
+ *,
33
+ lyrics: str,
34
+ bpm: int,
35
+ duration: float,
36
+ timesignature: int,
37
+ language: str,
38
+ keyscale: str,
39
+ seed: int,
40
+ generate_audio_codes: bool,
41
+ cfg_scale: float,
42
+ temperature: float,
43
+ top_p: float,
44
+ top_k: int,
45
+ min_p: float,
46
+ ) -> Any: ...
47
+
48
+ def encode_from_tokens_scheduled(self, tokens: Any) -> Any: ...
49
+
50
+
51
+ def _get_ltxv_empty_latent_audio_type() -> Any:
52
+ """Resolve ComfyUI LTXVEmptyLatentAudio node at call time."""
53
+ from ._runtime import ensure_comfyui_on_path
54
+
55
+ ensure_comfyui_on_path()
56
+ from comfy_extras.nodes_lt_audio import LTXVEmptyLatentAudio
57
+
58
+ return LTXVEmptyLatentAudio
59
+
60
+
61
+ def _get_ace_step_15_latent_audio_dependencies() -> tuple[Any, Any]:
62
+ """Resolve torch and ComfyUI model management at call time."""
63
+ from ._runtime import ensure_comfyui_on_path
64
+
65
+ ensure_comfyui_on_path()
66
+ import comfy.model_management
67
+ import torch
68
+
69
+ return torch, comfy.model_management
70
+
71
+
72
+ def _unwrap_node_output(output: Any) -> Any:
73
+ """Return first output for ComfyUI V3 nodes and tuple-style APIs."""
74
+ if hasattr(output, "result"):
75
+ return output.result[0]
76
+ if isinstance(output, tuple):
77
+ return output[0]
78
+ return output
79
+
80
+
81
+ def ltxv_audio_vae_encode(vae: _LtxvAudioVaeEncoder, audio: Any) -> dict[str, Any]:
82
+ """Encode raw audio with an LTXV audio VAE."""
83
+ audio_latents = vae.encode(audio)
84
+ return {"samples": audio_latents, "sample_rate": int(vae.sample_rate), "type": "audio"}
85
+
86
+
87
+ def ltxv_audio_vae_decode(vae: _LtxvAudioVaeDecoder, latent: Any) -> dict[str, Any]:
88
+ """Decode latent audio with an LTXV audio VAE."""
89
+ latent_tensor = latent["samples"] if isinstance(latent, dict) else latent
90
+ if getattr(latent_tensor, "is_nested", False):
91
+ latent_tensor = latent_tensor.unbind()[-1]
92
+ audio = vae.decode(latent_tensor).to(latent_tensor.device)
93
+ return {"waveform": audio, "sample_rate": int(vae.output_sample_rate)}
94
+
95
+
96
+ def ltxv_empty_latent_audio(
97
+ audio_vae: _LtxvAudioVae,
98
+ frames_number: int,
99
+ frame_rate: int = 25,
100
+ batch_size: int = 1,
101
+ ) -> dict[str, Any]:
102
+ """Create empty LTXV audio latents compatible with ComfyUI's audio pipeline."""
103
+ ltxv_empty_latent_audio_type = _get_ltxv_empty_latent_audio_type()
104
+ return cast(
105
+ dict[str, Any],
106
+ _unwrap_node_output(
107
+ ltxv_empty_latent_audio_type.execute(
108
+ frames_number=frames_number,
109
+ frame_rate=frame_rate,
110
+ batch_size=batch_size,
111
+ audio_vae=audio_vae,
112
+ )
113
+ )
114
+ )
115
+
116
+
117
+ def encode_ace_step_15_audio(
118
+ clip: _AceStep15Clip,
119
+ tags: str,
120
+ lyrics: str = "",
121
+ seed: int = 0,
122
+ bpm: int = 120,
123
+ duration: float = 120.0,
124
+ timesignature: str = "4",
125
+ language: str = "en",
126
+ keyscale: str = "C major",
127
+ generate_audio_codes: bool = True,
128
+ cfg_scale: float = 2.0,
129
+ temperature: float = 0.85,
130
+ top_p: float = 0.9,
131
+ top_k: int = 0,
132
+ min_p: float = 0.0,
133
+ ) -> Any:
134
+ """Encode ACE Step 1.5 text/audio metadata conditioning."""
135
+ tokens = clip.tokenize(
136
+ tags,
137
+ lyrics=lyrics,
138
+ bpm=bpm,
139
+ duration=duration,
140
+ timesignature=int(timesignature),
141
+ language=language,
142
+ keyscale=keyscale,
143
+ seed=seed,
144
+ generate_audio_codes=generate_audio_codes,
145
+ cfg_scale=cfg_scale,
146
+ temperature=temperature,
147
+ top_p=top_p,
148
+ top_k=top_k,
149
+ min_p=min_p,
150
+ )
151
+ return clip.encode_from_tokens_scheduled(tokens)
152
+
153
+
154
+ def empty_ace_step_15_latent_audio(seconds: float, batch_size: int = 1) -> dict[str, Any]:
155
+ """Create empty ACE Step 1.5 latents used as sampler noise input."""
156
+ torch, model_management = _get_ace_step_15_latent_audio_dependencies()
157
+ length = round(seconds * 48000 / 1920)
158
+ latent = torch.zeros([batch_size, 64, length], device=model_management.intermediate_device())
159
+ return {"samples": latent, "type": "audio"}
160
+
161
+
162
+ __all__ = [
163
+ "ltxv_audio_vae_encode",
164
+ "ltxv_audio_vae_decode",
165
+ "ltxv_empty_latent_audio",
166
+ "encode_ace_step_15_audio",
167
+ "empty_ace_step_15_latent_audio",
168
+ ]
@@ -0,0 +1,25 @@
1
+ """Prompt conditioning helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Protocol
6
+
7
+
8
+ class _ClipTextEncoder(Protocol):
9
+ def tokenize(self, text: str) -> Any: ...
10
+
11
+ def encode_from_tokens_scheduled(self, tokens: Any) -> Any: ...
12
+
13
+
14
+ def encode_prompt(clip: _ClipTextEncoder, text: str) -> Any:
15
+ """Encode prompt text with a ComfyUI-compatible CLIP object.
16
+
17
+ Positive and negative prompts use the same encoding path; prompt
18
+ semantics are owned by the caller.
19
+ """
20
+ normalized_text = " " if text == "" else text
21
+ tokens = clip.tokenize(normalized_text)
22
+ return clip.encode_from_tokens_scheduled(tokens)
23
+
24
+
25
+ __all__ = ["encode_prompt"]
@@ -0,0 +1,34 @@
1
+ """LoRA application helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any, cast
7
+
8
+
9
+ def apply_lora(
10
+ model: Any,
11
+ clip: Any,
12
+ path: str | Path,
13
+ strength_model: float,
14
+ strength_clip: float,
15
+ ) -> tuple[Any, Any]:
16
+ """Apply a LoRA file to a model/CLIP pair and return patched copies.
17
+
18
+ The returned pair can be passed back into ``apply_lora`` to stack
19
+ multiple LoRAs by chaining calls.
20
+ """
21
+ from ._runtime import ensure_comfyui_on_path
22
+
23
+ ensure_comfyui_on_path()
24
+
25
+ import comfy.sd
26
+ import comfy.utils
27
+
28
+ lora_path = str(Path(path))
29
+ lora = comfy.utils.load_torch_file(lora_path, safe_load=True)
30
+ patched = comfy.sd.load_lora_for_models(model, clip, lora, strength_model, strength_clip)
31
+ return cast(tuple[Any, Any], patched)
32
+
33
+
34
+ __all__ = ["apply_lora"]