comfy-diffusion 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- comfy_diffusion-0.1.0/PKG-INFO +183 -0
- comfy_diffusion-0.1.0/README.md +137 -0
- comfy_diffusion-0.1.0/comfy_diffusion/__init__.py +30 -0
- comfy_diffusion-0.1.0/comfy_diffusion/_runtime.py +26 -0
- comfy_diffusion-0.1.0/comfy_diffusion/audio.py +168 -0
- comfy_diffusion-0.1.0/comfy_diffusion/conditioning.py +25 -0
- comfy_diffusion-0.1.0/comfy_diffusion/lora.py +34 -0
- comfy_diffusion-0.1.0/comfy_diffusion/models.py +245 -0
- comfy_diffusion-0.1.0/comfy_diffusion/runtime.py +86 -0
- comfy_diffusion-0.1.0/comfy_diffusion/sampling.py +383 -0
- comfy_diffusion-0.1.0/comfy_diffusion/vae.py +390 -0
- comfy_diffusion-0.1.0/comfy_diffusion.egg-info/PKG-INFO +183 -0
- comfy_diffusion-0.1.0/comfy_diffusion.egg-info/SOURCES.txt +35 -0
- comfy_diffusion-0.1.0/comfy_diffusion.egg-info/dependency_links.txt +1 -0
- comfy_diffusion-0.1.0/comfy_diffusion.egg-info/requires.txt +42 -0
- comfy_diffusion-0.1.0/comfy_diffusion.egg-info/top_level.txt +1 -0
- comfy_diffusion-0.1.0/pyproject.toml +102 -0
- comfy_diffusion-0.1.0/setup.cfg +4 -0
- comfy_diffusion-0.1.0/tests/test_audio.py +405 -0
- comfy_diffusion-0.1.0/tests/test_comfyui_submodule.py +73 -0
- comfy_diffusion-0.1.0/tests/test_conditioning.py +199 -0
- comfy_diffusion-0.1.0/tests/test_cpu_only_smoke.py +67 -0
- comfy_diffusion-0.1.0/tests/test_lora.py +243 -0
- comfy_diffusion-0.1.0/tests/test_model_manager_checkpoint_loading.py +178 -0
- comfy_diffusion-0.1.0/tests/test_model_manager_clip_loading.py +132 -0
- comfy_diffusion-0.1.0/tests/test_model_manager_init.py +34 -0
- comfy_diffusion-0.1.0/tests/test_model_manager_ltxav_text_encoder_loading.py +240 -0
- comfy_diffusion-0.1.0/tests/test_model_manager_ltxv_audio_vae_loading.py +179 -0
- comfy_diffusion-0.1.0/tests/test_model_manager_unet_loading.py +108 -0
- comfy_diffusion-0.1.0/tests/test_model_manager_vae_loading.py +143 -0
- comfy_diffusion-0.1.0/tests/test_models_import.py +71 -0
- comfy_diffusion-0.1.0/tests/test_package_structure.py +26 -0
- comfy_diffusion-0.1.0/tests/test_pyproject_editable_install.py +88 -0
- comfy_diffusion-0.1.0/tests/test_runtime_diagnostics.py +171 -0
- comfy_diffusion-0.1.0/tests/test_runtime_path_management.py +89 -0
- comfy_diffusion-0.1.0/tests/test_sampling.py +1367 -0
- comfy_diffusion-0.1.0/tests/test_vae.py +837 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: comfy-diffusion
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: ComfyUI inference engine as a standalone Python library (no server, no UI).
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: pillow>=12.1.1
|
|
8
|
+
Requires-Dist: psutil
|
|
9
|
+
Provides-Extra: cpu
|
|
10
|
+
Requires-Dist: torch; extra == "cpu"
|
|
11
|
+
Provides-Extra: cuda
|
|
12
|
+
Requires-Dist: torch; extra == "cuda"
|
|
13
|
+
Provides-Extra: comfyui
|
|
14
|
+
Requires-Dist: aiohttp>=3.11.8; extra == "comfyui"
|
|
15
|
+
Requires-Dist: alembic>=1.18.4; extra == "comfyui"
|
|
16
|
+
Requires-Dist: av>=14.2.0; extra == "comfyui"
|
|
17
|
+
Requires-Dist: comfy-aimdo>=0.2.7; extra == "comfyui"
|
|
18
|
+
Requires-Dist: comfy-kitchen>=0.2.7; extra == "comfyui"
|
|
19
|
+
Requires-Dist: comfyui-embedded-docs==0.4.3; extra == "comfyui"
|
|
20
|
+
Requires-Dist: comfyui-frontend-package==1.39.19; extra == "comfyui"
|
|
21
|
+
Requires-Dist: comfyui-workflow-templates==0.9.10; extra == "comfyui"
|
|
22
|
+
Requires-Dist: einops>=0.8.2; extra == "comfyui"
|
|
23
|
+
Requires-Dist: glfw>=2.10.0; extra == "comfyui"
|
|
24
|
+
Requires-Dist: kornia>=0.7.1; extra == "comfyui"
|
|
25
|
+
Requires-Dist: numpy>=1.25.0; extra == "comfyui"
|
|
26
|
+
Requires-Dist: pillow>=12.1.1; extra == "comfyui"
|
|
27
|
+
Requires-Dist: psutil>=7.2.2; extra == "comfyui"
|
|
28
|
+
Requires-Dist: pydantic~=2.0; extra == "comfyui"
|
|
29
|
+
Requires-Dist: pydantic-settings~=2.0; extra == "comfyui"
|
|
30
|
+
Requires-Dist: pyopengl>=3.1.10; extra == "comfyui"
|
|
31
|
+
Requires-Dist: pyyaml>=6.0.3; extra == "comfyui"
|
|
32
|
+
Requires-Dist: requests>=2.32.5; extra == "comfyui"
|
|
33
|
+
Requires-Dist: safetensors>=0.4.2; extra == "comfyui"
|
|
34
|
+
Requires-Dist: scipy>=1.17.1; extra == "comfyui"
|
|
35
|
+
Requires-Dist: sentencepiece>=0.2.1; extra == "comfyui"
|
|
36
|
+
Requires-Dist: spandrel>=0.4.2; extra == "comfyui"
|
|
37
|
+
Requires-Dist: sqlalchemy>=2.0.48; extra == "comfyui"
|
|
38
|
+
Requires-Dist: tokenizers>=0.13.3; extra == "comfyui"
|
|
39
|
+
Requires-Dist: torch>=2.10.0; extra == "comfyui"
|
|
40
|
+
Requires-Dist: torchaudio>=2.10.0; extra == "comfyui"
|
|
41
|
+
Requires-Dist: torchsde>=0.2.6; extra == "comfyui"
|
|
42
|
+
Requires-Dist: torchvision>=0.25.0; extra == "comfyui"
|
|
43
|
+
Requires-Dist: tqdm>=4.67.3; extra == "comfyui"
|
|
44
|
+
Requires-Dist: transformers>=4.50.3; extra == "comfyui"
|
|
45
|
+
Requires-Dist: yarl>=1.18.0; extra == "comfyui"
|
|
46
|
+
|
|
47
|
+
# comfy-diffusion
|
|
48
|
+
|
|
49
|
+
A Python library that exposes ComfyUI's inference engine as importable modules — no server, no node graph, no UI.
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from comfy_diffusion import check_runtime
|
|
53
|
+
|
|
54
|
+
print(check_runtime())
|
|
55
|
+
# {"comfyui_version": "0.9.x", "device": "cuda:0", "vram_total_mb": 8192, ...}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Why I built this
|
|
61
|
+
|
|
62
|
+
I've been building creative AI applications — tools that generate music, visuals, and video for streaming platforms. For a while I used `diffusers` and `DiffSynth-Studio` as my inference backends. They're great libraries, well-documented, easy to import. But I kept hitting the same wall: the best models, the best fine-tunes, the ones that actually produce good results, are all built for ComfyUI.
|
|
63
|
+
|
|
64
|
+
The LoRAs on Civitai, the checkpoints people spend months training, the workflows the community shares — they're tested on ComfyUI. When I used them through diffusers I'd get inconsistent results, or they just wouldn't work the way they were intended. ComfyUI's sampler implementations, its VRAM management, its model loading logic — these aren't just UI conveniences, they're the reason the outputs look the way they do.
|
|
65
|
+
|
|
66
|
+
The problem is ComfyUI wasn't built to be a library. It's an application. The only way to use it programmatically is to run it as a server and talk to it over HTTP — which means every project I build needs to depend on a full ComfyUI backend running somewhere. That's a separate process to manage, a separate service to deploy, and a monolith that loads every node and capability whether my app needs them or not.
|
|
67
|
+
|
|
68
|
+
`comfy-diffusion` is my answer to that. ComfyUI's inference engine — `comfy.model_management`, `comfy.samplers`, `comfy.sd`, all of it — is perfectly importable Python code. It just was never packaged as a library. So I'm packaging it as one.
|
|
69
|
+
|
|
70
|
+
I built this for myself, to use in my own projects. But I'm building it in the open because I suspect I'm not the only one who wants to write `import comfy_diffusion` instead of running a server.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## What it is
|
|
75
|
+
|
|
76
|
+
`comfy-diffusion` imports ComfyUI's internal modules directly — no server, no HTTP, no node system. ComfyUI is vendored as a git submodule and its internals are made transparently importable when you `import comfy_diffusion`.
|
|
77
|
+
|
|
78
|
+
The API exposes ComfyUI's building blocks as plain Python functions. You compose them directly — the same way you'd wire nodes in ComfyUI, but in code:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from comfy_diffusion.models import ModelManager
|
|
82
|
+
from comfy_diffusion.conditioning import encode_prompt
|
|
83
|
+
from comfy_diffusion.sampling import sample
|
|
84
|
+
from comfy_diffusion import vae_decode, vae_encode, apply_lora
|
|
85
|
+
|
|
86
|
+
manager = ModelManager(models_dir="/path/to/models")
|
|
87
|
+
checkpoint = manager.load_checkpoint("animagine-xl.safetensors")
|
|
88
|
+
|
|
89
|
+
# Apply a LoRA
|
|
90
|
+
model, clip = apply_lora(checkpoint.model, checkpoint.clip, "style.safetensors", 0.8, 0.8)
|
|
91
|
+
|
|
92
|
+
# Encode prompts
|
|
93
|
+
positive = encode_prompt(clip, "a portrait of a woman, studio lighting")
|
|
94
|
+
negative = encode_prompt(clip, "blurry, low quality")
|
|
95
|
+
|
|
96
|
+
# txt2img
|
|
97
|
+
import torch
|
|
98
|
+
latent = {"samples": torch.zeros(1, 4, 64, 64)}
|
|
99
|
+
denoised = sample(
|
|
100
|
+
model, positive, negative, latent,
|
|
101
|
+
steps=20, cfg=7.0, sampler_name="euler",
|
|
102
|
+
scheduler="normal", seed=42,
|
|
103
|
+
)
|
|
104
|
+
image = vae_decode(checkpoint.vae, denoised)
|
|
105
|
+
image.save("output.png")
|
|
106
|
+
|
|
107
|
+
# img2img
|
|
108
|
+
source = Image.open("input.png")
|
|
109
|
+
latent = vae_encode(checkpoint.vae, source)
|
|
110
|
+
denoised = sample(
|
|
111
|
+
model, positive, negative, latent,
|
|
112
|
+
steps=20, cfg=7.0, sampler_name="euler",
|
|
113
|
+
scheduler="normal", seed=42, denoise=0.75,
|
|
114
|
+
)
|
|
115
|
+
image = vae_decode(checkpoint.vae, denoised)
|
|
116
|
+
image.save("output_img2img.png")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
The modularity is the point. Every building block is explicit — you see exactly what's happening at each step, and you can swap any piece without fighting a pipeline abstraction.
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
## What it is not
|
|
124
|
+
|
|
125
|
+
- Not a ComfyUI wrapper that talks to a running server
|
|
126
|
+
- Not a node system or workflow runner
|
|
127
|
+
- Not a replacement for ComfyUI — it depends on it
|
|
128
|
+
- Not a general-purpose diffusion library — it's opinionated toward ComfyUI's engine
|
|
129
|
+
- Not an opinionated pipeline — there is no `ImagePipeline`. You compose the blocks yourself.
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Status
|
|
134
|
+
|
|
135
|
+
Early development. Built iteratively, one capability block at a time. The full node inventory and iteration plan is in [`ROADMAP.md`](ROADMAP.md).
|
|
136
|
+
|
|
137
|
+
| # | Module | Goal | Status |
|
|
138
|
+
|---|--------|------|--------|
|
|
139
|
+
| 01 | `_runtime` / `check_runtime()` | Package foundation + ComfyUI vendoring | ✅ Done |
|
|
140
|
+
| 02 | `models` | Checkpoint loading (`ModelManager`, `CheckpointResult`) | ✅ Done |
|
|
141
|
+
| 03 | `conditioning` | Prompt encoding via `encode_prompt` | ✅ Done |
|
|
142
|
+
| 04 | `sampling` | KSampler wrapper via `sample()` | ✅ Done |
|
|
143
|
+
| 05 | `vae` | VAE decode latent→PIL via `vae_decode()` | ✅ Done |
|
|
144
|
+
| 06 | `lora` | LoRA loading and stacking via `apply_lora()` | ✅ Done |
|
|
145
|
+
| 07 | `vae` + `models` | VAE encode + standalone loaders (`load_vae`, `load_clip`, `load_unet`) | ✅ Done |
|
|
146
|
+
| 08 | `vae` — tiled | Tiled VAE encode/decode for large images without OOM | ⬜ Next |
|
|
147
|
+
| 09 | `vae` — batch/video | Batch VAE encode/decode for video frame sequences | ⬜ |
|
|
148
|
+
| 10 | `sampling` — advanced | `SamplerCustomAdvanced`, schedulers, sigma manipulation | ⬜ |
|
|
149
|
+
| 11 | `audio` | Stable Audio, WAN sound-to-video, LTXV audio, ACE Step | ⬜ |
|
|
150
|
+
| — | **`v0.1.0-preview`** | **Preview release milestone** | ⬜ |
|
|
151
|
+
| 12–18 | conditioning, controlnet, latent, image, mask, model patches, packaging | Post-preview | ⬜ |
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Installation
|
|
156
|
+
|
|
157
|
+
The package is **not published on PyPI yet**. Install from the repo (clone + submodule + uv).
|
|
158
|
+
|
|
159
|
+
ComfyUI deps come from `vendor/ComfyUI/requirements.txt` (extra `comfyui`).
|
|
160
|
+
|
|
161
|
+
**Note:** `uv.lock` is kept with the CPU variant of torch so CI (no GPU) can run `uv sync` and get reproducible tests. One sync installs CPU torch for everyone; GPU users replace torch with the step below.
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
# 1. ComfyUI submodule (required after clone)
|
|
165
|
+
git submodule update --init
|
|
166
|
+
|
|
167
|
+
# 2. Same for everyone (installs CPU torch)
|
|
168
|
+
uv sync --extra comfyui
|
|
169
|
+
|
|
170
|
+
# 3. GPU only: replace torch with CUDA build (required after every uv sync)
|
|
171
|
+
uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 --force-reinstall
|
|
172
|
+
# RTX 50xx (Blackwell): use cu128
|
|
173
|
+
uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 --force-reinstall
|
|
174
|
+
# Verify: uv run python -c "import torch; print(torch.__version__, torch.cuda.is_available())"
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
> Requires Python 3.12+. ComfyUI is vendored — no separate installation needed. Once the package is on PyPI you can use `pip install comfy-diffusion[cuda]` or `uv add comfy-diffusion[cuda]`.
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## License
|
|
182
|
+
|
|
183
|
+
GPL-3.0 — same as ComfyUI, which this project depends on.
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# comfy-diffusion
|
|
2
|
+
|
|
3
|
+
A Python library that exposes ComfyUI's inference engine as importable modules — no server, no node graph, no UI.
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
from comfy_diffusion import check_runtime
|
|
7
|
+
|
|
8
|
+
print(check_runtime())
|
|
9
|
+
# {"comfyui_version": "0.9.x", "device": "cuda:0", "vram_total_mb": 8192, ...}
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Why I built this
|
|
15
|
+
|
|
16
|
+
I've been building creative AI applications — tools that generate music, visuals, and video for streaming platforms. For a while I used `diffusers` and `DiffSynth-Studio` as my inference backends. They're great libraries, well-documented, easy to import. But I kept hitting the same wall: the best models, the best fine-tunes, the ones that actually produce good results, are all built for ComfyUI.
|
|
17
|
+
|
|
18
|
+
The LoRAs on Civitai, the checkpoints people spend months training, the workflows the community shares — they're tested on ComfyUI. When I used them through diffusers I'd get inconsistent results, or they just wouldn't work the way they were intended. ComfyUI's sampler implementations, its VRAM management, its model loading logic — these aren't just UI conveniences, they're the reason the outputs look the way they do.
|
|
19
|
+
|
|
20
|
+
The problem is ComfyUI wasn't built to be a library. It's an application. The only way to use it programmatically is to run it as a server and talk to it over HTTP — which means every project I build needs to depend on a full ComfyUI backend running somewhere. That's a separate process to manage, a separate service to deploy, and a monolith that loads every node and capability whether my app needs them or not.
|
|
21
|
+
|
|
22
|
+
`comfy-diffusion` is my answer to that. ComfyUI's inference engine — `comfy.model_management`, `comfy.samplers`, `comfy.sd`, all of it — is perfectly importable Python code. It just was never packaged as a library. So I'm packaging it as one.
|
|
23
|
+
|
|
24
|
+
I built this for myself, to use in my own projects. But I'm building it in the open because I suspect I'm not the only one who wants to write `import comfy_diffusion` instead of running a server.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## What it is
|
|
29
|
+
|
|
30
|
+
`comfy-diffusion` imports ComfyUI's internal modules directly — no server, no HTTP, no node system. ComfyUI is vendored as a git submodule and its internals are made transparently importable when you `import comfy_diffusion`.
|
|
31
|
+
|
|
32
|
+
The API exposes ComfyUI's building blocks as plain Python functions. You compose them directly — the same way you'd wire nodes in ComfyUI, but in code:
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from comfy_diffusion.models import ModelManager
|
|
36
|
+
from comfy_diffusion.conditioning import encode_prompt
|
|
37
|
+
from comfy_diffusion.sampling import sample
|
|
38
|
+
from comfy_diffusion import vae_decode, vae_encode, apply_lora
|
|
39
|
+
|
|
40
|
+
manager = ModelManager(models_dir="/path/to/models")
|
|
41
|
+
checkpoint = manager.load_checkpoint("animagine-xl.safetensors")
|
|
42
|
+
|
|
43
|
+
# Apply a LoRA
|
|
44
|
+
model, clip = apply_lora(checkpoint.model, checkpoint.clip, "style.safetensors", 0.8, 0.8)
|
|
45
|
+
|
|
46
|
+
# Encode prompts
|
|
47
|
+
positive = encode_prompt(clip, "a portrait of a woman, studio lighting")
|
|
48
|
+
negative = encode_prompt(clip, "blurry, low quality")
|
|
49
|
+
|
|
50
|
+
# txt2img
|
|
51
|
+
import torch
|
|
52
|
+
latent = {"samples": torch.zeros(1, 4, 64, 64)}
|
|
53
|
+
denoised = sample(
|
|
54
|
+
model, positive, negative, latent,
|
|
55
|
+
steps=20, cfg=7.0, sampler_name="euler",
|
|
56
|
+
scheduler="normal", seed=42,
|
|
57
|
+
)
|
|
58
|
+
image = vae_decode(checkpoint.vae, denoised)
|
|
59
|
+
image.save("output.png")
|
|
60
|
+
|
|
61
|
+
# img2img
|
|
62
|
+
source = Image.open("input.png")
|
|
63
|
+
latent = vae_encode(checkpoint.vae, source)
|
|
64
|
+
denoised = sample(
|
|
65
|
+
model, positive, negative, latent,
|
|
66
|
+
steps=20, cfg=7.0, sampler_name="euler",
|
|
67
|
+
scheduler="normal", seed=42, denoise=0.75,
|
|
68
|
+
)
|
|
69
|
+
image = vae_decode(checkpoint.vae, denoised)
|
|
70
|
+
image.save("output_img2img.png")
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
The modularity is the point. Every building block is explicit — you see exactly what's happening at each step, and you can swap any piece without fighting a pipeline abstraction.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## What it is not
|
|
78
|
+
|
|
79
|
+
- Not a ComfyUI wrapper that talks to a running server
|
|
80
|
+
- Not a node system or workflow runner
|
|
81
|
+
- Not a replacement for ComfyUI — it depends on it
|
|
82
|
+
- Not a general-purpose diffusion library — it's opinionated toward ComfyUI's engine
|
|
83
|
+
- Not an opinionated pipeline — there is no `ImagePipeline`. You compose the blocks yourself.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## Status
|
|
88
|
+
|
|
89
|
+
Early development. Built iteratively, one capability block at a time. The full node inventory and iteration plan is in [`ROADMAP.md`](ROADMAP.md).
|
|
90
|
+
|
|
91
|
+
| # | Module | Goal | Status |
|
|
92
|
+
|---|--------|------|--------|
|
|
93
|
+
| 01 | `_runtime` / `check_runtime()` | Package foundation + ComfyUI vendoring | ✅ Done |
|
|
94
|
+
| 02 | `models` | Checkpoint loading (`ModelManager`, `CheckpointResult`) | ✅ Done |
|
|
95
|
+
| 03 | `conditioning` | Prompt encoding via `encode_prompt` | ✅ Done |
|
|
96
|
+
| 04 | `sampling` | KSampler wrapper via `sample()` | ✅ Done |
|
|
97
|
+
| 05 | `vae` | VAE decode latent→PIL via `vae_decode()` | ✅ Done |
|
|
98
|
+
| 06 | `lora` | LoRA loading and stacking via `apply_lora()` | ✅ Done |
|
|
99
|
+
| 07 | `vae` + `models` | VAE encode + standalone loaders (`load_vae`, `load_clip`, `load_unet`) | ✅ Done |
|
|
100
|
+
| 08 | `vae` — tiled | Tiled VAE encode/decode for large images without OOM | ⬜ Next |
|
|
101
|
+
| 09 | `vae` — batch/video | Batch VAE encode/decode for video frame sequences | ⬜ |
|
|
102
|
+
| 10 | `sampling` — advanced | `SamplerCustomAdvanced`, schedulers, sigma manipulation | ⬜ |
|
|
103
|
+
| 11 | `audio` | Stable Audio, WAN sound-to-video, LTXV audio, ACE Step | ⬜ |
|
|
104
|
+
| — | **`v0.1.0-preview`** | **Preview release milestone** | ⬜ |
|
|
105
|
+
| 12–18 | conditioning, controlnet, latent, image, mask, model patches, packaging | Post-preview | ⬜ |
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Installation
|
|
110
|
+
|
|
111
|
+
The package is **not published on PyPI yet**. Install from the repo (clone + submodule + uv).
|
|
112
|
+
|
|
113
|
+
ComfyUI deps come from `vendor/ComfyUI/requirements.txt` (extra `comfyui`).
|
|
114
|
+
|
|
115
|
+
**Note:** `uv.lock` is kept with the CPU variant of torch so CI (no GPU) can run `uv sync` and get reproducible tests. One sync installs CPU torch for everyone; GPU users replace torch with the step below.
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# 1. ComfyUI submodule (required after clone)
|
|
119
|
+
git submodule update --init
|
|
120
|
+
|
|
121
|
+
# 2. Same for everyone (installs CPU torch)
|
|
122
|
+
uv sync --extra comfyui
|
|
123
|
+
|
|
124
|
+
# 3. GPU only: replace torch with CUDA build (required after every uv sync)
|
|
125
|
+
uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 --force-reinstall
|
|
126
|
+
# RTX 50xx (Blackwell): use cu128
|
|
127
|
+
uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128 --force-reinstall
|
|
128
|
+
# Verify: uv run python -c "import torch; print(torch.__version__, torch.cuda.is_available())"
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
> Requires Python 3.12+. ComfyUI is vendored — no separate installation needed. Once the package is on PyPI you can use `pip install comfy-diffusion[cuda]` or `uv add comfy-diffusion[cuda]`.
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## License
|
|
136
|
+
|
|
137
|
+
GPL-3.0 — same as ComfyUI, which this project depends on.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Public package entrypoint for comfy_diffusion."""
|
|
2
|
+
|
|
3
|
+
from ._runtime import ensure_comfyui_on_path
|
|
4
|
+
from .lora import apply_lora
|
|
5
|
+
from .runtime import check_runtime
|
|
6
|
+
from .vae import (
|
|
7
|
+
vae_decode,
|
|
8
|
+
vae_decode_batch,
|
|
9
|
+
vae_decode_batch_tiled,
|
|
10
|
+
vae_decode_tiled,
|
|
11
|
+
vae_encode,
|
|
12
|
+
vae_encode_batch,
|
|
13
|
+
vae_encode_batch_tiled,
|
|
14
|
+
vae_encode_tiled,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
ensure_comfyui_on_path()
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"check_runtime",
|
|
21
|
+
"vae_decode",
|
|
22
|
+
"vae_decode_batch",
|
|
23
|
+
"vae_decode_batch_tiled",
|
|
24
|
+
"vae_decode_tiled",
|
|
25
|
+
"vae_encode",
|
|
26
|
+
"vae_encode_batch",
|
|
27
|
+
"vae_encode_batch_tiled",
|
|
28
|
+
"vae_encode_tiled",
|
|
29
|
+
"apply_lora",
|
|
30
|
+
]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Internal runtime bootstrap for comfy_diffusion.
|
|
2
|
+
|
|
3
|
+
Path insertion is intentionally lightweight and import-safe: this module must not
|
|
4
|
+
import torch or comfy internals just to make ComfyUI discoverable.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _comfyui_root() -> Path:
|
|
14
|
+
"""Return the absolute path to the vendored ComfyUI directory."""
|
|
15
|
+
return Path(__file__).resolve().parents[1] / "vendor" / "ComfyUI"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def ensure_comfyui_on_path() -> Path:
|
|
19
|
+
"""Ensure vendored ComfyUI is importable and return the inserted path."""
|
|
20
|
+
comfyui_root = _comfyui_root()
|
|
21
|
+
comfyui_root_str = str(comfyui_root)
|
|
22
|
+
|
|
23
|
+
if comfyui_root_str not in sys.path:
|
|
24
|
+
sys.path.insert(0, comfyui_root_str)
|
|
25
|
+
|
|
26
|
+
return comfyui_root
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""Audio helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Protocol, cast
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class _LtxvAudioVaeEncoder(Protocol):
|
|
9
|
+
sample_rate: int
|
|
10
|
+
|
|
11
|
+
def encode(self, audio: Any) -> Any: ...
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class _LtxvAudioVaeDecoder(Protocol):
|
|
15
|
+
output_sample_rate: int
|
|
16
|
+
|
|
17
|
+
def decode(self, latent: Any) -> Any: ...
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class _LtxvAudioVae(Protocol):
|
|
21
|
+
sample_rate: int
|
|
22
|
+
latent_channels: int
|
|
23
|
+
latent_frequency_bins: int
|
|
24
|
+
|
|
25
|
+
def num_of_latents_from_frames(self, frames_number: int, frame_rate: int) -> int: ...
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class _AceStep15Clip(Protocol):
|
|
29
|
+
def tokenize(
|
|
30
|
+
self,
|
|
31
|
+
tags: str,
|
|
32
|
+
*,
|
|
33
|
+
lyrics: str,
|
|
34
|
+
bpm: int,
|
|
35
|
+
duration: float,
|
|
36
|
+
timesignature: int,
|
|
37
|
+
language: str,
|
|
38
|
+
keyscale: str,
|
|
39
|
+
seed: int,
|
|
40
|
+
generate_audio_codes: bool,
|
|
41
|
+
cfg_scale: float,
|
|
42
|
+
temperature: float,
|
|
43
|
+
top_p: float,
|
|
44
|
+
top_k: int,
|
|
45
|
+
min_p: float,
|
|
46
|
+
) -> Any: ...
|
|
47
|
+
|
|
48
|
+
def encode_from_tokens_scheduled(self, tokens: Any) -> Any: ...
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _get_ltxv_empty_latent_audio_type() -> Any:
|
|
52
|
+
"""Resolve ComfyUI LTXVEmptyLatentAudio node at call time."""
|
|
53
|
+
from ._runtime import ensure_comfyui_on_path
|
|
54
|
+
|
|
55
|
+
ensure_comfyui_on_path()
|
|
56
|
+
from comfy_extras.nodes_lt_audio import LTXVEmptyLatentAudio
|
|
57
|
+
|
|
58
|
+
return LTXVEmptyLatentAudio
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _get_ace_step_15_latent_audio_dependencies() -> tuple[Any, Any]:
|
|
62
|
+
"""Resolve torch and ComfyUI model management at call time."""
|
|
63
|
+
from ._runtime import ensure_comfyui_on_path
|
|
64
|
+
|
|
65
|
+
ensure_comfyui_on_path()
|
|
66
|
+
import comfy.model_management
|
|
67
|
+
import torch
|
|
68
|
+
|
|
69
|
+
return torch, comfy.model_management
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _unwrap_node_output(output: Any) -> Any:
|
|
73
|
+
"""Return first output for ComfyUI V3 nodes and tuple-style APIs."""
|
|
74
|
+
if hasattr(output, "result"):
|
|
75
|
+
return output.result[0]
|
|
76
|
+
if isinstance(output, tuple):
|
|
77
|
+
return output[0]
|
|
78
|
+
return output
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def ltxv_audio_vae_encode(vae: _LtxvAudioVaeEncoder, audio: Any) -> dict[str, Any]:
|
|
82
|
+
"""Encode raw audio with an LTXV audio VAE."""
|
|
83
|
+
audio_latents = vae.encode(audio)
|
|
84
|
+
return {"samples": audio_latents, "sample_rate": int(vae.sample_rate), "type": "audio"}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def ltxv_audio_vae_decode(vae: _LtxvAudioVaeDecoder, latent: Any) -> dict[str, Any]:
|
|
88
|
+
"""Decode latent audio with an LTXV audio VAE."""
|
|
89
|
+
latent_tensor = latent["samples"] if isinstance(latent, dict) else latent
|
|
90
|
+
if getattr(latent_tensor, "is_nested", False):
|
|
91
|
+
latent_tensor = latent_tensor.unbind()[-1]
|
|
92
|
+
audio = vae.decode(latent_tensor).to(latent_tensor.device)
|
|
93
|
+
return {"waveform": audio, "sample_rate": int(vae.output_sample_rate)}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def ltxv_empty_latent_audio(
|
|
97
|
+
audio_vae: _LtxvAudioVae,
|
|
98
|
+
frames_number: int,
|
|
99
|
+
frame_rate: int = 25,
|
|
100
|
+
batch_size: int = 1,
|
|
101
|
+
) -> dict[str, Any]:
|
|
102
|
+
"""Create empty LTXV audio latents compatible with ComfyUI's audio pipeline."""
|
|
103
|
+
ltxv_empty_latent_audio_type = _get_ltxv_empty_latent_audio_type()
|
|
104
|
+
return cast(
|
|
105
|
+
dict[str, Any],
|
|
106
|
+
_unwrap_node_output(
|
|
107
|
+
ltxv_empty_latent_audio_type.execute(
|
|
108
|
+
frames_number=frames_number,
|
|
109
|
+
frame_rate=frame_rate,
|
|
110
|
+
batch_size=batch_size,
|
|
111
|
+
audio_vae=audio_vae,
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def encode_ace_step_15_audio(
|
|
118
|
+
clip: _AceStep15Clip,
|
|
119
|
+
tags: str,
|
|
120
|
+
lyrics: str = "",
|
|
121
|
+
seed: int = 0,
|
|
122
|
+
bpm: int = 120,
|
|
123
|
+
duration: float = 120.0,
|
|
124
|
+
timesignature: str = "4",
|
|
125
|
+
language: str = "en",
|
|
126
|
+
keyscale: str = "C major",
|
|
127
|
+
generate_audio_codes: bool = True,
|
|
128
|
+
cfg_scale: float = 2.0,
|
|
129
|
+
temperature: float = 0.85,
|
|
130
|
+
top_p: float = 0.9,
|
|
131
|
+
top_k: int = 0,
|
|
132
|
+
min_p: float = 0.0,
|
|
133
|
+
) -> Any:
|
|
134
|
+
"""Encode ACE Step 1.5 text/audio metadata conditioning."""
|
|
135
|
+
tokens = clip.tokenize(
|
|
136
|
+
tags,
|
|
137
|
+
lyrics=lyrics,
|
|
138
|
+
bpm=bpm,
|
|
139
|
+
duration=duration,
|
|
140
|
+
timesignature=int(timesignature),
|
|
141
|
+
language=language,
|
|
142
|
+
keyscale=keyscale,
|
|
143
|
+
seed=seed,
|
|
144
|
+
generate_audio_codes=generate_audio_codes,
|
|
145
|
+
cfg_scale=cfg_scale,
|
|
146
|
+
temperature=temperature,
|
|
147
|
+
top_p=top_p,
|
|
148
|
+
top_k=top_k,
|
|
149
|
+
min_p=min_p,
|
|
150
|
+
)
|
|
151
|
+
return clip.encode_from_tokens_scheduled(tokens)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def empty_ace_step_15_latent_audio(seconds: float, batch_size: int = 1) -> dict[str, Any]:
|
|
155
|
+
"""Create empty ACE Step 1.5 latents used as sampler noise input."""
|
|
156
|
+
torch, model_management = _get_ace_step_15_latent_audio_dependencies()
|
|
157
|
+
length = round(seconds * 48000 / 1920)
|
|
158
|
+
latent = torch.zeros([batch_size, 64, length], device=model_management.intermediate_device())
|
|
159
|
+
return {"samples": latent, "type": "audio"}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
__all__ = [
|
|
163
|
+
"ltxv_audio_vae_encode",
|
|
164
|
+
"ltxv_audio_vae_decode",
|
|
165
|
+
"ltxv_empty_latent_audio",
|
|
166
|
+
"encode_ace_step_15_audio",
|
|
167
|
+
"empty_ace_step_15_latent_audio",
|
|
168
|
+
]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Prompt conditioning helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Protocol
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class _ClipTextEncoder(Protocol):
|
|
9
|
+
def tokenize(self, text: str) -> Any: ...
|
|
10
|
+
|
|
11
|
+
def encode_from_tokens_scheduled(self, tokens: Any) -> Any: ...
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def encode_prompt(clip: _ClipTextEncoder, text: str) -> Any:
|
|
15
|
+
"""Encode prompt text with a ComfyUI-compatible CLIP object.
|
|
16
|
+
|
|
17
|
+
Positive and negative prompts use the same encoding path; prompt
|
|
18
|
+
semantics are owned by the caller.
|
|
19
|
+
"""
|
|
20
|
+
normalized_text = " " if text == "" else text
|
|
21
|
+
tokens = clip.tokenize(normalized_text)
|
|
22
|
+
return clip.encode_from_tokens_scheduled(tokens)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
__all__ = ["encode_prompt"]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""LoRA application helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, cast
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def apply_lora(
|
|
10
|
+
model: Any,
|
|
11
|
+
clip: Any,
|
|
12
|
+
path: str | Path,
|
|
13
|
+
strength_model: float,
|
|
14
|
+
strength_clip: float,
|
|
15
|
+
) -> tuple[Any, Any]:
|
|
16
|
+
"""Apply a LoRA file to a model/CLIP pair and return patched copies.
|
|
17
|
+
|
|
18
|
+
The returned pair can be passed back into ``apply_lora`` to stack
|
|
19
|
+
multiple LoRAs by chaining calls.
|
|
20
|
+
"""
|
|
21
|
+
from ._runtime import ensure_comfyui_on_path
|
|
22
|
+
|
|
23
|
+
ensure_comfyui_on_path()
|
|
24
|
+
|
|
25
|
+
import comfy.sd
|
|
26
|
+
import comfy.utils
|
|
27
|
+
|
|
28
|
+
lora_path = str(Path(path))
|
|
29
|
+
lora = comfy.utils.load_torch_file(lora_path, safe_load=True)
|
|
30
|
+
patched = comfy.sd.load_lora_for_models(model, clip, lora, strength_model, strength_clip)
|
|
31
|
+
return cast(tuple[Any, Any], patched)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
__all__ = ["apply_lora"]
|