TorchDiff 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. torchdiff-2.0.0/LICENSE +21 -0
  2. torchdiff-2.0.0/PKG-INFO +315 -0
  3. torchdiff-2.0.0/README.md +264 -0
  4. torchdiff-2.0.0/TorchDiff.egg-info/PKG-INFO +315 -0
  5. torchdiff-2.0.0/TorchDiff.egg-info/SOURCES.txt +71 -0
  6. torchdiff-2.0.0/TorchDiff.egg-info/dependency_links.txt +1 -0
  7. torchdiff-2.0.0/TorchDiff.egg-info/requires.txt +10 -0
  8. torchdiff-2.0.0/TorchDiff.egg-info/top_level.txt +6 -0
  9. torchdiff-2.0.0/ddim/__init__.py +0 -0
  10. torchdiff-2.0.0/ddim/forward_ddim.py +79 -0
  11. torchdiff-2.0.0/ddim/hyper_param.py +225 -0
  12. torchdiff-2.0.0/ddim/noise_predictor.py +521 -0
  13. torchdiff-2.0.0/ddim/reverse_ddim.py +91 -0
  14. torchdiff-2.0.0/ddim/sample_ddim.py +219 -0
  15. torchdiff-2.0.0/ddim/text_encoder.py +152 -0
  16. torchdiff-2.0.0/ddim/train_ddim.py +394 -0
  17. torchdiff-2.0.0/ddpm/__init__.py +0 -0
  18. torchdiff-2.0.0/ddpm/forward_ddpm.py +89 -0
  19. torchdiff-2.0.0/ddpm/hyper_param.py +180 -0
  20. torchdiff-2.0.0/ddpm/noise_predictor.py +521 -0
  21. torchdiff-2.0.0/ddpm/reverse_ddpm.py +102 -0
  22. torchdiff-2.0.0/ddpm/sample_ddpm.py +213 -0
  23. torchdiff-2.0.0/ddpm/text_encoder.py +152 -0
  24. torchdiff-2.0.0/ddpm/train_ddpm.py +386 -0
  25. torchdiff-2.0.0/ldm/__init__.py +0 -0
  26. torchdiff-2.0.0/ldm/autoencoder.py +855 -0
  27. torchdiff-2.0.0/ldm/forward_idm.py +100 -0
  28. torchdiff-2.0.0/ldm/hyper_param.py +239 -0
  29. torchdiff-2.0.0/ldm/metrics.py +206 -0
  30. torchdiff-2.0.0/ldm/noise_predictor.py +1074 -0
  31. torchdiff-2.0.0/ldm/reverse_ldm.py +119 -0
  32. torchdiff-2.0.0/ldm/sample_ldm.py +254 -0
  33. torchdiff-2.0.0/ldm/text_encoder.py +429 -0
  34. torchdiff-2.0.0/ldm/train_autoencoder.py +216 -0
  35. torchdiff-2.0.0/ldm/train_ldm.py +412 -0
  36. torchdiff-2.0.0/sde/__init__.py +0 -0
  37. torchdiff-2.0.0/sde/forward_sde.py +98 -0
  38. torchdiff-2.0.0/sde/hyper_param.py +200 -0
  39. torchdiff-2.0.0/sde/noise_predictor.py +521 -0
  40. torchdiff-2.0.0/sde/reverse_sde.py +115 -0
  41. torchdiff-2.0.0/sde/sample_sde.py +216 -0
  42. torchdiff-2.0.0/sde/text_encoder.py +152 -0
  43. torchdiff-2.0.0/sde/train_sde.py +400 -0
  44. torchdiff-2.0.0/setup.cfg +4 -0
  45. torchdiff-2.0.0/setup.py +50 -0
  46. torchdiff-2.0.0/torchdiff/__init__.py +8 -0
  47. torchdiff-2.0.0/torchdiff/ddim.py +1222 -0
  48. torchdiff-2.0.0/torchdiff/ddpm.py +1153 -0
  49. torchdiff-2.0.0/torchdiff/ldm.py +2156 -0
  50. torchdiff-2.0.0/torchdiff/sde.py +1231 -0
  51. torchdiff-2.0.0/torchdiff/tests/__init__.py +0 -0
  52. torchdiff-2.0.0/torchdiff/tests/test_ddim.py +551 -0
  53. torchdiff-2.0.0/torchdiff/tests/test_ddpm.py +1188 -0
  54. torchdiff-2.0.0/torchdiff/tests/test_ldm.py +742 -0
  55. torchdiff-2.0.0/torchdiff/tests/test_sde.py +626 -0
  56. torchdiff-2.0.0/torchdiff/tests/test_unclip.py +366 -0
  57. torchdiff-2.0.0/torchdiff/unclip.py +4170 -0
  58. torchdiff-2.0.0/torchdiff/utils.py +1660 -0
  59. torchdiff-2.0.0/unclip/__init__.py +0 -0
  60. torchdiff-2.0.0/unclip/clip_model.py +304 -0
  61. torchdiff-2.0.0/unclip/ddim_model.py +1296 -0
  62. torchdiff-2.0.0/unclip/decoder_model.py +312 -0
  63. torchdiff-2.0.0/unclip/prior_diff.py +402 -0
  64. torchdiff-2.0.0/unclip/prior_model.py +264 -0
  65. torchdiff-2.0.0/unclip/project_decoder.py +57 -0
  66. torchdiff-2.0.0/unclip/project_prior.py +170 -0
  67. torchdiff-2.0.0/unclip/train_decoder.py +1059 -0
  68. torchdiff-2.0.0/unclip/train_prior.py +757 -0
  69. torchdiff-2.0.0/unclip/unclip_sampler.py +626 -0
  70. torchdiff-2.0.0/unclip/upsampler.py +432 -0
  71. torchdiff-2.0.0/unclip/upsampler_trainer.py +784 -0
  72. torchdiff-2.0.0/unclip/utils.py +1793 -0
  73. torchdiff-2.0.0/unclip/val_metrics.py +221 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Loghman Samani
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,315 @@
1
+ Metadata-Version: 2.4
2
+ Name: TorchDiff
3
+ Version: 2.0.0
4
+ Summary: A PyTorch-based library for diffusion models
5
+ Home-page: https://github.com/LoqmanSamani/TorchDiff
6
+ Author: Loghman Samani
7
+ Author-email: samaniloqman91@gmail.com
8
+ License: MIT
9
+ Project-URL: Homepage, https://loqmansamani.github.io/torchdiff
10
+ Project-URL: Documentation, https://torchdiff.readthedocs.io
11
+ Project-URL: Source, https://github.com/LoqmanSamani/TorchDiff
12
+ Keywords: diffusion models,pytorch,machine learning,deep learning
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: License :: OSI Approved :: MIT License
20
+ Classifier: Operating System :: OS Independent
21
+ Classifier: Intended Audience :: Developers
22
+ Classifier: Intended Audience :: Science/Research
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Requires-Python: >=3.8
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: lpips==0.1.4
29
+ Requires-Dist: pytorch-fid==0.3.0
30
+ Requires-Dist: torch==2.7.0
31
+ Requires-Dist: torchvision==0.22.0
32
+ Requires-Dist: tqdm==4.67.1
33
+ Requires-Dist: transformers==4.51.3
34
+ Provides-Extra: test
35
+ Requires-Dist: pytest>=7.0.0; extra == "test"
36
+ Requires-Dist: pytest-cov>=4.0.0; extra == "test"
37
+ Dynamic: author
38
+ Dynamic: author-email
39
+ Dynamic: classifier
40
+ Dynamic: description
41
+ Dynamic: description-content-type
42
+ Dynamic: home-page
43
+ Dynamic: keywords
44
+ Dynamic: license
45
+ Dynamic: license-file
46
+ Dynamic: project-url
47
+ Dynamic: provides-extra
48
+ Dynamic: requires-dist
49
+ Dynamic: requires-python
50
+ Dynamic: summary
51
+
52
+ # TorchDiff
53
+
54
+ <div align="center">
55
+ <img src="imgs/logo_.png" alt="TorchDiff Logo" width="300"/>
56
+ </div>
57
+
58
+ <div align="center">
59
+
60
+ [![License: MIT](https://img.shields.io/badge/license-MIT-red?style=plastic)](https://opensource.org/licenses/MIT)
61
+ [![PyTorch](https://img.shields.io/badge/PyTorch-white?style=plastic&logo=pytorch&logoColor=red)](https://pytorch.org/)
62
+ [![Version](https://img.shields.io/badge/version-2.0.0-blue?style=plastic)](https://pypi.org/project/torchdiff/)
63
+ [![Python](https://img.shields.io/badge/python-3.8%2B-blue?style=plastic&logo=python&logoColor=white)](https://www.python.org/)
64
+ [![Downloads](https://pepy.tech/badge/torchdiff)](https://pepy.tech/project/torchdiff)
65
+ [![Stars](https://img.shields.io/github/stars/LoqmanSamani/TorchDiff?style=plastic&color=yellow)](https://github.com/LoqmanSamani/TorchDiff)
66
+ [![Forks](https://img.shields.io/github/forks/LoqmanSamani/TorchDiff?style=plastic&color=orange)](https://github.com/LoqmanSamani/TorchDiff)
67
+ [![Issues](https://img.shields.io/github/issues/LoqmanSamani/TorchDiff?style=plastic&color=red)](https://github.com/LoqmanSamani/TorchDiff/issues)
68
+
69
+ </div>
70
+
71
+ ---
72
+
73
+ ## ๐Ÿ”Ž Overview
74
+
75
+ **TorchDiff** is a PyTorch-based library for building and experimenting with diffusion models, inspired by leading research papers.
76
+
77
+ The **TorchDiff 2.0.0** release includes implementations of five major diffusion model families:
78
+ - **DDPM** (Denoising Diffusion Probabilistic Models)
79
+ - **DDIM** (Denoising Diffusion Implicit Models)
80
+ - **SDE-based Diffusion**
81
+ - **LDM** (Latent Diffusion Models)
82
+ - **UnCLIP** (the model powering OpenAIโ€™s *DALLยทE 2*)
83
+
84
+ These models support both **conditional** (e.g., text-to-image) and **unconditional** generation.
85
+
86
+ <div align="center">
87
+ <img src="imgs/mount.png" alt="Diffusion Model Process" width="1000"/>
88
+ <br>
89
+ <em>Image generated using Sora</em>
90
+ <br><br>
91
+ </div>
92
+
93
+ TorchDiff is designed with **modularity** in mind. Each model is broken down into reusable components:
94
+ - **Forward Diffusion**: Adds noise (e.g., `ForwardDDPM`).
95
+ - **Reverse Diffusion**: Removes noise to recover data (e.g., `ReverseDDPM`).
96
+ - **Variance Scheduler**: Controls noise schedules (e.g., `VarianceSchedulerDDPM`).
97
+ - **Training**: Full training pipelines (e.g., `TrainDDPM`).
98
+ - **Sampling**: Efficient inference and generation (e.g., `SampleDDPM`).
99
+
100
+ Additional utilities:
101
+ - **Noise Predictor**: A U-Net-like model with attention and time embeddings.
102
+ - **Text Encoder**: Transformer-based (e.g., BERT) for conditional generation.
103
+ - **Metrics**: Evaluation suite including MSE, PSNR, SSIM, FID, and LPIPS.
104
+
105
+ ---
106
+
107
+ ## โšก Quick Start
108
+
109
+ Hereโ€™s a minimal working example to train and sample with **DDPM** on dummy data:
110
+
111
+ ```python
112
+ import torch
113
+ import torch.nn as nn
114
+ from torchvision import datasets, transforms
115
+ from torch.utils.data import DataLoader
116
+
117
+ from torchdiff.ddpm import VarianceSchedulerDDPM, ForwardDDPM, ReverseDDPM, TrainDDPM, SampleDDPM
118
+ from torchdiff.utils import NoisePredictor
119
+
120
+ # Dataset (CIFAR10 for demo)
121
+ transform = transforms.Compose([
122
+ transforms.Resize(32),
123
+ transforms.ToTensor(),
124
+ transforms.Normalize((0.5,), (0.5,))
125
+ ])
126
+ train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
127
+ train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
128
+
129
+ # Model components
130
+ noise_pred = NoisePredictor(in_channels=3)
131
+ vs = VarianceSchedulerDDPM(num_steps=1000)
132
+ fwd, rev = ForwardDDPM(vs), ReverseDDPM(vs)
133
+
134
+ # Optimizer & loss
135
+ optim = torch.optim.Adam(noise_pred.parameters(), lr=1e-4)
136
+ loss_fn = nn.MSELoss()
137
+
138
+ # Training
139
+ trainer = TrainDDPM(
140
+ noise_predictor=noise_pred, forward_diffusion=fwd, reverse_diffusion=rev,
141
+ conditional_model=None, optimizer=optim, objective=loss_fn,
142
+ data_loader=train_loader, max_epochs=1, device="cpu"
143
+ )
144
+ trainer()
145
+
146
+ # Sampling
147
+ sampler = SampleDDPM(reverse_diffusion=rev, noise_predictor=noise_pred,
148
+ image_shape=(32, 32), batch_size=4, in_channels=3, device="cpu")
149
+ images = sampler()
150
+ print("Generated images shape:", images.shape)
151
+ ```
152
+
153
+ For detailed examples, check the [examples/](https://github.com/LoqmanSamani/TorchDiff/tree/systembiology/examples) directory.
154
+
155
+ ---
156
+
157
+ ## ๐Ÿ“š Resources
158
+ - ๐ŸŒ [Project Website](https://loqmansamani.github.io/torchdiff/)
159
+ - ๐Ÿ“– [API Reference](https://torchdiff.readthedocs.io/en/latest/index.html)
160
+
161
+ ---
162
+
163
+ ## โšก Installation
164
+
165
+ Install from **PyPI (recommended):**
166
+ ```bash
167
+ pip install torchdiff
168
+ ```
169
+
170
+ Or install from source for development:
171
+ ```bash
172
+ # Clone repository
173
+ git clone https://github.com/LoqmanSamani/TorchDiff.git
174
+ cd TorchDiff
175
+
176
+ # Install dependencies
177
+ pip install -r requirements.txt
178
+
179
+ # Install package
180
+ pip install .
181
+ ```
182
+
183
+ > Requires **Python 3.8+**. For GPU acceleration, ensure PyTorch is installed with the correct CUDA version.
184
+
185
+ ---
186
+
187
+ ## ๐Ÿงฉ Implemented Models
188
+
189
+ ### 1. Denoising Diffusion Probabilistic Models (DDPM)
190
+ **Paper**: [Ho et al., 2020](https://arxiv.org/abs/2006.11239)
191
+
192
+ DDPMs learn to reverse a gradual noise-adding process to generate high-quality images. TorchDiff provides a modular implementation for both unconditional and conditional (text-guided) generation.
193
+
194
+ ๐Ÿ““ [DDPM Example Notebook](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/examples/ddpm.ipynb)
195
+
196
+ ---
197
+
198
+ ### 2. Denoising Diffusion Implicit Models (DDIM)
199
+ **Paper**: [Song et al., 2021](https://arxiv.org/abs/2010.02502)
200
+
201
+ DDIM accelerates sampling by reducing the number of denoising steps while maintaining image quality. TorchDiff supports both conditional and unconditional DDIM generation.
202
+
203
+ ๐Ÿ““ [DDIM Example Notebook](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/examples/ddim.ipynb)
204
+
205
+ ---
206
+
207
+ ### 3. Score-Based Generative Models via Stochastic Differential Equations (SDE)
208
+ **Paper**: [Song et al., 2021](https://arxiv.org/abs/2011.13456)
209
+
210
+ SDE-based models generalize diffusion via stochastic processes, supporting multiple formulations: **VE, VP, sub-VP**, and deterministic **ODE** variants. TorchDiff includes full training and sampling pipelines for both conditional and unconditional use cases.
211
+
212
+ ๐Ÿ““ [SDE Example Notebook](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/examples/sde.ipynb)
213
+
214
+ ---
215
+
216
+ ### 4. Latent Diffusion Models (LDM)
217
+ **Paper**: [Rombach et al., 2022](https://arxiv.org/abs/2112.10752)
218
+
219
+ LDMs operate in a compressed latent space using a VAE, enabling **efficient high-resolution image synthesis** with reduced computational cost. TorchDiff supports using DDPM, DDIM, or SDE as the diffusion backbone in latent space.
220
+
221
+ ๐Ÿ““ [LDM Example Notebook](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/examples/ldm.ipynb)
222
+
223
+ ---
224
+
225
+ ### 5. UnCLIP (Hierarchical Text-Conditional Image Generation with CLIP Latents)
226
+ **Paper**: [Ramesh et al., 2022](https://arxiv.org/abs/2204.06125)
227
+
228
+ UnCLIP, the architecture behind *DALLยทE 2*, leverages **CLIP latents** to enable hierarchical text-to-image generation. It first maps text into CLIPโ€™s multimodal embedding space, then performs diffusion-based generation in that space, followed by refinement in pixel space.
229
+
230
+ Training UnCLIP is significantly more complex than other diffusion families, and thus a minimal example is not shown here.
231
+
232
+ ๐Ÿ““ [UnCLIP Example Notebook](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/examples/unclip.ipynb)
233
+
234
+ ---
235
+
236
+ ## ๐Ÿ” License
237
+ Released under the [MIT License](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/LICENSE).
238
+
239
+ ---
240
+
241
+ ## ๐Ÿšง Roadmap / Future Work
242
+ TorchDiff is under active development. Planned features include:
243
+ - ๐Ÿง  New diffusion variants and improved training algorithms.
244
+ - โšก Faster and more memory-efficient sampling.
245
+ - ๐ŸŽฏ Additional utilities to simplify experimentation.
246
+
247
+ ---
248
+
249
+ ## ๐Ÿค Contributing
250
+ Contributions are welcome!
251
+
252
+ - Open an [Issue](../../issues) to report bugs or request features.
253
+ - Submit a PR with improvements or new features.
254
+
255
+ Your feedback helps make TorchDiff better for the community.
256
+
257
+
258
+ ---
259
+
260
+ ## ๐Ÿ“– Citation
261
+
262
+ If you use **TorchDiff** in your research or project, please cite the original papers and this repository.
263
+
264
+ ### Core Diffusion Papers
265
+
266
+ ```bibtex
267
+ @article{ho2020denoising,
268
+ title={Denoising Diffusion Probabilistic Models},
269
+ author={Ho, Jonathan and Jain, Ajay and Abbeel, Pieter},
270
+ journal={Advances in Neural Information Processing Systems},
271
+ year={2020}
272
+ }
273
+
274
+ @article{song2021denoising,
275
+ title={Denoising Diffusion Implicit Models},
276
+ author={Song, Jiaming and Meng, Chenlin and Ermon, Stefano},
277
+ journal={International Conference on Learning Representations (ICLR)},
278
+ year={2021}
279
+ }
280
+
281
+ @article{song2021score,
282
+ title={Score-Based Generative Modeling through Stochastic Differential Equations},
283
+ author={Song, Yang and Sohl-Dickstein, Jascha and Kingma, Diederik P and Kumar, Abhishek and Ermon, Stefano and Poole, Ben},
284
+ journal={International Conference on Learning Representations (ICLR)},
285
+ year={2021}
286
+ }
287
+
288
+ @article{rombach2022high,
289
+ title={High-Resolution Image Synthesis with Latent Diffusion Models},
290
+ author={Rombach, Robin and Blattmann, Andreas and Lorenz, Dominik and Esser, Patrick and Ommer, Bjรถrn},
291
+ journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
292
+ year={2022}
293
+ }
294
+
295
+ @article{ramesh2022hierarchical,
296
+ title={Hierarchical Text-Conditional Image Generation with CLIP Latents},
297
+ author={Ramesh, Aditya and Pavlov, Mikhail and Goh, Gabriel and Gray, Scott and Voss, Chelsea and Radford, Alec and Chen, Mark and Sutskever, Ilya},
298
+ journal={arXiv preprint arXiv:2204.06125},
299
+ year={2022}
300
+ }
301
+ ```
302
+
303
+ ### TorchDiff Repository
304
+
305
+ ```bibtex
306
+ @misc{torchdiff2025,
307
+ author = {Samani, Loghman},
308
+ title = {TorchDiff: A Modular Diffusion Modeling Library in PyTorch},
309
+ year = {2025},
310
+ publisher = {GitHub},
311
+ journal = {GitHub repository},
312
+ howpublished = {\url{https://github.com/LoqmanSamani/TorchDiff}},
313
+ }
314
+ ```
315
+
@@ -0,0 +1,264 @@
1
+ # TorchDiff
2
+
3
+ <div align="center">
4
+ <img src="imgs/logo_.png" alt="TorchDiff Logo" width="300"/>
5
+ </div>
6
+
7
+ <div align="center">
8
+
9
+ [![License: MIT](https://img.shields.io/badge/license-MIT-red?style=plastic)](https://opensource.org/licenses/MIT)
10
+ [![PyTorch](https://img.shields.io/badge/PyTorch-white?style=plastic&logo=pytorch&logoColor=red)](https://pytorch.org/)
11
+ [![Version](https://img.shields.io/badge/version-2.0.0-blue?style=plastic)](https://pypi.org/project/torchdiff/)
12
+ [![Python](https://img.shields.io/badge/python-3.8%2B-blue?style=plastic&logo=python&logoColor=white)](https://www.python.org/)
13
+ [![Downloads](https://pepy.tech/badge/torchdiff)](https://pepy.tech/project/torchdiff)
14
+ [![Stars](https://img.shields.io/github/stars/LoqmanSamani/TorchDiff?style=plastic&color=yellow)](https://github.com/LoqmanSamani/TorchDiff)
15
+ [![Forks](https://img.shields.io/github/forks/LoqmanSamani/TorchDiff?style=plastic&color=orange)](https://github.com/LoqmanSamani/TorchDiff)
16
+ [![Issues](https://img.shields.io/github/issues/LoqmanSamani/TorchDiff?style=plastic&color=red)](https://github.com/LoqmanSamani/TorchDiff/issues)
17
+
18
+ </div>
19
+
20
+ ---
21
+
22
+ ## ๐Ÿ”Ž Overview
23
+
24
+ **TorchDiff** is a PyTorch-based library for building and experimenting with diffusion models, inspired by leading research papers.
25
+
26
+ The **TorchDiff 2.0.0** release includes implementations of five major diffusion model families:
27
+ - **DDPM** (Denoising Diffusion Probabilistic Models)
28
+ - **DDIM** (Denoising Diffusion Implicit Models)
29
+ - **SDE-based Diffusion**
30
+ - **LDM** (Latent Diffusion Models)
31
+ - **UnCLIP** (the model powering OpenAIโ€™s *DALLยทE 2*)
32
+
33
+ These models support both **conditional** (e.g., text-to-image) and **unconditional** generation.
34
+
35
+ <div align="center">
36
+ <img src="imgs/mount.png" alt="Diffusion Model Process" width="1000"/>
37
+ <br>
38
+ <em>Image generated using Sora</em>
39
+ <br><br>
40
+ </div>
41
+
42
+ TorchDiff is designed with **modularity** in mind. Each model is broken down into reusable components:
43
+ - **Forward Diffusion**: Adds noise (e.g., `ForwardDDPM`).
44
+ - **Reverse Diffusion**: Removes noise to recover data (e.g., `ReverseDDPM`).
45
+ - **Variance Scheduler**: Controls noise schedules (e.g., `VarianceSchedulerDDPM`).
46
+ - **Training**: Full training pipelines (e.g., `TrainDDPM`).
47
+ - **Sampling**: Efficient inference and generation (e.g., `SampleDDPM`).
48
+
49
+ Additional utilities:
50
+ - **Noise Predictor**: A U-Net-like model with attention and time embeddings.
51
+ - **Text Encoder**: Transformer-based (e.g., BERT) for conditional generation.
52
+ - **Metrics**: Evaluation suite including MSE, PSNR, SSIM, FID, and LPIPS.
53
+
54
+ ---
55
+
56
+ ## โšก Quick Start
57
+
58
+ Hereโ€™s a minimal working example to train and sample with **DDPM** on dummy data:
59
+
60
+ ```python
61
+ import torch
62
+ import torch.nn as nn
63
+ from torchvision import datasets, transforms
64
+ from torch.utils.data import DataLoader
65
+
66
+ from torchdiff.ddpm import VarianceSchedulerDDPM, ForwardDDPM, ReverseDDPM, TrainDDPM, SampleDDPM
67
+ from torchdiff.utils import NoisePredictor
68
+
69
+ # Dataset (CIFAR10 for demo)
70
+ transform = transforms.Compose([
71
+ transforms.Resize(32),
72
+ transforms.ToTensor(),
73
+ transforms.Normalize((0.5,), (0.5,))
74
+ ])
75
+ train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
76
+ train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
77
+
78
+ # Model components
79
+ noise_pred = NoisePredictor(in_channels=3)
80
+ vs = VarianceSchedulerDDPM(num_steps=1000)
81
+ fwd, rev = ForwardDDPM(vs), ReverseDDPM(vs)
82
+
83
+ # Optimizer & loss
84
+ optim = torch.optim.Adam(noise_pred.parameters(), lr=1e-4)
85
+ loss_fn = nn.MSELoss()
86
+
87
+ # Training
88
+ trainer = TrainDDPM(
89
+ noise_predictor=noise_pred, forward_diffusion=fwd, reverse_diffusion=rev,
90
+ conditional_model=None, optimizer=optim, objective=loss_fn,
91
+ data_loader=train_loader, max_epochs=1, device="cpu"
92
+ )
93
+ trainer()
94
+
95
+ # Sampling
96
+ sampler = SampleDDPM(reverse_diffusion=rev, noise_predictor=noise_pred,
97
+ image_shape=(32, 32), batch_size=4, in_channels=3, device="cpu")
98
+ images = sampler()
99
+ print("Generated images shape:", images.shape)
100
+ ```
101
+
102
+ For detailed examples, check the [examples/](https://github.com/LoqmanSamani/TorchDiff/tree/systembiology/examples) directory.
103
+
104
+ ---
105
+
106
+ ## ๐Ÿ“š Resources
107
+ - ๐ŸŒ [Project Website](https://loqmansamani.github.io/torchdiff/)
108
+ - ๐Ÿ“– [API Reference](https://torchdiff.readthedocs.io/en/latest/index.html)
109
+
110
+ ---
111
+
112
+ ## โšก Installation
113
+
114
+ Install from **PyPI (recommended):**
115
+ ```bash
116
+ pip install torchdiff
117
+ ```
118
+
119
+ Or install from source for development:
120
+ ```bash
121
+ # Clone repository
122
+ git clone https://github.com/LoqmanSamani/TorchDiff.git
123
+ cd TorchDiff
124
+
125
+ # Install dependencies
126
+ pip install -r requirements.txt
127
+
128
+ # Install package
129
+ pip install .
130
+ ```
131
+
132
+ > Requires **Python 3.8+**. For GPU acceleration, ensure PyTorch is installed with the correct CUDA version.
133
+
134
+ ---
135
+
136
+ ## ๐Ÿงฉ Implemented Models
137
+
138
+ ### 1. Denoising Diffusion Probabilistic Models (DDPM)
139
+ **Paper**: [Ho et al., 2020](https://arxiv.org/abs/2006.11239)
140
+
141
+ DDPMs learn to reverse a gradual noise-adding process to generate high-quality images. TorchDiff provides a modular implementation for both unconditional and conditional (text-guided) generation.
142
+
143
+ ๐Ÿ““ [DDPM Example Notebook](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/examples/ddpm.ipynb)
144
+
145
+ ---
146
+
147
+ ### 2. Denoising Diffusion Implicit Models (DDIM)
148
+ **Paper**: [Song et al., 2021](https://arxiv.org/abs/2010.02502)
149
+
150
+ DDIM accelerates sampling by reducing the number of denoising steps while maintaining image quality. TorchDiff supports both conditional and unconditional DDIM generation.
151
+
152
+ ๐Ÿ““ [DDIM Example Notebook](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/examples/ddim.ipynb)
153
+
154
+ ---
155
+
156
+ ### 3. Score-Based Generative Models via Stochastic Differential Equations (SDE)
157
+ **Paper**: [Song et al., 2021](https://arxiv.org/abs/2011.13456)
158
+
159
+ SDE-based models generalize diffusion via stochastic processes, supporting multiple formulations: **VE, VP, sub-VP**, and deterministic **ODE** variants. TorchDiff includes full training and sampling pipelines for both conditional and unconditional use cases.
160
+
161
+ ๐Ÿ““ [SDE Example Notebook](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/examples/sde.ipynb)
162
+
163
+ ---
164
+
165
+ ### 4. Latent Diffusion Models (LDM)
166
+ **Paper**: [Rombach et al., 2022](https://arxiv.org/abs/2112.10752)
167
+
168
+ LDMs operate in a compressed latent space using a VAE, enabling **efficient high-resolution image synthesis** with reduced computational cost. TorchDiff supports using DDPM, DDIM, or SDE as the diffusion backbone in latent space.
169
+
170
+ ๐Ÿ““ [LDM Example Notebook](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/examples/ldm.ipynb)
171
+
172
+ ---
173
+
174
+ ### 5. UnCLIP (Hierarchical Text-Conditional Image Generation with CLIP Latents)
175
+ **Paper**: [Ramesh et al., 2022](https://arxiv.org/abs/2204.06125)
176
+
177
+ UnCLIP, the architecture behind *DALLยทE 2*, leverages **CLIP latents** to enable hierarchical text-to-image generation. It first maps text into CLIPโ€™s multimodal embedding space, then performs diffusion-based generation in that space, followed by refinement in pixel space.
178
+
179
+ Training UnCLIP is significantly more complex than other diffusion families, and thus a minimal example is not shown here.
180
+
181
+ ๐Ÿ““ [UnCLIP Example Notebook](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/examples/unclip.ipynb)
182
+
183
+ ---
184
+
185
+ ## ๐Ÿ” License
186
+ Released under the [MIT License](https://github.com/LoqmanSamani/TorchDiff/blob/systembiology/LICENSE).
187
+
188
+ ---
189
+
190
+ ## ๐Ÿšง Roadmap / Future Work
191
+ TorchDiff is under active development. Planned features include:
192
+ - ๐Ÿง  New diffusion variants and improved training algorithms.
193
+ - โšก Faster and more memory-efficient sampling.
194
+ - ๐ŸŽฏ Additional utilities to simplify experimentation.
195
+
196
+ ---
197
+
198
+ ## ๐Ÿค Contributing
199
+ Contributions are welcome!
200
+
201
+ - Open an [Issue](../../issues) to report bugs or request features.
202
+ - Submit a PR with improvements or new features.
203
+
204
+ Your feedback helps make TorchDiff better for the community.
205
+
206
+
207
+ ---
208
+
209
+ ## ๐Ÿ“– Citation
210
+
211
+ If you use **TorchDiff** in your research or project, please cite the original papers and this repository.
212
+
213
+ ### Core Diffusion Papers
214
+
215
+ ```bibtex
216
+ @article{ho2020denoising,
217
+ title={Denoising Diffusion Probabilistic Models},
218
+ author={Ho, Jonathan and Jain, Ajay and Abbeel, Pieter},
219
+ journal={Advances in Neural Information Processing Systems},
220
+ year={2020}
221
+ }
222
+
223
+ @article{song2021denoising,
224
+ title={Denoising Diffusion Implicit Models},
225
+ author={Song, Jiaming and Meng, Chenlin and Ermon, Stefano},
226
+ journal={International Conference on Learning Representations (ICLR)},
227
+ year={2021}
228
+ }
229
+
230
+ @article{song2021score,
231
+ title={Score-Based Generative Modeling through Stochastic Differential Equations},
232
+ author={Song, Yang and Sohl-Dickstein, Jascha and Kingma, Diederik P and Kumar, Abhishek and Ermon, Stefano and Poole, Ben},
233
+ journal={International Conference on Learning Representations (ICLR)},
234
+ year={2021}
235
+ }
236
+
237
+ @article{rombach2022high,
238
+ title={High-Resolution Image Synthesis with Latent Diffusion Models},
239
+ author={Rombach, Robin and Blattmann, Andreas and Lorenz, Dominik and Esser, Patrick and Ommer, Bjรถrn},
240
+ journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
241
+ year={2022}
242
+ }
243
+
244
+ @article{ramesh2022hierarchical,
245
+ title={Hierarchical Text-Conditional Image Generation with CLIP Latents},
246
+ author={Ramesh, Aditya and Pavlov, Mikhail and Goh, Gabriel and Gray, Scott and Voss, Chelsea and Radford, Alec and Chen, Mark and Sutskever, Ilya},
247
+ journal={arXiv preprint arXiv:2204.06125},
248
+ year={2022}
249
+ }
250
+ ```
251
+
252
+ ### TorchDiff Repository
253
+
254
+ ```bibtex
255
+ @misc{torchdiff2025,
256
+ author = {Samani, Loghman},
257
+ title = {TorchDiff: A Modular Diffusion Modeling Library in PyTorch},
258
+ year = {2025},
259
+ publisher = {GitHub},
260
+ journal = {GitHub repository},
261
+ howpublished = {\url{https://github.com/LoqmanSamani/TorchDiff}},
262
+ }
263
+ ```
264
+