ai-edge-torch-nightly 0.1.dev202405131930__py3-none-any.whl → 0.2.0.dev20240531__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ai-edge-torch-nightly might be problematic. Click here for more details.

Files changed (24) hide show
  1. ai_edge_torch/convert/fx_passes/build_aten_composite_pass.py +5 -2
  2. ai_edge_torch/convert/test/test_convert_composites.py +3 -0
  3. ai_edge_torch/generative/examples/stable_diffusion/__init__.py +14 -0
  4. ai_edge_torch/generative/examples/stable_diffusion/attention.py +106 -0
  5. ai_edge_torch/generative/examples/stable_diffusion/clip.py +79 -0
  6. ai_edge_torch/generative/examples/stable_diffusion/convert_to_tflite.py +107 -0
  7. ai_edge_torch/generative/examples/stable_diffusion/decoder.py +113 -0
  8. ai_edge_torch/generative/examples/stable_diffusion/diffusion.py +499 -0
  9. ai_edge_torch/generative/examples/stable_diffusion/encoder.py +67 -0
  10. ai_edge_torch/generative/examples/stable_diffusion/pipeline.py +222 -0
  11. ai_edge_torch/generative/examples/stable_diffusion/samplers/__init__.py +19 -0
  12. ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler.py +61 -0
  13. ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler_ancestral.py +65 -0
  14. ai_edge_torch/generative/examples/stable_diffusion/samplers/k_lms.py +73 -0
  15. ai_edge_torch/generative/examples/stable_diffusion/samplers/sampler.py +38 -0
  16. ai_edge_torch/generative/examples/stable_diffusion/tokenizer.py +108 -0
  17. ai_edge_torch/generative/examples/stable_diffusion/util.py +71 -0
  18. ai_edge_torch/generative/test/loader_test.py +80 -0
  19. ai_edge_torch/generative/utilities/loader.py +8 -4
  20. {ai_edge_torch_nightly-0.1.dev202405131930.dist-info → ai_edge_torch_nightly-0.2.0.dev20240531.dist-info}/METADATA +2 -2
  21. {ai_edge_torch_nightly-0.1.dev202405131930.dist-info → ai_edge_torch_nightly-0.2.0.dev20240531.dist-info}/RECORD +24 -8
  22. {ai_edge_torch_nightly-0.1.dev202405131930.dist-info → ai_edge_torch_nightly-0.2.0.dev20240531.dist-info}/LICENSE +0 -0
  23. {ai_edge_torch_nightly-0.1.dev202405131930.dist-info → ai_edge_torch_nightly-0.2.0.dev20240531.dist-info}/WHEEL +0 -0
  24. {ai_edge_torch_nightly-0.1.dev202405131930.dist-info → ai_edge_torch_nightly-0.2.0.dev20240531.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,222 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import argparse
17
+ import os
18
+ from pathlib import Path
19
+ from typing import Dict, Optional
20
+
21
+ import numpy as np
22
+ from PIL import Image
23
+ from tqdm import tqdm
24
+
25
+ import ai_edge_torch.generative.examples.stable_diffusion.samplers as samplers
26
+ from ai_edge_torch.generative.examples.stable_diffusion.tokenizer import Tokenizer # NOQA
27
+ import ai_edge_torch.generative.examples.stable_diffusion.util as util
28
+ from ai_edge_torch.model import TfLiteModel
29
+
30
+ arg_parser = argparse.ArgumentParser()
31
+ arg_parser.add_argument(
32
+ '--tokenizer_vocab_dir',
33
+ type=str,
34
+ help='Directory to the tokenizer vocabulary files, which include `merges.txt` and `vocab.json`',
35
+ required=True,
36
+ )
37
+ arg_parser.add_argument(
38
+ '--clip_ckpt', type=str, help='Path to CLIP TFLite tflite file', required=True
39
+ )
40
+ arg_parser.add_argument(
41
+ '--diffusion_ckpt', type=str, help='Path to diffusion tflite file', required=True
42
+ )
43
+ arg_parser.add_argument(
44
+ '--decoder_ckpt', type=str, help='Path to decoder tflite file', required=True
45
+ )
46
+ arg_parser.add_argument(
47
+ '--output_path',
48
+ type=str,
49
+ help='Path to the output generated image file.',
50
+ required=True,
51
+ )
52
+ arg_parser.add_argument(
53
+ '--prompt',
54
+ default='a photograph of an astronaut riding a horse',
55
+ type=str,
56
+ help='The prompt to guide the image generation.',
57
+ )
58
+ arg_parser.add_argument(
59
+ '--n_inference_steps', default=20, type=int, help='The number of denoising steps.'
60
+ )
61
+ arg_parser.add_argument(
62
+ '--sampler',
63
+ default='k_euler',
64
+ type=str,
65
+ choices=['k_euler', 'k_euler_ancestral', 'k_lms'],
66
+ help='A sampler to be used to denoise the encoded image latents. Can be one of `k_lms, `k_euler`, or `k_euler_ancestral`.',
67
+ )
68
+
69
+
70
+ class StableDiffusion:
71
+
72
+ def __init__(
73
+ self,
74
+ *,
75
+ tokenizer_vocab_dir: str,
76
+ clip_ckpt: str,
77
+ encoder_ckpt: Optional[str] = None,
78
+ diffusion_ckpt: str,
79
+ decoder_ckpt: str
80
+ ):
81
+ self.tokenizer = Tokenizer(tokenizer_vocab_dir)
82
+ self.clip = TfLiteModel.load(clip_ckpt)
83
+ self.decoder = TfLiteModel.load(decoder_ckpt)
84
+ self.diffusion = TfLiteModel.load(diffusion_ckpt)
85
+ if encoder_ckpt is not None:
86
+ self.encoder = TfLiteModel.load(encoder_ckpt)
87
+
88
+
89
+ def run_tflite_pipeline(
90
+ model: StableDiffusion,
91
+ prompt: str,
92
+ output_path: str,
93
+ uncond_prompt: Optional[str] = None,
94
+ cfg_scale: float = 7.5,
95
+ height: int = 512,
96
+ width: int = 512,
97
+ sampler: str = 'k_euler',
98
+ n_inference_steps: int = 20,
99
+ seed: Optional[int] = None,
100
+ strength: float = 0.8,
101
+ input_image: Optional[Image.Image] = None,
102
+ ):
103
+ """Run stable diffusion pipeline with tflite model.
104
+ model:
105
+ StableDiffsuion model.
106
+ prompt:
107
+ The prompt to guide the image generation.
108
+ output_path:
109
+ The path to the generated output image.
110
+ uncond_prompt:
111
+ The prompt not to guide the image generation.
112
+ cfg_scale:
113
+ Guidance scale of classifier-free guidance. Higher guidance scale encourages to generate
114
+ images that are closely linked to the text `prompt`, usually at the expense of lower
115
+ image quality.
116
+ height:
117
+ The height in pixels of the generated image.
118
+ width:
119
+ The width in pixels of the generated image.
120
+ sampler:
121
+ A sampler to be used to denoise the encoded image latents. Can be one of `k_lms, `k_euler`,
122
+ or `k_euler_ancestral`.
123
+ n_inference_steps:
124
+ The number of denoising steps. More denoising steps usually lead to a higher quality image at the
125
+ expense of slower inference. This parameter will be modulated by `strength`.
126
+ seed:
127
+ A seed to make generation deterministic.
128
+ strength:
129
+ Conceptually, indicates how much to transform the reference `input_image`. Must be between 0 and 1.
130
+ `input_image` will be used as a starting point, adding more noise to it the larger the `strength`.
131
+ The number of denoising steps depends on the amount of noise initially added. When `strength` is 1,
132
+ added noise will be maximum and the denoising process will run for the full number of iterations
133
+ specified in `n_inference_steps`. A value of 1, therefore, essentially ignores `input_image`.
134
+ input_image:
135
+ Image which is served as the starting point for the image generation.
136
+ """
137
+ if not 0 < strength < 1:
138
+ raise ValueError('strength must be between 0 and 1')
139
+ if height % 8 or width % 8:
140
+ raise ValueError('height and width must be a multiple of 8')
141
+ if seed is not None:
142
+ np.random.seed(seed)
143
+ if uncond_prompt is None:
144
+ uncond_prompt = ''
145
+
146
+ if sampler == 'k_lms':
147
+ sampler = samplers.KLMSSampler(n_inference_steps=n_inference_steps)
148
+ elif sampler == 'k_euler':
149
+ sampler = samplers.KEulerSampler(n_inference_steps=n_inference_steps)
150
+ elif sampler == 'k_euler_ancestral':
151
+ sampler = samplers.KEulerAncestralSampler(n_inference_steps=n_inference_steps)
152
+ else:
153
+ raise ValueError(
154
+ 'Unknown sampler value %s. '
155
+ 'Accepted values are {k_lms, k_euler, k_euler_ancestral}' % sampler
156
+ )
157
+
158
+ # Text embedding.
159
+ cond_tokens = model.tokenizer.encode(prompt)
160
+ cond_context = model.clip(np.array(cond_tokens), signature_name='encode')
161
+ uncond_tokens = model.tokenizer.encode(uncond_prompt)
162
+ uncond_context = model.clip(np.array(uncond_tokens), signature_name='encode')
163
+ context = np.concatenate([cond_context, uncond_context], axis=0)
164
+ noise_shape = (1, 4, height // 8, width // 8)
165
+
166
+ # Initialization starts from input_image if any, otherwise, starts from a random sampling.
167
+ if input_image:
168
+ if not hasattr(model, 'encoder'):
169
+ raise AttributeError(
170
+ 'Stable Diffusion must be initilaized with encoder to accept input_image.'
171
+ )
172
+ input_image = input_image.resize((width, height))
173
+ input_image_np = np.array(input_image).astype(np.float32)
174
+ input_image_np = util.rescale(input_image, (0, 255), (-1, 1))
175
+ input_image_np = util.move_channel(input_image_np, to='first')
176
+ encoder_noise = np.random.normal(size=noise_shape).astype(np.float32)
177
+ latents = model.encoder(input_image_np, encoder_noise)
178
+ latents_noise = np.random.normal(size=noise_shape).astype(np.float32)
179
+ sampler.set_strength(strength=strength)
180
+ latents += latents_noise * sampler.initial_scale
181
+ else:
182
+ latents = np.random.normal(size=noise_shape).astype(np.float32)
183
+ latents *= sampler.initial_scale
184
+
185
+ # Diffusion process.
186
+ timesteps = tqdm(sampler.timesteps)
187
+ for i, timestep in enumerate(timesteps):
188
+ time_embedding = util.get_time_embedding(timestep)
189
+
190
+ input_latents = latents * sampler.get_input_scale()
191
+ input_latents = input_latents.repeat(2, axis=0)
192
+ output = model.diffusion(
193
+ input_latents, context, time_embedding, signature_name='diffusion'
194
+ )
195
+ output_cond, output_uncond = np.split(output, 2, axis=0)
196
+ output = cfg_scale * (output_cond - output_uncond) + output_uncond
197
+
198
+ latents = sampler.step(latents, output)
199
+
200
+ # Image decoding.
201
+ images = model.decoder(latents, signature_name='decode')
202
+ images = util.rescale(images, (-1, 1), (0, 255), clamp=True)
203
+ images = util.move_channel(images, to='last')
204
+ if not os.path.exists(output_path):
205
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
206
+ Image.fromarray(images[0].astype(np.uint8)).save(output_path)
207
+
208
+
209
+ if __name__ == '__main__':
210
+ args = arg_parser.parse_args()
211
+ run_tflite_pipeline(
212
+ StableDiffusion(
213
+ tokenizer_vocab_dir=args.tokenizer_vocab_dir,
214
+ clip_ckpt=args.clip_ckpt,
215
+ diffusion_ckpt=args.diffusion_ckpt,
216
+ decoder_ckpt=args.decoder_ckpt,
217
+ ),
218
+ prompt=args.prompt,
219
+ output_path=args.output_path,
220
+ sampler=args.sampler,
221
+ n_inference_steps=args.n_inference_steps,
222
+ )
@@ -0,0 +1,19 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ from .k_euler import KEulerSampler
17
+ from .k_euler_ancestral import KEulerAncestralSampler
18
+ from .k_lms import KLMSSampler
19
+ from .sampler import SamplerInterface
@@ -0,0 +1,61 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import numpy as np
17
+
18
+ from ai_edge_torch.generative.examples.stable_diffusion import util
19
+ from ai_edge_torch.generative.examples.stable_diffusion.samplers.sampler import SamplerInterface # NOQA
20
+
21
+
22
+ class KEulerSampler(SamplerInterface):
23
+
24
+ def __init__(self, n_inference_steps=50, n_training_steps=1000):
25
+ timesteps = np.linspace(n_training_steps - 1, 0, n_inference_steps)
26
+
27
+ alphas_cumprod = util.get_alphas_cumprod(n_training_steps=n_training_steps)
28
+ sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5
29
+ log_sigmas = np.log(sigmas)
30
+ log_sigmas = np.interp(timesteps, range(n_training_steps), log_sigmas)
31
+ sigmas = np.exp(log_sigmas)
32
+ sigmas = np.append(sigmas, 0)
33
+
34
+ self.sigmas = sigmas
35
+ self.initial_scale = sigmas.max()
36
+ self.timesteps = timesteps
37
+ self.n_inference_steps = n_inference_steps
38
+ self.n_training_steps = n_training_steps
39
+ self.step_count = 0
40
+
41
+ def get_input_scale(self, step_count=None):
42
+ if step_count is None:
43
+ step_count = self.step_count
44
+ sigma = self.sigmas[step_count]
45
+ return 1 / (sigma**2 + 1) ** 0.5
46
+
47
+ def set_strength(self, strength=1):
48
+ start_step = self.n_inference_steps - int(self.n_inference_steps * strength)
49
+ self.timesteps = np.linspace(self.n_training_steps - 1, 0, self.n_inference_steps)
50
+ self.timesteps = self.timesteps[start_step:]
51
+ self.initial_scale = self.sigmas[start_step]
52
+ self.step_count = start_step
53
+
54
+ def step(self, latents, output):
55
+ t = self.step_count
56
+ self.step_count += 1
57
+
58
+ sigma_from = self.sigmas[t]
59
+ sigma_to = self.sigmas[t + 1]
60
+ latents += output * (sigma_to - sigma_from)
61
+ return latents
@@ -0,0 +1,65 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import numpy as np
17
+
18
+ from ai_edge_torch.generative.examples.stable_diffusion import util
19
+ from ai_edge_torch.generative.examples.stable_diffusion.samplers.sampler import SamplerInterface # NOQA
20
+
21
+
22
+ class KEulerAncestralSampler(SamplerInterface):
23
+
24
+ def __init__(self, n_inference_steps=50, n_training_steps=1000):
25
+ timesteps = np.linspace(n_training_steps - 1, 0, n_inference_steps)
26
+
27
+ alphas_cumprod = util.get_alphas_cumprod(n_training_steps=n_training_steps)
28
+ sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5
29
+ log_sigmas = np.log(sigmas)
30
+ log_sigmas = np.interp(timesteps, range(n_training_steps), log_sigmas)
31
+ sigmas = np.exp(log_sigmas)
32
+ sigmas = np.append(sigmas, 0)
33
+
34
+ self.sigmas = sigmas
35
+ self.initial_scale = sigmas.max()
36
+ self.timesteps = timesteps
37
+ self.n_inference_steps = n_inference_steps
38
+ self.n_training_steps = n_training_steps
39
+ self.step_count = 0
40
+
41
+ def get_input_scale(self, step_count=None):
42
+ if step_count is None:
43
+ step_count = self.step_count
44
+ sigma = self.sigmas[step_count]
45
+ return 1 / (sigma**2 + 1) ** 0.5
46
+
47
+ def set_strength(self, strength=1):
48
+ start_step = self.n_inference_steps - int(self.n_inference_steps * strength)
49
+ self.timesteps = np.linspace(self.n_training_steps - 1, 0, self.n_inference_steps)
50
+ self.timesteps = self.timesteps[start_step:]
51
+ self.initial_scale = self.sigmas[start_step]
52
+ self.step_count = start_step
53
+
54
+ def step(self, latents, output):
55
+ t = self.step_count
56
+ self.step_count += 1
57
+
58
+ sigma_from = self.sigmas[t]
59
+ sigma_to = self.sigmas[t + 1]
60
+ sigma_up = sigma_to * (1 - (sigma_to**2 / sigma_from**2)) ** 0.5
61
+ sigma_down = sigma_to**2 / sigma_from
62
+ latents += output * (sigma_down - sigma_from)
63
+ noise = np.random.normal(size=latents.shape)
64
+ latents += noise * sigma_up
65
+ return latents
@@ -0,0 +1,73 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import numpy as np
17
+
18
+ from ai_edge_torch.generative.examples.stable_diffusion import util
19
+ from ai_edge_torch.generative.examples.stable_diffusion.samplers.sampler import SamplerInterface # NOQA
20
+
21
+
22
+ class KLMSSampler(SamplerInterface):
23
+
24
+ def __init__(self, n_inference_steps=50, n_training_steps=1000, lms_order=4):
25
+ timesteps = np.linspace(n_training_steps - 1, 0, n_inference_steps)
26
+
27
+ alphas_cumprod = util.get_alphas_cumprod(n_training_steps=n_training_steps)
28
+ sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5
29
+ log_sigmas = np.log(sigmas)
30
+ log_sigmas = np.interp(timesteps, range(n_training_steps), log_sigmas)
31
+ sigmas = np.exp(log_sigmas)
32
+ sigmas = np.append(sigmas, 0)
33
+
34
+ self.sigmas = sigmas
35
+ self.initial_scale = sigmas.max()
36
+ self.timesteps = timesteps
37
+ self.n_inference_steps = n_inference_steps
38
+ self.n_training_steps = n_training_steps
39
+ self.lms_order = lms_order
40
+ self.step_count = 0
41
+ self.outputs = []
42
+
43
+ def get_input_scale(self, step_count=None):
44
+ if step_count is None:
45
+ step_count = self.step_count
46
+ sigma = self.sigmas[step_count]
47
+ return 1 / (sigma**2 + 1) ** 0.5
48
+
49
+ def set_strength(self, strength=1):
50
+ start_step = self.n_inference_steps - int(self.n_inference_steps * strength)
51
+ self.timesteps = np.linspace(self.n_training_steps - 1, 0, self.n_inference_steps)
52
+ self.timesteps = self.timesteps[start_step:]
53
+ self.initial_scale = self.sigmas[start_step]
54
+ self.step_count = start_step
55
+
56
+ def step(self, latents, output):
57
+ t = self.step_count
58
+ self.step_count += 1
59
+
60
+ self.outputs = [output] + self.outputs[: self.lms_order - 1]
61
+ order = len(self.outputs)
62
+ for i, output in enumerate(self.outputs):
63
+ # Integrate polynomial by trapezoidal approx. method for 81 points.
64
+ x = np.linspace(self.sigmas[t], self.sigmas[t + 1], 81)
65
+ y = np.ones(81)
66
+ for j in range(order):
67
+ if i == j:
68
+ continue
69
+ y *= x - self.sigmas[t - j]
70
+ y /= self.sigmas[t - i] - self.sigmas[t - j]
71
+ lms_coeff = np.trapz(y=y, x=x)
72
+ latents += lms_coeff * output
73
+ return latents
@@ -0,0 +1,38 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import abc
17
+
18
+ import numpy as np
19
+
20
+
21
+ class SamplerInterface(abc.ABC):
22
+
23
+ @abc.abstractmethod
24
+ def get_input_scale(self, step_count: int = 1) -> float:
25
+ """Get the input scale of the random samples from sampled distribution"""
26
+ return NotImplemented
27
+
28
+ @abc.abstractmethod
29
+ def set_strength(self, strength: float = 1) -> None:
30
+ """Set the strength of initial step.
31
+ Conceptually, indicates how much to transform the reference `input_images`.
32
+ """
33
+ return NotImplemented
34
+
35
+ @abc.abstractmethod
36
+ def step(self, latents: np.ndarray, output: np.ndarray) -> np.ndarray:
37
+ """Update latents from the diffusion output by a step"""
38
+ return NotImplemented
@@ -0,0 +1,108 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import functools
17
+ import json
18
+ import os
19
+ from typing import List, Tuple
20
+ import unicodedata
21
+
22
+ import regex as re
23
+
24
+
25
+ def create_bytes_table() -> dict:
26
+ table = {}
27
+ special_count = 0
28
+ for byte in range(256):
29
+ category = unicodedata.category(chr(byte))
30
+ if category[0] not in ['C', 'Z']: # ith character is NOT control char or space
31
+ table[byte] = chr(byte)
32
+ else: # ith character IS control char or space
33
+ table[byte] = chr(special_count + 256)
34
+ special_count += 1
35
+ return table
36
+
37
+
38
+ def pairwise(seq):
39
+ a = iter(seq)
40
+ b = iter(seq)
41
+ next(b)
42
+ return zip(a, b)
43
+
44
+
45
+ class Tokenizer:
46
+
47
+ def __init__(self, vocab_dir: str):
48
+ with open(os.path.join(vocab_dir, 'vocab.json'), encoding='utf-8') as f:
49
+ self.vocab = json.load(f)
50
+
51
+ with open(os.path.join(vocab_dir, 'merges.txt'), encoding='utf-8') as f:
52
+ lines = f.read().split('\n')
53
+ lines = lines[1:-1]
54
+ self.merges = {tuple(bigram.split()): i for i, bigram in enumerate(lines)}
55
+
56
+ self.bos_token = self.vocab['<|startoftext|>']
57
+ self.eos_token = self.vocab['<|endoftext|>']
58
+ self.pad_token = self.vocab['<|endoftext|>']
59
+ self.max_length = 77
60
+ self.bytes_table = create_bytes_table()
61
+ self.chunk_pattern = re.compile(
62
+ r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""",
63
+ re.IGNORECASE,
64
+ )
65
+
66
+ def encode(self, text: str) -> List[int]:
67
+ text = unicodedata.normalize('NFC', text)
68
+ text = re.sub(r'\s+', ' ', text)
69
+ text = text.strip()
70
+ text = text.lower()
71
+
72
+ tokens = [self.bos_token]
73
+ for chunk in re.findall(self.chunk_pattern, text):
74
+ chunk = ''.join(self.bytes_table[byte] for byte in chunk.encode('utf-8'))
75
+ tokens.extend(self.vocab[word] for word in self.bpe(chunk))
76
+ tokens.append(self.eos_token)
77
+
78
+ tokens = tokens[: self.max_length]
79
+ token_length = len(tokens)
80
+ pad_length = self.max_length - token_length
81
+ tokens += [self.pad_token] * pad_length
82
+ return tokens
83
+
84
+ def encode_batch(self, texts: List[str]) -> List[List[int]]:
85
+ return [self.encode(text) for text in texts]
86
+
87
+ @functools.lru_cache(maxsize=10000)
88
+ def bpe(self, chunk: str) -> Tuple[str]:
89
+ words = list(chunk)
90
+ words[-1] += '</w>'
91
+
92
+ while len(words) > 1:
93
+ valid_pairs = [pair for pair in pairwise(words) if pair in self.merges]
94
+ if not valid_pairs:
95
+ break
96
+
97
+ bigram = min(valid_pairs, key=lambda pair: self.merges[pair])
98
+ first, second = bigram
99
+
100
+ new_words = []
101
+ for word in words:
102
+ if word == second and new_words and new_words[-1] == first:
103
+ new_words[-1] = first + second
104
+ else:
105
+ new_words.append(word)
106
+ words = new_words
107
+
108
+ return tuple(words)
@@ -0,0 +1,71 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import os
17
+
18
+ import numpy as np
19
+ import torch
20
+
21
+
22
+ def get_time_embedding(timestep):
23
+ freqs = torch.pow(10000, -torch.arange(start=0, end=160, dtype=torch.float32) / 160)
24
+ x = torch.tensor([timestep], dtype=torch.float32)[:, None] * freqs[None]
25
+ return torch.cat([torch.cos(x), torch.sin(x)], dim=-1)
26
+
27
+
28
+ def get_alphas_cumprod(beta_start=0.00085, beta_end=0.0120, n_training_steps=1000):
29
+ betas = (
30
+ np.linspace(beta_start**0.5, beta_end**0.5, n_training_steps, dtype=np.float32)
31
+ ** 2
32
+ )
33
+ alphas = 1.0 - betas
34
+ alphas_cumprod = np.cumprod(alphas, axis=0)
35
+ return alphas_cumprod
36
+
37
+
38
+ def get_file_path(filename, url=None):
39
+ module_location = os.path.dirname(os.path.abspath(__file__))
40
+ parent_location = os.path.dirname(module_location)
41
+ file_location = os.path.join(parent_location, "data", filename)
42
+ return file_location
43
+
44
+
45
+ def move_channel(image, to):
46
+ if to == "first":
47
+ if isinstance(image, torch.Tensor):
48
+ return image.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
49
+ if isinstance(image, np.ndarray):
50
+ return image.transpose(0, 3, 1, 2)
51
+ elif to == "last":
52
+ if isinstance(image, torch.Tensor):
53
+ return image.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
54
+ if isinstance(image, np.ndarray):
55
+ return image.transpose(0, 2, 3, 1)
56
+ else:
57
+ raise ValueError("to must be one of the following: first, last")
58
+
59
+
60
+ def rescale(x, old_range, new_range, clamp=False):
61
+ old_min, old_max = old_range
62
+ new_min, new_max = new_range
63
+ x -= old_min
64
+ x *= (new_max - new_min) / (old_max - old_min)
65
+ x += new_min
66
+ if clamp:
67
+ if isinstance(x, torch.Tensor):
68
+ x = x.clamp(new_min, new_max)
69
+ elif isinstance(x, np.ndarray):
70
+ x = x.clip(new_min, new_max)
71
+ return x