PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240527__py3-none-any.whl → 0.2.0.dev20240602__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240527py3-none-any.whl → 0.2.0.dev20240602py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ai-edge-torch-nightly might be problematic. Click here for more details.

Files changed (20) hide show

ai_edge_torch/convert/fx_passes/build_aten_composite_pass.py CHANGED Viewed

@@ -141,8 +141,11 @@ def _aten_avg_pool2d(gm: GraphModule, node: Node):
     # Only wrap in a composite when the underlying converter can handle it.
     # TODO We should be able to remove this if the converter can inline composites when it can not handle them.
-    # We don't cover any cases where ceil_mode is True or divisor_override is set.
-    if full_kwargs["ceil_mode"] or full_kwargs["divisor_override"] is not None:
+    # We don't cover any cases where the divisor_override is set.
+    if full_kwargs["divisor_override"] is not None:
+      return op(*args, **kwargs)
+    if full_kwargs["ceil_mode"] and not full_kwargs["count_include_pad"]:
       return op(*args, **kwargs)
     # We also can not cover a case where count_include_pad is False but the padding is custom.

ai_edge_torch/convert/test/test_convert_composites.py CHANGED Viewed

@@ -51,6 +51,7 @@ class TestConvertComposites(unittest.TestCase):
   @parameterized.parameterized.expand(
       [
+          # input_size, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override
           # no padding, stride = 1
           ([1, 3, 6, 6], [3, 3], [1, 1], [0, 0], False, True, None),
           # add stride
@@ -67,6 +68,8 @@ class TestConvertComposites(unittest.TestCase):
           ([1, 3, 6, 6], [3, 3], [1, 1], [1, 1], False, False, None),
           # ceil_mode = True
           ([1, 3, 6, 6], [3, 3], [1, 1], [1, 1], True, True, None),
+          # ceil_mode = True, stride=[3, 3]
+          ([1, 3, 6, 6], [3, 3], [3, 3], [1, 1], True, True, None),
           # set divisor_override
           ([1, 3, 6, 6], [3, 3], [1, 1], 0, False, True, 6),
           # padding set to one number

ai_edge_torch/generative/examples/stable_diffusion/clip.py CHANGED Viewed

@@ -68,6 +68,7 @@ class CLIP(nn.Module):
     self.layers = nn.ModuleList([CLIPLayer(12, 768) for i in range(12)])
     self.layernorm = nn.LayerNorm(768)
+  @torch.inference_mode
   def forward(self, tokens: torch.LongTensor) -> torch.FloatTensor:
     tokens = tokens.type(torch.long)

ai_edge_torch/generative/examples/stable_diffusion/convert_to_tflite.py ADDED Viewed

@@ -0,0 +1,107 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import os
+from pathlib import Path
+import torch
+import ai_edge_torch
+from ai_edge_torch.generative.examples.stable_diffusion.clip import CLIP
+from ai_edge_torch.generative.examples.stable_diffusion.decoder import Decoder
+from ai_edge_torch.generative.examples.stable_diffusion.diffusion import Diffusion  # NOQA
+from ai_edge_torch.generative.examples.stable_diffusion.encoder import Encoder
+import ai_edge_torch.generative.examples.stable_diffusion.util as util
+@torch.inference_mode
+def convert_stable_diffusion_to_tflite(
+    clip_ckpt_path: str,
+    encoder_ckpt_path: str,
+    diffusion_ckpt_path: str,
+    decoder_ckpt_path: str,
+    image_height: int = 512,
+    image_width: int = 512,
+):
+  clip = CLIP()
+  clip.load_state_dict(torch.load(clip_ckpt_path))
+  encoder = Encoder()
+  encoder.load_state_dict(torch.load(encoder_ckpt_path))
+  diffusion = Diffusion()
+  diffusion.load_state_dict(torch.load(diffusion_ckpt_path))
+  decoder = Decoder()
+  decoder.load_state_dict(torch.load(decoder_ckpt_path))
+  # Tensors used to trace the model graph during conversion.
+  n_tokens = 77
+  timestamp = 0
+  len_prompt = 1
+  prompt_tokens = torch.full((1, n_tokens), 0, dtype=torch.long)
+  input_image = torch.full((1, 3, image_height, image_width), 0, dtype=torch.float32)
+  noise = torch.full(
+      (len_prompt, 4, image_height // 8, image_width // 8), 0, dtype=torch.float32
+  )
+  input_latents = encoder(input_image, noise)
+  context_cond = clip(prompt_tokens)
+  context_uncond = torch.zeros_like(context_cond)
+  context = torch.cat([context_cond, context_uncond], axis=0)
+  time_embedding = util.get_time_embedding(timestamp)
+  # CLIP text encoder
+  ai_edge_torch.signature('encode', clip, (prompt_tokens,)).convert().export(
+      '/tmp/stable_diffusion/clip.tflite'
+  )
+  # TODO(yichunk): convert to multi signature tflite model.
+  # Image encoder
+  ai_edge_torch.signature('encode', encoder, (input_image, noise)).convert().export(
+      '/tmp/stable_diffusion/encoder.tflite'
+  )
+  # Diffusion
+  ai_edge_torch.signature(
+      'diffusion',
+      diffusion,
+      (torch.repeat_interleave(input_latents, 2, 0), context, time_embedding),
+  ).convert().export('/tmp/stable_diffusion/diffusion.tflite')
+  # Image decoder
+  ai_edge_torch.signature('decode', decoder, (input_latents,)).convert().export(
+      '/tmp/stable_diffusion/decoder.tflite'
+  )
+if __name__ == '__main__':
+  convert_stable_diffusion_to_tflite(
+      clip_ckpt_path=os.path.join(
+          Path.home(), 'Downloads/stable_diffusion_data/ckpt/clip.pt'
+      ),
+      encoder_ckpt_path=os.path.join(
+          Path.home(), 'Downloads/stable_diffusion_data/ckpt/encoder.pt'
+      ),
+      diffusion_ckpt_path=os.path.join(
+          Path.home(), 'Downloads/stable_diffusion_data/ckpt/diffusion.pt'
+      ),
+      decoder_ckpt_path=os.path.join(
+          Path.home(), 'Downloads/stable_diffusion_data/ckpt/decoder.pt'
+      ),
+      image_height=512,
+      image_width=512,
+  )

ai_edge_torch/generative/examples/stable_diffusion/decoder.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
+import torch
 from torch import nn
 from torch.nn import functional as F
@@ -104,6 +105,7 @@ class Decoder(nn.Sequential):
         nn.Conv2d(128, 3, kernel_size=3, padding=1),
     )
+  @torch.inference_mode
   def forward(self, x):
     x = x / 0.18215
     for module in self:

ai_edge_torch/generative/examples/stable_diffusion/diffusion.py CHANGED Viewed

@@ -429,6 +429,7 @@ class Diffusion(nn.Module):
     self.unet = UNet()
     self.final = FinalLayer(320, 4)
+  @torch.inference_mode
   def forward(self, latent, context, time):
     time = self.time_embedding(time)
     # print('time:')

ai_edge_torch/generative/examples/stable_diffusion/encoder.py CHANGED Viewed

@@ -19,6 +19,7 @@ from torch.nn import functional as F
 from ai_edge_torch.generative.examples.stable_diffusion.decoder import AttentionBlock  # NOQA
 from ai_edge_torch.generative.examples.stable_diffusion.decoder import ResidualBlock  # NOQA
+import ai_edge_torch.generative.utilities.loader as loading_utils
 class Encoder(nn.Sequential):
@@ -46,6 +47,7 @@ class Encoder(nn.Sequential):
         nn.Conv2d(8, 8, kernel_size=1, padding=0),
     )
+  @torch.inference_mode
   def forward(self, x, noise):
     for module in self:
       if getattr(module, 'stride', None) == (

ai_edge_torch/generative/examples/stable_diffusion/pipeline.py ADDED Viewed

@@ -0,0 +1,222 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import argparse
+import os
+from pathlib import Path
+from typing import Dict, Optional
+import numpy as np
+from PIL import Image
+from tqdm import tqdm
+import ai_edge_torch.generative.examples.stable_diffusion.samplers as samplers
+from ai_edge_torch.generative.examples.stable_diffusion.tokenizer import Tokenizer  # NOQA
+import ai_edge_torch.generative.examples.stable_diffusion.util as util
+from ai_edge_torch.model import TfLiteModel
+arg_parser = argparse.ArgumentParser()
+arg_parser.add_argument(
+    '--tokenizer_vocab_dir',
+    type=str,
+    help='Directory to the tokenizer vocabulary files, which include `merges.txt` and `vocab.json`',
+    required=True,
+)
+arg_parser.add_argument(
+    '--clip_ckpt', type=str, help='Path to CLIP TFLite tflite file', required=True
+)
+arg_parser.add_argument(
+    '--diffusion_ckpt', type=str, help='Path to diffusion tflite file', required=True
+)
+arg_parser.add_argument(
+    '--decoder_ckpt', type=str, help='Path to decoder tflite file', required=True
+)
+arg_parser.add_argument(
+    '--output_path',
+    type=str,
+    help='Path to the output generated image file.',
+    required=True,
+)
+arg_parser.add_argument(
+    '--prompt',
+    default='a photograph of an astronaut riding a horse',
+    type=str,
+    help='The prompt to guide the image generation.',
+)
+arg_parser.add_argument(
+    '--n_inference_steps', default=20, type=int, help='The number of denoising steps.'
+)
+arg_parser.add_argument(
+    '--sampler',
+    default='k_euler',
+    type=str,
+    choices=['k_euler', 'k_euler_ancestral', 'k_lms'],
+    help='A sampler to be used to denoise the encoded image latents. Can be one of `k_lms, `k_euler`, or `k_euler_ancestral`.',
+)
+class StableDiffusion:
+  def __init__(
+      self,
+      *,
+      tokenizer_vocab_dir: str,
+      clip_ckpt: str,
+      encoder_ckpt: Optional[str] = None,
+      diffusion_ckpt: str,
+      decoder_ckpt: str
+  ):
+    self.tokenizer = Tokenizer(tokenizer_vocab_dir)
+    self.clip = TfLiteModel.load(clip_ckpt)
+    self.decoder = TfLiteModel.load(decoder_ckpt)
+    self.diffusion = TfLiteModel.load(diffusion_ckpt)
+    if encoder_ckpt is not None:
+      self.encoder = TfLiteModel.load(encoder_ckpt)
+def run_tflite_pipeline(
+    model: StableDiffusion,
+    prompt: str,
+    output_path: str,
+    uncond_prompt: Optional[str] = None,
+    cfg_scale: float = 7.5,
+    height: int = 512,
+    width: int = 512,
+    sampler: str = 'k_euler',
+    n_inference_steps: int = 20,
+    seed: Optional[int] = None,
+    strength: float = 0.8,
+    input_image: Optional[Image.Image] = None,
+):
+  """Run stable diffusion pipeline with tflite model.
+  model:
+    StableDiffsuion model.
+  prompt:
+    The prompt to guide the image generation.
+  output_path:
+    The path to the generated output image.
+  uncond_prompt:
+    The prompt not to guide the image generation.
+  cfg_scale:
+    Guidance scale of classifier-free guidance. Higher guidance scale encourages to generate
+    images that are closely linked to the text `prompt`, usually at the expense of lower
+    image quality.
+  height:
+    The height in pixels of the generated image.
+  width:
+    The width in pixels of the generated image.
+  sampler:
+    A sampler to be used to denoise the encoded image latents. Can be one of `k_lms, `k_euler`,
+    or `k_euler_ancestral`.
+  n_inference_steps:
+    The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+    expense of slower inference. This parameter will be modulated by `strength`.
+  seed:
+    A seed to make generation deterministic.
+  strength:
+    Conceptually, indicates how much to transform the reference `input_image`. Must be between 0 and 1.
+    `input_image` will be used as a starting point, adding more noise to it the larger the `strength`.
+    The number of denoising steps depends on the amount of noise initially added. When `strength` is 1,
+    added noise will be maximum and the denoising process will run for the full number of iterations
+    specified in `n_inference_steps`. A value of 1, therefore, essentially ignores `input_image`.
+  input_image:
+    Image which is served as the starting point for the image generation.
+  """
+  if not 0 < strength < 1:
+    raise ValueError('strength must be between 0 and 1')
+  if height % 8 or width % 8:
+    raise ValueError('height and width must be a multiple of 8')
+  if seed is not None:
+    np.random.seed(seed)
+  if uncond_prompt is None:
+    uncond_prompt = ''
+  if sampler == 'k_lms':
+    sampler = samplers.KLMSSampler(n_inference_steps=n_inference_steps)
+  elif sampler == 'k_euler':
+    sampler = samplers.KEulerSampler(n_inference_steps=n_inference_steps)
+  elif sampler == 'k_euler_ancestral':
+    sampler = samplers.KEulerAncestralSampler(n_inference_steps=n_inference_steps)
+  else:
+    raise ValueError(
+        'Unknown sampler value %s. '
+        'Accepted values are {k_lms, k_euler, k_euler_ancestral}' % sampler
+    )
+  # Text embedding.
+  cond_tokens = model.tokenizer.encode(prompt)
+  cond_context = model.clip(np.array(cond_tokens), signature_name='encode')
+  uncond_tokens = model.tokenizer.encode(uncond_prompt)
+  uncond_context = model.clip(np.array(uncond_tokens), signature_name='encode')
+  context = np.concatenate([cond_context, uncond_context], axis=0)
+  noise_shape = (1, 4, height // 8, width // 8)
+  # Initialization starts from input_image if any, otherwise, starts from a random sampling.
+  if input_image:
+    if not hasattr(model, 'encoder'):
+      raise AttributeError(
+          'Stable Diffusion must be initilaized with encoder to accept input_image.'
+      )
+    input_image = input_image.resize((width, height))
+    input_image_np = np.array(input_image).astype(np.float32)
+    input_image_np = util.rescale(input_image, (0, 255), (-1, 1))
+    input_image_np = util.move_channel(input_image_np, to='first')
+    encoder_noise = np.random.normal(size=noise_shape).astype(np.float32)
+    latents = model.encoder(input_image_np, encoder_noise)
+    latents_noise = np.random.normal(size=noise_shape).astype(np.float32)
+    sampler.set_strength(strength=strength)
+    latents += latents_noise * sampler.initial_scale
+  else:
+    latents = np.random.normal(size=noise_shape).astype(np.float32)
+    latents *= sampler.initial_scale
+  # Diffusion process.
+  timesteps = tqdm(sampler.timesteps)
+  for i, timestep in enumerate(timesteps):
+    time_embedding = util.get_time_embedding(timestep)
+    input_latents = latents * sampler.get_input_scale()
+    input_latents = input_latents.repeat(2, axis=0)
+    output = model.diffusion(
+        input_latents, context, time_embedding, signature_name='diffusion'
+    )
+    output_cond, output_uncond = np.split(output, 2, axis=0)
+    output = cfg_scale * (output_cond - output_uncond) + output_uncond
+    latents = sampler.step(latents, output)
+  # Image decoding.
+  images = model.decoder(latents, signature_name='decode')
+  images = util.rescale(images, (-1, 1), (0, 255), clamp=True)
+  images = util.move_channel(images, to='last')
+  if not os.path.exists(output_path):
+    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+  Image.fromarray(images[0].astype(np.uint8)).save(output_path)
+if __name__ == '__main__':
+  args = arg_parser.parse_args()
+  run_tflite_pipeline(
+      StableDiffusion(
+          tokenizer_vocab_dir=args.tokenizer_vocab_dir,
+          clip_ckpt=args.clip_ckpt,
+          diffusion_ckpt=args.diffusion_ckpt,
+          decoder_ckpt=args.decoder_ckpt,
+      ),
+      prompt=args.prompt,
+      output_path=args.output_path,
+      sampler=args.sampler,
+      n_inference_steps=args.n_inference_steps,
+  )

ai_edge_torch/generative/examples/stable_diffusion/samplers/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from .k_euler import KEulerSampler
+from .k_euler_ancestral import KEulerAncestralSampler
+from .k_lms import KLMSSampler
+from .sampler import SamplerInterface

ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler.py ADDED Viewed

@@ -0,0 +1,61 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+from ai_edge_torch.generative.examples.stable_diffusion import util
+from ai_edge_torch.generative.examples.stable_diffusion.samplers.sampler import SamplerInterface  # NOQA
+class KEulerSampler(SamplerInterface):
+  def __init__(self, n_inference_steps=50, n_training_steps=1000):
+    timesteps = np.linspace(n_training_steps - 1, 0, n_inference_steps)
+    alphas_cumprod = util.get_alphas_cumprod(n_training_steps=n_training_steps)
+    sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5
+    log_sigmas = np.log(sigmas)
+    log_sigmas = np.interp(timesteps, range(n_training_steps), log_sigmas)
+    sigmas = np.exp(log_sigmas)
+    sigmas = np.append(sigmas, 0)
+    self.sigmas = sigmas
+    self.initial_scale = sigmas.max()
+    self.timesteps = timesteps
+    self.n_inference_steps = n_inference_steps
+    self.n_training_steps = n_training_steps
+    self.step_count = 0
+  def get_input_scale(self, step_count=None):
+    if step_count is None:
+      step_count = self.step_count
+    sigma = self.sigmas[step_count]
+    return 1 / (sigma**2 + 1) ** 0.5
+  def set_strength(self, strength=1):
+    start_step = self.n_inference_steps - int(self.n_inference_steps * strength)
+    self.timesteps = np.linspace(self.n_training_steps - 1, 0, self.n_inference_steps)
+    self.timesteps = self.timesteps[start_step:]
+    self.initial_scale = self.sigmas[start_step]
+    self.step_count = start_step
+  def step(self, latents, output):
+    t = self.step_count
+    self.step_count += 1
+    sigma_from = self.sigmas[t]
+    sigma_to = self.sigmas[t + 1]
+    latents += output * (sigma_to - sigma_from)
+    return latents

ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler_ancestral.py ADDED Viewed

@@ -0,0 +1,65 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+from ai_edge_torch.generative.examples.stable_diffusion import util
+from ai_edge_torch.generative.examples.stable_diffusion.samplers.sampler import SamplerInterface  # NOQA
+class KEulerAncestralSampler(SamplerInterface):
+  def __init__(self, n_inference_steps=50, n_training_steps=1000):
+    timesteps = np.linspace(n_training_steps - 1, 0, n_inference_steps)
+    alphas_cumprod = util.get_alphas_cumprod(n_training_steps=n_training_steps)
+    sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5
+    log_sigmas = np.log(sigmas)
+    log_sigmas = np.interp(timesteps, range(n_training_steps), log_sigmas)
+    sigmas = np.exp(log_sigmas)
+    sigmas = np.append(sigmas, 0)
+    self.sigmas = sigmas
+    self.initial_scale = sigmas.max()
+    self.timesteps = timesteps
+    self.n_inference_steps = n_inference_steps
+    self.n_training_steps = n_training_steps
+    self.step_count = 0
+  def get_input_scale(self, step_count=None):
+    if step_count is None:
+      step_count = self.step_count
+    sigma = self.sigmas[step_count]
+    return 1 / (sigma**2 + 1) ** 0.5
+  def set_strength(self, strength=1):
+    start_step = self.n_inference_steps - int(self.n_inference_steps * strength)
+    self.timesteps = np.linspace(self.n_training_steps - 1, 0, self.n_inference_steps)
+    self.timesteps = self.timesteps[start_step:]
+    self.initial_scale = self.sigmas[start_step]
+    self.step_count = start_step
+  def step(self, latents, output):
+    t = self.step_count
+    self.step_count += 1
+    sigma_from = self.sigmas[t]
+    sigma_to = self.sigmas[t + 1]
+    sigma_up = sigma_to * (1 - (sigma_to**2 / sigma_from**2)) ** 0.5
+    sigma_down = sigma_to**2 / sigma_from
+    latents += output * (sigma_down - sigma_from)
+    noise = np.random.normal(size=latents.shape)
+    latents += noise * sigma_up
+    return latents

ai_edge_torch/generative/examples/stable_diffusion/samplers/k_lms.py ADDED Viewed

@@ -0,0 +1,73 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+from ai_edge_torch.generative.examples.stable_diffusion import util
+from ai_edge_torch.generative.examples.stable_diffusion.samplers.sampler import SamplerInterface  # NOQA
+class KLMSSampler(SamplerInterface):
+  def __init__(self, n_inference_steps=50, n_training_steps=1000, lms_order=4):
+    timesteps = np.linspace(n_training_steps - 1, 0, n_inference_steps)
+    alphas_cumprod = util.get_alphas_cumprod(n_training_steps=n_training_steps)
+    sigmas = ((1 - alphas_cumprod) / alphas_cumprod) ** 0.5
+    log_sigmas = np.log(sigmas)
+    log_sigmas = np.interp(timesteps, range(n_training_steps), log_sigmas)
+    sigmas = np.exp(log_sigmas)
+    sigmas = np.append(sigmas, 0)
+    self.sigmas = sigmas
+    self.initial_scale = sigmas.max()
+    self.timesteps = timesteps
+    self.n_inference_steps = n_inference_steps
+    self.n_training_steps = n_training_steps
+    self.lms_order = lms_order
+    self.step_count = 0
+    self.outputs = []
+  def get_input_scale(self, step_count=None):
+    if step_count is None:
+      step_count = self.step_count
+    sigma = self.sigmas[step_count]
+    return 1 / (sigma**2 + 1) ** 0.5
+  def set_strength(self, strength=1):
+    start_step = self.n_inference_steps - int(self.n_inference_steps * strength)
+    self.timesteps = np.linspace(self.n_training_steps - 1, 0, self.n_inference_steps)
+    self.timesteps = self.timesteps[start_step:]
+    self.initial_scale = self.sigmas[start_step]
+    self.step_count = start_step
+  def step(self, latents, output):
+    t = self.step_count
+    self.step_count += 1
+    self.outputs = [output] + self.outputs[: self.lms_order - 1]
+    order = len(self.outputs)
+    for i, output in enumerate(self.outputs):
+      # Integrate polynomial by trapezoidal approx. method for 81 points.
+      x = np.linspace(self.sigmas[t], self.sigmas[t + 1], 81)
+      y = np.ones(81)
+      for j in range(order):
+        if i == j:
+          continue
+        y *= x - self.sigmas[t - j]
+        y /= self.sigmas[t - i] - self.sigmas[t - j]
+      lms_coeff = np.trapz(y=y, x=x)
+      latents += lms_coeff * output
+    return latents

ai_edge_torch/generative/examples/stable_diffusion/samplers/sampler.py ADDED Viewed

@@ -0,0 +1,38 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import abc
+import numpy as np
+class SamplerInterface(abc.ABC):
+  @abc.abstractmethod
+  def get_input_scale(self, step_count: int = 1) -> float:
+    """Get the input scale of the random samples from sampled distribution"""
+    return NotImplemented
+  @abc.abstractmethod
+  def set_strength(self, strength: float = 1) -> None:
+    """Set the strength of initial step.
+    Conceptually, indicates how much to transform the reference `input_images`.
+    """
+    return NotImplemented
+  @abc.abstractmethod
+  def step(self, latents: np.ndarray, output: np.ndarray) -> np.ndarray:
+    """Update latents from the diffusion output by a step"""
+    return NotImplemented

ai_edge_torch/generative/examples/stable_diffusion/tokenizer.py ADDED Viewed

@@ -0,0 +1,108 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import functools
+import json
+import os
+from typing import List, Tuple
+import unicodedata
+import regex as re
+def create_bytes_table() -> dict:
+  table = {}
+  special_count = 0
+  for byte in range(256):
+    category = unicodedata.category(chr(byte))
+    if category[0] not in ['C', 'Z']:  # ith character is NOT control char or space
+      table[byte] = chr(byte)
+    else:  # ith character IS control char or space
+      table[byte] = chr(special_count + 256)
+      special_count += 1
+  return table
+def pairwise(seq):
+  a = iter(seq)
+  b = iter(seq)
+  next(b)
+  return zip(a, b)
+class Tokenizer:
+  def __init__(self, vocab_dir: str):
+    with open(os.path.join(vocab_dir, 'vocab.json'), encoding='utf-8') as f:
+      self.vocab = json.load(f)
+    with open(os.path.join(vocab_dir, 'merges.txt'), encoding='utf-8') as f:
+      lines = f.read().split('\n')
+      lines = lines[1:-1]
+      self.merges = {tuple(bigram.split()): i for i, bigram in enumerate(lines)}
+    self.bos_token = self.vocab['<|startoftext|>']
+    self.eos_token = self.vocab['<|endoftext|>']
+    self.pad_token = self.vocab['<|endoftext|>']
+    self.max_length = 77
+    self.bytes_table = create_bytes_table()
+    self.chunk_pattern = re.compile(
+        r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""",
+        re.IGNORECASE,
+    )
+  def encode(self, text: str) -> List[int]:
+    text = unicodedata.normalize('NFC', text)
+    text = re.sub(r'\s+', ' ', text)
+    text = text.strip()
+    text = text.lower()
+    tokens = [self.bos_token]
+    for chunk in re.findall(self.chunk_pattern, text):
+      chunk = ''.join(self.bytes_table[byte] for byte in chunk.encode('utf-8'))
+      tokens.extend(self.vocab[word] for word in self.bpe(chunk))
+    tokens.append(self.eos_token)
+    tokens = tokens[: self.max_length]
+    token_length = len(tokens)
+    pad_length = self.max_length - token_length
+    tokens += [self.pad_token] * pad_length
+    return tokens
+  def encode_batch(self, texts: List[str]) -> List[List[int]]:
+    return [self.encode(text) for text in texts]
+  @functools.lru_cache(maxsize=10000)
+  def bpe(self, chunk: str) -> Tuple[str]:
+    words = list(chunk)
+    words[-1] += '</w>'
+    while len(words) > 1:
+      valid_pairs = [pair for pair in pairwise(words) if pair in self.merges]
+      if not valid_pairs:
+        break
+      bigram = min(valid_pairs, key=lambda pair: self.merges[pair])
+      first, second = bigram
+      new_words = []
+      for word in words:
+        if word == second and new_words and new_words[-1] == first:
+          new_words[-1] = first + second
+        else:
+          new_words.append(word)
+      words = new_words
+    return tuple(words)

ai_edge_torch/generative/examples/stable_diffusion/util.py CHANGED Viewed

@@ -44,9 +44,15 @@ def get_file_path(filename, url=None):
 def move_channel(image, to):
   if to == "first":
-    return image.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
+    if isinstance(image, torch.Tensor):
+      return image.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
+    if isinstance(image, np.ndarray):
+      return image.transpose(0, 3, 1, 2)
   elif to == "last":
-    return image.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
+    if isinstance(image, torch.Tensor):
+      return image.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
+    if isinstance(image, np.ndarray):
+      return image.transpose(0, 2, 3, 1)
   else:
     raise ValueError("to must be one of the following: first, last")
@@ -58,5 +64,8 @@ def rescale(x, old_range, new_range, clamp=False):
   x *= (new_max - new_min) / (old_max - old_min)
   x += new_min
   if clamp:
-    x = x.clamp(new_min, new_max)
+    if isinstance(x, torch.Tensor):
+      x = x.clamp(new_min, new_max)
+    elif isinstance(x, np.ndarray):
+      x = x.clip(new_min, new_max)
   return x

{ai_edge_torch_nightly-0.2.0.dev20240527.dist-info → ai_edge_torch_nightly-0.2.0.dev20240602.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.2.0.dev20240527
+Version: 0.2.0.dev20240602
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

{ai_edge_torch_nightly-0.2.0.dev20240527.dist-info → ai_edge_torch_nightly-0.2.0.dev20240602.dist-info}/RECORD RENAMED Viewed

@@ -6,7 +6,7 @@ ai_edge_torch/convert/conversion_utils.py,sha256=NpVm3Ms81_cIW5IYgGsr0BVganJJgBK
 ai_edge_torch/convert/converter.py,sha256=bjj5TV5_g4sGyuSh8ThEDydlNMqhkGSY4SzXK6vwhqI,6927
 ai_edge_torch/convert/fx_passes/__init__.py,sha256=Ll2nNwufjcV5nSruQPXiloq7F1E7pWJ2T5clXmy1lk8,2825
 ai_edge_torch/convert/fx_passes/_pass_base.py,sha256=ijVyDclPnd6a0DWWUJkwR4igj6f82S-cE1-83QGPvgw,1652
-ai_edge_torch/convert/fx_passes/build_aten_composite_pass.py,sha256=quuPsyRtOeumB4SVRYoj2UmSWfrGzJ6Q2ZqjWeG3UPI,6150
+ai_edge_torch/convert/fx_passes/build_aten_composite_pass.py,sha256=wHVWNNMu5h_ya6GnnJn0cNif9xmdSqr8Vm-R7lllxZM,6213
 ai_edge_torch/convert/fx_passes/build_upsample_bilinear2d_composite_pass.py,sha256=76XYoIlFDgrzp5QemoaEalPFcEbfszkEH_PLvO1ASCk,2607
 ai_edge_torch/convert/fx_passes/canonicalize_pass.py,sha256=UX6dJsxCqSkftXXvNBV-i7Bjk6H7qTyqzUnE640Itfg,1673
 ai_edge_torch/convert/fx_passes/inject_mlir_debuginfo_pass.py,sha256=aRT8hTS3n9ie28lgu6mygtFO6Ypwu0qjNb0c81v9HLs,2448
@@ -22,7 +22,7 @@ ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/layout_partition
 ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/min_cut.py,sha256=FlNKt2EhIKnlVEeUWTiv5sz446YKU6Yy1H0Gd6VRgkU,6432
 ai_edge_torch/convert/test/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/convert/test/test_convert.py,sha256=USduDO6PaO3nlA82jMihTct--mCU_ugILZDin00lcJ8,8092
-ai_edge_torch/convert/test/test_convert_composites.py,sha256=gFUa_lKNUfeYMgtulqJvRAtWIvzy3f3eXptMBiJDbms,6403
+ai_edge_torch/convert/test/test_convert_composites.py,sha256=SrVn_cEMtQhYYCMOUKK0K7M57MQNQX-lOUwieln0HGA,6616
 ai_edge_torch/convert/test/test_convert_multisig.py,sha256=kMaGnHe9ylfyU68qCifYcaGwJqyejKz--QQt9jS2oUA,4537
 ai_edge_torch/debug/__init__.py,sha256=TKvmnjVk3asvYcVh6C-LPr6srgAF_nppSAupWEXqwPY,707
 ai_edge_torch/debug/culprit.py,sha256=vklaxBUfINdo44OsH7csILK70N41gEThCGchGEfbTZw,12789
@@ -40,11 +40,19 @@ ai_edge_torch/generative/examples/phi2/convert_to_tflite.py,sha256=6nOuwx9q3AUlY
 ai_edge_torch/generative/examples/phi2/phi2.py,sha256=VvigzPQ_LJHeADTsMliwFwPe2BcnOhFgKDqr_WZ2JQ8,5540
 ai_edge_torch/generative/examples/stable_diffusion/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/stable_diffusion/attention.py,sha256=Lo4Dq7a3Kg-lyH56iqGtqCo5UaClQHRCTDdNagXGTo8,3535
-ai_edge_torch/generative/examples/stable_diffusion/clip.py,sha256=8W4X9PKdnMsWGxXbBfm5OX6mX4XhvaMZ2gZw8yCTScY,2410
-ai_edge_torch/generative/examples/stable_diffusion/decoder.py,sha256=beLCtogA32oYT2nlATpyT-1xzkyPF8zi4v3kfHpw6Mc,3239
-ai_edge_torch/generative/examples/stable_diffusion/diffusion.py,sha256=nnsfgjSeL16U3TVdjTkRycaoWA2ChFeitx2RjGLpwyA,16200
-ai_edge_torch/generative/examples/stable_diffusion/encoder.py,sha256=X6ekByU19KNHNh5OaztZEROv-QwcCwVm1xiJjm2SCoo,2251
-ai_edge_torch/generative/examples/stable_diffusion/util.py,sha256=pG_dsV4xIaB7B8MgoRgSXBvLCVqDlF6bNunPN3GIm-s,2046
+ai_edge_torch/generative/examples/stable_diffusion/clip.py,sha256=KR1Ci4rlJeeGfsFRliCxUve9K7RTJLZfTRMgFtfQ4MU,2434
+ai_edge_torch/generative/examples/stable_diffusion/convert_to_tflite.py,sha256=6REAYy1Bv-Iv5zcmA_m_W6fH6jt5a3IS6Vge18jS_Wo,3633
+ai_edge_torch/generative/examples/stable_diffusion/decoder.py,sha256=AgVAdUbSkHXONVUjAyBQEXhIUUlinf9kNljcBpWnj3A,3276
+ai_edge_torch/generative/examples/stable_diffusion/diffusion.py,sha256=nq94VpQ103eOimnmdyg7u3Xk1LH1IxGlmIbr2AttRIk,16224
+ai_edge_torch/generative/examples/stable_diffusion/encoder.py,sha256=L6hLaMQGb8-_BwSvTLIuDnZwfTqn0K4swBUjfPnYWZo,2341
+ai_edge_torch/generative/examples/stable_diffusion/pipeline.py,sha256=FCbnwlkpYYb-tF7KscbSYjNEdg7XnuLju1cDuIRoQv8,8277
+ai_edge_torch/generative/examples/stable_diffusion/tokenizer.py,sha256=r9RqbyNvuvXOGu3ojtl7ZmbC7o4Pt8aUKAhN1yCdtEc,3397
+ai_edge_torch/generative/examples/stable_diffusion/util.py,sha256=NFpOfA4KN0JpShm5QvuYbQYZ844NzexWD8nV3WjMOZM,2397
+ai_edge_torch/generative/examples/stable_diffusion/samplers/__init__.py,sha256=uQWKzCD_49ackNFrt50H04dkDXxfAwUCtMWWQre5SVE,830
+ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler.py,sha256=w9C2iVFAn4F2SLJiFdjwR9rRPf5wc3OBS1t0GIOEy08,2310
+ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler_ancestral.py,sha256=24aIPj6AoK_vSPqmpfmYd-IA8-Uvq6wHLwdVS34Pwtc,2513
+ai_edge_torch/generative/examples/stable_diffusion/samplers/k_lms.py,sha256=iPYX9ZSaxwSak2KI44j6TEr_g4pdxS3xpka4u0trjbo,2788
+ai_edge_torch/generative/examples/stable_diffusion/samplers/sampler.py,sha256=5iRfU5MO6GR6K3WrdddIU_9U7ZZGEEb7zGKVY1WFl-8,1340
 ai_edge_torch/generative/examples/t5/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/generative/examples/t5/convert_to_tflite.py,sha256=bWtwtUacvJOEDUpuYvLTgkP7oTkXKJA-Tf4FPxlD1Cw,4536
 ai_edge_torch/generative/examples/t5/t5.py,sha256=q2gG5RRo7RgNzvHXYC0Juh6Tgt5d_RTMSWFaYvOKiZU,21065
@@ -92,8 +100,8 @@ ai_edge_torch/quantize/quant_config.py,sha256=ExThdTXqnWmGC3-F6sdXbXr8nYzkEe_qCz
 ai_edge_torch/testing/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=EIyKz-HY70DguWuSrJal8LpYXQ5ZSEUf3ZrVl7jikFM,4286
-ai_edge_torch_nightly-0.2.0.dev20240527.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.2.0.dev20240527.dist-info/METADATA,sha256=nbZoIm0s6CWdrMkaffTrpz-XooKzTR1q0SQ17rs-AKU,1748
-ai_edge_torch_nightly-0.2.0.dev20240527.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-ai_edge_torch_nightly-0.2.0.dev20240527.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.2.0.dev20240527.dist-info/RECORD,,
+ai_edge_torch_nightly-0.2.0.dev20240602.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.2.0.dev20240602.dist-info/METADATA,sha256=CKuSmz0abh0kBACyWKBRLRf6CWI0frkLFVwerI25tc8,1748
+ai_edge_torch_nightly-0.2.0.dev20240602.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+ai_edge_torch_nightly-0.2.0.dev20240602.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.2.0.dev20240602.dist-info/RECORD,,

{ai_edge_torch_nightly-0.2.0.dev20240527.dist-info → ai_edge_torch_nightly-0.2.0.dev20240602.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.2.0.dev20240527.dist-info → ai_edge_torch_nightly-0.2.0.dev20240602.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.2.0.dev20240527.dist-info → ai_edge_torch_nightly-0.2.0.dev20240602.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.2.0.dev20240527__py3-none-any.whl → 0.2.0.dev20240602__py3-none-any.whl

Potentially problematic release.

ai-edge-torch-nightly 0.2.0.dev20240527py3-none-any.whl → 0.2.0.dev20240602py3-none-any.whl