PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240714__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240714__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ai-edge-torch-nightly might be problematic. Click here for more details.

Files changed (121) hide show

ai_edge_torch/__init__.py +31 -0
ai_edge_torch/convert/__init__.py +14 -0
ai_edge_torch/convert/conversion.py +117 -0
ai_edge_torch/convert/conversion_utils.py +400 -0
ai_edge_torch/convert/converter.py +202 -0
ai_edge_torch/convert/fx_passes/__init__.py +59 -0
ai_edge_torch/convert/fx_passes/_pass_base.py +49 -0
ai_edge_torch/convert/fx_passes/build_aten_composite_pass.py +225 -0
ai_edge_torch/convert/fx_passes/build_interpolate_composite_pass.py +123 -0
ai_edge_torch/convert/fx_passes/canonicalize_pass.py +37 -0
ai_edge_torch/convert/fx_passes/inject_mlir_debuginfo_pass.py +73 -0
ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/__init__.py +16 -0
ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/layout_check.py +215 -0
ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/layout_mark.py +48 -0
ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/__init__.py +17 -0
ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/greedy.py +59 -0
ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/min_cut.py +215 -0
ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/layout_rewrite.py +400 -0
ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/op_func_registry.py +30 -0
ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/pass_body.py +293 -0
ai_edge_torch/convert/fx_passes/optimize_layout_transposes_pass/utils.py +62 -0
ai_edge_torch/convert/test/__init__.py +14 -0
ai_edge_torch/convert/test/test_convert.py +311 -0
ai_edge_torch/convert/test/test_convert_composites.py +192 -0
ai_edge_torch/convert/test/test_convert_multisig.py +139 -0
ai_edge_torch/convert/test/test_to_channel_last_io.py +96 -0
ai_edge_torch/convert/to_channel_last_io.py +85 -0
ai_edge_torch/debug/__init__.py +17 -0
ai_edge_torch/debug/culprit.py +464 -0
ai_edge_torch/debug/test/__init__.py +14 -0
ai_edge_torch/debug/test/test_culprit.py +133 -0
ai_edge_torch/debug/test/test_search_model.py +50 -0
ai_edge_torch/debug/utils.py +48 -0
ai_edge_torch/experimental/__init__.py +14 -0
ai_edge_torch/generative/__init__.py +14 -0
ai_edge_torch/generative/examples/__init__.py +14 -0
ai_edge_torch/generative/examples/gemma/__init__.py +14 -0
ai_edge_torch/generative/examples/gemma/convert_to_tflite.py +66 -0
ai_edge_torch/generative/examples/gemma/gemma.py +174 -0
ai_edge_torch/generative/examples/phi2/__init__.py +14 -0
ai_edge_torch/generative/examples/phi2/convert_to_tflite.py +64 -0
ai_edge_torch/generative/examples/phi2/phi2.py +164 -0
ai_edge_torch/generative/examples/stable_diffusion/__init__.py +14 -0
ai_edge_torch/generative/examples/stable_diffusion/attention.py +106 -0
ai_edge_torch/generative/examples/stable_diffusion/clip.py +115 -0
ai_edge_torch/generative/examples/stable_diffusion/convert_to_tflite.py +142 -0
ai_edge_torch/generative/examples/stable_diffusion/decoder.py +317 -0
ai_edge_torch/generative/examples/stable_diffusion/diffusion.py +573 -0
ai_edge_torch/generative/examples/stable_diffusion/encoder.py +118 -0
ai_edge_torch/generative/examples/stable_diffusion/pipeline.py +222 -0
ai_edge_torch/generative/examples/stable_diffusion/samplers/__init__.py +19 -0
ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler.py +61 -0
ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler_ancestral.py +65 -0
ai_edge_torch/generative/examples/stable_diffusion/samplers/k_lms.py +73 -0
ai_edge_torch/generative/examples/stable_diffusion/samplers/sampler.py +38 -0
ai_edge_torch/generative/examples/stable_diffusion/tokenizer.py +108 -0
ai_edge_torch/generative/examples/stable_diffusion/util.py +71 -0
ai_edge_torch/generative/examples/t5/__init__.py +14 -0
ai_edge_torch/generative/examples/t5/convert_to_tflite.py +135 -0
ai_edge_torch/generative/examples/t5/t5.py +608 -0
ai_edge_torch/generative/examples/t5/t5_attention.py +231 -0
ai_edge_torch/generative/examples/test_models/__init__.py +14 -0
ai_edge_torch/generative/examples/test_models/toy_model.py +122 -0
ai_edge_torch/generative/examples/test_models/toy_model_with_external_kv_cache.py +161 -0
ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py +143 -0
ai_edge_torch/generative/examples/tiny_llama/__init__.py +0 -0
ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py +66 -0
ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py +164 -0
ai_edge_torch/generative/fx_passes/__init__.py +31 -0
ai_edge_torch/generative/fx_passes/remove_sdpa_zero_mask_pass.py +47 -0
ai_edge_torch/generative/layers/__init__.py +14 -0
ai_edge_torch/generative/layers/attention.py +354 -0
ai_edge_torch/generative/layers/attention_utils.py +169 -0
ai_edge_torch/generative/layers/builder.py +131 -0
ai_edge_torch/generative/layers/feed_forward.py +95 -0
ai_edge_torch/generative/layers/kv_cache.py +83 -0
ai_edge_torch/generative/layers/model_config.py +158 -0
ai_edge_torch/generative/layers/normalization.py +62 -0
ai_edge_torch/generative/layers/rotary_position_embedding.py +36 -0
ai_edge_torch/generative/layers/scaled_dot_product_attention.py +117 -0
ai_edge_torch/generative/layers/unet/__init__.py +14 -0
ai_edge_torch/generative/layers/unet/blocks_2d.py +711 -0
ai_edge_torch/generative/layers/unet/builder.py +47 -0
ai_edge_torch/generative/layers/unet/model_config.py +269 -0
ai_edge_torch/generative/quantize/__init__.py +14 -0
ai_edge_torch/generative/quantize/ai_edge_quantizer_glue/__init__.py +0 -0
ai_edge_torch/generative/quantize/ai_edge_quantizer_glue/translate_recipe.py +148 -0
ai_edge_torch/generative/quantize/example.py +45 -0
ai_edge_torch/generative/quantize/quant_attrs.py +68 -0
ai_edge_torch/generative/quantize/quant_recipe.py +151 -0
ai_edge_torch/generative/quantize/quant_recipe_utils.py +51 -0
ai_edge_torch/generative/quantize/quant_recipes.py +48 -0
ai_edge_torch/generative/quantize/supported_schemes.py +32 -0
ai_edge_torch/generative/test/__init__.py +14 -0
ai_edge_torch/generative/test/loader_test.py +80 -0
ai_edge_torch/generative/test/test_model_conversion.py +235 -0
ai_edge_torch/generative/test/test_quantize.py +162 -0
ai_edge_torch/generative/utilities/__init__.py +15 -0
ai_edge_torch/generative/utilities/loader.py +328 -0
ai_edge_torch/generative/utilities/stable_diffusion_loader.py +924 -0
ai_edge_torch/generative/utilities/t5_loader.py +483 -0
ai_edge_torch/hlfb/__init__.py +16 -0
ai_edge_torch/hlfb/mark_pattern/__init__.py +139 -0
ai_edge_torch/hlfb/mark_pattern/passes.py +42 -0
ai_edge_torch/hlfb/mark_pattern/pattern.py +273 -0
ai_edge_torch/hlfb/test/__init__.py +14 -0
ai_edge_torch/hlfb/test/test_mark_pattern.py +133 -0
ai_edge_torch/hlfb/test/test_stablehlo_composite_builder.py +270 -0
ai_edge_torch/model.py +142 -0
ai_edge_torch/quantize/__init__.py +16 -0
ai_edge_torch/quantize/pt2e_quantizer.py +438 -0
ai_edge_torch/quantize/pt2e_quantizer_utils.py +1041 -0
ai_edge_torch/quantize/quant_config.py +81 -0
ai_edge_torch/testing/__init__.py +14 -0
ai_edge_torch/testing/model_coverage/__init__.py +16 -0
ai_edge_torch/testing/model_coverage/model_coverage.py +132 -0
ai_edge_torch_nightly-0.2.0.dev20240714.dist-info/LICENSE +202 -0
ai_edge_torch_nightly-0.2.0.dev20240714.dist-info/METADATA +38 -0
ai_edge_torch_nightly-0.2.0.dev20240714.dist-info/RECORD +121 -0
ai_edge_torch_nightly-0.2.0.dev20240714.dist-info/WHEEL +5 -0
ai_edge_torch_nightly-0.2.0.dev20240714.dist-info/top_level.txt +1 -0

ai_edge_torch/generative/layers/unet/builder.py ADDED Viewed

@@ -0,0 +1,47 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Builder utils for individual components.
+from torch import nn
+import ai_edge_torch.generative.layers.unet.model_config as unet_config
+def build_upsampling(config: unet_config.UpSamplingConfig):
+  if config.mode == unet_config.SamplingType.NEAREST:
+    return nn.UpsamplingNearest2d(scale_factor=config.scale_factor)
+  elif config.mode == unet_config.SamplingType.BILINEAR:
+    return nn.UpsamplingBilinear2d(scale_factor=config.scale_factor)
+  else:
+    raise ValueError("Unsupported upsampling type.")
+def build_downsampling(config: unet_config.DownSamplingConfig):
+  if config.mode == unet_config.SamplingType.AVERAGE:
+    return nn.AvgPool2d(config.kernel_size, config.stride, padding=config.padding)
+  elif config.mode == unet_config.SamplingType.CONVOLUTION:
+    out_channels = (
+        config.in_channels if config.out_channels is None else config.out_channels
+    )
+    padding = (0, 1, 0, 1) if config.padding == 0 else config.padding
+    return nn.Conv2d(
+        config.in_channels,
+        out_channels=out_channels,
+        kernel_size=config.kernel_size,
+        stride=config.stride,
+        padding=padding,
+    )
+  else:
+    raise ValueError("Unsupported downsampling type.")

ai_edge_torch/generative/layers/unet/model_config.py ADDED Viewed

@@ -0,0 +1,269 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# UNet configuration class.
+from dataclasses import dataclass
+from dataclasses import field
+import enum
+from typing import List, Optional
+import ai_edge_torch.generative.layers.model_config as layers_cfg
+@enum.unique
+class SamplingType(enum.Enum):
+  NEAREST = enum.auto()
+  BILINEAR = enum.auto()
+  AVERAGE = enum.auto()
+  CONVOLUTION = enum.auto()
+@dataclass
+class UpSamplingConfig:
+  mode: SamplingType
+  scale_factor: float
+@dataclass
+class DownSamplingConfig:
+  mode: SamplingType
+  in_channels: int
+  kernel_size: int
+  stride: int
+  padding: int
+  out_channels: Optional[int] = None
+@dataclass
+class ResidualBlock2DConfig:
+  in_channels: int
+  out_channels: int
+  normalization_config: layers_cfg.NormalizationConfig
+  activation_config: layers_cfg.ActivationConfig
+  # Optional time embedding channels if the residual block takes a time embedding context as input
+  time_embedding_channels: Optional[int] = None
+@dataclass
+class AttentionBlock2DConfig:
+  dim: int
+  normalization_config: layers_cfg.NormalizationConfig
+  attention_config: layers_cfg.AttentionConfig
+  enable_hlfb: bool = True
+  attention_batch_size: int = 1
+@dataclass
+class CrossAttentionBlock2DConfig:
+  query_dim: int
+  cross_dim: int
+  normalization_config: layers_cfg.NormalizationConfig
+  attention_config: layers_cfg.AttentionConfig
+  enable_hlfb: bool = True
+  attention_batch_size: int = 1
+@dataclass
+class FeedForwardBlock2DConfig:
+  dim: int
+  hidden_dim: int
+  normalization_config: layers_cfg.NormalizationConfig
+  activation_config: layers_cfg.ActivationConfig
+  use_bias: bool
+@dataclass
+class TransformerBlock2DConfig:
+  pre_conv_normalization_config: layers_cfg.NormalizationConfig
+  attention_block_config: AttentionBlock2DConfig
+  cross_attention_block_config: CrossAttentionBlock2DConfig
+  feed_forward_block_config: FeedForwardBlock2DConfig
+@dataclass
+class UpDecoderBlock2DConfig:
+  in_channels: int
+  out_channels: int
+  normalization_config: layers_cfg.NormalizationConfig
+  activation_config: layers_cfg.ActivationConfig
+  num_layers: int
+  # Optional time embedding channels if the residual blocks take a time embedding as input
+  time_embedding_channels: Optional[int] = None
+  # Whether to add upsample operation after residual blocks
+  add_upsample: bool = True
+  # Whether to add a conv2d layer after upsample
+  upsample_conv: bool = True
+  # Optional sampling config if add_upsample is True.
+  sampling_config: Optional[UpSamplingConfig] = None
+  # Optional config of transformer blocks interleaved with residual blocks
+  transformer_block_config: Optional[TransformerBlock2DConfig] = None
+  # Optional dimension of context tensor if context tensor is given as input.
+  context_dim: Optional[int] = None
+@dataclass
+class SkipUpDecoderBlock2DConfig:
+  in_channels: int
+  out_channels: int
+  # The dimension of output channels of previous connected block
+  prev_out_channels: int
+  normalization_config: layers_cfg.NormalizationConfig
+  activation_config: layers_cfg.ActivationConfig
+  num_layers: int
+  # Optional time embedding channels if the residual blocks take a time embedding as input
+  time_embedding_channels: Optional[int] = None
+  # Whether to add upsample operation after residual blocks
+  add_upsample: bool = True
+  # Whether to add a conv2d layer after upsample
+  upsample_conv: bool = True
+  # Optional sampling config if add_upsample is True.
+  sampling_config: Optional[UpSamplingConfig] = None
+  # Optional config of transformer blocks interleaved with residual blocks
+  transformer_block_config: Optional[TransformerBlock2DConfig] = None
+  # Optional dimension of context tensor if context tensor is given as input.
+  context_dim: Optional[int] = None
+@dataclass
+class DownEncoderBlock2DConfig:
+  in_channels: int
+  out_channels: int
+  normalization_config: layers_cfg.NormalizationConfig
+  activation_config: layers_cfg.ActivationConfig
+  num_layers: int
+  # Padding for the downsampling convolution.
+  padding: int = 1
+  # Optional time embedding channels if the residual blocks take a time embedding as input
+  time_embedding_channels: Optional[int] = None
+  # Whether to add downsample operation after residual blocks
+  add_downsample: bool = True
+  # Optional sampling config if add_upsample is True.
+  sampling_config: Optional[DownSamplingConfig] = None
+  # Optional config of transformer blocks interleaved with residual blocks
+  transformer_block_config: Optional[TransformerBlock2DConfig] = None
+  # Optional dimension of context tensor if context tensor is given as input.
+  context_dim: Optional[int] = None
+@dataclass
+class MidBlock2DConfig:
+  in_channels: int
+  normalization_config: layers_cfg.NormalizationConfig
+  activation_config: layers_cfg.ActivationConfig
+  num_layers: int
+  # Optional time embedding channels if the residual blocks take a time embedding context as input
+  time_embedding_channels: Optional[int] = None
+  # Optional config of attention blocks interleaved with residual blocks
+  attention_block_config: Optional[AttentionBlock2DConfig] = None
+  # Optional config of transformer blocks interleaved with residual blocks
+  transformer_block_config: Optional[TransformerBlock2DConfig] = None
+  # Optional dimension of context tensor if context tensor is given as input.
+  context_dim: Optional[int] = None
+@dataclass
+class AutoEncoderConfig:
+  """Configurations of encoder/decoder in the autoencoder model."""
+  # The activation type of encoder/decoder blocks.
+  activation_config: layers_cfg.ActivationConfig
+  # The output channels of each block.
+  block_out_channels: List[int]
+  # Number of channels in the input image.
+  in_channels: int
+  # Number of channels in the output.
+  out_channels: int
+  # Number of channels in the latent space.
+  latent_channels: int
+  # The component-wise standard deviation of the trained latent space computed using the first batch of the
+  # training set. This is used to scale the latent space to have unit variance when training the diffusion
+  # model. The latents are scaled with the formula `z = z * scaling_factor` before being passed to the
+  # diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z = 1
+  # / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution Image
+  # Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper.
+  scaling_factor: float
+  # The layesr number of each encoder/decoder block.
+  layers_per_block: int
+  # The normalization config.
+  normalization_config: layers_cfg.NormalizationConfig
+  # The configuration of middle blocks, that is, after the last block of encoder and before the first block of decoder.
+  mid_block_config: MidBlock2DConfig
+@dataclass
+class DiffusionModelConfig:
+  """Configurations of Diffusion model."""
+  # Number of channels in the input tensor.
+  in_channels: int
+  # Number of channels in the output tensor.
+  out_channels: int
+  # The output channels of each block.
+  block_out_channels: List[int]
+  # The layesr number of each block.
+  layers_per_block: int
+  # The padding to use for the downsampling.
+  downsample_padding: int
+  # Normalization config used in residual blocks.
+  residual_norm_config: layers_cfg.NormalizationConfig
+  # Activation config used in residual blocks
+  residual_activation_type: layers_cfg.ActivationType
+  # The batch size used in transformer blocks, for attention layers.
+  transformer_batch_size: int
+  # The number of attention heads used in transformer blocks.
+  transformer_num_attention_heads: int
+  # The dimension of cross attention used in transformer blocks.
+  transformer_cross_attention_dim: int
+  # Normalization config used in prev conv layer of transformer blocks.
+  transformer_pre_conv_norm_config: layers_cfg.NormalizationConfig
+  # Normalization config used in transformer blocks.
+  transformer_norm_config: layers_cfg.NormalizationConfig
+  # Activation type of feed forward used in transformer blocks.
+  transformer_ff_activation_type: layers_cfg.ActivationType
+  # Number of layers in mid block.
+  mid_block_layers: int
+  # Dimension of time embedding.
+  time_embedding_dim: int
+  # Time embedding dimensions for blocks.
+  time_embedding_blocks_dim: int
+  # Normalization config used for final layer
+  final_norm_config: layers_cfg.NormalizationConfig
+  # Activation type used in final layer
+  final_activation_type: layers_cfg.ActivationType

ai_edge_torch/generative/quantize/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

ai_edge_torch/generative/quantize/ai_edge_quantizer_glue/__init__.py ADDED Viewed

File without changes

ai_edge_torch/generative/quantize/ai_edge_quantizer_glue/translate_recipe.py ADDED Viewed

@@ -0,0 +1,148 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import json
+from ai_edge_quantizer import quantizer
+from ai_edge_torch.generative.quantize import quant_attrs
+from ai_edge_torch.generative.quantize import quant_recipe
+_OpExecutionMode = quantizer.qtyping.OpExecutionMode
+_OpName = quantizer.qtyping.TFLOperationName
+_TensorQuantConfig = quantizer.qtyping.TensorQuantizationConfig
+_OpQuantConfig = quantizer.qtyping.OpQuantizationConfig
+_DEFAULT_REGEX_STR = '.*'
+_SINGULAR_TRANSFORMER_BLOCK_REGEX_STR = 'transformer_block'
+_IDX_TRANSFORMER_BLOCKS_REGEX_STR = 'transformer_blocks\[{}\]'
+_ATTENTION_REGEX_STR = 'ai_edge_torch.generative.layers.attention'
+_FEEDFORWARD_REGEX_STR = 'ai_edge_torch.generative.layers.feed_forward'
+_EMBEDDING_REGEX_STR = 'Embedding_tok_embedding'
+_ANY_TWO_DIGITS_REGEX_STR = '\d{1,2}'
+def _get_nbits_from_dtype(dtype: quant_attrs.Dtype) -> int:
+  if dtype == quant_attrs.Dtype.FP32:
+    return 32
+  elif dtype == quant_attrs.Dtype.FP16:
+    return 16
+  elif dtype == quant_attrs.Dtype.INT8:
+    return 8
+  raise ValueError('Unimplemented number of bits')
+def _get_dtype_from_dtype(dtype: quant_attrs.Dtype) -> quantizer.qtyping.TensorDataType:
+  if dtype == quant_attrs.Dtype.FP32 or dtype == quant_attrs.Dtype.FP16:
+    return quantizer.qtyping.TensorDataType.FLOAT
+  else:
+    return quantizer.qtyping.TensorDataType.INT
+def _get_execution_mode_from_mode(mode: quant_attrs.Mode) -> _OpExecutionMode:
+  if mode == quant_attrs.Mode.DYNAMIC_RANGE:
+    return _OpExecutionMode.DRQ
+  elif mode == quant_attrs.Mode.WEIGHT_ONLY:
+    return _OpExecutionMode.WEIGHT_ONLY
+  raise ValueError('Unimplemented execution mode')
+def _get_channelwise_from_granularity(granularity: quant_attrs.Granularity) -> bool:
+  if granularity == quant_attrs.Granularity.CHANNELWISE:
+    return True
+  elif granularity == quant_attrs.Granularity.NONE:
+    return False
+  raise ValueError('Unimplemented granularity')
+def _get_algorithm_key_from_algorithm(algo: quant_attrs.Algorithm) -> str:
+  if algo == quant_attrs.Algorithm.MIN_MAX:
+    return quantizer.algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT
+  elif algo == quant_attrs.Algorithm.FLOAT_CAST:
+    return quantizer.algorithm_manager.AlgorithmName.FLOAT_CASTING
+  raise ValueError('Unimplemented algorithm')
+def _set_quant_config(
+    rm: quantizer.recipe_manager.RecipeManager,
+    layer_recipe: quant_recipe.LayerQuantRecipe,
+    regex: str,
+):
+  rm.add_quantization_config(
+      regex=regex,
+      operation_name=_OpName.ALL_SUPPORTED,
+      op_config=_OpQuantConfig(
+          weight_tensor_config=_TensorQuantConfig(
+              num_bits=_get_nbits_from_dtype(layer_recipe.weight_dtype),
+              symmetric=True,
+              channel_wise=_get_channelwise_from_granularity(layer_recipe.granularity),
+              dtype=_get_dtype_from_dtype(layer_recipe.weight_dtype),
+          ),
+          execution_mode=_get_execution_mode_from_mode(layer_recipe.mode),
+      ),
+      algorithm_key=_get_algorithm_key_from_algorithm(layer_recipe.algorithm),
+  )
+def translate_to_ai_edge_recipe(
+    recipe: quant_recipe.GenerativeQuantRecipe,
+) -> quantizer.recipe_manager.ModelQuantizationRecipe:
+  rm = quantizer.recipe_manager.RecipeManager()
+  if recipe.default is not None:
+    _set_quant_config(rm, recipe.default, _DEFAULT_REGEX_STR)
+  if recipe.embedding is not None:
+    _set_quant_config(rm, recipe.embedding, _EMBEDDING_REGEX_STR)
+  if recipe.attention is not None:
+    if isinstance(recipe.attention, dict):
+      for idx, layer in recipe.attention.items():
+        _set_quant_config(
+            rm,
+            layer,
+            f'{_IDX_TRANSFORMER_BLOCKS_REGEX_STR.format(idx)}/{_ATTENTION_REGEX_STR}',
+        )
+    else:
+      _set_quant_config(
+          rm,
+          recipe.attention,
+          f'{_SINGULAR_TRANSFORMER_BLOCK_REGEX_STR}/{_ATTENTION_REGEX_STR}',
+      )
+  if recipe.feedforward is not None:
+    if isinstance(recipe.feedforward, dict):
+      for idx, layer in recipe.feedforward.items():
+        _set_quant_config(
+            rm,
+            layer,
+            f'{_IDX_TRANSFORMER_BLOCKS_REGEX_STR.format(idx)}/{_FEEDFORWARD_REGEX_STR}',
+        )
+    else:
+      _set_quant_config(
+          rm,
+          recipe.feedforward,
+          f'{_SINGULAR_TRANSFORMER_BLOCK_REGEX_STR}/{_FEEDFORWARD_REGEX_STR}',
+      )
+  return rm.get_quantization_recipe()
+def quantize_model(
+    model: bytearray, recipe: quantizer.recipe_manager.ModelQuantizationRecipe
+) -> bytearray:
+  qt = quantizer.Quantizer(bytearray(model), recipe)
+  result = qt.quantize()
+  return result.quantized_model

ai_edge_torch/generative/quantize/example.py ADDED Viewed

@@ -0,0 +1,45 @@
+# Copyright 2024 The AI Edge Torch Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import torch
+import ai_edge_torch
+from ai_edge_torch.generative.examples.gemma import gemma
+from ai_edge_torch.generative.quantize import quant_recipes
+def main():
+  # Build a PyTorch model as usual
+  config = gemma.get_fake_model_config_2b_for_test()
+  model = gemma.Gemma(config)
+  idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
+  tokens = torch.full((1, 10), 0, dtype=torch.long, device="cpu")
+  tokens[0, :4] = idx
+  input_pos = torch.arange(0, 10)
+  # Create a quantization recipe to be applied to the model
+  quant_config = quant_recipes.full_int8_dynamic_recipe()
+  print(quant_config)
+  # Convert with quantization
+  edge_model = ai_edge_torch.convert(
+      model, (tokens, input_pos), quant_config=quant_config
+  )
+  edge_model.export("/tmp/gemma_2b_quantized.tflite")
+if __name__ == "__main__":
+  main()

ai_edge_torch/generative/quantize/quant_attrs.py ADDED Viewed

@@ -0,0 +1,68 @@
+# Copyright 2024 The AI Edge Torch Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import enum
+@enum.unique
+class Dtype(enum.Enum):
+  """Data types and precision of tensors."""
+  FP32 = enum.auto()
+  FP16 = enum.auto()
+  INT8 = enum.auto()
+@enum.unique
+class Algorithm(enum.Enum):
+  """Algorithm used to calculate quantization parameters.
+  Attributes:
+    MIN_MAX: Maps the min/max of floating point space to the min/max of
+      quantized space and quantize uniformly.
+    FLOAT_CAST: Casts a float to another float of a different type.
+  """
+  MIN_MAX = enum.auto()
+  FLOAT_CAST = enum.auto()
+@enum.unique
+class Mode(enum.Enum):
+  """Mode of quantization.
+  Attributes:
+    DYNAMIC_RANGE: Quantize activations during runtime and weights statically to
+      perform computation in integers.
+    WEIGHT_ONLY: Quantize weights statically and dequantize during runtime to
+      perform computation in floating points.
+  """
+  DYNAMIC_RANGE = enum.auto()
+  WEIGHT_ONLY = enum.auto()
+@enum.unique
+class Granularity(enum.Enum):
+  """Granularity of quantization parameters.
+  Attributes:
+    NONE: Granularity not applicable to this quantization scheme.
+    CHANNELWISE: Or per-channel quantization. Each channel of relevant tensors
+      is quantized independently of one another.
+  """
+  NONE = enum.auto()
+  CHANNELWISE = enum.auto()