PyPI - ai-edge-torch-nightly - Versions diffs - 0.3.0.dev20241201__py3-none-any.whl → 0.3.0.dev20241205__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.3.0.dev20241201py3-none-any.whl → 0.3.0.dev20241205py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

ai_edge_torch/_convert/test/test_convert.py CHANGED Viewed

@@ -21,10 +21,12 @@ from typing import Tuple
 import ai_edge_torch
 from ai_edge_torch import config
 from ai_edge_torch._convert import conversion_utils
+from ai_edge_torch.quantize import pt2e_quantizer
 from ai_edge_torch.testing import model_coverage
 import numpy as np
 import torch
 from torch import nn
+from torch.ao.quantization import quantize_pt2e
 import torchvision
 from absl.testing import absltest as googletest
@@ -506,6 +508,52 @@ class TestConvert(googletest.TestCase):
         model_coverage.compare_tflite_torch(edge_model, torch_module, args)
     )
+  def test_convert_resnet18_pt2e_per_layer(self):
+    # Step 1: export resnet18
+    args = (torch.randn(1, 3, 224, 224),)
+    m = torchvision.models.resnet18().eval()
+    m = torch._export.capture_pre_autograd_graph(m, args)
+    # Step 2: Insert observers or fake quantize modules
+    quantizer = pt2e_quantizer.PT2EQuantizer().set_global(
+        pt2e_quantizer.get_symmetric_quantization_config(is_per_channel=False)
+    )
+    m = quantize_pt2e.prepare_pt2e(m, quantizer)
+    # Step 3: Quantize the model
+    m = quantize_pt2e.convert_pt2e(m, fold_quantize=False)
+    # pylint: disable=broad-except
+    try:
+      ai_edge_torch.convert(m, args)
+    except Exception as err:
+      self.fail(f"PT2E conversion failed: {err}")
+    # pylint: enable=broad-except
+  def test_convert_resnet18_pt2e_per_channel(self):
+    # Step 1: export resnet18
+    args = (torch.randn(1, 3, 224, 224),)
+    m = torchvision.models.resnet18().eval()
+    m = torch._export.capture_pre_autograd_graph(m, args)
+    # Step 2: Insert observers or fake quantize modules
+    quantizer = pt2e_quantizer.PT2EQuantizer().set_global(
+        pt2e_quantizer.get_symmetric_quantization_config(is_per_channel=True)
+    )
+    m = quantize_pt2e.prepare_pt2e(m, quantizer)
+    # Step 3: Run through example inputs, otherwise per-channel
+    # quant may have scalar scale/zero_point
+    m(*args)
+    # Step 4: Quantize the model
+    m = quantize_pt2e.convert_pt2e(m, fold_quantize=False)
+    # pylint: disable=broad-except
+    try:
+      ai_edge_torch.convert(m, args)
+    except Exception as err:
+      self.fail(f"PT2E conversion failed: {err}")
+    # pylint: enable=broad-except
 if __name__ == "__main__":
   googletest.main()

ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py CHANGED Viewed

@@ -33,10 +33,10 @@ _TFLITE_PATH = flags.DEFINE_string(
     '/tmp/',
     'The tflite file path to export.',
 )
-_PREFILL_SEQ_LEN = flags.DEFINE_integer(
-    'prefill_seq_len',
-    1024,
-    'The maximum size of prefill input tensor.',
+_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
+    'prefill_seq_lens',
+    (8, 64, 128, 256, 512, 1024),
+    'List of the maximum sizes of prefill input tensors.',
 )
 _KV_CACHE_MAX_LEN = flags.DEFINE_integer(
     'kv_cache_max_len',
@@ -55,11 +55,11 @@ def main(_):
       _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
-  output_filename = f'gemma_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  output_filename = f'gemma_{quant_suffix}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
   converter.convert_to_tflite(
       pytorch_model,
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
-      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
   )

ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py CHANGED Viewed

@@ -33,10 +33,10 @@ _TFLITE_PATH = flags.DEFINE_string(
     '/tmp/',
     'The tflite file path to export.',
 )
-_PREFILL_SEQ_LEN = flags.DEFINE_integer(
-    'prefill_seq_len',
-    1024,
-    'The maximum size of prefill input tensor.',
+_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
+    'prefill_seq_lens',
+    (8, 64, 128, 256, 512, 1024),
+    'List of the maximum sizes of prefill input tensors.',
 )
 _KV_CACHE_MAX_LEN = flags.DEFINE_integer(
     'kv_cache_max_len',
@@ -55,11 +55,11 @@ def main(_):
       _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
-  output_filename = f'gemma2_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  output_filename = f'gemma2_{quant_suffix}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
   converter.convert_to_tflite(
       pytorch_model,
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
-      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
   )

ai_edge_torch/generative/examples/llama/convert_to_tflite.py CHANGED Viewed

@@ -39,10 +39,10 @@ _TFLITE_PATH = flags.DEFINE_string(
     '/tmp/',
     'The tflite file path to export.',
 )
-_PREFILL_SEQ_LEN = flags.DEFINE_integer(
-    'prefill_seq_len',
-    1024,
-    'The maximum size of prefill input tensor.',
+_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
+    'prefill_seq_lens',
+    (8, 64, 128, 256, 512, 1024),
+    'List of the maximum sizes of prefill input tensors.',
 )
 _KV_CACHE_MAX_LEN = flags.DEFINE_integer(
     'kv_cache_max_len',
@@ -66,11 +66,11 @@ def main(_):
       _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
-  output_filename = f'llama_{_MODEL_SIZE.value}_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  output_filename = f'llama_{_MODEL_SIZE.value}_{quant_suffix}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
   converter.convert_to_tflite(
       pytorch_model,
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
-      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
   )

ai_edge_torch/generative/examples/moonshine/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

ai_edge_torch/generative/examples/{gemma/convert_gemma2_multi_prefills.py → moonshine/convert_moonshine_to_tflite.py} RENAMED Viewed

@@ -13,19 +13,21 @@
 # limitations under the License.
 # ==============================================================================
-"""Example to convert a Gemma2 model to multiple prefill length tflite model."""
+"""Example of converting a Moonshine model to multi-signature tflite model."""
 import os
 import pathlib
 from absl import app
 from absl import flags
-from ai_edge_torch.generative.examples.gemma import gemma2
+import ai_edge_torch
+from ai_edge_torch.generative.examples.moonshine import moonshine
 from ai_edge_torch.generative.utilities import converter
+import torch
 _CHECKPOINT_PATH = flags.DEFINE_string(
     'checkpoint_path',
-    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/gemma2-2b'),
+    os.path.join(pathlib.Path.home(), 'Downloads/llm_data/moonshine'),
     'The path to the model checkpoint, or directory holding the checkpoint.',
 )
 _TFLITE_PATH = flags.DEFINE_string(
@@ -33,35 +35,15 @@ _TFLITE_PATH = flags.DEFINE_string(
     '/tmp/',
     'The tflite file path to export.',
 )
-_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
-    'prefill_seq_lens',
-    (8, 64, 128, 256, 512, 1024),
-    'List of the maximum sizes of prefill input tensors.',
-)
-_KV_CACHE_MAX_LEN = flags.DEFINE_integer(
-    'kv_cache_max_len',
-    1280,
-    'The maximum size of KV cache buffer, including both prefill and decode.',
-)
-_QUANTIZE = flags.DEFINE_bool(
-    'quantize',
-    True,
-    'Whether the model should be quantized.',
-)
 def main(_):
-  pytorch_model = gemma2.build_2b_model(
-      _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
-  )
-  quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
-  output_filename = f'gemma2_{quant_suffix}_multi-prefill-seq_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
-  converter.convert_to_tflite(
-      pytorch_model,
-      tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
-      prefill_seq_len=_PREFILL_SEQ_LENS.value,
-      quantize=_QUANTIZE.value,
-  )
+  p_model = moonshine.build_preprocessor(_CHECKPOINT_PATH.value)
+  output_filename = f'moonshine_preprocessor.tflite'
+  _input = torch.randn((1, 1, 159414), dtype=torch.float)
+  edge_model = ai_edge_torch.convert(p_model, (_input,), quant_config=None)
+  tflite_path = os.path.join(_TFLITE_PATH.value, output_filename)
+  edge_model.export(tflite_path)
 if __name__ == '__main__':

ai_edge_torch/generative/examples/moonshine/moonshine.py ADDED Viewed

@@ -0,0 +1,103 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Example of building the Moonshine model."""
+import os
+import pathlib
+from typing import Optional, Tuple
+from absl import app
+from ai_edge_torch.generative.layers import attention
+from ai_edge_torch.generative.layers import builder
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
+import ai_edge_torch.generative.layers.attention_utils as attn_utils
+import ai_edge_torch.generative.layers.model_config as cfg
+import ai_edge_torch.generative.layers.normalization as normalization
+import ai_edge_torch.generative.utilities.moonshine_loader as loading_utils
+import h5py
+import torch
+from torch import nn
+import torch.nn as nn
+TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
+    conv1D_0="layers/sequential/layers/conv1d/vars",
+    conv1D_1="layers/sequential/layers/conv1d_1/vars",
+    conv1D_2="layers/sequential/layers/conv1d_2/vars",
+    group_norm="layers/sequential/layers/group_normalization/vars",
+)
+class AudioPreprocessor(nn.Module):
+  def __init__(self, dim):
+    super(AudioPreprocessor, self).__init__()
+    self.conv1 = nn.Conv1d(
+        in_channels=1, out_channels=dim, kernel_size=127, stride=64, bias=False
+    )
+    self.tanh = nn.Tanh()
+    self.group_norm = normalization.GroupNorm(group_num=1, dim=dim, eps=1e-5)
+    self.conv2 = nn.Conv1d(
+        in_channels=dim,
+        out_channels=2 * dim,
+        kernel_size=7,
+        stride=3,
+        padding=0,  # Equivalent to padding="valid"
+    )
+    self.gelu1 = nn.GELU()
+    self.conv3 = nn.Conv1d(
+        in_channels=2 * dim,
+        out_channels=dim,
+        kernel_size=3,
+        stride=2,
+        padding=0,  # Equivalent to padding="valid"
+    )
+    self.gelu2 = nn.GELU()
+  def forward(self, inputs):
+    x = self.conv1(inputs)
+    x = self.tanh(x)
+    x = self.group_norm(x)
+    x = self.conv2(x)
+    x = self.gelu1(x)
+    x = self.conv3(x)
+    x = self.gelu2(x)
+    return x
+def build_preprocessor(checkpoint_path: str, **kwargs) -> nn.Module:
+  ap = AudioPreprocessor(dim=416)
+  loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES)
+  loader.load(ap, strict=True)
+  return ap
+def main(_):
+  # TODO(b/375421767) Remove golden checks once full model is implemented.
+  HF_PATH = os.path.join(pathlib.Path.home(), "Downloads/llm_data/moonshine")
+  test_data_path = pathlib.Path(__file__).parent.resolve()
+  INPUT_PATH = test_data_path / "data" / "pp_input.pt")
+  GOLDEN_PATH = test_data_path / "data" / "pp_output.pt")
+  ap = build_preprocessor(HF_PATH)
+  ap.eval()
+  inputs = torch.load(INPUT_PATH).reshape((1, 1, 159414))
+  out = ap(inputs)
+  golden = torch.load(GOLDEN_PATH).transpose(1, 2)
+  assert torch.allclose(out, golden, atol=1e-4, rtol=1e-4)
+if __name__ == "__main__":
+  app.run(main)

ai_edge_torch/generative/examples/openelm/convert_to_tflite.py CHANGED Viewed

@@ -33,10 +33,10 @@ _TFLITE_PATH = flags.DEFINE_string(
     '/tmp/',
     'The tflite file path to export.',
 )
-_PREFILL_SEQ_LEN = flags.DEFINE_integer(
-    'prefill_seq_len',
-    1024,
-    'The maximum size of prefill input tensor.',
+_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
+    'prefill_seq_lens',
+    (8, 64, 128, 256, 512, 1024),
+    'List of the maximum sizes of prefill input tensors.',
 )
 _KV_CACHE_MAX_LEN = flags.DEFINE_integer(
     'kv_cache_max_len',
@@ -55,11 +55,14 @@ def main(_):
       _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
-  output_filename = f'openelm_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  output_filename = (
+      f'openelm_{quant_suffix}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  )
   converter.convert_to_tflite(
       pytorch_model,
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
-      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
   )

ai_edge_torch/generative/examples/phi/convert_phi3_to_tflite.py CHANGED Viewed

@@ -33,10 +33,10 @@ _TFLITE_PATH = flags.DEFINE_string(
     '/tmp/',
     'The tflite file path to export.',
 )
-_PREFILL_SEQ_LEN = flags.DEFINE_integer(
-    'prefill_seq_len',
-    1024,
-    'The maximum size of prefill input tensor.',
+_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
+    'prefill_seq_lens',
+    (8, 64, 128, 256, 512, 1024),
+    'List of the maximum sizes of prefill input tensors.',
 )
 _KV_CACHE_MAX_LEN = flags.DEFINE_integer(
     'kv_cache_max_len',
@@ -55,11 +55,11 @@ def main(_):
       _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
-  output_filename = f'phi3_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  output_filename = f'phi3_{quant_suffix}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
   converter.convert_to_tflite(
       pytorch_model,
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
-      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
   )

ai_edge_torch/generative/examples/phi/convert_to_tflite.py CHANGED Viewed

@@ -33,10 +33,10 @@ _TFLITE_PATH = flags.DEFINE_string(
     '/tmp/',
     'The tflite file path to export.',
 )
-_PREFILL_SEQ_LEN = flags.DEFINE_integer(
-    'prefill_seq_len',
-    1024,
-    'The maximum size of prefill input tensor.',
+_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
+    'prefill_seq_lens',
+    (8, 64, 128, 256, 512, 1024),
+    'List of the maximum sizes of prefill input tensors.',
 )
 _KV_CACHE_MAX_LEN = flags.DEFINE_integer(
     'kv_cache_max_len',
@@ -55,11 +55,11 @@ def main(_):
       _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
-  output_filename = f'phi2_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  output_filename = f'phi2_{quant_suffix}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
   converter.convert_to_tflite(
       pytorch_model,
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
-      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
   )

ai_edge_torch/generative/examples/qwen/convert_to_tflite.py CHANGED Viewed

@@ -39,10 +39,10 @@ _TFLITE_PATH = flags.DEFINE_string(
     '/tmp/',
     'The tflite file path to export.',
 )
-_PREFILL_SEQ_LEN = flags.DEFINE_integer(
-    'prefill_seq_len',
-    1024,
-    'The maximum size of prefill input tensor.',
+_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
+    'prefill_seq_lens',
+    (8, 64, 128, 256, 512, 1024),
+    'List of the maximum sizes of prefill input tensors.',
 )
 _KV_CACHE_MAX_LEN = flags.DEFINE_integer(
     'kv_cache_max_len',
@@ -68,11 +68,13 @@ def main(_):
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
   model_size = _MODEL_SIZE.value.replace('.', '_')
-  output_filename = f'qwen_{model_size}_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  output_filename = (
+      f'qwen_{model_size}_{quant_suffix}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  )
   converter.convert_to_tflite(
       pytorch_model,
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
-      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
   )

ai_edge_torch/generative/examples/smollm/convert_to_tflite.py CHANGED Viewed

@@ -33,10 +33,10 @@ _TFLITE_PATH = flags.DEFINE_string(
     '/tmp/',
     'The tflite file path to export.',
 )
-_PREFILL_SEQ_LEN = flags.DEFINE_integer(
-    'prefill_seq_len',
-    1024,
-    'The maximum size of prefill input tensor.',
+_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
+    'prefill_seq_lens',
+    (8, 64, 128, 256, 512, 1024),
+    'List of the maximum sizes of prefill input tensors.',
 )
 _KV_CACHE_MAX_LEN = flags.DEFINE_integer(
     'kv_cache_max_len',
@@ -55,11 +55,11 @@ def main(_):
       _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
-  output_filename = f'smollm_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  output_filename = f'smollm_{quant_suffix}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
   converter.convert_to_tflite(
       pytorch_model,
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
-      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
   )

ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py CHANGED Viewed

@@ -33,10 +33,10 @@ _TFLITE_PATH = flags.DEFINE_string(
     '/tmp/',
     'The tflite file path to export.',
 )
-_PREFILL_SEQ_LEN = flags.DEFINE_integer(
-    'prefill_seq_len',
-    1024,
-    'The maximum size of prefill input tensor.',
+_PREFILL_SEQ_LENS = flags.DEFINE_multi_integer(
+    'prefill_seq_lens',
+    (8, 64, 128, 256, 512, 1024),
+    'List of the maximum sizes of prefill input tensors.',
 )
 _KV_CACHE_MAX_LEN = flags.DEFINE_integer(
     'kv_cache_max_len',
@@ -55,11 +55,13 @@ def main(_):
       _CHECKPOINT_PATH.value, kv_cache_max_len=_KV_CACHE_MAX_LEN.value
   )
   quant_suffix = 'q8' if _QUANTIZE.value else 'f32'
-  output_filename = f'tinyllama_{quant_suffix}_seq{_PREFILL_SEQ_LEN.value}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  output_filename = (
+      f'tinyllama_{quant_suffix}_ekv{_KV_CACHE_MAX_LEN.value}.tflite'
+  )
   converter.convert_to_tflite(
       pytorch_model,
       tflite_path=os.path.join(_TFLITE_PATH.value, output_filename),
-      prefill_seq_len=_PREFILL_SEQ_LEN.value,
+      prefill_seq_len=_PREFILL_SEQ_LENS.value,
       quantize=_QUANTIZE.value,
   )

ai_edge_torch/generative/test/test_quantize.py CHANGED Viewed

@@ -91,6 +91,11 @@ class TestVerifyRecipes(parameterized.TestCase):
 class TestQuantizeConvert(parameterized.TestCase):
   """Test conversion with quantization."""
+  def setUp(self):
+    super().setUp()
+    torch.manual_seed(0)
+    torch._dynamo.reset()
   def _attention_int8_dynamic_recipe() -> quant_config.QuantConfig:
     return quant_config.QuantConfig(
         generative_recipe=quant_recipe.GenerativeQuantRecipe(

ai_edge_torch/generative/utilities/moonshine_loader.py ADDED Viewed

@@ -0,0 +1,154 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Common utility functions for data loading etc.
+from dataclasses import dataclass
+import glob
+import os
+from typing import Callable, Dict
+import h5py
+import torch
+def transpose_if_needed(t):
+  """We assume the file is from Keras, i.e. channel last format."""
+  if len(t.shape) > 2:
+    return t.permute(2, 1, 0)
+  return t
+def load_h5_statedict(full_path: str):
+  """Loads the HDF5 DataSets into a single dctionary.
+  Args:
+    full_path (string): the HDF5 filename or directory that contains the HDF5
+      files.
+  Returns:
+    A state dictionary contating loaded tensors.
+  Raises:
+    ValueError: If no tensors are loaded from the provided directory or file.
+  """
+  pattern = (
+      os.path.join(full_path, "*.h5") if os.path.isdir(full_path) else full_path
+  )
+  files = []
+  for file in glob.glob(pattern):
+    files.append(file)
+  tensors = {}
+  def collect_datasets(name, obj):
+    if isinstance(obj, h5py.Dataset):
+      tensors[name] = transpose_if_needed(torch.from_numpy(obj[:]))
+  for file in files:
+    with h5py.File(file) as f:
+      f.visititems(collect_datasets)
+  if not tensors:
+    raise ValueError("Failed to load HDF5 file.")
+  return tensors
+class ModelLoader:
+  """Utility class for loading and converting checkpoints to ODML transformer layer format."""
+  @dataclass
+  class TensorNames:
+    conv1D_0: str = None
+    conv1D_1: str = None
+    conv1D_2: str = None
+    group_norm: str = None
+  def __init__(self, file_name: str, names: TensorNames) -> None:
+    """ModelLoader constructor.
+    Can be used to load multiple models of the same type.
+    Args:
+        file_name (str): Path to the checkpoint. Can be a directory or an exact
+          file.
+        names (TensorNames): An instance of `TensorNames` to determine mappings.
+    """
+    self._file_name = file_name
+    self._names = names
+    self._loader = load_h5_statedict
+  def load(
+      self,
+      model: torch.nn.Module,
+      strict: bool = True,
+  ):
+    """Load the model from the checkpoint
+    Args:
+        model (torch.nn.Module): The pytorch model that needs to be loaded.
+        strict (bool, optional): Whether the converted keys are strictly
+          matched. Defaults to True.
+    Raises:
+        ValueError: If conversion results in unmapped tensors and strict mode is
+          enabled.
+    """
+    state = self._loader(self._file_name)
+    if isinstance(self._names, ModelLoader.TensorNames):
+      converted_state = self._do_load(model, state, self._names)
+    else:
+      raise ValueError(f"Unkown type for names: {type(self._names)}")
+    if strict and state:
+      raise ValueError(
+          "Failed to map all tensor. Remaining tensor are:"
+          f" {list(state.keys())}"
+      )
+    model.load_state_dict(converted_state, strict=strict)
+  def _do_load(self, model, state, names, additional_prefix=""):
+    """Load the model from the checkpoint
+    Args:
+        model (torch.nn.Module): The pytorch model that needs to be loaded.
+        state (Dict[str, torch.Tensor]): The pytorch state dictionary
+        names (TensorNames]): The TensorNames for the model we are loading.
+    Returns:
+        Dict[str, torch.Tensor]: Map of name to tensor for loading.
+    """
+    converted_state = dict()
+    if names.conv1D_0 is not None:
+      converted_state["conv1.weight"] = state.pop(f"{names.conv1D_0}/0")
+      if f"{names.conv1D_0}/1" in state:
+        converted_state["conv1.bias"] = state.pop(f"{names.conv1D_0}/1")
+    if names.conv1D_1 is not None:
+      converted_state["conv2.weight"] = state.pop(f"{names.conv1D_1}/0")
+      if f"{names.conv1D_1}/1" in state:
+        converted_state["conv2.bias"] = state.pop(f"{names.conv1D_1}/1")
+    if names.conv1D_2 is not None:
+      converted_state["conv3.weight"] = state.pop(f"{names.conv1D_2}/0")
+      if f"{names.conv1D_2}/1" in state:
+        converted_state["conv3.bias"] = state.pop(f"{names.conv1D_2}/1")
+    if names.group_norm is not None:
+      group_norm_name = names.group_norm
+      converted_state[f"group_norm.weight"] = state.pop(f"{group_norm_name}/0")
+      if f"{group_norm_name}/1" in state:
+        converted_state["group_norm.bias"] = state.pop(f"{group_norm_name}/1")
+    return converted_state

ai_edge_torch/odml_torch/export.py CHANGED Viewed

@@ -35,9 +35,7 @@ from . import lowerings
 LoweringContext = lowerings.context.LoweringContext
-def _build_flat_inputs(
-    ctx: ir.Context, exported_program: torch.export.ExportedProgram
-):
+def _build_flat_inputs(exported_program: torch.export.ExportedProgram):
   """Build flattened inputs and metadata from exported program's signature."""
   placeholder_nodes = [
       n for n in exported_program.graph.nodes if n.op == "placeholder"
@@ -49,9 +47,11 @@ def _build_flat_inputs(
   ir_inputs = []
   tensor_metas = []
   for node, arg in zip(placeholder_nodes, export_flat_args):
-    tensor_meta = node.meta.get("tensor_meta")
+    tensor_meta = node.meta.get("tensor_meta") or node.meta.get("val")
     if tensor_meta is None:
-      raise RuntimeError(f"{type(arg)} (for {node.name}) is not a tensor")
+      raise RuntimeError(
+          f"{type(arg)} (for {node.name}) does not have tensor meta"
+      )
     tensor_metas.append(tensor_meta)
     # Assume all dynamic dimensions are unbounded.
@@ -63,7 +63,7 @@ def _build_flat_inputs(
     ir_inputs.append(
         ir.RankedTensorType.get(
             shape,
-            export_utils.torch_dtype_to_ir_element_type(ctx, tensor_meta.dtype),
+            export_utils.torch_dtype_to_ir_element_type(tensor_meta.dtype),
         )
     )
   return tuple(ir_inputs), tuple(export_flat_args), tuple(tensor_metas)
@@ -258,6 +258,43 @@ def _convert_i64_to_i32(exported_program: torch.export.ExportedProgram):
       rewrite_arange(node)
+# TODO(b/331481564) Make this a ai_edge_torch FX pass.
+def _convert_q_dq_per_channel_args_to_list(
+    exported_program: torch.export.ExportedProgram,
+):
+  """Resolve tensor inputs to Q/DQ ops as static number list for lowering.
+  This pass makes the ExportedProgram in a non-executable state. This pass must
+  be run after all run_decompositions calls.
+  """
+  placeholder_nodes = [
+      n for n in exported_program.graph.nodes if n.op == "placeholder"
+  ]
+  export_flat_args = _torch_future.graph_module_flat_inputs(
+      exported_program, *exported_program.example_inputs
+  )
+  placeholder_tensor = {
+      n: tensor for n, tensor in zip(placeholder_nodes, export_flat_args)
+  }
+  graph_module = exported_program.graph_module
+  for node in graph_module.graph.nodes:
+    if node.target in (
+        torch.ops.quantized_decomposed.quantize_per_channel.default,
+        torch.ops.quantized_decomposed.quantize_per_tensor.tensor,
+        torch.ops.quantized_decomposed.dequantize_per_channel.default,
+        torch.ops.quantized_decomposed.dequantize_per_tensor.tensor,
+    ):
+      input, scale_node, zero_point_node = node.args[:3]
+      scale = placeholder_tensor[scale_node]
+      zero_point = placeholder_tensor[zero_point_node]
+      scale = scale.detach().numpy().tolist()
+      zero_point = zero_point.detach().numpy().tolist()
+      node.args = (input, scale, zero_point, *node.args[3:])
 def exported_program_to_mlir(
     exported_program: torch.export.ExportedProgram,
 ) -> MlirLowered:
@@ -270,6 +307,7 @@ def exported_program_to_mlir(
   exported_program = _torch_future.safe_run_decompositions(
       exported_program, lowerings.decompositions()
   )
+  _convert_q_dq_per_channel_args_to_list(exported_program)
   with export_utils.create_ir_context() as context, ir.Location.unknown():
@@ -277,7 +315,7 @@ def exported_program_to_mlir(
     lctx = LoweringContext(context, module)
     interpreter = LoweringInterpreter(exported_program.graph_module, lctx)
     ir_flat_inputs, export_flat_args, tensor_metas = _build_flat_inputs(
-        context, exported_program
+        exported_program
     )
     # HACK: OSS MLIR pybinding could mysteriously transform func.func under

ai_edge_torch/odml_torch/export_utils.py CHANGED Viewed

@@ -14,9 +14,9 @@
 # ==============================================================================
 """Utilities for ODML Torch export."""
-import functools
 import re
 from typing import Sequence, cast
+from ai_edge_torch.odml_torch.lowerings import utils as lowering_utils
 import jax._src.interpreters.mlir
 from jax._src.lib.mlir import ir
 from jax._src.lib.mlir.dialects import func
@@ -47,7 +47,6 @@ def create_ir_context():
   # TODO(b/362798610) Build MLIR pybinding in ai-edge-torch release.
   context = jax._src.interpreters.mlir.make_ir_context()
   context.allow_unregistered_dialects = True
   return context
@@ -135,17 +134,7 @@ def build_ir_attr(val):
   return ir.StringAttr.get(str(val))
-def torch_dtype_to_ir_element_type(ctx, dtype):
-  ty_get = {
-      torch.double: ir.F64Type.get,
-      torch.float32: ir.F32Type.get,
-      torch.half: ir.F16Type.get,
-      torch.long: functools.partial(ir.IntegerType.get_signless, 64),
-      torch.int32: functools.partial(ir.IntegerType.get_signless, 32),
-      torch.int16: functools.partial(ir.IntegerType.get_signless, 16),
-      torch.bool: functools.partial(ir.IntegerType.get_signless, 1),
-  }.get(dtype)
-  return ty_get(ctx)
+torch_dtype_to_ir_element_type = lowering_utils.torch_dtype_to_ir_element_type
 def ir_element_type_to_torch_dtype(ty):

ai_edge_torch/odml_torch/jax_bridge/_wrap.py CHANGED Viewed

@@ -163,9 +163,7 @@ def wrap(jaxfn: Callable[Any, Any], ir_input_names: list[str] = None):
       if aval is None:
         return result
-      target_elty = export_utils.torch_dtype_to_ir_element_type(
-          lctx.ir_context, aval.dtype
-      )
+      target_elty = export_utils.torch_dtype_to_ir_element_type(aval.dtype)
       if result.type.element_type == target_elty:
         return result
       return stablehlo.convert(

ai_edge_torch/odml_torch/lowerings/__init__.py CHANGED Viewed

@@ -17,6 +17,7 @@ from . import _batch_norm
 from . import _convolution
 from . import _jax_lowerings
 from . import _layer_norm
+from . import _quantized_decomposed
 from . import context
 from . import registry
 from . import utils

ai_edge_torch/odml_torch/lowerings/_basic.py CHANGED Viewed

@@ -227,9 +227,7 @@ def _aten_cat(lctx: LoweringContext, tensors, dim=0):
   if not non_empty_tensors:
     return utils.splat(
         0,
-        export_utils.torch_dtype_to_ir_element_type(
-            lctx.ir_context, out_aval.dtype
-        ),
+        export_utils.torch_dtype_to_ir_element_type(out_aval.dtype),
         out_aval.shape,
     )

ai_edge_torch/odml_torch/lowerings/_quantized_decomposed.py ADDED Viewed

@@ -0,0 +1,174 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Lowerings for PT2E torch.ops.quantized_decomposed ops."""
+from typing import Union, cast
+from ai_edge_torch.odml_torch.lowerings import context
+from ai_edge_torch.odml_torch.lowerings import utils
+from jax._src.lib.mlir import ir
+from jax._src.lib.mlir.dialects import hlo as stablehlo
+import torch
+import torch.ao.quantization.fx._decomposed
+import torch.utils._pytree as pytree
+from . import registry
+lower = registry.lower
+LoweringContext = context.LoweringContext
+def _uniform_quantized_type(
+    stored_type: str | ir.Type,
+    expressed_type: str | ir.Type,
+    *,
+    scale=float | list[float] | tuple[float],
+    zero_point=float | list[float] | tuple[float],
+    storage_type_min: int | None = None,
+    storage_type_max: int | None = None,
+    channel_axis: int | None = None,
+    channel_axis_size: int | None = None,
+):
+  """Polyfill for quant.UniformQuantizedType."""
+  if storage_type_min and storage_type_max:
+    storage_min_max = f"<{storage_type_min}:{storage_type_max}>"
+  else:
+    storage_min_max = ""
+  if channel_axis is not None:
+    # Per-channel quantization
+    # https://mlir.llvm.org/docs/Dialects/QuantDialect/#per-channel-quantization
+    assert isinstance(scale, (list, tuple))
+    assert isinstance(zero_point, (list, tuple))
+    if len(scale) == 1:
+      scale *= channel_axis_size
+    if len(zero_point) == 1:
+      zero_point *= channel_axis_size
+    assert len(scale) == len(zero_point) == channel_axis_size
+    scale_zp_strs = []
+    for s, zp in zip(scale, zero_point):
+      scale_zp_strs.append(f"{s}:{zp}")
+    scale_zp = "{" + ",".join(scale_zp_strs) + "}"
+    return ir.Type.parse(
+        f"!quant.uniform<{stored_type}{storage_min_max}:{expressed_type}:{channel_axis},{scale_zp}>"
+    )
+  else:
+    # Per-layer quantization
+    # https://mlir.llvm.org/docs/Dialects/QuantDialect/#per-layer-quantization
+    scale = pytree.tree_flatten([scale])[0][-1]
+    zero_point = pytree.tree_flatten([zero_point])[0][-1]
+    scale_zp = f"{scale}:{zero_point}"
+    return ir.Type.parse(
+        f"!quant.uniform<{stored_type}{storage_min_max}:{expressed_type},{scale_zp}>"
+    )
+# Quant dialect is not registered in the Python MLIR pybinding used by
+# odml-torch. Therefore, stablehlo.uniform_quantize/uniform_dequantize ops and
+# quant types are represented in stablehlo.custom_call to pass MLIR verification
+# and VHLO serialization before converter.
+# TODO(b/362798610) Build MLIR pybinding in ai-edge-torch release.
+# Schema:
+#   - quantized_decomposed::quantize_per_tensor(Tensor input, float scale,
+#       int zero_point, int quant_min, int quant_max,
+#       ScalarType dtype) -> Tensor
+#   - quantized_decomposed::quantize_per_tensor.tensor(Tensor input,
+#       Tensor scale, Tensor zero_point, int quant_min, int quant_max,
+#       ScalarType dtype) -> Tensor
+#
+# Scale and zero_point in tensors are automatically converted to list before
+# lowering.
+@lower(torch.ops.quantized_decomposed.quantize_per_tensor)
+def _quantize_per_tensor(
+    lctx: LoweringContext,
+    input: ir.Value,
+    scale: Union[float, list[float]],
+    zero_point: Union[float, list[float]],
+    quant_min: int,
+    quant_max: int,
+    dtype: torch.dtype,
+):
+  input_ty = cast(ir.RankedTensorType, input.type)
+  qty = _uniform_quantized_type(
+      utils.torch_dtype_to_ir_element_type(dtype),
+      input_ty.element_type,
+      scale=scale,
+      zero_point=zero_point,
+      storage_type_min=quant_min,
+      storage_type_max=quant_max,
+  )
+  return stablehlo.custom_call(
+      call_target_name="odml_torch.uniform_quantize",
+      inputs=[input],
+      result=[input_ty],
+      backend_config=ir.StringAttr.get(
+          str(ir.RankedTensorType.get(input_ty.shape, qty))
+      ),
+  )
+# Schema:
+#   - quantized_decomposed::quantize_per_channel(Tensor input, Tensor scales,
+#       Tensor zero_points, int axis, int quant_min, int quant_max,
+#       ScalarType dtype) -> Tensor
+#
+# Scale and zero_point in tensors are automatically converted to list before
+# lowering.
+@lower(torch.ops.quantized_decomposed.quantize_per_channel)
+def _quantize_per_channel(
+    lctx: LoweringContext,
+    input: ir.Value,
+    scale: list[float],
+    zero_point: list[float],
+    axis: int,
+    quant_min: int,
+    quant_max: int,
+    dtype: torch.dtype,
+):
+  input_ty = cast(ir.RankedTensorType, input.type)
+  qty = _uniform_quantized_type(
+      utils.torch_dtype_to_ir_element_type(dtype),
+      input_ty.element_type,
+      scale=scale,
+      zero_point=zero_point,
+      channel_axis=axis,
+      channel_axis_size=input_ty.shape[axis],
+      storage_type_min=quant_min,
+      storage_type_max=quant_max,
+  )
+  return stablehlo.custom_call(
+      call_target_name="odml_torch.uniform_quantize",
+      inputs=[input],
+      result=[input_ty],
+      backend_config=ir.StringAttr.get(
+          str(ir.RankedTensorType.get(input_ty.shape, qty))
+      ),
+  )
+@lower(torch.ops.quantized_decomposed.dequantize_per_tensor)
+@lower(torch.ops.quantized_decomposed.dequantize_per_channel)
+def _dequantize(lctx: LoweringContext, input: ir.Value, *args, **kwargs):
+  result_meta = lctx.node.meta.get("tensor_meta")
+  result_elty = utils.torch_dtype_to_ir_element_type(result_meta.dtype)
+  return stablehlo.custom_call(
+      call_target_name="odml_torch.uniform_dequantize",
+      inputs=[input],
+      result=[ir.RankedTensorType.get(result_meta.shape, result_elty)],
+  )

ai_edge_torch/odml_torch/lowerings/utils.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # ==============================================================================
 """Utilities for building MLIR lowerings."""
+import functools
 import numbers
 from typing import Any
 from typing import Optional
@@ -21,6 +22,21 @@ from typing import Optional
 from jax._src.lib.mlir import ir
 from jax._src.lib.mlir.dialects import hlo as stablehlo
 import numpy as np
+import torch
+def torch_dtype_to_ir_element_type(dtype):
+  ty_get = {
+      torch.double: ir.F64Type.get,
+      torch.float32: ir.F32Type.get,
+      torch.half: ir.F16Type.get,
+      torch.long: functools.partial(ir.IntegerType.get_signless, 64),
+      torch.int32: functools.partial(ir.IntegerType.get_signless, 32),
+      torch.int16: functools.partial(ir.IntegerType.get_signless, 16),
+      torch.int8: functools.partial(ir.IntegerType.get_signless, 8),
+      torch.bool: functools.partial(ir.IntegerType.get_signless, 1),
+  }[dtype]
+  return ty_get()
 def splat(val, ty, shape=tuple(), *, loc: Optional[Any] = None):

ai_edge_torch/version.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-__version__ = "0.3.0.dev20241201"
+__version__ = "0.3.0.dev20241205"

{ai_edge_torch_nightly-0.3.0.dev20241201.dist-info → ai_edge_torch_nightly-0.3.0.dev20241205.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-edge-torch-nightly
-Version: 0.3.0.dev20241201
+Version: 0.3.0.dev20241205
 Summary: Supporting PyTorch models with the Google AI Edge TFLite runtime.
 Home-page: https://github.com/google-ai-edge/ai-edge-torch
 Keywords: On-Device ML,AI,Google,TFLite,PyTorch,LLMs,GenAI

{ai_edge_torch_nightly-0.3.0.dev20241201.dist-info → ai_edge_torch_nightly-0.3.0.dev20241205.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ ai_edge_torch/config.py,sha256=FMWeCH2b7HYILBvaI1iZNnYCO4WAhDOwBZBmIE-xrF0,909
 ai_edge_torch/conftest.py,sha256=r0GTrhMRhlmOGrrkvumHN8hkmyug6WvF60vWq8wRIBI,758
 ai_edge_torch/fx_pass_base.py,sha256=518ziQ0TUxqum2qZXqlD8qr65pHPh8ZNLnwFC6zvK3k,4253
 ai_edge_torch/model.py,sha256=N-pNpTxzhaFGhWhnSGd70lBzb9VlEhTOq5mddU7bvvI,5542
-ai_edge_torch/version.py,sha256=OUqcy-x2l3EVJNsWANXG1NaPkhKDz4-EkU_yVTe0f1Y,706
+ai_edge_torch/version.py,sha256=UKNQIv9LGNIpDQkZXBrHuhDFIYET3G8pLZ5njXu6KJc,706
 ai_edge_torch/_convert/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/_convert/conversion.py,sha256=HwzfRx_DX5TLtPqwEH1_NOm38_INvHzHl4_mX67KOdQ,5448
 ai_edge_torch/_convert/conversion_utils.py,sha256=Sr8qXVcTwc-ZnZmK7yxVrIOOp1S_vNrwzC0zUvLTI2o,2160
@@ -26,7 +26,7 @@ ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_partitio
 ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/greedy.py,sha256=L_x8BrF7UDah-SYl-pG11I6CIckdU9kBTUHcmwW4cts,2420
 ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/min_cut.py,sha256=mzfL9cf0qBnpmxM_OlMQFvQsEZV2B_Mia9yEJV4J7rI,7135
 ai_edge_torch/_convert/test/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/_convert/test/test_convert.py,sha256=yXfeWDw9u_rTS3B6kvvFPo5E4XNT3zKTSLFSBSAI9Fc,15502
+ai_edge_torch/_convert/test/test_convert.py,sha256=v6AhfWqRBuHT7uBDueTbntaQtDSMMrvQOqlIDXNUaMA,17250
 ai_edge_torch/_convert/test/test_convert_composites.py,sha256=BCIODgxMI_3MxMLfNWYMGjcz-al-J3z5eDHCiZJXNwY,7992
 ai_edge_torch/_convert/test/test_convert_multisig.py,sha256=6_C2R9--KyNR7_oezZIAfyTSR97tOeEWy4XGcbSxBDE,5778
 ai_edge_torch/_convert/test/test_to_channel_last_io.py,sha256=1o-gUiwzIuO67FNAJ8DeyKv8fVUeZVNNNwofNVDjYeU,3024
@@ -44,20 +44,22 @@ ai_edge_torch/generative/examples/amd_llama_135m/amd_llama_135m.py,sha256=bkq2Zk
 ai_edge_torch/generative/examples/amd_llama_135m/convert_to_tflite.py,sha256=-n79r6yFnCACpms5eMkXNpyQsCn2PYVRdB-jOoIqn14,2227
 ai_edge_torch/generative/examples/amd_llama_135m/verify.py,sha256=-9Nb9D818YSJR3olVtBwoLNeMMD5qE58YBnsA67hlHg,2421
 ai_edge_torch/generative/examples/gemma/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py,sha256=evmUj_4yygQthSRU-ke-Xn1qFNDCZKbegqINWfruKwU,2184
-ai_edge_torch/generative/examples/gemma/convert_gemma2_multi_prefills.py,sha256=6d9wG5MnStEys34_gFXwKTMRXUBFLTW1jEzCoWkAtwM,2224
-ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py,sha256=RZDs6oY-NLYrPNtfuJDweIHzGUL2kzpIc3AW_1p8gGg,2186
+ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py,sha256=mrG96_WEGD4NQ4uFEKrHRMAQvVVliOcj1zbI3drGDjI,2199
+ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py,sha256=I_tvwCYmtf08D1HqDxYx7dpvj2q5_eaYnuI_3rI6Dlw,2201
 ai_edge_torch/generative/examples/gemma/gemma1.py,sha256=oSbysiPvwp5efMbNYZop3HrxDMGiD15Tmz-HiQuTr2E,3315
 ai_edge_torch/generative/examples/gemma/gemma2.py,sha256=RQFQDMEnIVp8PefcCTr7P0CvllKI7FVoIJLXbPLLIsc,9056
 ai_edge_torch/generative/examples/gemma/verify_gemma1.py,sha256=ip-Gmk4CI5f0GWSdAIdrectxQWJ0t328KCsA4nfHuGg,1736
 ai_edge_torch/generative/examples/gemma/verify_gemma2.py,sha256=IoBhEMwH07-tFm5-U6F2hpCsI8xynglhq1x9tIOdaPQ,1322
 ai_edge_torch/generative/examples/gemma/verify_util.py,sha256=tR8RflXocDZqvuStyw9aFlzuiTllEC8rNnjrxms6_Is,5727
 ai_edge_torch/generative/examples/llama/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/examples/llama/convert_to_tflite.py,sha256=P0-pByTM5tslE23ILgo7nd0nOGE25ciBRG5wKJj0bBk,2411
+ai_edge_torch/generative/examples/llama/convert_to_tflite.py,sha256=Brb83sbqBfStUiIZFhfWnYtN7LcNmkKyFn96cZK4sGo,2426
 ai_edge_torch/generative/examples/llama/llama.py,sha256=AMcCbuDBxEfbO-l3KiEXbUaXEJ3RLLwkHii7to7UhVo,6854
 ai_edge_torch/generative/examples/llama/verify.py,sha256=X7oKQi85M789ugBrOlMvzk8eSRR3Kf1Mprfl-U-WIpo,2842
+ai_edge_torch/generative/examples/moonshine/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
+ai_edge_torch/generative/examples/moonshine/convert_moonshine_to_tflite.py,sha256=7m3rYRzThRDYb-7pGnpLr3ACi4PWX07Mg20Q98ArPc4,1714
+ai_edge_torch/generative/examples/moonshine/moonshine.py,sha256=nZ2b8u4TmsB5sgdClgAuH8E78bcTv9RCnF9666HqP2M,3394
 ai_edge_torch/generative/examples/openelm/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/examples/openelm/convert_to_tflite.py,sha256=85FVEt6cKFP2UzCLC78tAkbwGlGhAArtG7Wa75NxJik,2185
+ai_edge_torch/generative/examples/openelm/convert_to_tflite.py,sha256=-qDBu3bjUq0jx73SPDMsPIBP0BT1nA_0UgtFKeSuM18,2213
 ai_edge_torch/generative/examples/openelm/openelm.py,sha256=sFakstoPDcOHSak0IGFEEq_HQMBBSMcx-WVCDZqcVDo,4411
 ai_edge_torch/generative/examples/openelm/verify.py,sha256=VkigoqhAr8ew95neb3TifYv-SLOSheaWKv2AH0iKDrc,2441
 ai_edge_torch/generative/examples/paligemma/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
@@ -69,18 +71,18 @@ ai_edge_torch/generative/examples/paligemma/verify.py,sha256=Bkbgy-GFjnMNYjduWUM
 ai_edge_torch/generative/examples/paligemma/verify_decoder.py,sha256=al5wMPWri4IRVWrLmCplPi6uoCzwh0vBHMGnCt-XUqo,2690
 ai_edge_torch/generative/examples/paligemma/verify_image_encoder.py,sha256=pSekf1BybhieQz3cQx_llbRQHxczXbTqool8fOyGj_0,3114
 ai_edge_torch/generative/examples/phi/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/examples/phi/convert_phi3_to_tflite.py,sha256=rkbTtMaqSVG48cm-NTxR_LDgZmXAEBqayTm9O49oMXc,2171
-ai_edge_torch/generative/examples/phi/convert_to_tflite.py,sha256=3go690yX6PFeXMdpY7y4JZorAwxX0HT_b_pKZieauvk,2169
+ai_edge_torch/generative/examples/phi/convert_phi3_to_tflite.py,sha256=ruY-LLwpqBqVZ5z9h_sewYj04ukWRG4j804tUAyDdnA,2186
+ai_edge_torch/generative/examples/phi/convert_to_tflite.py,sha256=UdMk1SSkcWpv8gosUylx3JSCxdOJBjZNhuQQtT4-Ono,2184
 ai_edge_torch/generative/examples/phi/phi2.py,sha256=nbivDwZREd-sypy_ittO59-yaAdPvHv1YEV6Fo5buCo,3341
 ai_edge_torch/generative/examples/phi/phi3.py,sha256=GkHOaYfsFEbHvfZCaLlb3Us_h19ezqPDUakoz_DiG9A,7123
 ai_edge_torch/generative/examples/phi/verify.py,sha256=YPFCdbnfmvq38fbpBNr0kHPfSZo4p3_6WkLJAW3pLPo,2177
 ai_edge_torch/generative/examples/phi/verify_phi3.py,sha256=kVYaBVvddfQng0IyZGxyTJEzhiPO0G4VFJm2WOc2Q94,2360
 ai_edge_torch/generative/examples/qwen/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/examples/qwen/convert_to_tflite.py,sha256=QAAVoSKDVf2rHAChzumGloVCWIU0Oe5UYKgv3T192Iw,2496
+ai_edge_torch/generative/examples/qwen/convert_to_tflite.py,sha256=1M3DTkf536TCLYcQB1lu-3TxQ6mV03dFhTdbk0p8i84,2523
 ai_edge_torch/generative/examples/qwen/qwen.py,sha256=oYm9hhALUQ4uOn-PO1bF7fCIGP8EWRNK4zClkx2RQs8,4070
 ai_edge_torch/generative/examples/qwen/verify.py,sha256=9_AyEJTeUfvhhID64Rto2bflFPyXMFokdQLsseLUMiI,2775
 ai_edge_torch/generative/examples/smollm/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/examples/smollm/convert_to_tflite.py,sha256=zPrDTDeRVWFi9DS32uNi-RLpzOStFOk5MhNla4ixeew,2179
+ai_edge_torch/generative/examples/smollm/convert_to_tflite.py,sha256=56CzCjyp9xh_2ZtXKN9tlEv6GayeSc4giTIZsi2Q59E,2194
 ai_edge_torch/generative/examples/smollm/smollm.py,sha256=M5qAcSUE5gxOSfq24a8lZku9kgvmlFCyIBar3kF2XEk,2570
 ai_edge_torch/generative/examples/smollm/verify.py,sha256=HXYcCjDJMylVL3Pc9HU-UXqtpjtIU25o1YhPiX30aPU,2361
 ai_edge_torch/generative/examples/stable_diffusion/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
@@ -107,7 +109,7 @@ ai_edge_torch/generative/examples/test_models/convert_toy_model.py,sha256=6-WaNH
 ai_edge_torch/generative/examples/test_models/toy_model.py,sha256=4113jZK-Hu3kYop__WTc8Bq-bG6YzQtADbxHtYPEB4w,5036
 ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py,sha256=rRodLr-hEqAs_-8x06O8qO-hJ_cqr2AfhJZ9DCptvwo,4332
 ai_edge_torch/generative/examples/tiny_llama/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
-ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py,sha256=ekxd8efjMgEvauUu3PidWOC-DszPHn5sqU753F7sJIM,2201
+ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py,sha256=WmEshoN9HgNLbV7NTjxdqWz9Olcim6r_vo4R9eYE98I,2228
 ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py,sha256=10X8HwPx4akzclnIMOBNItKQemhRbvxBbTo7nwZtWjM,2650
 ai_edge_torch/generative/examples/tiny_llama/verify.py,sha256=7Bk8z033M-BCXJ299fpQNXYAudBbZoDQp9934xcvg50,2426
 ai_edge_torch/generative/fx_passes/__init__.py,sha256=jrzCB3ZyY_t5jJM1e2Czdt3DjAIL43R0_a-T-I7wOzw,1155
@@ -139,13 +141,14 @@ ai_edge_torch/generative/test/test_kv_cache.py,sha256=W6Bh0gYDzmwb0j9HdD5_D7Z7FP
 ai_edge_torch/generative/test/test_loader.py,sha256=9mQUeeZKOVApOWSWl2cN9c10axZjMKM1-0Zd823CCS4,3449
 ai_edge_torch/generative/test/test_model_conversion.py,sha256=aZFaheg2sq7rEccch1TZM6W4BSfpJZjrM9Gyp4hVGYs,6351
 ai_edge_torch/generative/test/test_model_conversion_large.py,sha256=xWV9O2wuRHc4VNBWuWipiuqXa3AJhiV1nmjewAZHHWM,11177
-ai_edge_torch/generative/test/test_quantize.py,sha256=8geJhKwYBU20m0mdGPD1BUFwQ0lZKNtCB04SOLO18y4,5980
+ai_edge_torch/generative/test/test_quantize.py,sha256=bEJMhpQ9bIDUZVBXTW888728FcH-i3SyE4JSZZUgU0A,6071
 ai_edge_torch/generative/test/utils.py,sha256=eQ-hjd1eXuHJF3SJK6_CrjgOZVzmG_4VEdH7Z1gH_lA,1897
 ai_edge_torch/generative/utilities/__init__.py,sha256=-_jxnnFnCgnTU4oTm4MnRsvL5lqhomBNdFBbqfmfHPo,720
 ai_edge_torch/generative/utilities/converter.py,sha256=S14STbyxV6A9HKy1BdUo49f2jS6Ij0RL9mVAFUMWYV8,5291
 ai_edge_torch/generative/utilities/dynamic_update_slice.py,sha256=e2mhx-Vp8sUK4EXoPtpZLSx3TViqLAKs67EhKcXBjAQ,2121
 ai_edge_torch/generative/utilities/loader.py,sha256=A3SOjPXp--AsvoP1hqj5QKWE4sgxoFc3H5EBUz_Eogc,13531
 ai_edge_torch/generative/utilities/model_builder.py,sha256=OcHJhEqc3LjI3STli6cyn71m1mdzr7QbzF9fqSNCXrg,5730
+ai_edge_torch/generative/utilities/moonshine_loader.py,sha256=_RpFabSqtGH5PHiP3_1f6QfO14qMADUxr_HGRlVDFB0,4891
 ai_edge_torch/generative/utilities/stable_diffusion_loader.py,sha256=dqPD9qRXEWtU3ombslOC-BE2l_dMwHoCNu7NsIJhsso,36158
 ai_edge_torch/generative/utilities/t5_loader.py,sha256=tEsfy8-ymzbbjOIc-oesXF3yGyyWtJgFXn2s7VOavt8,16961
 ai_edge_torch/generative/utilities/transformers_verifier.py,sha256=8sp9m_FMcXn7nqOrochtu2jIANkJKhnhIBUmH0ZTDR4,1549
@@ -167,8 +170,8 @@ ai_edge_torch/lowertools/translate_recipe.py,sha256=ymkBpFqAUiupRWqrPOWiVphKcXR1
 ai_edge_torch/odml_torch/__init__.py,sha256=S8jOzE9nLof-6es3XDiGJRN-9H_XTxsVm9dE7lD3RWo,812
 ai_edge_torch/odml_torch/_torch_future.py,sha256=AJ0klpsbu2ZBTfiZlqSOoaYzBVITt40a1fYN8xKkEPw,3044
 ai_edge_torch/odml_torch/_torch_library.py,sha256=Lw1gqL2HWNRspdTwNhIkYAHDyafHedHtkXyKKxn-Wss,805
-ai_edge_torch/odml_torch/export.py,sha256=4xwrsDeOAgzoB9m7EeNsBj6dC5Ajtn5aKDRQkdHxa-o,11584
-ai_edge_torch/odml_torch/export_utils.py,sha256=q84U69ZQ82hLXw-xncJ8IW-K71Xux-NWlzZTs7hdZWA,5127
+ai_edge_torch/odml_torch/export.py,sha256=dgnNGBVkHBz0brlWALX2hGXpQ4YzCKdwbkF4oAfEu4I,13062
+ai_edge_torch/odml_torch/export_utils.py,sha256=QeA37Irlty6AiIBuqmHmJgn3lqahBQ5xsh6IKRoKm1g,4774
 ai_edge_torch/odml_torch/tf_integration.py,sha256=lTFJPPEijLPFmn6qq2jbpVTQOo0YaOTK36kK6rCiyIE,5956
 ai_edge_torch/odml_torch/composite/__init__.py,sha256=71GM_gDZxJyo38ZSoYSwhZX3xKA9rknO93JS9kw9w_c,778
 ai_edge_torch/odml_torch/composite/mark_tensor.py,sha256=U--rwl-XkWKgkdXCXDn6yySug8FR66o1YFUAIoSaWW4,3523
@@ -177,17 +180,18 @@ ai_edge_torch/odml_torch/debuginfo/__init__.py,sha256=9ag6-WWRG50rPCtIV7OpIokEKu
 ai_edge_torch/odml_torch/debuginfo/_build.py,sha256=1xCXOs3-9UcsOyLFH0uyQwLu7c06iYFTo0NQ7Ckbl2I,1465
 ai_edge_torch/odml_torch/debuginfo/_op_polyfill.py,sha256=IvOBQyROI9WHS3umHRxsDW-1YElU9BPWzKtJA2eKWOI,1739
 ai_edge_torch/odml_torch/jax_bridge/__init__.py,sha256=Jco5zvejxuyl9xHQxZICAKbkgH7x38qPlwUUpD7S15Q,730
-ai_edge_torch/odml_torch/jax_bridge/_wrap.py,sha256=oQo9nxH08NnEDeZaGoCUk1kRtoEOM_f0DUOyd9nfxjg,6673
+ai_edge_torch/odml_torch/jax_bridge/_wrap.py,sha256=LqwZ1vCJTSOzgzvH8LUAN-sAkF-l_pGj1AMEIzAqHCA,6638
 ai_edge_torch/odml_torch/jax_bridge/utils.py,sha256=T8isGc896VrHZ6c_L5pYmLpolQ7ibcOlgWfPuVFPzIg,2264
-ai_edge_torch/odml_torch/lowerings/__init__.py,sha256=dE_qzh-OnCNjWzqs1-PHs5PNlRF726qMQKM3tkwAzEs,959
-ai_edge_torch/odml_torch/lowerings/_basic.py,sha256=eH9eJqFO-BI9l4WdXfjsItODPRa18SAR_qSvJ6-7gxc,9987
+ai_edge_torch/odml_torch/lowerings/__init__.py,sha256=1lMKPoStK3SUA8yYTPZBRhESN33BghGXnfqOOg4oeVk,995
+ai_edge_torch/odml_torch/lowerings/_basic.py,sha256=ufvnaAh6rM_yfoc8ybI3VErHEVBv5W_p4iOe9slfwKM,9948
 ai_edge_torch/odml_torch/lowerings/_batch_norm.py,sha256=PaLI0BB6pdBW1VyfW8VTOT_Be-ZcqYdNOsyfzKfq8Cg,2064
 ai_edge_torch/odml_torch/lowerings/_convolution.py,sha256=v1VdKmL8YLJv3PR9VgyNghO83A25PpTzY2ZUAJqlq3Q,6847
 ai_edge_torch/odml_torch/lowerings/_jax_lowerings.py,sha256=4UyNyaR2W-vCOvj-P5lywQ1_RfLIxVE7J_GONI6CQvI,10718
 ai_edge_torch/odml_torch/lowerings/_layer_norm.py,sha256=1ePJs7oIdUkVdMddFsXMc53qTkEKqGz0ZhQQoNzBa10,2862
+ai_edge_torch/odml_torch/lowerings/_quantized_decomposed.py,sha256=rFmzqcdjYrwhcxH8j9zCFStPy21HFF7hkUV_GQ8FPAk,6056
 ai_edge_torch/odml_torch/lowerings/context.py,sha256=jslcCv7r_HtImSRTxJwHAUV_QCu9Jub51lovmoBkmFA,1295
 ai_edge_torch/odml_torch/lowerings/registry.py,sha256=itTt8MLbq2LoHTzRidCF2TTbh0TP7L836u99qCjP3FA,2953
-ai_edge_torch/odml_torch/lowerings/utils.py,sha256=NczqpsSd3Fn7yVcPC3qllemiZxxDAZgcW1T5l8-W9fE,5593
+ai_edge_torch/odml_torch/lowerings/utils.py,sha256=pqM6mumpviFDHRaabp93CUAngzEZmWcAHl0nTDgyI2g,6167
 ai_edge_torch/odml_torch/passes/__init__.py,sha256=AVwIwUTMx7rXacKjGy4kwrtMd3XB2v_ncdc40KOjUqQ,1245
 ai_edge_torch/quantize/__init__.py,sha256=aB5dXot04bqyUhpsDFvxt9CIi15QAC4euvqOndJ0XLU,714
 ai_edge_torch/quantize/pt2e_quantizer.py,sha256=CKIEhs9jCcna64qj1jFH9zEbMbRdyeGV_TmSqEBPjes,15741
@@ -196,8 +200,8 @@ ai_edge_torch/quantize/quant_config.py,sha256=U0KisSW-uZkoMJcy-ZP9W57p3tsa594fr9
 ai_edge_torch/testing/__init__.py,sha256=hHLluseD2R0Hh4W6XZRIXY_dRQeYudjsrKGf6LZz65g,671
 ai_edge_torch/testing/model_coverage/__init__.py,sha256=5P8J6Zk5YYtDvTBucFvB9NGSRI7Gw_24WnrbhXgycEE,765
 ai_edge_torch/testing/model_coverage/model_coverage.py,sha256=UPB448aMDUyC0HNYVqio2rcJPnDN0tBQMP08J6vPYew,4718
-ai_edge_torch_nightly-0.3.0.dev20241201.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-ai_edge_torch_nightly-0.3.0.dev20241201.dist-info/METADATA,sha256=Imp9XnPMYxNMskFOdV5J8IzWfU4Ox84qo5-ghCYKDJU,1897
-ai_edge_torch_nightly-0.3.0.dev20241201.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-ai_edge_torch_nightly-0.3.0.dev20241201.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
-ai_edge_torch_nightly-0.3.0.dev20241201.dist-info/RECORD,,
+ai_edge_torch_nightly-0.3.0.dev20241205.dist-info/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+ai_edge_torch_nightly-0.3.0.dev20241205.dist-info/METADATA,sha256=q0YQggf3bWL7q67R2IpsvyUlncZRjjJRfsqL8yLNJ_Y,1897
+ai_edge_torch_nightly-0.3.0.dev20241205.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+ai_edge_torch_nightly-0.3.0.dev20241205.dist-info/top_level.txt,sha256=5KXRaF2hwkApYxf7Y8y_tVb9aulGTlbOoNdbx1aKRkE,14
+ai_edge_torch_nightly-0.3.0.dev20241205.dist-info/RECORD,,

{ai_edge_torch_nightly-0.3.0.dev20241201.dist-info → ai_edge_torch_nightly-0.3.0.dev20241205.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.3.0.dev20241201.dist-info → ai_edge_torch_nightly-0.3.0.dev20241205.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_edge_torch_nightly-0.3.0.dev20241201.dist-info → ai_edge_torch_nightly-0.3.0.dev20241205.dist-info}/top_level.txt RENAMED Viewed

File without changes

ai-edge-torch-nightly 0.3.0.dev20241201__py3-none-any.whl → 0.3.0.dev20241205__py3-none-any.whl

ai-edge-torch-nightly 0.3.0.dev20241201py3-none-any.whl → 0.3.0.dev20241205py3-none-any.whl