PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240718__py3-none-any.whl → 0.2.0.dev20240720__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240718py3-none-any.whl → 0.2.0.dev20240720py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ai-edge-torch-nightly might be problematic. Click here for more details.

Files changed (23) hide show

ai_edge_torch/convert/conversion_utils.py CHANGED Viewed

@@ -20,7 +20,7 @@ import gc
 import itertools
 import logging
 import tempfile
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 import torch
 import torch.utils._pytree as pytree
@@ -79,28 +79,49 @@ class Signature:
     for i in range(args_spec.num_leaves):
       names.append(f"args_{i}")
-    dict_context = (
-        kwargs_spec.context
-        if kwargs_spec.type is not collections.defaultdict
-        # ignore mismatch of `default_factory` for defaultdict
-        else kwargs_spec.context[1]
+    kwargs_names = self._flat_kwarg_names(
+        kwargs_spec.children_specs, kwargs_spec.context
     )
+    names.extend(kwargs_names)
+    return names
-    for name, value_spec in zip(dict_context, kwargs_spec.children_specs):
-      if value_spec.num_leaves == 1:
-        names.append(name)
+  def _flat_kwarg_names(self, specs, context) -> List[str]:
+    flat_names = []
+    if context is None:
+      for i, spec in enumerate(specs):
+        if spec.children_specs:
+          flat_names.extend(
+              [
+                  f"{i}_{name}"
+                  for name in self._flat_kwarg_names(spec.children_specs, spec.context)
+              ]
+          )
+        else:
+          flat_names.append(f"{i}")
+    else:
+      flat_ctx = self._flatten_list(context)
+      for prefix, spec in zip(flat_ctx, specs):
+        leaf_flat_names = self._flat_kwarg_names(spec.children_specs, spec.context)
+        if leaf_flat_names:
+          flat_names.extend([f"{prefix}_{name}" for name in leaf_flat_names])
+        else:
+          flat_names.append(prefix)
+    return flat_names
+  def _flatten_list(self, l: List) -> List:
+    flattened = []
+    for item in l:
+      if isinstance(item, list):
+        flattened.extend(self._flatten_list(item))
       else:
-        # value_spec.num_leaves may be greater than 1 when the value is a (nested)
-        # tuple of tensors. We haven't decided how we should support flattenable
-        # tensor containers as inputs.
-        # TODO(b/352584188): Decide the behavior of tensor container as input (flatten or reject)
-        for i in range(value_spec.num_leaves):
-          names.append(f"{name}_{i}")
-    return names
+        flattened.append(item)
+    return flattened
   @property
-  def flat_args(self) -> tuple[torch.Tensor]:
-    return tuple(pytree.tree_flatten(self._normalized_sample_args_kwargs)[0])
+  def flat_args(self) -> tuple[Any]:
+    args, kwargs = self._normalized_sample_args_kwargs
+    return tuple([*args, *kwargs.values()])
 def exported_program_to_stablehlo_bundle(

ai_edge_torch/convert/test/test_convert.py CHANGED Viewed

@@ -14,10 +14,14 @@
 # ==============================================================================
+from dataclasses import dataclass
 import os
 import tempfile
+from typing import Tuple
 import unittest
+import numpy as np
+import tensorflow as tf
 import torch
 import torchvision
@@ -26,6 +30,15 @@ from ai_edge_torch.convert import conversion_utils as cutils
 from ai_edge_torch.testing import model_coverage
+@dataclass
+class TestContainer1:
+  data_1: torch.Tensor
+  data_2: Tuple[torch.Tensor, torch.Tensor]
+torch.export.register_dataclass(TestContainer1, serialized_type_name="TestContainer1")
 class TestConvert(unittest.TestCase):
   """Tests conversion of various modules."""
@@ -306,6 +319,99 @@ class TestConvert(unittest.TestCase):
         model_coverage.compare_tflite_torch(edge_model, model, args_gen, kwargs_gen)
     )
+  def test_convert_model_with_args_nested_kwargs_1(self):
+    """
+    Test converting a simple model with both sample_args and nested sample_kwargs.
+    """
+    class SampleModel(torch.nn.Module):
+      def forward(self, x: torch.Tensor, y: torch.Tensor, z: TestContainer1):
+        return x + y + z.data_1 + z.data_2[0] + z.data_2[1]
+    args = (torch.randn(10, 10),)
+    kwargs = dict(
+        y=torch.randn(10, 10),
+        z=TestContainer1(
+            data_1=torch.randn(10, 10),
+            data_2=(torch.randn(10, 10), torch.randn(10, 10)),
+        ),
+    )
+    flat_inputs = {
+        "args_0": args[0].numpy(),
+        "y": kwargs["y"].numpy(),
+        "z_data_1": kwargs["z"].data_1.numpy(),
+        "z_data_2_0": kwargs["z"].data_2[0].numpy(),
+        "z_data_2_1": kwargs["z"].data_2[1].numpy(),
+    }
+    self._compare_tflite_torch_args_kwargs(SampleModel(), args, kwargs, flat_inputs)
+  def test_convert_model_with_args_nested_kwargs_2(self):
+    """
+    Test converting a simple model with both sample_args and nested sample_kwargs.
+    """
+    class SampleModel(torch.nn.Module):
+      def forward(self, x, y, z):
+        return x + y + z.data_1 + z.data_2[0][0] + z.data_2[1]
+    args = (torch.randn(10, 10),)
+    kwargs = dict(
+        y=torch.randn(10, 10),
+        z=TestContainer1(
+            data_1=torch.randn(10, 10),
+            data_2=[(torch.randn(10, 10),), torch.randn(10, 10)],
+        ),
+    )
+    flat_inputs = {
+        "args_0": args[0].numpy(),
+        "y": kwargs["y"].numpy(),
+        "z_data_1": kwargs["z"].data_1.numpy(),
+        "z_data_2_0_0": kwargs["z"].data_2[0][0].numpy(),
+        "z_data_2_1": kwargs["z"].data_2[1].numpy(),
+    }
+    self._compare_tflite_torch_args_kwargs(SampleModel(), args, kwargs, flat_inputs)
+  def test_convert_model_with_args_nested_kwargs_3(self):
+    """
+    Test converting a simple model with both sample_args and nested sample_kwargs.
+    """
+    class SampleModel(torch.nn.Module):
+      def forward(self, x, y, z):
+        return x + y + z.data_1 + z.data_2[0]["foo"] + z.data_2[1]
+    args = (torch.randn(10, 10),)
+    kwargs = dict(
+        y=torch.randn(10, 10),
+        z=TestContainer1(
+            data_1=torch.randn(10, 10),
+            data_2=(dict(foo=torch.randn(10, 10)), torch.randn(10, 10)),
+        ),
+    )
+    flat_inputs = {
+        "args_0": args[0].numpy(),
+        "y": kwargs["y"].numpy(),
+        "z_data_1": kwargs["z"].data_1.numpy(),
+        "z_data_2_0_foo": kwargs["z"].data_2[0]["foo"].numpy(),
+        "z_data_2_1": kwargs["z"].data_2[1].numpy(),
+    }
+    self._compare_tflite_torch_args_kwargs(SampleModel(), args, kwargs, flat_inputs)
+  def _compare_tflite_torch_args_kwargs(self, model, args, kwargs, flat_inputs):
+    model.eval()
+    edge_model = ai_edge_torch.convert(model, args, kwargs)
+    interpreter = tf.lite.Interpreter(model_content=edge_model._tflite_model)
+    runner = interpreter.get_signature_runner("serving_default")
+    input_details = runner.get_input_details()
+    self.assertEqual(input_details.keys(), flat_inputs.keys())
+    reference_output = model(*args, **kwargs)
+    tflite_output = edge_model(**flat_inputs)
+    np.testing.assert_almost_equal(reference_output, tflite_output)
 if __name__ == "__main__":
   unittest.main()

ai_edge_torch/generative/examples/experimental/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

ai_edge_torch/generative/examples/experimental/gemma/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

ai_edge_torch/generative/examples/experimental/gemma/convert_to_tflite.py ADDED Viewed

@@ -0,0 +1,87 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Note: This is an experimental version of Gemma with external KV cache.
+# Please use with caution.
+import os
+from pathlib import Path
+import torch
+import ai_edge_torch
+from ai_edge_torch.generative.examples.experimental.gemma import gemma
+from ai_edge_torch.generative.layers.experimental import ekv_cache as kv_utils
+from ai_edge_torch.generative.quantize import quant_recipes
+def convert_gemma_to_tflite(
+    checkpoint_path: str,
+    prefill_seq_len: int = 512,
+    kv_cache_max_len: int = 1024,
+    quantize: bool = True,
+):
+  """An example method for converting a Gemma 2B model to multi-signature
+  tflite model.
+  Args:
+      checkpoint_path (str): The filepath to the model checkpoint, or directory
+        holding the checkpoint.
+      prefill_seq_len (int, optional): The maximum size of prefill input tensor.
+        Defaults to 512.
+      kv_cache_max_len (int, optional): The maximum size of KV cache buffer,
+        including both prefill and decode. Defaults to 1024.
+      quantize (bool, optional): Whether the model should be quanized.
+        Defaults to True.
+  """
+  pytorch_model = gemma.build_2b_model(
+      checkpoint_path, kv_cache_max_len=kv_cache_max_len
+  )
+  # Tensors used to trace the model graph during conversion.
+  prefill_tokens = torch.full((1, prefill_seq_len), 0, dtype=torch.long)
+  prefill_input_pos = torch.arange(0, prefill_seq_len)
+  decode_token = torch.tensor([[0]], dtype=torch.long)
+  decode_input_pos = torch.tensor([0], dtype=torch.int64)
+  kv = kv_utils.EKVCache.from_model_config(pytorch_model.config)
+  quant_config = quant_recipes.full_int8_dynamic_recipe() if quantize else None
+  edge_model = (
+      ai_edge_torch.signature(
+          'prefill',
+          pytorch_model,
+          sample_kwargs={
+              'tokens': prefill_tokens,
+              'input_pos': prefill_input_pos,
+              'kv_cache': kv,
+          },
+      )
+      .signature(
+          'decode',
+          pytorch_model,
+          sample_kwargs={
+              'tokens': decode_token,
+              'input_pos': decode_input_pos,
+              'kv_cache': kv,
+          },
+      )
+      .convert(quant_config=quant_config)
+  )
+  edge_model.export(f'/tmp/gemma_seq{prefill_seq_len}_ekv{kv_cache_max_len}.tflite')
+if __name__ == '__main__':
+  checkpoint_path = os.path.join(Path.home(), 'Downloads/llm_data/gemma-2b')
+  convert_gemma_to_tflite(checkpoint_path)

ai_edge_torch/generative/examples/experimental/gemma/gemma.py ADDED Viewed

@@ -0,0 +1,195 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Example of building a Gemma model.
+#
+# Note: This is an experimental version of Gemma with external KV cache.
+# Please use with caution.
+import os
+from pathlib import Path
+from typing import Tuple
+import numpy as np
+import torch
+import torch.nn as nn
+import ai_edge_torch.generative.layers.attention_utils as attn_utils
+import ai_edge_torch.generative.layers.builder as builder
+from ai_edge_torch.generative.layers.experimental import ekv_cache as kv_utils
+from ai_edge_torch.generative.layers.experimental.attention import TransformerBlock  # NOQA
+import ai_edge_torch.generative.layers.model_config as cfg
+import ai_edge_torch.generative.utilities.loader as loading_utils
+TENSOR_NAMES = loading_utils.ModelLoader.TensorNames(
+    ff_up_proj="model.layers.{}.mlp.up_proj",
+    ff_down_proj="model.layers.{}.mlp.down_proj",
+    ff_gate_proj="model.layers.{}.mlp.gate_proj",
+    attn_query_proj="model.layers.{}.self_attn.q_proj",
+    attn_key_proj="model.layers.{}.self_attn.k_proj",
+    attn_value_proj="model.layers.{}.self_attn.v_proj",
+    attn_output_proj="model.layers.{}.self_attn.o_proj",
+    pre_attn_norm="model.layers.{}.input_layernorm",
+    pre_ff_norm="model.layers.{}.post_attention_layernorm",
+    embedding="model.embed_tokens",
+    final_norm="model.norm",
+    lm_head=None,
+)
+class Gemma(nn.Module):
+  def __init__(self, config: cfg.ModelConfig):
+    super().__init__()
+    self.config = config
+    # Construct model layers.
+    self.tok_embedding = nn.Embedding(
+        config.vocab_size, config.embedding_dim, padding_idx=0
+    )
+    self.lm_head = nn.Linear(
+        config.embedding_dim,
+        config.vocab_size,
+        bias=config.lm_head_use_bias,
+    )
+    # Gemma re-uses the embedding as the head projection layer.
+    self.lm_head.weight.data = self.tok_embedding.weight.data
+    self.transformer_blocks = nn.ModuleList(
+        TransformerBlock(config) for _ in range(config.num_layers)
+    )
+    self.final_norm = builder.build_norm(
+        config.embedding_dim,
+        config.final_norm_config,
+    )
+    self.rope_cache = attn_utils.build_rope_cache(
+        size=config.kv_cache_max,
+        dim=int(config.attn_config.rotary_percentage * config.head_dim),
+        base=10_000,
+        condense_ratio=1,
+        dtype=torch.float32,
+        device=torch.device("cpu"),
+    )
+    self.mask_cache = attn_utils.build_causal_mask_cache(
+        size=config.kv_cache_max, dtype=torch.float32, device=torch.device("cpu")
+    )
+    self.config = config
+  @torch.inference_mode
+  def forward(
+      self,
+      tokens: torch.Tensor,
+      input_pos: torch.Tensor,
+      kv_cache: kv_utils.EKVCache,
+  ) -> Tuple[torch.Tensor, kv_utils.EKVCache]:
+    B, T = tokens.size()
+    assert (
+        self.config.max_seq_len >= T
+    ), f"Cannot forward sequence of length {T}, max seq length is only {self.config.max_seq_len}"
+    cos, sin = self.rope_cache
+    cos = cos.index_select(0, input_pos)
+    sin = sin.index_select(0, input_pos)
+    mask = self.mask_cache.index_select(2, input_pos)
+    mask = mask[:, :, :, : self.config.kv_cache_max]
+    # token embeddings of shape (b, t, n_embd)
+    x = self.tok_embedding(tokens)
+    x = x * (self.config.embedding_dim**0.5)
+    updated_kv_entires = []
+    for i, block in enumerate(self.transformer_blocks):
+      kv_entry = kv_cache.caches[i] if kv_cache else None
+      x, kv_entry = block(x, (cos, sin), mask, input_pos, kv_entry)
+      if kv_entry:
+        updated_kv_entires.append(kv_entry)
+    updated_kv_cache = kv_utils.EKVCache(tuple(updated_kv_entires))
+    x = self.final_norm(x)
+    res = self.lm_head(x)  # (b, t, vocab_size)
+    return res, updated_kv_cache
+def get_model_config_2b(kv_cache_max_len: int = 1024) -> cfg.ModelConfig:
+  attn_config = cfg.AttentionConfig(
+      num_heads=8,
+      num_query_groups=1,
+      rotary_percentage=1.0,
+  )
+  ff_config = cfg.FeedForwardConfig(
+      type=cfg.FeedForwardType.GATED,
+      activation=cfg.ActivationConfig(cfg.ActivationType.GELU_TANH),
+      intermediate_size=16384,
+  )
+  norm_config = cfg.NormalizationConfig(
+      type=cfg.NormalizationType.RMS_NORM,
+      epsilon=1e-6,
+      zero_centered=True,
+  )
+  config = cfg.ModelConfig(
+      vocab_size=256000,
+      num_layers=18,
+      max_seq_len=8192,
+      embedding_dim=2048,
+      kv_cache_max_len=kv_cache_max_len,
+      attn_config=attn_config,
+      ff_config=ff_config,
+      pre_attention_norm_config=norm_config,
+      pre_ff_norm_config=norm_config,
+      final_norm_config=norm_config,
+      parallel_residual=False,
+      lm_head_use_bias=False,
+      enable_hlfb=True,
+  )
+  return config
+def get_fake_model_config_2b_for_test(**kwargs) -> cfg.ModelConfig:
+  config = get_model_config_2b(**kwargs)
+  config.num_layers = 2
+  return config
+def build_2b_model(checkpoint_path, test_model=False, **kwargs) -> nn.Module:
+  config = (
+      get_fake_model_config_2b_for_test(**kwargs)
+      if test_model
+      else get_model_config_2b(**kwargs)
+  )
+  model = Gemma(config)
+  if checkpoint_path is not None:
+    loader = loading_utils.ModelLoader(checkpoint_path, TENSOR_NAMES)
+    # since embedding and lm-head use the same weight, we need to set strict
+    # to False.
+    loader.load(model, strict=False)
+  model.eval()
+  return model
+def define_and_run_2b(checkpoint_path, test_model=False) -> None:
+  kv_cache_max_len = 1024
+  model = build_2b_model(
+      checkpoint_path, test_model=test_model, kv_cache_max_len=kv_cache_max_len
+  )
+  idx = torch.from_numpy(np.array([[1, 2, 3, 4]]))
+  tokens = torch.full((1, kv_cache_max_len), 0, dtype=torch.long, device="cpu")
+  tokens[0, :4] = idx
+  input_pos = torch.arange(0, kv_cache_max_len)
+  kv = kv_utils.EKVCache.from_model_config(model.config)
+  print("running an inference")
+  print(model.forward(tokens, input_pos, kv))
+if __name__ == "__main__":
+  checkpoint_path = os.path.join(Path.home(), "Downloads/gemma-2b")
+  define_and_run_2b(checkpoint_path)

ai_edge_torch/generative/examples/experimental/phi/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

ai_edge_torch/generative/examples/experimental/phi/convert_to_tflite.py ADDED Viewed

@@ -0,0 +1,84 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Note: This is an experimental version of phi2 with external KV cache.
+# Please use with caution.
+import os
+from pathlib import Path
+import torch
+import ai_edge_torch
+from ai_edge_torch.generative.examples.experimental.phi import phi2
+from ai_edge_torch.generative.layers.experimental import ekv_cache
+from ai_edge_torch.generative.quantize import quant_recipes
+def convert_phi2_to_tflite(
+    checkpoint_path: str,
+    prefill_seq_len: int = 512,
+    kv_cache_max_len: int = 1024,
+    quantize: bool = True,
+):
+  """An example method for converting a Phi-2 model to multi-signature
+  tflite model.
+  Args:
+      checkpoint_path (str): The filepath to the model checkpoint, or
+        directory holding the checkpoint.
+      prefill_seq_len (int, optional): The maximum size of prefill input tensor.
+        Defaults to 512.
+      kv_cache_max_len (int, optional): The maximum size of KV cache buffer,
+        including both prefill and decode. Defaults to 1024.
+      quantize (bool, optional): Whether the model should be quanized.
+        Defaults to True.
+  """
+  pytorch_model = phi2.build_model(checkpoint_path, kv_cache_max_len=kv_cache_max_len)
+  # Tensors used to trace the model graph during conversion.
+  prefill_tokens = torch.full((1, prefill_seq_len), 0, dtype=torch.long)
+  prefill_input_pos = torch.arange(0, prefill_seq_len)
+  decode_token = torch.tensor([[0]], dtype=torch.long)
+  decode_input_pos = torch.tensor([0], dtype=torch.int64)
+  kv = ekv_cache.EKVCache.from_model_config(pytorch_model.config)
+  quant_config = quant_recipes.full_int8_dynamic_recipe() if quantize else None
+  edge_model = (
+      ai_edge_torch.signature(
+          'prefill',
+          pytorch_model,
+          sample_kwargs={
+              'tokens': prefill_tokens,
+              'input_pos': prefill_input_pos,
+              'kv_cache': kv,
+          },
+      )
+      .signature(
+          'decode',
+          pytorch_model,
+          sample_kwargs={
+              'tokens': decode_token,
+              'input_pos': decode_input_pos,
+              'kv_cache': kv,
+          },
+      )
+      .convert(quant_config=quant_config)
+  )
+  edge_model.export(f'/tmp/phi2_seq{prefill_seq_len}_ekv{kv_cache_max_len}.tflite')
+if __name__ == '__main__':
+  checkpoint_path = os.path.join(Path.home(), 'Downloads/llm_data/phi2')
+  convert_phi2_to_tflite(checkpoint_path)

ai-edge-torch-nightly 0.2.0.dev20240718__py3-none-any.whl → 0.2.0.dev20240720__py3-none-any.whl

Potentially problematic release.

ai-edge-torch-nightly 0.2.0.dev20240718py3-none-any.whl → 0.2.0.dev20240720py3-none-any.whl