PyPI - ai-edge-torch-nightly - Versions diffs - 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl

Files changed (169) hide show

ai_edge_torch/generative/utilities/t5_loader.py CHANGED Viewed

@@ -18,11 +18,10 @@ import glob
 import os
 from typing import Callable, Dict
+from ai_edge_torch.generative.layers import model_config
 from safetensors import safe_open
 import torch
-from ai_edge_torch.generative.layers import model_config
 def load_safetensors(full_path: str):
   """Loads safetensors into a single state dictionary.
@@ -71,7 +70,11 @@ def load_pytorch_statedict(full_path: str):
   Raises:
     ValueError: If no tensors are loaded from the provided directory or file.
   """
-  pattern = os.path.join(full_path, "*.bin") if os.path.isdir(full_path) else full_path
+  pattern = (
+      os.path.join(full_path, "*.bin")
+      if os.path.isdir(full_path)
+      else full_path
+  )
   files = []
   for file in glob.glob(pattern):
     files.append(file)
@@ -89,9 +92,7 @@ def load_pytorch_statedict(full_path: str):
 class ModelLoader:
-  """A utility class for loading and converting model checkpoints to ODML
-  transformer layer format.
-  """
+  """Utility class for loading and converting checkpoints to ODML transformer layer format."""
   @dataclass
   class TensorNames:
@@ -112,18 +113,19 @@ class ModelLoader:
     pre_attn_norm: str = None
     pre_cross_attn_norm: str = None
-    pre_ff_norm: str = None
+    post_attn_norm: str = None
     embedding: str = None
     final_norm: str = None
     lm_head: str = None
   def __init__(self, file_name: str, names: TensorNames) -> None:
-    """ModelLoader constructor. Can be used to load multiple models of the same
-    type.
+    """ModelLoader constructor.
+    Can be used to load multiple models of the same type.
     Args:
-        file_name (str): Path to the checkpoint. Can be a directory or an
-          exact file.
+        file_name (str): Path to the checkpoint. Can be a directory or an exact
+          file.
         names (TensorNames): An instance of `TensorNames` to determine mappings.
     """
     self._file_name = file_name
@@ -131,7 +133,10 @@ class ModelLoader:
     self._loader = self._get_loader()
   def load(
-      self, model: torch.nn.Module, strict: bool = True, fuse_attention: bool = True
+      self,
+      model: torch.nn.Module,
+      strict: bool = True,
+      fuse_attention: bool = True,
   ):
     """Load the model from the checkpoint
@@ -152,7 +157,7 @@ class ModelLoader:
       )
     elif isinstance(self._names, dict):
       converted_state = {}
-      for additional_prefix, names in self._names.items():
+      for additional_prefix, _ in self._names.items():
         local_converted_state = self._do_load(
             model,
             state,
@@ -166,11 +171,14 @@ class ModelLoader:
     if strict and state:
       raise ValueError(
-          f"Failed to map all tensor. Remaining tensor are: {list(state.keys())}"
+          "Failed to map all tensor. Remaining tensor are:"
+          f" {list(state.keys())}"
       )
     model.load_state_dict(converted_state, strict=strict)
-  def _do_load(self, model, state, names, additional_prefix="", fuse_attention=True):
+  def _do_load(
+      self, model, state, names, additional_prefix="", fuse_attention=True
+  ):
     """Load the model from the checkpoint
     Args:
@@ -183,7 +191,9 @@ class ModelLoader:
     """
     converted_state = dict()
     if names.embedding is not None:
-      converted_state["tok_embedding.weight"] = state.pop(f"{names.embedding}.weight")
+      converted_state["tok_embedding.weight"] = state.pop(
+          f"{names.embedding}.weight"
+      )
     if names.lm_head is not None:
       converted_state["lm_head.weight"] = state.pop(f"{names.lm_head}.weight")
       if model.config.lm_head_use_bias:
@@ -195,17 +205,21 @@ class ModelLoader:
           f"{final_norm_name}.weight"
       )
       if f"{final_norm_name}.bias" in state:
-        converted_state["final_norm.bias"] = state.pop(f"{final_norm_name}.bias")
+        converted_state["final_norm.bias"] = state.pop(
+            f"{final_norm_name}.bias"
+        )
     if names.relative_attn_bias:
       rel_attn_name = names.relative_attn_bias
-      prefix = additional_prefix + f"transformer_blocks.0"
+      prefix = additional_prefix + "transformer_blocks.0"
       converted_state[f"{prefix}.atten_func.relative_attention_bias.weight"] = (
           state.pop(f"{rel_attn_name}.weight")
       )
     for i in range(model.config.num_layers):
-      self._map_norm(i, model.config, state, converted_state, names, additional_prefix)
+      self._map_norm(
+          i, model.config, state, converted_state, names, additional_prefix
+      )
       self._map_feedforward(
           i, model.config, state, converted_state, names, additional_prefix
       )
@@ -251,7 +265,7 @@ class ModelLoader:
     if self._file_name.endswith(".bin"):
       return load_pytorch_statedict
-    raise ValueError(f"File format not supported.")
+    raise ValueError("File format not supported.")
   def _map_feedforward(
       self,
@@ -265,16 +279,23 @@ class ModelLoader:
     prefix = additional_prefix + f"transformer_blocks.{idx}"
     if names.ff_up_proj is None or names.ff_down_proj is None:
       return
-    if config.ff_config.type == model_config.FeedForwardType.SEQUENTIAL:
+    ff_config = config.block_config(idx).ff_config
+    if ff_config.type == model_config.FeedForwardType.SEQUENTIAL:
       ff_up_proj_name = names.ff_up_proj.format(idx)
       ff_down_proj_name = names.ff_down_proj.format(idx)
-      converted_state[f"{prefix}.ff.w1.weight"] = state.pop(f"{ff_up_proj_name}.weight")
+      converted_state[f"{prefix}.ff.w1.weight"] = state.pop(
+          f"{ff_up_proj_name}.weight"
+      )
       converted_state[f"{prefix}.ff.w2.weight"] = state.pop(
           f"{ff_down_proj_name}.weight"
       )
-      if config.ff_config.use_bias:
-        converted_state[f"{prefix}.ff.w1.bias"] = state.pop(f"{ff_up_proj_name}.bias")
-        converted_state[f"{prefix}.ff.w2.bias"] = state.pop(f"{ff_down_proj_name}.bias")
+      if ff_config.use_bias:
+        converted_state[f"{prefix}.ff.w1.bias"] = state.pop(
+            f"{ff_up_proj_name}.bias"
+        )
+        converted_state[f"{prefix}.ff.w2.bias"] = state.pop(
+            f"{ff_down_proj_name}.bias"
+        )
     else:
       if names.ff_gate_proj is not None:
         ff_up_proj_name = names.ff_up_proj.format(idx)
@@ -289,8 +310,10 @@ class ModelLoader:
         converted_state[f"{prefix}.ff.w1.weight"] = state.pop(
             f"{ff_gate_proj_name}.weight"
         )
-        if config.ff_config.use_bias:
-          converted_state[f"{prefix}.ff.w3.bias"] = state.pop(f"{ff_up_proj_name}.bias")
+        if ff_config.use_bias:
+          converted_state[f"{prefix}.ff.w3.bias"] = state.pop(
+              f"{ff_up_proj_name}.bias"
+          )
           converted_state[f"{prefix}.ff.w2.bias"] = state.pop(
               f"{ff_down_proj_name}.bias"
           )
@@ -315,20 +338,21 @@ class ModelLoader:
     ):
       return
     prefix = additional_prefix + f"transformer_blocks.{idx}"
+    attn_config = config.block_config(idx).attn_config
     q_name = names.attn_query_proj.format(idx)
     k_name = names.attn_key_proj.format(idx)
     v_name = names.attn_value_proj.format(idx)
     # model.encoder.transformer_blocks[0].atten_func.q_projection.weight
     if fuse_attention:
       converted_state[f"{prefix}.atten_func.attn.weight"] = self._fuse_qkv(
-          config,
+          attn_config,
           state.pop(f"{q_name}.weight"),
           state.pop(f"{k_name}.weight"),
           state.pop(f"{v_name}.weight"),
       )
-      if config.attn_config.qkv_use_bias:
+      if attn_config.qkv_use_bias:
         converted_state[f"{prefix}.atten_func.attn.bias"] = self._fuse_qkv(
-            config,
+            attn_config,
             state.pop(f"{q_name}.bias"),
             state.pop(f"{k_name}.bias"),
             state.pop(f"{v_name}.bias"),
@@ -343,7 +367,7 @@ class ModelLoader:
       converted_state[f"{prefix}.atten_func.v_projection.weight"] = state.pop(
           f"{v_name}.weight"
       )
-      if config.attn_config.qkv_use_bias:
+      if attn_config.qkv_use_bias:
         converted_state[f"{prefix}.atten_func.q_projection.bias"] = state.pop(
             f"{q_name}.bias"
         )
@@ -355,12 +379,12 @@ class ModelLoader:
         )
     o_name = names.attn_output_proj.format(idx)
-    converted_state[f"{prefix}.atten_func.output_projection.weight"] = state.pop(
-        f"{o_name}.weight"
+    converted_state[f"{prefix}.atten_func.output_projection.weight"] = (
+        state.pop(f"{o_name}.weight")
     )
-    if config.attn_config.output_proj_use_bias:
-      converted_state[f"{prefix}.atten_func.output_projection.bias"] = state.pop(
-          f"{o_name}.bias"
+    if attn_config.output_proj_use_bias:
+      converted_state[f"{prefix}.atten_func.output_projection.bias"] = (
+          state.pop(f"{o_name}.bias")
       )
   def _map_cross_attention(
@@ -380,52 +404,57 @@ class ModelLoader:
     ):
       return
     prefix = additional_prefix + f"transformer_blocks.{idx}"
+    attn_config = config.block_config(idx).attn_config
     q_name = names.cross_attn_query_proj.format(idx)
     k_name = names.cross_attn_key_proj.format(idx)
     v_name = names.cross_attn_value_proj.format(idx)
     if fuse_attention:
-      converted_state[f"{prefix}.cross_atten_func.attn.weight"] = self._fuse_qkv(
-          config,
-          state.pop(f"{q_name}.weight"),
-          state.pop(f"{k_name}.weight"),
-          state.pop(f"{v_name}.weight"),
+      converted_state[f"{prefix}.cross_atten_func.attn.weight"] = (
+          self._fuse_qkv(
+              attn_config,
+              state.pop(f"{q_name}.weight"),
+              state.pop(f"{k_name}.weight"),
+              state.pop(f"{v_name}.weight"),
+          )
       )
-      if config.attn_config.qkv_use_bias:
-        converted_state[f"{prefix}.cross_atten_func.attn.bias"] = self._fuse_qkv(
-            config,
-            state.pop(f"{q_name}.bias"),
-            state.pop(f"{k_name}.bias"),
-            state.pop(f"{v_name}.bias"),
+      if attn_config.qkv_use_bias:
+        converted_state[f"{prefix}.cross_atten_func.attn.bias"] = (
+            self._fuse_qkv(
+                attn_config,
+                state.pop(f"{q_name}.bias"),
+                state.pop(f"{k_name}.bias"),
+                state.pop(f"{v_name}.bias"),
+            )
         )
     else:
-      converted_state[f"{prefix}.cross_atten_func.q_projection.weight"] = state.pop(
-          f"{q_name}.weight"
+      converted_state[f"{prefix}.cross_atten_func.q_projection.weight"] = (
+          state.pop(f"{q_name}.weight")
       )
-      converted_state[f"{prefix}.cross_atten_func.k_projection.weight"] = state.pop(
-          f"{k_name}.weight"
+      converted_state[f"{prefix}.cross_atten_func.k_projection.weight"] = (
+          state.pop(f"{k_name}.weight")
       )
-      converted_state[f"{prefix}.cross_atten_func.v_projection.weight"] = state.pop(
-          f"{v_name}.weight"
+      converted_state[f"{prefix}.cross_atten_func.v_projection.weight"] = (
+          state.pop(f"{v_name}.weight")
       )
-      if config.attn_config.qkv_use_bias:
-        converted_state[f"{prefix}.cross_atten_func.q_projection.bias"] = state.pop(
-            f"{q_name}.bias"
+      if attn_config.qkv_use_bias:
+        converted_state[f"{prefix}.cross_atten_func.q_projection.bias"] = (
+            state.pop(f"{q_name}.bias")
         )
-        converted_state[f"{prefix}.cross_atten_func.k_projection.bias"] = state.pop(
-            f"{k_name}.bias"
+        converted_state[f"{prefix}.cross_atten_func.k_projection.bias"] = (
+            state.pop(f"{k_name}.bias")
         )
-        converted_state[f"{prefix}.cross_atten_func.v_projection.bias"] = state.pop(
-            f"{v_name}.bias"
+        converted_state[f"{prefix}.cross_atten_func.v_projection.bias"] = (
+            state.pop(f"{v_name}.bias")
         )
     o_name = names.cross_attn_output_proj.format(idx)
-    converted_state[f"{prefix}.cross_atten_func.output_projection.weight"] = state.pop(
-        f"{o_name}.weight"
+    converted_state[f"{prefix}.cross_atten_func.output_projection.weight"] = (
+        state.pop(f"{o_name}.weight")
     )
-    if config.attn_config.output_proj_use_bias:
-      converted_state[f"{prefix}.cross_atten_func.output_projection.bias"] = state.pop(
-          f"{o_name}.bias"
+    if attn_config.output_proj_use_bias:
+      converted_state[f"{prefix}.cross_atten_func.output_projection.bias"] = (
+          state.pop(f"{o_name}.bias")
       )
   def _map_norm(
@@ -450,34 +479,34 @@ class ModelLoader:
     if names.pre_cross_attn_norm:
       pre_cross_attn_norm_name = names.pre_cross_attn_norm.format(idx)
-      converted_state[f"{prefix}.cross_atten_func.pre_atten_norm.weight"] = state.pop(
-          f"{pre_cross_attn_norm_name}.weight"
+      converted_state[f"{prefix}.cross_atten_func.pre_atten_norm.weight"] = (
+          state.pop(f"{pre_cross_attn_norm_name}.weight")
       )
       if f"{pre_cross_attn_norm_name}.bias" in state:
-        converted_state[f"{prefix}.cross_atten_func.pre_atten_norm.bias"] = state.pop(
-            f"{pre_cross_attn_norm_name}.bias"
+        converted_state[f"{prefix}.cross_atten_func.pre_atten_norm.bias"] = (
+            state.pop(f"{pre_cross_attn_norm_name}.bias")
         )
-    if names.pre_ff_norm is not None:
-      pre_ff_norm_name = names.pre_ff_norm.format(idx)
-      converted_state[f"{prefix}.pre_ff_norm.weight"] = state.pop(
-          f"{pre_ff_norm_name}.weight"
+    if names.post_attn_norm is not None:
+      post_attn_norm_name = names.post_attn_norm.format(idx)
+      converted_state[f"{prefix}.post_atten_norm.weight"] = state.pop(
+          f"{post_attn_norm_name}.weight"
       )
-      if f"{pre_ff_norm_name}.bias" in state:
-        converted_state[f"{prefix}.pre_ff_norm.bias"] = state.pop(
-            f"{pre_ff_norm_name}.bias"
+      if f"{post_attn_norm_name}.bias" in state:
+        converted_state[f"{prefix}.post_atten_norm.bias"] = state.pop(
+            f"{post_attn_norm_name}.bias"
         )
   def _fuse_qkv(
       self,
-      config: model_config.ModelConfig,
+      attn_config: model_config.AttentionConfig,
       q: torch.Tensor,
       k: torch.Tensor,
       v: torch.Tensor,
   ) -> torch.Tensor:
-    q_per_kv = config.attn_config.num_heads // config.attn_config.num_query_groups
-    qs = torch.split(q, config.head_dim * q_per_kv)
-    ks = torch.split(k, config.head_dim)
-    vs = torch.split(v, config.head_dim)
+    q_per_kv = attn_config.num_heads // attn_config.num_query_groups
+    qs = torch.split(q, attn_config.head_dim * q_per_kv)
+    ks = torch.split(k, attn_config.head_dim)
+    vs = torch.split(v, attn_config.head_dim)
     cycled = [t for group in zip(qs, ks, vs) for t in group]
     return torch.cat(cycled)

ai_edge_torch/generative/utilities/verifier.py ADDED Viewed

@@ -0,0 +1,247 @@
+# Copyright 2024 The AI Edge Torch Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Common utility functions to verify the reauthored models."""
+import logging
+from typing import List, Optional, Union
+from ai_edge_torch.generative.layers import kv_cache as kv_utils
+import torch
+import transformers
+class ModelWrapper(torch.nn.Module):
+  """A wrapper for the model to be verified, this could be a HuggingFace model
+  or a regular PyTorch model.
+  """
+  def __init__(
+      self,
+      model: torch.nn.Module,
+      model_format: str = "huggingface",
+      hf_generation_config: Optional[transformers.GenerationConfig] = None,
+  ):
+    """Initializes the wrapper.
+    Args:
+      model (torch.nn.Module): The original model. This could be a model built
+        from HuggingFace transformers, or a regular PyTorch model.
+      model_format (str): The format of the model. It should be either
+        "huggingface" or "pytorch".
+      hf_generation_config (transformers.GenerationConfig): The HuggingFace
+        generation config. This config will only be used if the underlying model
+        is built from HuggingFace transformers.
+    """
+    super().__init__()
+    self.model = model
+    self.model_format = model_format
+    self.hf_generation_config = hf_generation_config
+  def generate(
+      self, inputs: torch.Tensor
+  ) -> Union[transformers.utils.ModelOutput, torch.LongTensor]:
+    if self.model_format == "huggingface":
+      return self.model.generate(
+          inputs=inputs, generation_config=self.hf_generation_config
+      )
+    else:
+      raise NotImplementedError(
+          "generate() is not implemented for model format: %s"
+          % self.model_format
+      )
+  def forward(
+      self,
+      inputs: torch.Tensor,
+  ):
+    return self.model.forward(inputs)
+def forward(
+    model: torch.nn.Module,
+    tokens: torch.Tensor,
+    kv_cache: kv_utils.KVCache,
+) -> tuple[torch.Tensor, kv_utils.KVCache]:
+  """Forwards the model reauthored with ai_edge_torch Generative API.
+  Args:
+    model (torch.nn.Module): The model to forward. It should be a model built
+      with ai_edge_torch Generative API.
+    tokens (torch.Tensor): The input tokens to forward.
+    kv_cache (KVCache): The KV cache to forward.
+  Returns:
+    The output logits and the updated KV cache.
+  """
+  input_pos = torch.arange(0, tokens.shape[1], dtype=torch.int)
+  output = model.forward(tokens, input_pos, kv_cache)
+  return output["logits"], output["kv_cache"]
+def generate(
+    model: torch.nn.Module, prompts: torch.Tensor, response_len: int
+) -> torch.Tensor:
+  """Generates the response to the prompts.
+  It appends tokens output by the model to the prompts and feeds them back to
+  the model up to decode_len.
+  Args:
+    model (torch.nn.Module): The model to generate. It should be a model built
+      with ai_edge_torch Generative API.
+    prompts (torch.Tensor): The prompts to generate.
+    response_len (int): The number of tokens to generate.
+  Returns:
+    The generated tokens.
+  """
+  input_ids = prompts[0].int().tolist()
+  kv_cache = kv_utils.KVCache.from_model_config(model.config)
+  for _ in range(response_len - len(input_ids)):
+    logits, kv_cache = forward(model, torch.tensor([input_ids]), kv_cache)
+    generated_token = logits[0][-1].argmax().item()
+    input_ids.append(generated_token)
+  return torch.tensor([input_ids])
+def verify_with_input_ids(
+    original_model: ModelWrapper,
+    reauthored_model: torch.nn.Module,
+    input_ids: List[int],
+    kv_cache_max_len: int = 1024,
+    rtol: float = 1e-05,
+    atol: float = 1e-05,
+) -> bool:
+  """Verifies if the model reauthored generates the same output of the oringal.
+  It compares only one outputs from the original and the reauthored model.
+  Args:
+    original_model (ModelWrapper): The original model.
+    reauthored_model (torch.nn.Module): The model reauthored with ai_edge_torch
+      Generative API.
+    input_ids (List[int]): The input token IDs to forward with.
+    kv_cache_max_len (int): The maximum sequence length of the KV cache.
+    rtol (float): The relative tolerance for the comparison.
+    atol (float): The absolute tolerance for the comparison.
+  Returns:
+    True if the model reauthored generates the same output of the original.
+  """
+  tokens = torch.full((1, kv_cache_max_len), 0, dtype=torch.int, device="cpu")
+  tokens[0, : len(input_ids)] = torch.tensor([input_ids]).int()
+  logging.info("Forwarding the original model...")
+  outputs_original = original_model.forward(tokens)
+  logits_original = outputs_original.logits[0, len(input_ids) - 1, :]
+  logging.info("logits_original: %s", logits_original)
+  logging.info("Forwarding the reauthored model...")
+  kv_cache = kv_utils.KVCache.from_model_config(reauthored_model.config)
+  outputs_reauthored = forward(reauthored_model, tokens, kv_cache)
+  logits_reauthored = outputs_reauthored[0][0, len(input_ids) - 1, :]
+  logging.info("logits_reauthored: %s", logits_reauthored)
+  return torch.allclose(
+      logits_original, logits_reauthored, rtol=rtol, atol=atol
+  )
+def verify_model_with_prompts(
+    original_model: ModelWrapper,
+    reauthored_model: torch.nn.Module,
+    tokenizer: torch.nn.Module,
+    prompts: str,
+) -> bool:
+  """Verifies if the model reauthored generates the same answer of the oringal.
+  It compares an answer, i.e. multiple continuous outputs generated by the
+  original and the reauthored model.
+  Args:
+    original_model (ModelWrapper): The original model.
+    reauthored_model (torch.nn.Module): The model reauthored with ai_edge_torch
+      Generative API.
+    tokenizer (torch.nn.Module): The tokenizer.
+    prompts (str): The input prompts to generate answers.
+  Returns:
+    True if the model reauthored generates the same answer of the original.
+  """
+  prompt_tokens = tokenizer.encode(prompts, return_tensors="pt")
+  logging.info("Generating answer with the original model...")
+  outputs_original = original_model.generate(prompt_tokens)
+  response_original = tokenizer.decode(outputs_original[0])
+  logging.info("outputs_from_original_model: [[%s]]", response_original)
+  logging.info("Generating answer with the reauthored model...")
+  generate_len = len(outputs_original[0])
+  outputs_reauthored = generate(reauthored_model, prompt_tokens, generate_len)
+  response_reauthored = tokenizer.decode(outputs_reauthored[0])
+  logging.info("outputs from reauthored model: [[%s]]", response_reauthored)
+  return response_original == response_reauthored
+def verify_reauthored_model(
+    original_model: ModelWrapper,
+    reauthored_model: torch.nn.Module,
+    tokenizer: torch.nn.Module,
+    generate_prompts: List[str],
+    forward_input_ids: List[List[int]] = [[1, 2, 3, 4]],
+    rtol: float = 1e-05,
+    atol: float = 1e-05,
+):
+  """Verifies the reauthored model against the original model.
+  It verifies the reauthored model with two methods:
+  1. It compares the output of the original and the reauthored model with an
+     arbitrary input.
+  2. It compares the answer generated by the original and the reauthored model
+     with a prompt.
+  It prints out "PASS" or "FAILED" to the console.
+  Args:
+    original_model (ModelWrapper): The original model.
+    reauthored_model (torch.nn.Module): The model reauthored with ai_edge_torch
+      Generative API.
+    tokenizer (torch.nn.Module): The tokenizer.
+    generate_prompts (List[str]): List of the input prompts to generate answers.
+    forward_input_ids (List[torch.Tensor]): List if ihe input token IDs to
+      forward with.
+    rtol (float): The relative tolerance for the comparison.
+    atol (float): The absolute tolerance for the comparison.
+  """
+  for input_ids in forward_input_ids:
+    logging.info("Verifying the reauthored model with input IDs: %s", input_ids)
+    if verify_with_input_ids(
+        original_model, reauthored_model, input_ids, rtol=rtol, atol=atol
+    ):
+      logging.info("PASS")
+    else:
+      logging.info("FAILED")
+  for prompts in generate_prompts:
+    logging.info("Verifying the reauthored model with prompts:%s", prompts)
+    if verify_model_with_prompts(
+        original_model, reauthored_model, tokenizer, prompts
+    ):
+      logging.info("PASS")
+    else:
+      logging.info("FAILED")

ai_edge_torch/hlfb/__init__.py CHANGED Viewed

@@ -13,4 +13,4 @@
 # limitations under the License.
 # ==============================================================================
-from torch_xla.experimental.mark_pattern_utils import StableHLOCompositeBuilder
+from ai_edge_torch.lowertools import StableHLOCompositeBuilder

ai_edge_torch/hlfb/mark_pattern/__init__.py CHANGED Viewed

@@ -16,11 +16,10 @@ import copy
 from typing import Any
 import uuid
+from ai_edge_torch import lowertools
+from ai_edge_torch.hlfb.mark_pattern import passes
+from ai_edge_torch.hlfb.mark_pattern import pattern as pattern_module
 import torch
-from torch_xla.experimental import xla_marker
-from ai_edge_torch.hlfb.mark_pattern.pattern import Pattern
-from ai_edge_torch.hlfb.mark_pattern.pattern import ScalarAttrTracker  # NOQA
 @torch._dynamo.assume_constant_result
@@ -49,10 +48,10 @@ def _insert_marker(
     is_input: bool,
     attr: dict[str, Any] = None,
 ):
-  attr = xla_marker.serialize_composite_attr(attr) if attr else None
+  attr = lowertools.serialize_composite_attr(attr) if attr else None
   with graph_module.graph.inserting_after(node):
     new_node = graph_module.graph.call_function(
-        torch.ops.xla.mark_tensor,
+        lowertools.mark_tensor_op,
         args=(node,),
         kwargs={
             "name": name,
@@ -69,13 +68,16 @@ def _insert_marker(
 def mark_pattern(
     graph_module: torch.fx.GraphModule,
-    pattern: Pattern,
+    pattern: pattern_module.Pattern,
 ) -> torch.fx.GraphModule:
   """Mark all existences of pattern graph in the GraphModule with fx pattern matching.
   The marked subgraphs will be lowered in StableHLO composite ops.
   Args:
     graph_module (torch.fx.GraphModule): GraphModule to be matched and marked.
     pattern (ai_edge_torch.hlfb.mark_pattern.Pattern): Pattern to match.
   Returns:
     The modified graph_module with additional marker ops in graph.
   """

ai-edge-torch-nightly 0.2.0.dev20240714__py3-none-any.whl → 0.3.0.dev20240926__py3-none-any.whl

ai-edge-torch-nightly 0.2.0.dev20240714py3-none-any.whl → 0.3.0.dev20240926py3-none-any.whl