PyPI - ai-edge-torch-nightly - Versions diffs - 0.7.0.dev20251007__py3-none-any.whl → 0.8.0.dev20251225__py3-none-any.whl - Mend

ai-edge-torch-nightly 0.7.0.dev20251007py3-none-any.whl → 0.8.0.dev20251225py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ai-edge-torch-nightly might be problematic. Click here for more details.

Files changed (42) hide show

ai_edge_torch/_convert/conversion.py CHANGED Viewed

@@ -133,10 +133,11 @@ def convert_signatures(
     exported_program = fx_infra.safe_run_decompositions(
         exported_program,
         fx_infra.decomp.pre_convert_decomp(),
+        can_skip=False,
     )
     return exported_program
-  exported_programs: torch.export.ExportedProgram = [
+  exported_programs = [
       export(
           mod=sig.module,
           args=sig.args,

ai_edge_torch/fx_infra/__init__.py CHANGED Viewed

@@ -30,3 +30,4 @@ run_passes = pass_base.run_passes
 CanonicalizePass = _canonicalize_pass.CanonicalizePass
 safe_run_decompositions = _safe_run_decompositions.safe_run_decompositions
+annotate_force_decomp = _safe_run_decompositions.annotate_force_decomp

ai_edge_torch/fx_infra/_safe_run_decompositions.py CHANGED Viewed

@@ -13,6 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 """ExportedProgram.run_decompositions wrapper to handle unexpected export behavior."""
+import operator
+from typing import Any, Callable
 import torch
@@ -26,8 +28,48 @@ _DUMMY_DECOMP_TABLE = {
     torch._ops.OperatorBase(): lambda: None,
 }
+_BUILTIN_OPERATORS = {
+    getattr(operator, name)
+    for name in dir(operator)
+    if not name.startswith("_")
+}
+def _require_decomp(
+    exported_program: torch.export.ExportedProgram, decomp_table
+):
+  """Checks if the exported program requires decompositions."""
+  for node in exported_program.graph.nodes:
+    if "call_" not in str(node.op):
+      continue
+    op = node.target
+    if isinstance(op, torch._ops.OpOverloadPacket):
+      op = op.default
+    if op in decomp_table:
+      return True
+    if (
+        not isinstance(op, (torch._ops.OpOverload, torch._ops.OperatorBase))
+        and op not in _BUILTIN_OPERATORS
+    ):
+      # Python function that requires to be retraced via run_decompositions.
+      return True
+  return False
-def safe_run_decompositions(exported_program, decomp_table=None):
+_FORCE_DECOMP_ATTR = "_ai_edge_torch_force_decomp"
+def annotate_force_decomp(decomp: Callable[..., Any]):
+  """Annotates a decomp to force it to be run (at least shallowly) in safe_run_decompositions."""
+  setattr(decomp, _FORCE_DECOMP_ATTR, _FORCE_DECOMP_ATTR)
+  return decomp
+def safe_run_decompositions(exported_program, decomp_table=None, can_skip=True):
   """Wrapper for ExportedProgram.run_decompositions to handle unexpected export behavior."""
   if decomp_table is not None and not decomp_table:
@@ -35,6 +77,9 @@ def safe_run_decompositions(exported_program, decomp_table=None):
     # instead for backward compatibility.
     decomp_table = _DUMMY_DECOMP_TABLE
+  if can_skip and not _require_decomp(exported_program, decomp_table):
+    return exported_program
   for node in exported_program.graph.nodes:
     if node.target == torch.ops.aten.view.default:
       # Passes or torch.export may generate aten.view nodes not respecting the
@@ -44,6 +89,14 @@ def safe_run_decompositions(exported_program, decomp_table=None):
       # back to one aten.view.
       node.target = lambda self, size: torch.reshape(self.contiguous(), size)
+    # Torch may skip some decompositions even if target is in decomp_table.
+    # The following ensures the target is always run through the decompositions
+    # shallowly if it has _FORCE_DECOMP_ATTR.
+    if decomp_table and node.target in decomp_table:
+      decomp = decomp_table[node.target]
+      if hasattr(decomp, _FORCE_DECOMP_ATTR):
+        node.target = decomp
   exported_program = exported_program.run_decompositions(decomp_table)
   if hasattr(torch.ops.aten, "_assert_tensor_metadata"):

ai_edge_torch/generative/examples/stable_diffusion/convert_to_tflite.py CHANGED Viewed

@@ -138,9 +138,7 @@ def convert_stable_diffusion_to_tflite(
   if not os.path.exists(output_dir):
     pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
-  quant_config = (
-      quant_recipes.full_int8_weight_only_recipe() if quantize else None
-  )
+  quant_config = quant_recipes.full_weight_only_recipe() if quantize else None
   # TODO(yichunk): convert to multi signature tflite model.
   # CLIP text encoder

ai_edge_torch/generative/examples/stable_diffusion/samplers/k_lms.py CHANGED Viewed

@@ -69,6 +69,6 @@ class KLMSSampler(SamplerInterface):
           continue
         y *= x - self.sigmas[t - j]
         y /= self.sigmas[t - i] - self.sigmas[t - j]
-      lms_coeff = np.trapz(y=y, x=x)
+      lms_coeff = np.trapezoid(y=y, x=x)
       latents += lms_coeff * output
     return latents

ai_edge_torch/generative/layers/attention.py CHANGED Viewed

@@ -18,6 +18,7 @@
 import abc
 from typing import Optional, Tuple, Union
+from ai_edge_torch.generative.layers import attention_utils
 from ai_edge_torch.generative.layers import builder
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
 from ai_edge_torch.generative.layers import lora as lora_utils
@@ -240,13 +241,35 @@ class CausalSelfAttention(CausalSelfAttentionBase):
     k = k.reshape(B, T, -1, self.config.head_dim)
     v = v.reshape(B, T, -1, self.config.head_dim)
-    if rope is not None:
+    alibi_bias = None
+    if self.config.use_alibi:
+      k_size = T
+      if mask is not None:
+        k_size = mask.shape[-1]
+      elif input_pos is not None:
+        # If mask is not present, assume current sequence length is key length.
+        k_size = input_pos[-1].item() + 1
+      alibi_bias = attention_utils.build_alibi_bias(
+          n_heads=self.config.num_heads,
+          k_size=k_size,
+          dtype=x.dtype,
+          device=x.device,
+      )
+    elif rope is not None:
       # Compute rotary positional embedding for query and key.
       cos, sin = rope
       q, k = rotary_pos_emb.apply_rope_inline(q, k, cos, sin)
     sdpa_out, kv_cache = sdpa_with_kv_update.sdpa_with_kv_update(
-        q, k, v, kv_cache, input_pos, mask, self.config, self.enable_hlfb
+        q,
+        k,
+        v,
+        kv_cache,
+        input_pos,
+        mask,
+        self.config,
+        self.enable_hlfb,
+        alibi_bias=alibi_bias,
     )
     # Compute the output projection.

ai_edge_torch/generative/layers/attention_test.py CHANGED Viewed

@@ -27,16 +27,27 @@ class AttentionTest(parameterized.TestCase):
       dict(
           testcase_name="local_causal_self_attention",
           attn_type=cfg.AttentionType.LOCAL_SLIDING,
+          use_alibi=False,
           expected_shape=(1, 10, 16),
       ),
       dict(
           testcase_name="global_causal_self_attention",
           attn_type=cfg.AttentionType.GLOBAL,
+          use_alibi=False,
+          expected_shape=(1, 10, 16),
+      ),
+      dict(
+          testcase_name="alibi_attention",
+          attn_type=cfg.AttentionType.GLOBAL,
+          use_alibi=True,
           expected_shape=(1, 10, 16),
       ),
   )
   def test_causal_self_attention(
-      self, attn_type: cfg.AttentionType, expected_shape: tuple[int, ...]
+      self,
+      attn_type: cfg.AttentionType,
+      use_alibi: bool,
+      expected_shape: tuple[int, ...],
   ):
     norm_config = cfg.NormalizationConfig(
         type=cfg.NormalizationType.RMS_NORM,
@@ -56,6 +67,7 @@ class AttentionTest(parameterized.TestCase):
         logit_softcap=None,
         sliding_window_size=16,
         attn_type=attn_type,
+        use_alibi=use_alibi,
     )
     self_atten = attention.CausalSelfAttention(
         dim=16,

ai_edge_torch/generative/layers/attention_utils.py CHANGED Viewed

@@ -15,11 +15,72 @@
 # Common utility functions used with attention module.
 import math
-from typing import Tuple
+from typing import List, Tuple
 import torch
+def _get_alibi_slopes(n_heads: int) -> List[float]:
+  """Returns slopes for ALiBi implementation.
+  The slopes are taken from the ALiBi paper
+  [https://arxiv.org/abs/2108.12409].
+  The slopes are later used to calculate the bias which is added to the
+  attention scores.
+  Args:
+      n_heads (int): The number of attention heads.
+  """
+  def get_slopes_power_of_2(n):
+    start = 2 ** (-(2 ** -(math.log2(n) - 3)))
+    return [start**i for i in range(1, n + 1)]
+  if math.log2(n_heads).is_integer():
+    return get_slopes_power_of_2(n_heads)
+  else:
+    closest_power_of_2 = 2 ** math.floor(math.log2(n_heads))
+    return (
+        get_slopes_power_of_2(closest_power_of_2)
+        + _get_alibi_slopes(2 * closest_power_of_2)[0::2][
+            : n_heads - closest_power_of_2
+        ]
+    )
+def build_alibi_bias(
+    n_heads: int,
+    k_size: int,
+    dtype: torch.dtype = torch.float32,
+    device: torch.device = None,
+) -> torch.Tensor:
+  """Builds ALiBi bias tensor based on key position.
+  The bias tensor is added to the attention scores before softmax.
+  Replicates HuggingFace Falcon implementation behavior where bias only depends
+  on key position j, not relative position j-i.
+  Args:
+      n_heads (int): The number of attention heads.
+      k_size (int): The key size of the bias tensor.
+      dtype (torch.dtype, optional): Output tensor's data type. Defaults to
+        torch.float32.
+      device (torch.device, optional): Output tensor's data type. Defaults to
+        None in which case "cpu" is used.
+  Returns:
+      torch.Tensor: The ALiBi bias tensor of shape (1, n_heads, 1, k_size).
+  """
+  if device is None:
+    device = torch.device('cpu')
+  slopes = torch.tensor(_get_alibi_slopes(n_heads), dtype=dtype, device=device)
+  k_pos = torch.arange(k_size, device=device)
+  # According to HF implementation, bias only depends on key position.
+  # slopes[h] * k_pos[j]
+  alibi_bias = slopes.unsqueeze(-1) * k_pos.unsqueeze(0)  # Shape: H, K
+  return alibi_bias[None, :, None, :].to(dtype)
 def build_rope_cache(
     size: int,
     dim: int,

ai_edge_torch/generative/layers/attention_utils_test.py CHANGED Viewed

@@ -21,6 +21,26 @@ from absl.testing import absltest as googletest
 class AttentionUtilsTest(googletest.TestCase):
+  def test_get_alibi_slopes(self):
+    slopes = attention_utils._get_alibi_slopes(1)
+    self.assertSequenceAlmostEqual(slopes, [0.00390625], places=6)
+    slopes = attention_utils._get_alibi_slopes(2)
+    self.assertSequenceAlmostEqual(slopes, [0.0625, 0.00390625], places=6)
+    slopes = attention_utils._get_alibi_slopes(4)
+    self.assertSequenceAlmostEqual(
+        slopes, [0.25, 0.0625, 0.015625, 0.00390625], places=6
+    )
+    slopes = attention_utils._get_alibi_slopes(3)
+    self.assertSequenceAlmostEqual(slopes, [0.0625, 0.00390625, 0.25], places=6)
+  def test_build_alibi_bias(self):
+    bias = attention_utils.build_alibi_bias(n_heads=2, k_size=3)
+    self.assertEqual(bias.shape, (1, 2, 1, 3))
+    expected = torch.tensor(
+        [[[[0.0, 0.0625, 0.125]], [[0.0, 0.00390625, 0.0078125]]]]
+    )
+    torch.testing.assert_close(bias, expected)
   def test_build_causal_mask_cache(self):
     mask = attention_utils.build_causal_mask_cache(3)
     self.assertEqual(mask.shape, (1, 1, 3, 3))

ai_edge_torch/generative/layers/builder.py CHANGED Viewed

@@ -71,7 +71,7 @@ def build_norm(
   Raises:
     ValueError: If config's `layer_norm_type` is not supported.
   """
-  if config.type == cfg.NormalizationType.NONE:
+  if config is None or config.type == cfg.NormalizationType.NONE:
     return lambda x: x
   elif config.type == cfg.NormalizationType.RMS_NORM:
     return normalization.RMSNorm(
@@ -84,7 +84,9 @@ def build_norm(
         init_fn=init_fn,
     )
   elif config.type == cfg.NormalizationType.LAYER_NORM:
-    return normalization.LayerNorm(dim, config.epsilon, config.enable_hlfb)
+    return normalization.LayerNorm(
+        dim, config.epsilon, config.use_bias, config.enable_hlfb
+    )
   elif config.type == cfg.NormalizationType.GROUP_NORM:
     return normalization.GroupNorm(
         config.group_num, dim, config.epsilon, config.enable_hlfb

ai_edge_torch/generative/layers/model_config.py CHANGED Viewed

@@ -75,6 +75,8 @@ class NormalizationConfig:
   scale_shift: float = 0.0
   # Number of groups used in group normalization.
   group_num: Optional[float] = None
+  # Whether to use bias in norm.
+  use_bias: bool = True
 # Exprimental feature and may subject to change.
@@ -108,6 +110,8 @@ class AttentionConfig:
   rotary_base: int = 10_000
   # Percentage of Rotary Positional Embedding added Q and K projections.
   rotary_percentage: Optional[float] = None
+  # Whether to use ALiBi positional encoding.
+  use_alibi: bool = False
   # Whether to transpose the query groups of qkv bundled tensor before
   # splitting into separated tensors.
   qkv_transpose_before_split: bool = False
@@ -247,6 +251,7 @@ class ModelConfig:
   lm_head_use_bias: bool = False
   # Whether LLM's HEAD shares the weight of the embedding.
   lm_head_share_weight_with_embedding: bool = True
+  dense_intermediate_size: Optional[int] = None
   # Whether to turn on high-level function boundary.
   enable_hlfb: bool = True

ai_edge_torch/generative/layers/normalization.py CHANGED Viewed

@@ -148,6 +148,7 @@ class LayerNorm(torch.nn.Module):
       self,
       dim: int,
       eps: float = 1e-5,
+      use_bias: bool = True,
       enable_hlfb: bool = False,
   ):
     """Initialize the LayerNorm layer.
@@ -156,6 +157,7 @@ class LayerNorm(torch.nn.Module):
       dim (int): dimension of the input tensor.
       eps (float): A small float value to ensure numerical stability (default:
         1e-5).
+      use_bias (bool): Whether to use bias in LayerNorm.
       enable_hlfb (bool): Whether to convert this normalization into a single
         op.
     """
@@ -164,7 +166,11 @@ class LayerNorm(torch.nn.Module):
     self.normalized_shape = (dim,)
     self.eps = eps
     self.weight = torch.nn.Parameter(torch.empty(dim), requires_grad=False)
-    self.bias = torch.nn.Parameter(torch.empty(dim), requires_grad=False)
+    self.bias = (
+        torch.nn.Parameter(torch.empty(dim), requires_grad=False)
+        if use_bias
+        else None
+    )
   def forward(self, x):
     """Running the forward pass of LayerNorm layer.
@@ -175,7 +181,7 @@ class LayerNorm(torch.nn.Module):
     Returns:
       torch.Tensor: output tensor after applying LayerNorm.
     """
-    if self.enable_hlfb:
+    if self.enable_hlfb and self.bias is not None:
       return layer_norm_with_hlfb(
           x, self.normalized_shape, self.weight, self.bias, self.eps
       )

ai_edge_torch/generative/layers/scaled_dot_product_attention.py CHANGED Viewed

@@ -32,6 +32,7 @@ def scaled_dot_product_attention(
     mask: Optional[torch.Tensor] = None,
     scale: Optional[float] = None,
     softcap: Optional[float] = None,
+    alibi_bias: Optional[torch.Tensor] = None,
 ):
   """Scaled dot product attention.
@@ -41,14 +42,23 @@ def scaled_dot_product_attention(
     v (torch.Tensor): Value tensor, with shape [B, T, KV_LEN, H].
     head_size (int): head dimension.
     mask (torch.Tensor): the optional mask tensor.
+    scale (float): the optional scale factor.
+    softcap (float): the optional softcap for the logits.
+    alibi_bias (torch.Tensor): optional alibi bias tensor.
   Returns:
     The output tensor of scaled_dot_product_attention.
   """
   if scale is None:
     scale = 1.0 / math.sqrt(head_size)
+  if alibi_bias is not None:
+    alibi_bias = alibi_bias * scale
+    if mask is None:
+      mask = alibi_bias
+    else:
+      mask = mask + alibi_bias
   q = q.transpose(1, 2)
   k = k.transpose(1, 2)
   v = v.transpose(1, 2)
@@ -72,7 +82,8 @@ def scaled_dot_product_attention(
     scores = scores / softcap
     scores = torch.tanh(scores)
     scores = scores * softcap
-    scores = scores + mask
+    if mask is not None:
+      scores = scores + mask
     out = F.softmax(scores.float(), dim=-1).type_as(q)
     y = torch.matmul(out, v)
@@ -87,6 +98,7 @@ def scaled_dot_product_attention_with_hlfb(
     mask: Optional[torch.Tensor] = None,
     scale: Optional[float] = None,
     softcap: Optional[float] = None,
+    alibi_bias: Optional[torch.Tensor] = None,
 ):
   """Scaled dot product attention with high-level function boundary enabled.
@@ -96,14 +108,23 @@ def scaled_dot_product_attention_with_hlfb(
     v (torch.Tensor): Value tensor, with shape [B, T, KV_LEN, H].
     head_size (int): head dimension.
     mask (torch.Tensor): the optional mask tensor.
+    scale (float): the optional scale factor.
+    softcap (float): the optional softcap for the logits.
+    alibi_bias (torch.Tensor): optional alibi bias tensor.
   Returns:
     The output tensor of scaled_dot_product_attention.
   """
   if scale is None:
     scale = 1.0 / math.sqrt(head_size)
+  if alibi_bias is not None:
+    alibi_bias = alibi_bias * scale
+    if mask is None:
+      mask = alibi_bias
+    else:
+      mask = mask + alibi_bias
   attrs = {"scale": scale}
   if softcap is not None:
@@ -137,7 +158,8 @@ def scaled_dot_product_attention_with_hlfb(
     scores = scores / softcap
     scores = torch.tanh(scores)
     scores = scores * softcap
-    scores = scores + mask
+    if mask is not None:
+      scores = scores + mask
     out = F.softmax(scores.float(), dim=-1).type_as(q)
     y = torch.matmul(out, v)
@@ -154,6 +176,7 @@ def scaled_dot_product_attention_transposed(
     mask: Optional[torch.Tensor] = None,
     scale: Optional[float] = None,
     softcap: Optional[float] = None,
+    alibi_bias: Optional[torch.Tensor] = None,
 ):
   """Scaled dot product attention with transposed key and value.
@@ -165,14 +188,21 @@ def scaled_dot_product_attention_transposed(
     mask (torch.Tensor): the optional mask tensor.
     scale (float): the optional scale factor.
     softcap (float): the optional softcap for the logits.
+    alibi_bias (torch.Tensor): optional alibi bias tensor.
   Returns:
     The output tensor of scaled_dot_product_attention_transposed.
   """
   if scale is None:
     scale = 1.0 / math.sqrt(head_size)
+  if alibi_bias is not None:
+    alibi_bias = alibi_bias * scale
+    if mask is None:
+      mask = alibi_bias
+    else:
+      mask = mask + alibi_bias
   query = query * scale
   assert mask is not None, "Mask should not be None!"

ai_edge_torch/generative/layers/sdpa_with_kv_update.py CHANGED Viewed

@@ -15,7 +15,7 @@
 """Common utility functions for data loading etc."""
-from typing import Tuple
+from typing import Optional, Tuple
 from ai_edge_torch.generative.layers import kv_cache as kv_utils
 from ai_edge_torch.generative.layers import scaled_dot_product_attention as sdpa
@@ -32,14 +32,15 @@ def sdpa_with_kv_update(
     mask: torch.Tensor,
     config: cfg.AttentionConfig,
     enable_hlfb: bool,
+    alibi_bias: Optional[torch.Tensor] = None,
 ) -> Tuple[torch.Tensor, kv_utils.KVCacheEntry]:
   """Wrapper function for scaled dot product attention with KV cache update."""
   if kv is not None and kv.kv_layout == kv_utils.KV_LAYOUT_TRANSPOSED:
     return _sdpa_with_kv_update_transposed(
-        query, key, value, kv, input_pos, mask, config
+        query, key, value, kv, input_pos, mask, config, alibi_bias
     )
   return _sdpa_with_kv_update_default(
-      query, key, value, kv, input_pos, mask, config, enable_hlfb
+      query, key, value, kv, input_pos, mask, config, enable_hlfb, alibi_bias
   )
@@ -51,6 +52,7 @@ def _sdpa_with_kv_update_transposed(
     input_pos: torch.Tensor,
     mask: torch.Tensor,
     config: cfg.AttentionConfig,
+    alibi_bias: Optional[torch.Tensor] = None,
 ) -> Tuple[torch.Tensor, kv_utils.KVCacheEntry]:
   # Transpose k/v to specific layout for GPU implementation.
   b, seq_len, n, h = query.shape
@@ -77,6 +79,7 @@ def _sdpa_with_kv_update_transposed(
       config.head_dim,
       mask=mask,
       softcap=config.logit_softcap,
+      alibi_bias=alibi_bias,
   )  # 1, bk, gt, h
   sdpa_out = (
       sdpa_out.reshape(b, -1, seq_len, h)
@@ -95,6 +98,7 @@ def _sdpa_with_kv_update_default(
     mask: torch.Tensor,
     config: cfg.AttentionConfig,
     enable_hlfb: bool,
+    alibi_bias: Optional[torch.Tensor] = None,
 ) -> Tuple[torch.Tensor, kv_utils.KVCacheEntry]:
   b, seq_len, _, _ = query.shape
   if kv is not None:
@@ -112,6 +116,7 @@ def _sdpa_with_kv_update_default(
       config.head_dim,
       mask=mask,
       softcap=config.logit_softcap,
+      alibi_bias=alibi_bias,
   )
   sdpa_out = sdpa_out.reshape(b, seq_len, -1)
   return sdpa_out, kv

ai_edge_torch/generative/quantize/example.py CHANGED Viewed

@@ -33,7 +33,7 @@ def main():
   kv = kv_utils.KVCache.from_model_config(config)
   # Create a quantization recipe to be applied to the model
-  quant_config = quant_recipes.full_int8_dynamic_recipe()
+  quant_config = quant_recipes.full_dynamic_recipe()
   print(quant_config)
   # Convert with quantization

ai_edge_torch/generative/quantize/quant_attrs.py CHANGED Viewed

@@ -63,8 +63,15 @@ class Granularity(enum.Enum):
     NONE: Granularity not applicable to this quantization scheme.
     CHANNELWISE: Or per-channel quantization. Each channel of relevant tensors
       is quantized independently of one another.
+    BLOCKWISE_32: Blockwise quantization with block size 32.
+    BLOCKWISE_64: Blockwise quantization with block size 64.
+    BLOCKWISE_128: Blockwise quantization with block size 128.
+    BLOCKWISE_256: Blockwise quantization with block size 256.
   """
   NONE = enum.auto()
   CHANNELWISE = enum.auto()
-  BLOCKWISE = enum.auto()
+  BLOCKWISE_32 = enum.auto()
+  BLOCKWISE_64 = enum.auto()
+  BLOCKWISE_128 = enum.auto()
+  BLOCKWISE_256 = enum.auto()

ai_edge_torch/generative/quantize/quant_recipe.py CHANGED Viewed

@@ -39,7 +39,6 @@ class LayerQuantRecipe:
     mode: Type of quantization.
     algorithm: Algorithm for calculating quantization parameters.
     granularity: Granularity of quantization.
-    block_size: Size of the block for blockwise quantization.
   """
   activation_dtype: quant_attrs.Dtype
@@ -47,7 +46,6 @@ class LayerQuantRecipe:
   mode: quant_attrs.Mode
   algorithm: quant_attrs.Algorithm
   granularity: quant_attrs.Granularity
-  block_size: int = 0
   def __str__(self):
     base_str = (
@@ -56,7 +54,6 @@ class LayerQuantRecipe:
         f'{self.mode.name}, '
         f'{self.algorithm.name}, '
         f'{self.granularity.name}, '
-        f'{self.block_size}'
     )
     return f'{base_str})'
@@ -77,16 +74,6 @@ class LayerQuantRecipe:
           and self.algorithm == supported[3]
           and self.granularity == supported[4]
       ):
-        if self.block_size > 0:
-          if (
-              self.block_size % 32 == 0
-              and self.granularity == quant_attrs.Granularity.BLOCKWISE
-          ):
-            is_valid = True
-            break
-          else:
-            is_valid = False
-            break
         is_valid = True
         break

ai-edge-torch-nightly 0.7.0.dev20251007__py3-none-any.whl → 0.8.0.dev20251225__py3-none-any.whl

Potentially problematic release.

ai-edge-torch-nightly 0.7.0.dev20251007py3-none-any.whl → 0.8.0.dev20251225py3-none-any.whl