ai-edge-torch-nightly 0.3.0.dev20250114__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (213) hide show
  1. ai_edge_torch/__init__.py +32 -0
  2. ai_edge_torch/_config.py +69 -0
  3. ai_edge_torch/_convert/__init__.py +14 -0
  4. ai_edge_torch/_convert/conversion.py +153 -0
  5. ai_edge_torch/_convert/conversion_utils.py +64 -0
  6. ai_edge_torch/_convert/converter.py +270 -0
  7. ai_edge_torch/_convert/fx_passes/__init__.py +23 -0
  8. ai_edge_torch/_convert/fx_passes/build_aten_composite_pass.py +288 -0
  9. ai_edge_torch/_convert/fx_passes/build_interpolate_composite_pass.py +131 -0
  10. ai_edge_torch/_convert/fx_passes/inject_mlir_debuginfo_pass.py +73 -0
  11. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/__init__.py +16 -0
  12. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_check.py +258 -0
  13. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_mark.py +50 -0
  14. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/__init__.py +18 -0
  15. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/greedy.py +68 -0
  16. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/min_cut.py +216 -0
  17. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_rewrite.py +449 -0
  18. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/op_func_registry.py +30 -0
  19. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/pass_body.py +303 -0
  20. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/utils.py +64 -0
  21. ai_edge_torch/_convert/fx_passes/remove_non_user_outputs_pass.py +52 -0
  22. ai_edge_torch/_convert/signature.py +66 -0
  23. ai_edge_torch/_convert/test/__init__.py +14 -0
  24. ai_edge_torch/_convert/test/test_convert.py +558 -0
  25. ai_edge_torch/_convert/test/test_convert_composites.py +234 -0
  26. ai_edge_torch/_convert/test/test_convert_multisig.py +189 -0
  27. ai_edge_torch/_convert/test/test_to_channel_last_io.py +96 -0
  28. ai_edge_torch/_convert/to_channel_last_io.py +92 -0
  29. ai_edge_torch/conftest.py +20 -0
  30. ai_edge_torch/debug/__init__.py +17 -0
  31. ai_edge_torch/debug/culprit.py +496 -0
  32. ai_edge_torch/debug/test/__init__.py +14 -0
  33. ai_edge_torch/debug/test/test_culprit.py +140 -0
  34. ai_edge_torch/debug/test/test_search_model.py +51 -0
  35. ai_edge_torch/debug/utils.py +59 -0
  36. ai_edge_torch/experimental/__init__.py +14 -0
  37. ai_edge_torch/fx_pass_base.py +110 -0
  38. ai_edge_torch/generative/__init__.py +14 -0
  39. ai_edge_torch/generative/examples/__init__.py +14 -0
  40. ai_edge_torch/generative/examples/amd_llama_135m/__init__.py +14 -0
  41. ai_edge_torch/generative/examples/amd_llama_135m/amd_llama_135m.py +87 -0
  42. ai_edge_torch/generative/examples/amd_llama_135m/convert_to_tflite.py +70 -0
  43. ai_edge_torch/generative/examples/amd_llama_135m/verify.py +72 -0
  44. ai_edge_torch/generative/examples/gemma/__init__.py +14 -0
  45. ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py +80 -0
  46. ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py +80 -0
  47. ai_edge_torch/generative/examples/gemma/gemma1.py +107 -0
  48. ai_edge_torch/generative/examples/gemma/gemma2.py +295 -0
  49. ai_edge_torch/generative/examples/gemma/verify_gemma1.py +56 -0
  50. ai_edge_torch/generative/examples/gemma/verify_gemma2.py +43 -0
  51. ai_edge_torch/generative/examples/gemma/verify_util.py +157 -0
  52. ai_edge_torch/generative/examples/llama/__init__.py +14 -0
  53. ai_edge_torch/generative/examples/llama/convert_to_tflite.py +91 -0
  54. ai_edge_torch/generative/examples/llama/llama.py +196 -0
  55. ai_edge_torch/generative/examples/llama/verify.py +88 -0
  56. ai_edge_torch/generative/examples/moonshine/__init__.py +14 -0
  57. ai_edge_torch/generative/examples/moonshine/convert_moonshine_to_tflite.py +50 -0
  58. ai_edge_torch/generative/examples/moonshine/moonshine.py +103 -0
  59. ai_edge_torch/generative/examples/openelm/__init__.py +14 -0
  60. ai_edge_torch/generative/examples/openelm/convert_to_tflite.py +80 -0
  61. ai_edge_torch/generative/examples/openelm/openelm.py +127 -0
  62. ai_edge_torch/generative/examples/openelm/verify.py +71 -0
  63. ai_edge_torch/generative/examples/paligemma/__init__.py +14 -0
  64. ai_edge_torch/generative/examples/paligemma/convert_to_tflite.py +95 -0
  65. ai_edge_torch/generative/examples/paligemma/decoder.py +151 -0
  66. ai_edge_torch/generative/examples/paligemma/decoder2.py +177 -0
  67. ai_edge_torch/generative/examples/paligemma/image_encoder.py +160 -0
  68. ai_edge_torch/generative/examples/paligemma/paligemma.py +179 -0
  69. ai_edge_torch/generative/examples/paligemma/verify.py +161 -0
  70. ai_edge_torch/generative/examples/paligemma/verify_decoder.py +75 -0
  71. ai_edge_torch/generative/examples/paligemma/verify_decoder2.py +72 -0
  72. ai_edge_torch/generative/examples/paligemma/verify_image_encoder.py +99 -0
  73. ai_edge_torch/generative/examples/phi/__init__.py +14 -0
  74. ai_edge_torch/generative/examples/phi/convert_phi3_to_tflite.py +80 -0
  75. ai_edge_torch/generative/examples/phi/convert_to_tflite.py +80 -0
  76. ai_edge_torch/generative/examples/phi/phi2.py +107 -0
  77. ai_edge_torch/generative/examples/phi/phi3.py +219 -0
  78. ai_edge_torch/generative/examples/phi/verify.py +64 -0
  79. ai_edge_torch/generative/examples/phi/verify_phi3.py +69 -0
  80. ai_edge_torch/generative/examples/qwen/__init__.py +14 -0
  81. ai_edge_torch/generative/examples/qwen/convert_to_tflite.py +93 -0
  82. ai_edge_torch/generative/examples/qwen/qwen.py +134 -0
  83. ai_edge_torch/generative/examples/qwen/verify.py +88 -0
  84. ai_edge_torch/generative/examples/smollm/__init__.py +14 -0
  85. ai_edge_torch/generative/examples/smollm/convert_to_tflite.py +80 -0
  86. ai_edge_torch/generative/examples/smollm/convert_v2_to_tflite.py +71 -0
  87. ai_edge_torch/generative/examples/smollm/smollm.py +125 -0
  88. ai_edge_torch/generative/examples/smollm/verify.py +86 -0
  89. ai_edge_torch/generative/examples/stable_diffusion/__init__.py +14 -0
  90. ai_edge_torch/generative/examples/stable_diffusion/attention.py +108 -0
  91. ai_edge_torch/generative/examples/stable_diffusion/clip.py +185 -0
  92. ai_edge_torch/generative/examples/stable_diffusion/convert_to_tflite.py +173 -0
  93. ai_edge_torch/generative/examples/stable_diffusion/decoder.py +398 -0
  94. ai_edge_torch/generative/examples/stable_diffusion/diffusion.py +749 -0
  95. ai_edge_torch/generative/examples/stable_diffusion/encoder.py +119 -0
  96. ai_edge_torch/generative/examples/stable_diffusion/pipeline.py +254 -0
  97. ai_edge_torch/generative/examples/stable_diffusion/samplers/__init__.py +19 -0
  98. ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler.py +62 -0
  99. ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler_ancestral.py +66 -0
  100. ai_edge_torch/generative/examples/stable_diffusion/samplers/k_lms.py +74 -0
  101. ai_edge_torch/generative/examples/stable_diffusion/samplers/sampler.py +39 -0
  102. ai_edge_torch/generative/examples/stable_diffusion/tokenizer.py +111 -0
  103. ai_edge_torch/generative/examples/stable_diffusion/util.py +77 -0
  104. ai_edge_torch/generative/examples/t5/__init__.py +14 -0
  105. ai_edge_torch/generative/examples/t5/convert_to_tflite.py +138 -0
  106. ai_edge_torch/generative/examples/t5/t5.py +655 -0
  107. ai_edge_torch/generative/examples/t5/t5_attention.py +246 -0
  108. ai_edge_torch/generative/examples/test_models/__init__.py +14 -0
  109. ai_edge_torch/generative/examples/test_models/convert_toy_model.py +105 -0
  110. ai_edge_torch/generative/examples/test_models/toy_model.py +156 -0
  111. ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py +138 -0
  112. ai_edge_torch/generative/examples/tiny_llama/__init__.py +14 -0
  113. ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py +80 -0
  114. ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py +88 -0
  115. ai_edge_torch/generative/examples/tiny_llama/verify.py +72 -0
  116. ai_edge_torch/generative/fx_passes/__init__.py +30 -0
  117. ai_edge_torch/generative/fx_passes/remove_sdpa_zero_mask_pass.py +50 -0
  118. ai_edge_torch/generative/layers/__init__.py +14 -0
  119. ai_edge_torch/generative/layers/attention.py +399 -0
  120. ai_edge_torch/generative/layers/attention_utils.py +210 -0
  121. ai_edge_torch/generative/layers/builder.py +160 -0
  122. ai_edge_torch/generative/layers/feed_forward.py +120 -0
  123. ai_edge_torch/generative/layers/kv_cache.py +204 -0
  124. ai_edge_torch/generative/layers/lora.py +557 -0
  125. ai_edge_torch/generative/layers/model_config.py +238 -0
  126. ai_edge_torch/generative/layers/normalization.py +222 -0
  127. ai_edge_torch/generative/layers/rotary_position_embedding.py +94 -0
  128. ai_edge_torch/generative/layers/scaled_dot_product_attention.py +144 -0
  129. ai_edge_torch/generative/layers/unet/__init__.py +14 -0
  130. ai_edge_torch/generative/layers/unet/blocks_2d.py +806 -0
  131. ai_edge_torch/generative/layers/unet/builder.py +50 -0
  132. ai_edge_torch/generative/layers/unet/model_config.py +282 -0
  133. ai_edge_torch/generative/quantize/__init__.py +14 -0
  134. ai_edge_torch/generative/quantize/example.py +47 -0
  135. ai_edge_torch/generative/quantize/quant_attrs.py +68 -0
  136. ai_edge_torch/generative/quantize/quant_recipe.py +154 -0
  137. ai_edge_torch/generative/quantize/quant_recipe_utils.py +62 -0
  138. ai_edge_torch/generative/quantize/quant_recipes.py +56 -0
  139. ai_edge_torch/generative/quantize/supported_schemes.py +32 -0
  140. ai_edge_torch/generative/test/__init__.py +14 -0
  141. ai_edge_torch/generative/test/test_custom_dus.py +107 -0
  142. ai_edge_torch/generative/test/test_kv_cache.py +120 -0
  143. ai_edge_torch/generative/test/test_loader.py +83 -0
  144. ai_edge_torch/generative/test/test_lora.py +147 -0
  145. ai_edge_torch/generative/test/test_model_conversion.py +191 -0
  146. ai_edge_torch/generative/test/test_model_conversion_large.py +362 -0
  147. ai_edge_torch/generative/test/test_quantize.py +183 -0
  148. ai_edge_torch/generative/test/utils.py +82 -0
  149. ai_edge_torch/generative/utilities/__init__.py +15 -0
  150. ai_edge_torch/generative/utilities/converter.py +215 -0
  151. ai_edge_torch/generative/utilities/dynamic_update_slice.py +56 -0
  152. ai_edge_torch/generative/utilities/loader.py +398 -0
  153. ai_edge_torch/generative/utilities/model_builder.py +180 -0
  154. ai_edge_torch/generative/utilities/moonshine_loader.py +154 -0
  155. ai_edge_torch/generative/utilities/stable_diffusion_loader.py +1032 -0
  156. ai_edge_torch/generative/utilities/t5_loader.py +512 -0
  157. ai_edge_torch/generative/utilities/transformers_verifier.py +42 -0
  158. ai_edge_torch/generative/utilities/verifier.py +335 -0
  159. ai_edge_torch/hlfb/__init__.py +16 -0
  160. ai_edge_torch/hlfb/mark_pattern/__init__.py +153 -0
  161. ai_edge_torch/hlfb/mark_pattern/fx_utils.py +69 -0
  162. ai_edge_torch/hlfb/mark_pattern/pattern.py +288 -0
  163. ai_edge_torch/hlfb/test/__init__.py +14 -0
  164. ai_edge_torch/hlfb/test/test_mark_pattern.py +185 -0
  165. ai_edge_torch/lowertools/__init__.py +18 -0
  166. ai_edge_torch/lowertools/_shim.py +86 -0
  167. ai_edge_torch/lowertools/common_utils.py +142 -0
  168. ai_edge_torch/lowertools/odml_torch_utils.py +260 -0
  169. ai_edge_torch/lowertools/test_utils.py +62 -0
  170. ai_edge_torch/lowertools/torch_xla_utils.py +301 -0
  171. ai_edge_torch/lowertools/translate_recipe.py +163 -0
  172. ai_edge_torch/model.py +177 -0
  173. ai_edge_torch/odml_torch/__init__.py +20 -0
  174. ai_edge_torch/odml_torch/_torch_future.py +88 -0
  175. ai_edge_torch/odml_torch/_torch_library.py +19 -0
  176. ai_edge_torch/odml_torch/composite/__init__.py +16 -0
  177. ai_edge_torch/odml_torch/composite/mark_tensor.py +120 -0
  178. ai_edge_torch/odml_torch/composite/stablehlo_composite_builder.py +106 -0
  179. ai_edge_torch/odml_torch/debuginfo/__init__.py +16 -0
  180. ai_edge_torch/odml_torch/debuginfo/_build.py +43 -0
  181. ai_edge_torch/odml_torch/debuginfo/_op_polyfill.py +55 -0
  182. ai_edge_torch/odml_torch/export.py +403 -0
  183. ai_edge_torch/odml_torch/export_utils.py +157 -0
  184. ai_edge_torch/odml_torch/jax_bridge/__init__.py +18 -0
  185. ai_edge_torch/odml_torch/jax_bridge/_wrap.py +180 -0
  186. ai_edge_torch/odml_torch/jax_bridge/utils.py +75 -0
  187. ai_edge_torch/odml_torch/lowerings/__init__.py +27 -0
  188. ai_edge_torch/odml_torch/lowerings/_basic.py +294 -0
  189. ai_edge_torch/odml_torch/lowerings/_batch_norm.py +65 -0
  190. ai_edge_torch/odml_torch/lowerings/_convolution.py +243 -0
  191. ai_edge_torch/odml_torch/lowerings/_jax_lowerings.py +285 -0
  192. ai_edge_torch/odml_torch/lowerings/_layer_norm.py +87 -0
  193. ai_edge_torch/odml_torch/lowerings/_quantized_decomposed.py +177 -0
  194. ai_edge_torch/odml_torch/lowerings/_rand.py +142 -0
  195. ai_edge_torch/odml_torch/lowerings/context.py +42 -0
  196. ai_edge_torch/odml_torch/lowerings/decomp.py +69 -0
  197. ai_edge_torch/odml_torch/lowerings/registry.py +65 -0
  198. ai_edge_torch/odml_torch/lowerings/utils.py +201 -0
  199. ai_edge_torch/odml_torch/passes/__init__.py +38 -0
  200. ai_edge_torch/odml_torch/tf_integration.py +156 -0
  201. ai_edge_torch/quantize/__init__.py +16 -0
  202. ai_edge_torch/quantize/pt2e_quantizer.py +466 -0
  203. ai_edge_torch/quantize/pt2e_quantizer_utils.py +1061 -0
  204. ai_edge_torch/quantize/quant_config.py +85 -0
  205. ai_edge_torch/testing/__init__.py +14 -0
  206. ai_edge_torch/testing/model_coverage/__init__.py +16 -0
  207. ai_edge_torch/testing/model_coverage/model_coverage.py +145 -0
  208. ai_edge_torch/version.py +16 -0
  209. ai_edge_torch_nightly-0.3.0.dev20250114.dist-info/LICENSE +202 -0
  210. ai_edge_torch_nightly-0.3.0.dev20250114.dist-info/METADATA +44 -0
  211. ai_edge_torch_nightly-0.3.0.dev20250114.dist-info/RECORD +213 -0
  212. ai_edge_torch_nightly-0.3.0.dev20250114.dist-info/WHEEL +5 -0
  213. ai_edge_torch_nightly-0.3.0.dev20250114.dist-info/top_level.txt +1 -0
@@ -0,0 +1,512 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ # Common utility functions for data loading etc.
16
+ from dataclasses import dataclass
17
+ import glob
18
+ import os
19
+ from typing import Callable, Dict
20
+
21
+ from ai_edge_torch.generative.layers import model_config
22
+ from safetensors import safe_open
23
+ import torch
24
+
25
+
26
+ def load_safetensors(full_path: str):
27
+ """Loads safetensors into a single state dictionary.
28
+
29
+ Args:
30
+ full_path (string): the safetensor filename or directory that contains the
31
+ safetensor files.
32
+
33
+ Returns:
34
+ A state dictionary contating loaded tensors.
35
+
36
+ Raises:
37
+ ValueError: If no tensors are loaded from the provided directory or file.
38
+ """
39
+ pattern = (
40
+ os.path.join(full_path, "*.safetensors")
41
+ if os.path.isdir(full_path)
42
+ else full_path
43
+ )
44
+ files = []
45
+ for file in glob.glob(pattern):
46
+ files.append(file)
47
+
48
+ tensors = {}
49
+ for file in files:
50
+ with safe_open(file, framework="pt") as fp:
51
+ for k in fp.keys():
52
+ assert k not in tensors
53
+ tensors[k] = fp.get_tensor(k)
54
+
55
+ if not tensors:
56
+ raise ValueError("Failed to load SafeTensors.")
57
+ return tensors
58
+
59
+
60
+ def load_pytorch_statedict(full_path: str):
61
+ """Loads state dictionary binaries into a single state dictionary.
62
+
63
+ Args:
64
+ full_path (string): the bin filename or directory that contains the bin
65
+ files.
66
+
67
+ Returns:
68
+ A state dictionary contating loaded tensors.
69
+
70
+ Raises:
71
+ ValueError: If no tensors are loaded from the provided directory or file.
72
+ """
73
+ pattern = (
74
+ os.path.join(full_path, "*.bin")
75
+ if os.path.isdir(full_path)
76
+ else full_path
77
+ )
78
+ files = []
79
+ for file in glob.glob(pattern):
80
+ files.append(file)
81
+
82
+ tensors = {}
83
+ for file in files:
84
+ this_file_tensors = torch.load(file, map_location=torch.device("cpu"))
85
+ for k in this_file_tensors:
86
+ assert k not in tensors
87
+ tensors.update(this_file_tensors)
88
+
89
+ if not tensors:
90
+ raise ValueError("Failed to load torch bin files.")
91
+ return tensors
92
+
93
+
94
+ class ModelLoader:
95
+ """Utility class for loading and converting checkpoints to ODML transformer layer format."""
96
+
97
+ @dataclass
98
+ class TensorNames:
99
+ attn_query_proj: str = None
100
+ attn_key_proj: str = None
101
+ attn_value_proj: str = None
102
+ attn_output_proj: str = None
103
+ relative_attn_bias: str = None
104
+
105
+ cross_attn_query_proj: str = None
106
+ cross_attn_key_proj: str = None
107
+ cross_attn_value_proj: str = None
108
+ cross_attn_output_proj: str = None
109
+
110
+ ff_up_proj: str = None
111
+ ff_down_proj: str = None
112
+ ff_gate_proj: str = None
113
+
114
+ pre_attn_norm: str = None
115
+ pre_cross_attn_norm: str = None
116
+ post_attn_norm: str = None
117
+ embedding: str = None
118
+ final_norm: str = None
119
+ lm_head: str = None
120
+
121
+ def __init__(self, file_name: str, names: TensorNames) -> None:
122
+ """ModelLoader constructor.
123
+
124
+ Can be used to load multiple models of the same type.
125
+
126
+ Args:
127
+ file_name (str): Path to the checkpoint. Can be a directory or an exact
128
+ file.
129
+ names (TensorNames): An instance of `TensorNames` to determine mappings.
130
+ """
131
+ self._file_name = file_name
132
+ self._names = names
133
+ self._loader = self._get_loader()
134
+
135
+ def load(
136
+ self,
137
+ model: torch.nn.Module,
138
+ strict: bool = True,
139
+ fuse_attention: bool = True,
140
+ ):
141
+ """Load the model from the checkpoint
142
+
143
+ Args:
144
+ model (torch.nn.Module): The pytorch model that needs to be loaded.
145
+ strict (bool, optional): Whether the converted keys are strictly
146
+ matched. Defaults to True.
147
+
148
+ Raises:
149
+ ValueError: If conversion results in unmapped tensors and strict mode is
150
+ enabled.
151
+ """
152
+ state = self._loader(self._file_name)
153
+
154
+ if isinstance(self._names, ModelLoader.TensorNames):
155
+ converted_state = self._do_load(
156
+ model, state, self._names, fuse_attention=fuse_attention
157
+ )
158
+ elif isinstance(self._names, dict):
159
+ converted_state = {}
160
+ for additional_prefix, _ in self._names.items():
161
+ local_converted_state = self._do_load(
162
+ model,
163
+ state,
164
+ self._names[additional_prefix],
165
+ additional_prefix,
166
+ fuse_attention=fuse_attention,
167
+ )
168
+ converted_state.update(local_converted_state)
169
+ else:
170
+ raise ValueError(f"Unkown type for names: {type(self._names)}")
171
+
172
+ if strict and state:
173
+ raise ValueError(
174
+ "Failed to map all tensor. Remaining tensor are:"
175
+ f" {list(state.keys())}"
176
+ )
177
+ model.load_state_dict(converted_state, strict=strict)
178
+
179
+ def _do_load(
180
+ self, model, state, names, additional_prefix="", fuse_attention=True
181
+ ):
182
+ """Load the model from the checkpoint
183
+
184
+ Args:
185
+ model (torch.nn.Module): The pytorch model that needs to be loaded.
186
+ state (Dict[str, torch.Tensor]): The pytorch state dictionary
187
+ names (TensorNames]): The TensorNames for the model we are loading.
188
+
189
+ Returns:
190
+ Dict[str, torch.Tensor]: Map of name to tensor for loading.
191
+ """
192
+ converted_state = dict()
193
+ if names.embedding is not None:
194
+ converted_state["tok_embedding.weight"] = state.pop(
195
+ f"{names.embedding}.weight"
196
+ )
197
+ if names.lm_head is not None:
198
+ converted_state["lm_head.weight"] = state.pop(f"{names.lm_head}.weight")
199
+ if model.config.lm_head_use_bias:
200
+ converted_state["lm_head.bias"] = state.pop(f"{names.lm_head}.bias")
201
+ if names.final_norm is not None:
202
+ final_norm_name = names.final_norm
203
+ prefix = additional_prefix
204
+ converted_state[f"{prefix}final_norm.weight"] = state.pop(
205
+ f"{final_norm_name}.weight"
206
+ )
207
+ if f"{final_norm_name}.bias" in state:
208
+ converted_state["final_norm.bias"] = state.pop(
209
+ f"{final_norm_name}.bias"
210
+ )
211
+
212
+ if names.relative_attn_bias:
213
+ rel_attn_name = names.relative_attn_bias
214
+ prefix = additional_prefix + "transformer_blocks.0"
215
+ converted_state[f"{prefix}.atten_func.relative_attention_bias.weight"] = (
216
+ state.pop(f"{rel_attn_name}.weight")
217
+ )
218
+
219
+ for i in range(model.config.num_layers):
220
+ self._map_norm(
221
+ i, model.config, state, converted_state, names, additional_prefix
222
+ )
223
+ self._map_feedforward(
224
+ i, model.config, state, converted_state, names, additional_prefix
225
+ )
226
+ self._map_attention(
227
+ i,
228
+ model.config,
229
+ state,
230
+ converted_state,
231
+ names,
232
+ additional_prefix,
233
+ fuse_attention,
234
+ )
235
+ self._map_cross_attention(
236
+ i,
237
+ model.config,
238
+ state,
239
+ converted_state,
240
+ names,
241
+ additional_prefix,
242
+ fuse_attention,
243
+ )
244
+
245
+ return converted_state
246
+
247
+ def _get_loader(self) -> Callable[[str], Dict[str, torch.Tensor]]:
248
+ """A best effort method for finding appropriate state loader.
249
+
250
+ Raises:
251
+ ValueError: If it fails to find an appropriate loader.
252
+
253
+ Returns:
254
+ Callable[[str], Dict[str, torch.Tensor]]: State loader to be used.
255
+ """
256
+ if os.path.isdir(self._file_name):
257
+ if glob.glob(os.path.join(self._file_name, "*.safetensors")):
258
+ return load_safetensors
259
+ if glob.glob(os.path.join(self._file_name, "*.bin")):
260
+ return load_pytorch_statedict
261
+
262
+ if self._file_name.endswith(".safetensors"):
263
+ return load_safetensors
264
+
265
+ if self._file_name.endswith(".bin"):
266
+ return load_pytorch_statedict
267
+
268
+ raise ValueError("File format not supported.")
269
+
270
+ def _map_feedforward(
271
+ self,
272
+ idx: int,
273
+ config: model_config.ModelConfig,
274
+ state: Dict[str, torch.Tensor],
275
+ converted_state: Dict[str, torch.Tensor],
276
+ names: TensorNames,
277
+ additional_prefix: str = "",
278
+ ):
279
+ prefix = additional_prefix + f"transformer_blocks.{idx}"
280
+ if names.ff_up_proj is None or names.ff_down_proj is None:
281
+ return
282
+ ff_config = config.block_config(idx).ff_config
283
+ if ff_config.type == model_config.FeedForwardType.SEQUENTIAL:
284
+ ff_up_proj_name = names.ff_up_proj.format(idx)
285
+ ff_down_proj_name = names.ff_down_proj.format(idx)
286
+ converted_state[f"{prefix}.ff.w1.weight"] = state.pop(
287
+ f"{ff_up_proj_name}.weight"
288
+ )
289
+ converted_state[f"{prefix}.ff.w2.weight"] = state.pop(
290
+ f"{ff_down_proj_name}.weight"
291
+ )
292
+ if ff_config.use_bias:
293
+ converted_state[f"{prefix}.ff.w1.bias"] = state.pop(
294
+ f"{ff_up_proj_name}.bias"
295
+ )
296
+ converted_state[f"{prefix}.ff.w2.bias"] = state.pop(
297
+ f"{ff_down_proj_name}.bias"
298
+ )
299
+ else:
300
+ if names.ff_gate_proj is not None:
301
+ ff_up_proj_name = names.ff_up_proj.format(idx)
302
+ ff_down_proj_name = names.ff_down_proj.format(idx)
303
+ ff_gate_proj_name = names.ff_gate_proj.format(idx)
304
+ converted_state[f"{prefix}.ff.w3.weight"] = state.pop(
305
+ f"{ff_up_proj_name}.weight"
306
+ )
307
+ converted_state[f"{prefix}.ff.w2.weight"] = state.pop(
308
+ f"{ff_down_proj_name}.weight"
309
+ )
310
+ converted_state[f"{prefix}.ff.w1.weight"] = state.pop(
311
+ f"{ff_gate_proj_name}.weight"
312
+ )
313
+ if ff_config.use_bias:
314
+ converted_state[f"{prefix}.ff.w3.bias"] = state.pop(
315
+ f"{ff_up_proj_name}.bias"
316
+ )
317
+ converted_state[f"{prefix}.ff.w2.bias"] = state.pop(
318
+ f"{ff_down_proj_name}.bias"
319
+ )
320
+ converted_state[f"{prefix}.ff.w1.bias"] = state.pop(
321
+ f"{ff_gate_proj_name}.bias"
322
+ )
323
+
324
+ def _map_attention(
325
+ self,
326
+ idx: int,
327
+ config: model_config.ModelConfig,
328
+ state: Dict[str, torch.Tensor],
329
+ converted_state: Dict[str, torch.Tensor],
330
+ names: TensorNames,
331
+ additional_prefix: str = "",
332
+ fuse_attention: bool = True,
333
+ ):
334
+ if (
335
+ names.attn_query_proj is None
336
+ or names.attn_key_proj is None
337
+ or names.attn_value_proj is None
338
+ ):
339
+ return
340
+ prefix = additional_prefix + f"transformer_blocks.{idx}"
341
+ attn_config = config.block_config(idx).attn_config
342
+ q_name = names.attn_query_proj.format(idx)
343
+ k_name = names.attn_key_proj.format(idx)
344
+ v_name = names.attn_value_proj.format(idx)
345
+ # model.encoder.transformer_blocks[0].atten_func.q_projection.weight
346
+ if fuse_attention:
347
+ converted_state[f"{prefix}.atten_func.attn.weight"] = self._fuse_qkv(
348
+ attn_config,
349
+ state.pop(f"{q_name}.weight"),
350
+ state.pop(f"{k_name}.weight"),
351
+ state.pop(f"{v_name}.weight"),
352
+ )
353
+ if attn_config.qkv_use_bias:
354
+ converted_state[f"{prefix}.atten_func.attn.bias"] = self._fuse_qkv(
355
+ attn_config,
356
+ state.pop(f"{q_name}.bias"),
357
+ state.pop(f"{k_name}.bias"),
358
+ state.pop(f"{v_name}.bias"),
359
+ )
360
+ else:
361
+ converted_state[f"{prefix}.atten_func.q_projection.weight"] = state.pop(
362
+ f"{q_name}.weight"
363
+ )
364
+ converted_state[f"{prefix}.atten_func.k_projection.weight"] = state.pop(
365
+ f"{k_name}.weight"
366
+ )
367
+ converted_state[f"{prefix}.atten_func.v_projection.weight"] = state.pop(
368
+ f"{v_name}.weight"
369
+ )
370
+ if attn_config.qkv_use_bias:
371
+ converted_state[f"{prefix}.atten_func.q_projection.bias"] = state.pop(
372
+ f"{q_name}.bias"
373
+ )
374
+ converted_state[f"{prefix}.atten_func.k_projection.bias"] = state.pop(
375
+ f"{k_name}.bias"
376
+ )
377
+ converted_state[f"{prefix}.atten_func.v_projection.bias"] = state.pop(
378
+ f"{v_name}.bias"
379
+ )
380
+
381
+ o_name = names.attn_output_proj.format(idx)
382
+ converted_state[f"{prefix}.atten_func.output_projection.weight"] = (
383
+ state.pop(f"{o_name}.weight")
384
+ )
385
+ if attn_config.output_proj_use_bias:
386
+ converted_state[f"{prefix}.atten_func.output_projection.bias"] = (
387
+ state.pop(f"{o_name}.bias")
388
+ )
389
+
390
+ def _map_cross_attention(
391
+ self,
392
+ idx: int,
393
+ config: model_config.ModelConfig,
394
+ state: Dict[str, torch.Tensor],
395
+ converted_state: Dict[str, torch.Tensor],
396
+ names: TensorNames,
397
+ additional_prefix: str = "",
398
+ fuse_attention: bool = True,
399
+ ):
400
+ if (
401
+ names.cross_attn_query_proj is None
402
+ or names.cross_attn_key_proj is None
403
+ or names.cross_attn_value_proj is None
404
+ ):
405
+ return
406
+ prefix = additional_prefix + f"transformer_blocks.{idx}"
407
+ attn_config = config.block_config(idx).attn_config
408
+ q_name = names.cross_attn_query_proj.format(idx)
409
+ k_name = names.cross_attn_key_proj.format(idx)
410
+ v_name = names.cross_attn_value_proj.format(idx)
411
+
412
+ if fuse_attention:
413
+ converted_state[f"{prefix}.cross_atten_func.attn.weight"] = (
414
+ self._fuse_qkv(
415
+ attn_config,
416
+ state.pop(f"{q_name}.weight"),
417
+ state.pop(f"{k_name}.weight"),
418
+ state.pop(f"{v_name}.weight"),
419
+ )
420
+ )
421
+ if attn_config.qkv_use_bias:
422
+ converted_state[f"{prefix}.cross_atten_func.attn.bias"] = (
423
+ self._fuse_qkv(
424
+ attn_config,
425
+ state.pop(f"{q_name}.bias"),
426
+ state.pop(f"{k_name}.bias"),
427
+ state.pop(f"{v_name}.bias"),
428
+ )
429
+ )
430
+ else:
431
+ converted_state[f"{prefix}.cross_atten_func.q_projection.weight"] = (
432
+ state.pop(f"{q_name}.weight")
433
+ )
434
+ converted_state[f"{prefix}.cross_atten_func.k_projection.weight"] = (
435
+ state.pop(f"{k_name}.weight")
436
+ )
437
+ converted_state[f"{prefix}.cross_atten_func.v_projection.weight"] = (
438
+ state.pop(f"{v_name}.weight")
439
+ )
440
+ if attn_config.qkv_use_bias:
441
+ converted_state[f"{prefix}.cross_atten_func.q_projection.bias"] = (
442
+ state.pop(f"{q_name}.bias")
443
+ )
444
+ converted_state[f"{prefix}.cross_atten_func.k_projection.bias"] = (
445
+ state.pop(f"{k_name}.bias")
446
+ )
447
+ converted_state[f"{prefix}.cross_atten_func.v_projection.bias"] = (
448
+ state.pop(f"{v_name}.bias")
449
+ )
450
+
451
+ o_name = names.cross_attn_output_proj.format(idx)
452
+ converted_state[f"{prefix}.cross_atten_func.output_projection.weight"] = (
453
+ state.pop(f"{o_name}.weight")
454
+ )
455
+ if attn_config.output_proj_use_bias:
456
+ converted_state[f"{prefix}.cross_atten_func.output_projection.bias"] = (
457
+ state.pop(f"{o_name}.bias")
458
+ )
459
+
460
+ def _map_norm(
461
+ self,
462
+ idx: int,
463
+ config: model_config.ModelConfig,
464
+ state: Dict[str, torch.Tensor],
465
+ converted_state: Dict[str, torch.Tensor],
466
+ names: TensorNames,
467
+ additional_prefix: str = "",
468
+ ):
469
+ prefix = additional_prefix + f"transformer_blocks.{idx}"
470
+ if names.pre_attn_norm is not None:
471
+ pre_attn_norm_name = names.pre_attn_norm.format(idx)
472
+ converted_state[f"{prefix}.atten_func.pre_atten_norm.weight"] = state.pop(
473
+ f"{pre_attn_norm_name}.weight"
474
+ )
475
+ if f"{pre_attn_norm_name}.bias" in state:
476
+ converted_state[f"{prefix}.atten_func.pre_atten_norm.bias"] = state.pop(
477
+ f"{pre_attn_norm_name}.bias"
478
+ )
479
+
480
+ if names.pre_cross_attn_norm:
481
+ pre_cross_attn_norm_name = names.pre_cross_attn_norm.format(idx)
482
+ converted_state[f"{prefix}.cross_atten_func.pre_atten_norm.weight"] = (
483
+ state.pop(f"{pre_cross_attn_norm_name}.weight")
484
+ )
485
+ if f"{pre_cross_attn_norm_name}.bias" in state:
486
+ converted_state[f"{prefix}.cross_atten_func.pre_atten_norm.bias"] = (
487
+ state.pop(f"{pre_cross_attn_norm_name}.bias")
488
+ )
489
+
490
+ if names.post_attn_norm is not None:
491
+ post_attn_norm_name = names.post_attn_norm.format(idx)
492
+ converted_state[f"{prefix}.post_atten_norm.weight"] = state.pop(
493
+ f"{post_attn_norm_name}.weight"
494
+ )
495
+ if f"{post_attn_norm_name}.bias" in state:
496
+ converted_state[f"{prefix}.post_atten_norm.bias"] = state.pop(
497
+ f"{post_attn_norm_name}.bias"
498
+ )
499
+
500
+ def _fuse_qkv(
501
+ self,
502
+ attn_config: model_config.AttentionConfig,
503
+ q: torch.Tensor,
504
+ k: torch.Tensor,
505
+ v: torch.Tensor,
506
+ ) -> torch.Tensor:
507
+ q_per_kv = attn_config.num_heads // attn_config.num_query_groups
508
+ qs = torch.split(q, attn_config.head_dim * q_per_kv)
509
+ ks = torch.split(k, attn_config.head_dim)
510
+ vs = torch.split(v, attn_config.head_dim)
511
+ cycled = [t for group in zip(qs, ks, vs) for t in group]
512
+ return torch.cat(cycled)
@@ -0,0 +1,42 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Utilities for the models predefined in HuggingFace transformers."""
17
+
18
+ from typing import cast
19
+
20
+ from ai_edge_torch.generative.utilities import verifier
21
+ import torch
22
+ import transformers
23
+
24
+
25
+ class TransformersModelWrapper(verifier.ModelWrapper):
26
+ """A wrapper for the model predefined in HuggingFace transformers.
27
+
28
+ Verifier expects forward() to return logits while Transformers models return
29
+ an object with `logits` field.
30
+
31
+ Transformers models get `max_new_tokens` settings for generate() via
32
+ GenerationConfig.
33
+ """
34
+
35
+ def forward(self, tokens: torch.Tensor) -> torch.Tensor:
36
+ return self.model.forward(tokens).logits
37
+
38
+ def generate(
39
+ self, inputs: torch.Tensor, max_new_tokens: int
40
+ ) -> torch.IntTensor:
41
+ gen_config = transformers.GenerationConfig(max_new_tokens=max_new_tokens)
42
+ return self.model.generate(inputs=inputs, generation_config=gen_config)