ai-edge-torch-nightly 0.3.0.dev20250114__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (213) hide show
  1. ai_edge_torch/__init__.py +32 -0
  2. ai_edge_torch/_config.py +69 -0
  3. ai_edge_torch/_convert/__init__.py +14 -0
  4. ai_edge_torch/_convert/conversion.py +153 -0
  5. ai_edge_torch/_convert/conversion_utils.py +64 -0
  6. ai_edge_torch/_convert/converter.py +270 -0
  7. ai_edge_torch/_convert/fx_passes/__init__.py +23 -0
  8. ai_edge_torch/_convert/fx_passes/build_aten_composite_pass.py +288 -0
  9. ai_edge_torch/_convert/fx_passes/build_interpolate_composite_pass.py +131 -0
  10. ai_edge_torch/_convert/fx_passes/inject_mlir_debuginfo_pass.py +73 -0
  11. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/__init__.py +16 -0
  12. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_check.py +258 -0
  13. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_mark.py +50 -0
  14. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/__init__.py +18 -0
  15. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/greedy.py +68 -0
  16. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_partitioners/min_cut.py +216 -0
  17. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/layout_rewrite.py +449 -0
  18. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/op_func_registry.py +30 -0
  19. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/pass_body.py +303 -0
  20. ai_edge_torch/_convert/fx_passes/optimize_layout_transposes_pass/utils.py +64 -0
  21. ai_edge_torch/_convert/fx_passes/remove_non_user_outputs_pass.py +52 -0
  22. ai_edge_torch/_convert/signature.py +66 -0
  23. ai_edge_torch/_convert/test/__init__.py +14 -0
  24. ai_edge_torch/_convert/test/test_convert.py +558 -0
  25. ai_edge_torch/_convert/test/test_convert_composites.py +234 -0
  26. ai_edge_torch/_convert/test/test_convert_multisig.py +189 -0
  27. ai_edge_torch/_convert/test/test_to_channel_last_io.py +96 -0
  28. ai_edge_torch/_convert/to_channel_last_io.py +92 -0
  29. ai_edge_torch/conftest.py +20 -0
  30. ai_edge_torch/debug/__init__.py +17 -0
  31. ai_edge_torch/debug/culprit.py +496 -0
  32. ai_edge_torch/debug/test/__init__.py +14 -0
  33. ai_edge_torch/debug/test/test_culprit.py +140 -0
  34. ai_edge_torch/debug/test/test_search_model.py +51 -0
  35. ai_edge_torch/debug/utils.py +59 -0
  36. ai_edge_torch/experimental/__init__.py +14 -0
  37. ai_edge_torch/fx_pass_base.py +110 -0
  38. ai_edge_torch/generative/__init__.py +14 -0
  39. ai_edge_torch/generative/examples/__init__.py +14 -0
  40. ai_edge_torch/generative/examples/amd_llama_135m/__init__.py +14 -0
  41. ai_edge_torch/generative/examples/amd_llama_135m/amd_llama_135m.py +87 -0
  42. ai_edge_torch/generative/examples/amd_llama_135m/convert_to_tflite.py +70 -0
  43. ai_edge_torch/generative/examples/amd_llama_135m/verify.py +72 -0
  44. ai_edge_torch/generative/examples/gemma/__init__.py +14 -0
  45. ai_edge_torch/generative/examples/gemma/convert_gemma1_to_tflite.py +80 -0
  46. ai_edge_torch/generative/examples/gemma/convert_gemma2_to_tflite.py +80 -0
  47. ai_edge_torch/generative/examples/gemma/gemma1.py +107 -0
  48. ai_edge_torch/generative/examples/gemma/gemma2.py +295 -0
  49. ai_edge_torch/generative/examples/gemma/verify_gemma1.py +56 -0
  50. ai_edge_torch/generative/examples/gemma/verify_gemma2.py +43 -0
  51. ai_edge_torch/generative/examples/gemma/verify_util.py +157 -0
  52. ai_edge_torch/generative/examples/llama/__init__.py +14 -0
  53. ai_edge_torch/generative/examples/llama/convert_to_tflite.py +91 -0
  54. ai_edge_torch/generative/examples/llama/llama.py +196 -0
  55. ai_edge_torch/generative/examples/llama/verify.py +88 -0
  56. ai_edge_torch/generative/examples/moonshine/__init__.py +14 -0
  57. ai_edge_torch/generative/examples/moonshine/convert_moonshine_to_tflite.py +50 -0
  58. ai_edge_torch/generative/examples/moonshine/moonshine.py +103 -0
  59. ai_edge_torch/generative/examples/openelm/__init__.py +14 -0
  60. ai_edge_torch/generative/examples/openelm/convert_to_tflite.py +80 -0
  61. ai_edge_torch/generative/examples/openelm/openelm.py +127 -0
  62. ai_edge_torch/generative/examples/openelm/verify.py +71 -0
  63. ai_edge_torch/generative/examples/paligemma/__init__.py +14 -0
  64. ai_edge_torch/generative/examples/paligemma/convert_to_tflite.py +95 -0
  65. ai_edge_torch/generative/examples/paligemma/decoder.py +151 -0
  66. ai_edge_torch/generative/examples/paligemma/decoder2.py +177 -0
  67. ai_edge_torch/generative/examples/paligemma/image_encoder.py +160 -0
  68. ai_edge_torch/generative/examples/paligemma/paligemma.py +179 -0
  69. ai_edge_torch/generative/examples/paligemma/verify.py +161 -0
  70. ai_edge_torch/generative/examples/paligemma/verify_decoder.py +75 -0
  71. ai_edge_torch/generative/examples/paligemma/verify_decoder2.py +72 -0
  72. ai_edge_torch/generative/examples/paligemma/verify_image_encoder.py +99 -0
  73. ai_edge_torch/generative/examples/phi/__init__.py +14 -0
  74. ai_edge_torch/generative/examples/phi/convert_phi3_to_tflite.py +80 -0
  75. ai_edge_torch/generative/examples/phi/convert_to_tflite.py +80 -0
  76. ai_edge_torch/generative/examples/phi/phi2.py +107 -0
  77. ai_edge_torch/generative/examples/phi/phi3.py +219 -0
  78. ai_edge_torch/generative/examples/phi/verify.py +64 -0
  79. ai_edge_torch/generative/examples/phi/verify_phi3.py +69 -0
  80. ai_edge_torch/generative/examples/qwen/__init__.py +14 -0
  81. ai_edge_torch/generative/examples/qwen/convert_to_tflite.py +93 -0
  82. ai_edge_torch/generative/examples/qwen/qwen.py +134 -0
  83. ai_edge_torch/generative/examples/qwen/verify.py +88 -0
  84. ai_edge_torch/generative/examples/smollm/__init__.py +14 -0
  85. ai_edge_torch/generative/examples/smollm/convert_to_tflite.py +80 -0
  86. ai_edge_torch/generative/examples/smollm/convert_v2_to_tflite.py +71 -0
  87. ai_edge_torch/generative/examples/smollm/smollm.py +125 -0
  88. ai_edge_torch/generative/examples/smollm/verify.py +86 -0
  89. ai_edge_torch/generative/examples/stable_diffusion/__init__.py +14 -0
  90. ai_edge_torch/generative/examples/stable_diffusion/attention.py +108 -0
  91. ai_edge_torch/generative/examples/stable_diffusion/clip.py +185 -0
  92. ai_edge_torch/generative/examples/stable_diffusion/convert_to_tflite.py +173 -0
  93. ai_edge_torch/generative/examples/stable_diffusion/decoder.py +398 -0
  94. ai_edge_torch/generative/examples/stable_diffusion/diffusion.py +749 -0
  95. ai_edge_torch/generative/examples/stable_diffusion/encoder.py +119 -0
  96. ai_edge_torch/generative/examples/stable_diffusion/pipeline.py +254 -0
  97. ai_edge_torch/generative/examples/stable_diffusion/samplers/__init__.py +19 -0
  98. ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler.py +62 -0
  99. ai_edge_torch/generative/examples/stable_diffusion/samplers/k_euler_ancestral.py +66 -0
  100. ai_edge_torch/generative/examples/stable_diffusion/samplers/k_lms.py +74 -0
  101. ai_edge_torch/generative/examples/stable_diffusion/samplers/sampler.py +39 -0
  102. ai_edge_torch/generative/examples/stable_diffusion/tokenizer.py +111 -0
  103. ai_edge_torch/generative/examples/stable_diffusion/util.py +77 -0
  104. ai_edge_torch/generative/examples/t5/__init__.py +14 -0
  105. ai_edge_torch/generative/examples/t5/convert_to_tflite.py +138 -0
  106. ai_edge_torch/generative/examples/t5/t5.py +655 -0
  107. ai_edge_torch/generative/examples/t5/t5_attention.py +246 -0
  108. ai_edge_torch/generative/examples/test_models/__init__.py +14 -0
  109. ai_edge_torch/generative/examples/test_models/convert_toy_model.py +105 -0
  110. ai_edge_torch/generative/examples/test_models/toy_model.py +156 -0
  111. ai_edge_torch/generative/examples/test_models/toy_model_with_kv_cache.py +138 -0
  112. ai_edge_torch/generative/examples/tiny_llama/__init__.py +14 -0
  113. ai_edge_torch/generative/examples/tiny_llama/convert_to_tflite.py +80 -0
  114. ai_edge_torch/generative/examples/tiny_llama/tiny_llama.py +88 -0
  115. ai_edge_torch/generative/examples/tiny_llama/verify.py +72 -0
  116. ai_edge_torch/generative/fx_passes/__init__.py +30 -0
  117. ai_edge_torch/generative/fx_passes/remove_sdpa_zero_mask_pass.py +50 -0
  118. ai_edge_torch/generative/layers/__init__.py +14 -0
  119. ai_edge_torch/generative/layers/attention.py +399 -0
  120. ai_edge_torch/generative/layers/attention_utils.py +210 -0
  121. ai_edge_torch/generative/layers/builder.py +160 -0
  122. ai_edge_torch/generative/layers/feed_forward.py +120 -0
  123. ai_edge_torch/generative/layers/kv_cache.py +204 -0
  124. ai_edge_torch/generative/layers/lora.py +557 -0
  125. ai_edge_torch/generative/layers/model_config.py +238 -0
  126. ai_edge_torch/generative/layers/normalization.py +222 -0
  127. ai_edge_torch/generative/layers/rotary_position_embedding.py +94 -0
  128. ai_edge_torch/generative/layers/scaled_dot_product_attention.py +144 -0
  129. ai_edge_torch/generative/layers/unet/__init__.py +14 -0
  130. ai_edge_torch/generative/layers/unet/blocks_2d.py +806 -0
  131. ai_edge_torch/generative/layers/unet/builder.py +50 -0
  132. ai_edge_torch/generative/layers/unet/model_config.py +282 -0
  133. ai_edge_torch/generative/quantize/__init__.py +14 -0
  134. ai_edge_torch/generative/quantize/example.py +47 -0
  135. ai_edge_torch/generative/quantize/quant_attrs.py +68 -0
  136. ai_edge_torch/generative/quantize/quant_recipe.py +154 -0
  137. ai_edge_torch/generative/quantize/quant_recipe_utils.py +62 -0
  138. ai_edge_torch/generative/quantize/quant_recipes.py +56 -0
  139. ai_edge_torch/generative/quantize/supported_schemes.py +32 -0
  140. ai_edge_torch/generative/test/__init__.py +14 -0
  141. ai_edge_torch/generative/test/test_custom_dus.py +107 -0
  142. ai_edge_torch/generative/test/test_kv_cache.py +120 -0
  143. ai_edge_torch/generative/test/test_loader.py +83 -0
  144. ai_edge_torch/generative/test/test_lora.py +147 -0
  145. ai_edge_torch/generative/test/test_model_conversion.py +191 -0
  146. ai_edge_torch/generative/test/test_model_conversion_large.py +362 -0
  147. ai_edge_torch/generative/test/test_quantize.py +183 -0
  148. ai_edge_torch/generative/test/utils.py +82 -0
  149. ai_edge_torch/generative/utilities/__init__.py +15 -0
  150. ai_edge_torch/generative/utilities/converter.py +215 -0
  151. ai_edge_torch/generative/utilities/dynamic_update_slice.py +56 -0
  152. ai_edge_torch/generative/utilities/loader.py +398 -0
  153. ai_edge_torch/generative/utilities/model_builder.py +180 -0
  154. ai_edge_torch/generative/utilities/moonshine_loader.py +154 -0
  155. ai_edge_torch/generative/utilities/stable_diffusion_loader.py +1032 -0
  156. ai_edge_torch/generative/utilities/t5_loader.py +512 -0
  157. ai_edge_torch/generative/utilities/transformers_verifier.py +42 -0
  158. ai_edge_torch/generative/utilities/verifier.py +335 -0
  159. ai_edge_torch/hlfb/__init__.py +16 -0
  160. ai_edge_torch/hlfb/mark_pattern/__init__.py +153 -0
  161. ai_edge_torch/hlfb/mark_pattern/fx_utils.py +69 -0
  162. ai_edge_torch/hlfb/mark_pattern/pattern.py +288 -0
  163. ai_edge_torch/hlfb/test/__init__.py +14 -0
  164. ai_edge_torch/hlfb/test/test_mark_pattern.py +185 -0
  165. ai_edge_torch/lowertools/__init__.py +18 -0
  166. ai_edge_torch/lowertools/_shim.py +86 -0
  167. ai_edge_torch/lowertools/common_utils.py +142 -0
  168. ai_edge_torch/lowertools/odml_torch_utils.py +260 -0
  169. ai_edge_torch/lowertools/test_utils.py +62 -0
  170. ai_edge_torch/lowertools/torch_xla_utils.py +301 -0
  171. ai_edge_torch/lowertools/translate_recipe.py +163 -0
  172. ai_edge_torch/model.py +177 -0
  173. ai_edge_torch/odml_torch/__init__.py +20 -0
  174. ai_edge_torch/odml_torch/_torch_future.py +88 -0
  175. ai_edge_torch/odml_torch/_torch_library.py +19 -0
  176. ai_edge_torch/odml_torch/composite/__init__.py +16 -0
  177. ai_edge_torch/odml_torch/composite/mark_tensor.py +120 -0
  178. ai_edge_torch/odml_torch/composite/stablehlo_composite_builder.py +106 -0
  179. ai_edge_torch/odml_torch/debuginfo/__init__.py +16 -0
  180. ai_edge_torch/odml_torch/debuginfo/_build.py +43 -0
  181. ai_edge_torch/odml_torch/debuginfo/_op_polyfill.py +55 -0
  182. ai_edge_torch/odml_torch/export.py +403 -0
  183. ai_edge_torch/odml_torch/export_utils.py +157 -0
  184. ai_edge_torch/odml_torch/jax_bridge/__init__.py +18 -0
  185. ai_edge_torch/odml_torch/jax_bridge/_wrap.py +180 -0
  186. ai_edge_torch/odml_torch/jax_bridge/utils.py +75 -0
  187. ai_edge_torch/odml_torch/lowerings/__init__.py +27 -0
  188. ai_edge_torch/odml_torch/lowerings/_basic.py +294 -0
  189. ai_edge_torch/odml_torch/lowerings/_batch_norm.py +65 -0
  190. ai_edge_torch/odml_torch/lowerings/_convolution.py +243 -0
  191. ai_edge_torch/odml_torch/lowerings/_jax_lowerings.py +285 -0
  192. ai_edge_torch/odml_torch/lowerings/_layer_norm.py +87 -0
  193. ai_edge_torch/odml_torch/lowerings/_quantized_decomposed.py +177 -0
  194. ai_edge_torch/odml_torch/lowerings/_rand.py +142 -0
  195. ai_edge_torch/odml_torch/lowerings/context.py +42 -0
  196. ai_edge_torch/odml_torch/lowerings/decomp.py +69 -0
  197. ai_edge_torch/odml_torch/lowerings/registry.py +65 -0
  198. ai_edge_torch/odml_torch/lowerings/utils.py +201 -0
  199. ai_edge_torch/odml_torch/passes/__init__.py +38 -0
  200. ai_edge_torch/odml_torch/tf_integration.py +156 -0
  201. ai_edge_torch/quantize/__init__.py +16 -0
  202. ai_edge_torch/quantize/pt2e_quantizer.py +466 -0
  203. ai_edge_torch/quantize/pt2e_quantizer_utils.py +1061 -0
  204. ai_edge_torch/quantize/quant_config.py +85 -0
  205. ai_edge_torch/testing/__init__.py +14 -0
  206. ai_edge_torch/testing/model_coverage/__init__.py +16 -0
  207. ai_edge_torch/testing/model_coverage/model_coverage.py +145 -0
  208. ai_edge_torch/version.py +16 -0
  209. ai_edge_torch_nightly-0.3.0.dev20250114.dist-info/LICENSE +202 -0
  210. ai_edge_torch_nightly-0.3.0.dev20250114.dist-info/METADATA +44 -0
  211. ai_edge_torch_nightly-0.3.0.dev20250114.dist-info/RECORD +213 -0
  212. ai_edge_torch_nightly-0.3.0.dev20250114.dist-info/WHEEL +5 -0
  213. ai_edge_torch_nightly-0.3.0.dev20250114.dist-info/top_level.txt +1 -0
@@ -0,0 +1,301 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ import copy
17
+ import dataclasses
18
+ from dataclasses import dataclass
19
+ import gc
20
+ import itertools
21
+ import logging
22
+ import os
23
+ import tempfile
24
+ from typing import Any, Dict, Optional, Tuple, Union
25
+
26
+ if "PJRT_DEVICE" not in os.environ:
27
+ # https://github.com/google-ai-edge/ai-edge-torch/issues/326
28
+ os.environ["PJRT_DEVICE"] = "CPU"
29
+
30
+ os.environ["EXPERIMENTAL_XLA_UNBOUNDED_DYNAMISM"] = "1"
31
+
32
+
33
+ from ai_edge_torch import model
34
+ from ai_edge_torch._convert import conversion_utils
35
+ from ai_edge_torch._convert import signature as signature_module
36
+ from ai_edge_torch.lowertools import common_utils
37
+ from ai_edge_torch.lowertools import translate_recipe
38
+ from ai_edge_torch.quantize import quant_config as qcfg
39
+ import torch
40
+ from torch_xla import stablehlo
41
+
42
+ try:
43
+ import tensorflow as tf
44
+
45
+ from tensorflow.compiler.tf2xla.python import xla as tfxla
46
+
47
+ from tensorflow.lite.python import conversion_metadata_schema_py_generated as conversion_metadata_fb # isort:skip
48
+ except ImportError:
49
+ logging.error(
50
+ "This module needs tensorflow with xla support.\n"
51
+ "Please install tensorflow with `pip install tf-nightly`.\n"
52
+ )
53
+ raise
54
+
55
+ MlirBundle = stablehlo.StableHLOModelBundle
56
+
57
+
58
+ @dataclasses.dataclass
59
+ class MergedBundle:
60
+
61
+ bundle: stablehlo.StableHLOModelBundle
62
+ exported_programs: list[torch.export.ExportedProgram]
63
+ deduped_tf_vars: list[tf.Variable]
64
+
65
+
66
+ def exported_program_to_mlir(
67
+ exported_program: torch.export.ExportedProgram,
68
+ sample_args: tuple[torch.Tensor],
69
+ ) -> stablehlo.StableHLOModelBundle:
70
+ # Setting export_weights to False here so that pytorch/xla avoids copying the
71
+ # weights to a numpy array which would lead to memory bloat. This means that
72
+ # the state_dict in the returned bundle is going to be empty.
73
+ return stablehlo.exported_program_to_stablehlo(
74
+ exported_program,
75
+ stablehlo.StableHLOExportOptions(
76
+ override_tracing_arguments=sample_args, export_weights=False
77
+ ),
78
+ )._bundle
79
+
80
+
81
+ def merge_mlir_bundles(
82
+ bundles: list[stablehlo.StableHLOModelBundle],
83
+ signatures: list[signature_module.Signature],
84
+ exported_programs: list[torch.export.ExportedProgram],
85
+ ) -> stablehlo.StableHLOGraphModule:
86
+ state_dict, deduped_tf_vars = common_utils.gather_state_dict(
87
+ exported_programs, signatures
88
+ )
89
+
90
+ new_shlo_model_bundle = stablehlo.StableHLOModelBundle(
91
+ state_dict=state_dict, additional_constants=[], stablehlo_funcs=[]
92
+ )
93
+
94
+ for bundle, signature in zip(bundles, signatures):
95
+ const_offset = len(new_shlo_model_bundle.additional_constants)
96
+ for func in bundle.stablehlo_funcs:
97
+ func.meta.name = signature.name + "_" + func.meta.name
98
+ for loc in func.meta.input_locations:
99
+ if loc.type_ == stablehlo.VariableType.CONSTANT:
100
+ loc.position += const_offset
101
+ elif loc.type_ == stablehlo.VariableType.PARAMETER:
102
+ loc.name = signature.name + "_" + loc.name
103
+ new_shlo_model_bundle.stablehlo_funcs.append(func)
104
+ new_shlo_model_bundle.additional_constants.extend(
105
+ bundle.additional_constants
106
+ )
107
+ return MergedBundle(
108
+ bundle=new_shlo_model_bundle,
109
+ exported_programs=exported_programs,
110
+ deduped_tf_vars=deduped_tf_vars,
111
+ )
112
+
113
+
114
+ def _get_shape_with_dynamic(signature: stablehlo.VariableSignature):
115
+ shape = copy.copy(signature.shape)
116
+ for i in signature.dynamic_dims:
117
+ shape[i] = None
118
+ return shape
119
+
120
+
121
+ def _wrap_as_tf_func(
122
+ func: stablehlo.StableHLOFunc,
123
+ bundle: stablehlo.StableHLOModelBundle,
124
+ exported_program: torch.export.ExportedProgram,
125
+ ):
126
+ def inner(*args):
127
+ type_info = [sig.dtype for sig in func.meta.output_signature]
128
+ shape_info = [
129
+ _get_shape_with_dynamic(sig) for sig in func.meta.output_signature
130
+ ]
131
+ call_args = stablehlo._extract_call_parameters(args, func.meta, bundle)
132
+ call_module_return = tfxla.call_module(
133
+ tuple(call_args),
134
+ version=5,
135
+ Tout=type_info,
136
+ Sout=shape_info,
137
+ function_list=[],
138
+ module=func.bytecode,
139
+ )
140
+ spec = exported_program.call_spec.out_spec
141
+
142
+ # The module returning a flat array.
143
+ if not spec.context:
144
+ return call_module_return
145
+
146
+ flat_names = common_utils.flat_dict_names(spec.children_specs, spec.context)
147
+ return {name: value for name, value in zip(flat_names, call_module_return)}
148
+
149
+ return inner
150
+
151
+
152
+ def _make_tf_signature(
153
+ meta: stablehlo.StableHLOFunctionMeta,
154
+ signature: signature_module.Signature,
155
+ ) -> list[tf.TensorSpec]:
156
+ input_names = signature.flat_arg_names
157
+ input_pos_to_spec = {
158
+ loc.position: spec
159
+ for loc, spec in itertools.chain(
160
+ zip(meta.input_locations, meta.input_signature), meta.unused_inputs
161
+ )
162
+ if loc.type_ == stablehlo.VariableType.INPUT_ARG
163
+ }
164
+ assert len(input_pos_to_spec) == len(input_names)
165
+
166
+ primitive_type_to_tf_type = {"int": "int32", "float": "float32"}
167
+ ret: list[tf.TensorSpec] = []
168
+ for i, name in enumerate(input_names):
169
+ spec = input_pos_to_spec[i]
170
+ shape = _get_shape_with_dynamic(spec)
171
+ ret.append(
172
+ tf.TensorSpec(
173
+ shape=shape,
174
+ dtype=primitive_type_to_tf_type[spec.dtype]
175
+ if spec.dtype in primitive_type_to_tf_type
176
+ else spec.dtype,
177
+ name=name,
178
+ )
179
+ )
180
+ return ret
181
+
182
+
183
+ def exported_program_to_mlir_text(
184
+ exported_program: torch.export.ExportedProgram,
185
+ ) -> str:
186
+ """Converts a ExportedProgram to a MLIR text."""
187
+ return stablehlo.exported_program_to_stablehlo(
188
+ exported_program
189
+ ).get_stablehlo_text()
190
+
191
+
192
+ def merged_bundle_to_tfl_model(
193
+ merged_bundle: MergedBundle,
194
+ signatures: list[signature_module.Signature],
195
+ *,
196
+ quant_config: Optional[qcfg.QuantConfig] = None,
197
+ _tfl_converter_flags: dict = {},
198
+ _saved_model_dir: Optional[str] = None,
199
+ ) -> None:
200
+ """Converts a StableHLOGraphModule to a tflite model.
201
+
202
+ Args: shlo_bundle - model to export and save
203
+
204
+ signatures: List of signatures from which names of the signatures is
205
+ extracted.
206
+ quant_config: User-defined quantization method and scheme of the model.
207
+ _saved_model_dir: Directory for the intermediate saved model. If not
208
+ specified, a random temporary directory would be used.
209
+ _tfl_converter_flags: A nested dictionary allowing setting flags for the
210
+ underlying tflite converter.
211
+ """
212
+
213
+ tf_module = tf.Module()
214
+
215
+ shlo_bundle = merged_bundle.bundle
216
+
217
+ shlo_bundle.additional_constants = [
218
+ tf.Variable(v, trainable=False) for v in shlo_bundle.additional_constants
219
+ ]
220
+ tf_signatures: list[list[tf.TensorSpec]] = list(
221
+ _make_tf_signature(func.meta, sig)
222
+ for func, sig in zip(shlo_bundle.stablehlo_funcs, signatures)
223
+ )
224
+
225
+ tf_functions = [
226
+ _wrap_as_tf_func(func, shlo_bundle, ep)
227
+ for func, ep in zip(
228
+ shlo_bundle.stablehlo_funcs, merged_bundle.exported_programs
229
+ )
230
+ ]
231
+
232
+ tf_module.f = []
233
+ for tf_sig, func in zip(tf_signatures, tf_functions):
234
+ tf_module.f.append(
235
+ tf.function(
236
+ func,
237
+ input_signature=tf_sig,
238
+ )
239
+ )
240
+
241
+ tf_module._variables = (
242
+ merged_bundle.deduped_tf_vars + shlo_bundle.additional_constants
243
+ )
244
+ del shlo_bundle
245
+ gc.collect()
246
+
247
+ tf_concrete_funcs = [
248
+ func.get_concrete_function(*tf_sig)
249
+ for func, tf_sig in zip(tf_module.f, tf_signatures)
250
+ ]
251
+
252
+ # We need to temporarily save since TFLite's from_concrete_functions does not
253
+ # allow providing names for each of the concrete functions.
254
+ with tempfile.TemporaryDirectory() as temp_dir_path:
255
+ if _saved_model_dir is not None:
256
+ temp_dir_path = _saved_model_dir
257
+
258
+ tf.saved_model.save(
259
+ tf_module,
260
+ temp_dir_path,
261
+ signatures={
262
+ sig.name: tf_concrete_funcs[idx]
263
+ for idx, sig in enumerate(signatures)
264
+ },
265
+ )
266
+ # Clean up intermediate memory early.
267
+ del tf_functions
268
+ del tf_module
269
+ del tf_concrete_funcs
270
+ gc.collect()
271
+
272
+ converter = tf.lite.TFLiteConverter.from_saved_model(temp_dir_path)
273
+ converter._set_original_model_type(conversion_metadata_fb.ModelType.PYTORCH)
274
+ converter._experimental_enable_composite_direct_lowering = True
275
+
276
+ conversion_utils.set_tfl_converter_quant_flags(converter, quant_config)
277
+ if (
278
+ quant_config is not None
279
+ and quant_config._quantizer_mode
280
+ == quant_config._QuantizerMode.AI_EDGE_QUANTIZER
281
+ ):
282
+ translated_recipe = translate_recipe.translate_to_ai_edge_recipe(
283
+ quant_config.generative_recipe
284
+ )
285
+
286
+ conversion_utils.apply_tfl_converter_flags(converter, _tfl_converter_flags)
287
+
288
+ tflite_model = converter.convert()
289
+ del converter
290
+ gc.collect()
291
+
292
+ if (
293
+ quant_config is not None
294
+ and quant_config._quantizer_mode
295
+ == quant_config._QuantizerMode.AI_EDGE_QUANTIZER
296
+ ):
297
+ tflite_model = translate_recipe.quantize_model(
298
+ tflite_model, translated_recipe
299
+ )
300
+
301
+ return tflite_model
@@ -0,0 +1,163 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ from ai_edge_quantizer import quantizer
17
+ from ai_edge_torch.generative.quantize import quant_attrs
18
+ from ai_edge_torch.generative.quantize import quant_recipe
19
+
20
+ _ComputePrecision = quantizer.qtyping.ComputePrecision
21
+ _QuantGranularity = quantizer.qtyping.QuantGranularity
22
+ _OpName = quantizer.qtyping.TFLOperationName
23
+ _TensorQuantConfig = quantizer.qtyping.TensorQuantizationConfig
24
+ _OpQuantConfig = quantizer.qtyping.OpQuantizationConfig
25
+
26
+ _DEFAULT_REGEX_STR = '.*'
27
+ _SINGULAR_TRANSFORMER_BLOCK_REGEX_STR = 'transformer_block'
28
+ _IDX_TRANSFORMER_BLOCKS_REGEX_STR = 'transformer_blocks\[{}\]'
29
+ _ATTENTION_REGEX_STR = 'ai_edge_torch.generative.layers.attention'
30
+ _FEEDFORWARD_REGEX_STR = 'ai_edge_torch.generative.layers.feed_forward'
31
+ _EMBEDDING_REGEX_STR = 'Embedding_tok_embedding'
32
+ _ANY_TWO_DIGITS_REGEX_STR = '\d{1,2}'
33
+
34
+
35
+ def _get_nbits_from_dtype(dtype: quant_attrs.Dtype) -> int:
36
+ if dtype == quant_attrs.Dtype.FP32:
37
+ return 32
38
+ elif dtype == quant_attrs.Dtype.FP16:
39
+ return 16
40
+ elif dtype == quant_attrs.Dtype.INT8:
41
+ return 8
42
+ raise ValueError('Unimplemented number of bits')
43
+
44
+
45
+ def _get_dtype_from_dtype(
46
+ dtype: quant_attrs.Dtype,
47
+ ) -> quantizer.qtyping.TensorDataType:
48
+ if dtype == quant_attrs.Dtype.FP32 or dtype == quant_attrs.Dtype.FP16:
49
+ return quantizer.qtyping.TensorDataType.FLOAT
50
+ else:
51
+ return quantizer.qtyping.TensorDataType.INT
52
+
53
+
54
+ def _get_compute_precision_from_mode(
55
+ mode: quant_attrs.Mode,
56
+ ) -> _ComputePrecision:
57
+ if mode == quant_attrs.Mode.DYNAMIC_RANGE:
58
+ return _ComputePrecision.INTEGER
59
+ elif mode == quant_attrs.Mode.WEIGHT_ONLY:
60
+ return _ComputePrecision.FLOAT
61
+ raise ValueError('Unimplemented execution mode')
62
+
63
+
64
+ def _get_explicit_dequant_from_mode(mode: quant_attrs.Mode) -> bool:
65
+ if mode == quant_attrs.Mode.DYNAMIC_RANGE:
66
+ return False
67
+ elif mode == quant_attrs.Mode.WEIGHT_ONLY:
68
+ return True
69
+ raise ValueError('Unimplemented execution mode')
70
+
71
+
72
+ def _get_granularity(
73
+ granularity: quant_attrs.Granularity,
74
+ ) -> bool:
75
+ if granularity == quant_attrs.Granularity.CHANNELWISE:
76
+ return _QuantGranularity.CHANNELWISE
77
+ if granularity == quant_attrs.Granularity.NONE:
78
+ return _QuantGranularity.TENSORWISE
79
+ raise ValueError('Unimplemented granularity')
80
+
81
+
82
+ def _get_algorithm_key_from_algorithm(algo: quant_attrs.Algorithm) -> str:
83
+ if algo == quant_attrs.Algorithm.MIN_MAX:
84
+ return quantizer.algorithm_manager.AlgorithmName.MIN_MAX_UNIFORM_QUANT
85
+ elif algo == quant_attrs.Algorithm.FLOAT_CAST:
86
+ return quantizer.algorithm_manager.AlgorithmName.FLOAT_CASTING
87
+ raise ValueError('Unimplemented algorithm')
88
+
89
+
90
+ def _set_quant_config(
91
+ rm: quantizer.recipe_manager.RecipeManager,
92
+ layer_recipe: quant_recipe.LayerQuantRecipe,
93
+ regex: str,
94
+ ):
95
+ rm.add_quantization_config(
96
+ regex=regex,
97
+ operation_name=_OpName.ALL_SUPPORTED,
98
+ op_config=_OpQuantConfig(
99
+ weight_tensor_config=_TensorQuantConfig(
100
+ num_bits=_get_nbits_from_dtype(layer_recipe.weight_dtype),
101
+ symmetric=True,
102
+ granularity=_get_granularity(layer_recipe.granularity),
103
+ dtype=_get_dtype_from_dtype(layer_recipe.weight_dtype),
104
+ ),
105
+ compute_precision=_get_compute_precision_from_mode(layer_recipe.mode),
106
+ explicit_dequantize=_get_explicit_dequant_from_mode(
107
+ layer_recipe.mode
108
+ ),
109
+ ),
110
+ algorithm_key=_get_algorithm_key_from_algorithm(layer_recipe.algorithm),
111
+ )
112
+
113
+
114
+ def translate_to_ai_edge_recipe(
115
+ recipe: quant_recipe.GenerativeQuantRecipe,
116
+ ) -> quantizer.recipe_manager.ModelQuantizationRecipe:
117
+ rm = quantizer.recipe_manager.RecipeManager()
118
+
119
+ if recipe.default is not None:
120
+ _set_quant_config(rm, recipe.default, _DEFAULT_REGEX_STR)
121
+
122
+ if recipe.embedding is not None:
123
+ _set_quant_config(rm, recipe.embedding, _EMBEDDING_REGEX_STR)
124
+
125
+ if recipe.attention is not None:
126
+ if isinstance(recipe.attention, dict):
127
+ for idx, layer in recipe.attention.items():
128
+ _set_quant_config(
129
+ rm,
130
+ layer,
131
+ f'{_IDX_TRANSFORMER_BLOCKS_REGEX_STR.format(idx)}/{_ATTENTION_REGEX_STR}',
132
+ )
133
+ else:
134
+ _set_quant_config(
135
+ rm,
136
+ recipe.attention,
137
+ f'{_SINGULAR_TRANSFORMER_BLOCK_REGEX_STR}/{_ATTENTION_REGEX_STR}',
138
+ )
139
+
140
+ if recipe.feedforward is not None:
141
+ if isinstance(recipe.feedforward, dict):
142
+ for idx, layer in recipe.feedforward.items():
143
+ _set_quant_config(
144
+ rm,
145
+ layer,
146
+ f'{_IDX_TRANSFORMER_BLOCKS_REGEX_STR.format(idx)}/{_FEEDFORWARD_REGEX_STR}',
147
+ )
148
+ else:
149
+ _set_quant_config(
150
+ rm,
151
+ recipe.feedforward,
152
+ f'{_SINGULAR_TRANSFORMER_BLOCK_REGEX_STR}/{_FEEDFORWARD_REGEX_STR}',
153
+ )
154
+
155
+ return rm.get_quantization_recipe()
156
+
157
+
158
+ def quantize_model(
159
+ model: bytes, recipe: quantizer.recipe_manager.ModelQuantizationRecipe
160
+ ) -> bytearray:
161
+ qt = quantizer.Quantizer(model, recipe)
162
+ result = qt.quantize()
163
+ return result.quantized_model
ai_edge_torch/model.py ADDED
@@ -0,0 +1,177 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Represents an ai_edge_torch model.
17
+
18
+ PyTorch models can be converted to this representation through
19
+ `ai_edge_torch.convert`.
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import abc
24
+ import re
25
+ from typing import Callable
26
+
27
+ import numpy.typing as npt
28
+ import tensorflow as tf
29
+
30
+ from ai_edge_litert import interpreter as tfl_interpreter # pylint: disable=g-direct-tensorflow-import
31
+
32
+ DEFAULT_SIGNATURE_NAME = tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY
33
+
34
+
35
+ class Model(abc.ABC):
36
+ """Represents and edge model."""
37
+
38
+ @abc.abstractmethod
39
+ def __call__(
40
+ self,
41
+ *args: npt.ArrayLike,
42
+ signature_name: str = DEFAULT_SIGNATURE_NAME,
43
+ **kwargs,
44
+ ) -> npt.ArrayLike | tuple[npt.ArrayLike]:
45
+ raise NotImplementedError()
46
+
47
+ @abc.abstractmethod
48
+ def export(self, path: str):
49
+ raise NotImplementedError()
50
+
51
+ @staticmethod
52
+ def load(path: str) -> TfLiteModel:
53
+ tflite_model = TfLiteModel.load(path)
54
+ if tflite_model:
55
+ return tflite_model
56
+
57
+ raise ValueError(f'File format in {path} cannot be deserialized.')
58
+
59
+
60
+ class TfLiteModel(Model):
61
+ """An edge model which uses tflite under-the-hood."""
62
+
63
+ def __init__(self, tflite_model):
64
+ """Initializes the TfLiteModel instance using a TFLite serialized object.
65
+
66
+ Args:
67
+ tflite_model: A TFlite serialized object.
68
+ """
69
+ self._tflite_model = tflite_model
70
+ self._interpreter_builder = lambda: tfl_interpreter.Interpreter(
71
+ model_content=self._tflite_model,
72
+ experimental_default_delegate_latest_features=True,
73
+ )
74
+
75
+ def tflite_model(self) -> bytes:
76
+ """Returns the wrapped tflite model."""
77
+ return self._tflite_model
78
+
79
+ def set_interpreter_builder(
80
+ self, builder: Callable[[], tfl_interpreter.Interpreter]
81
+ ) -> None:
82
+ """Sets a custom interpreter builder.
83
+
84
+ Args:
85
+ builder: A function that returns a `tfl_interpreter.Interpreter` or its
86
+ subclass.
87
+ """
88
+ self._interpreter_builder = builder
89
+
90
+ def __call__(
91
+ self,
92
+ *args: npt.ArrayLike,
93
+ signature_name: str = DEFAULT_SIGNATURE_NAME,
94
+ **kwargs,
95
+ ) -> npt.ArrayLike | tuple[npt.ArrayLike]:
96
+ """Runs inference on the edge model using the provided arguments.
97
+
98
+ Args:
99
+ *args: The arguments to be passed to the model for inference.
100
+ **kwargs: The arguments with specific names to be passed to the model for
101
+ inference.
102
+ signature_name: The name of the signature to be used for inference. The
103
+ default signature is used if not provided.
104
+ """
105
+ interpreter = self._interpreter_builder()
106
+ interpreter.allocate_tensors()
107
+
108
+ signature_list = interpreter.get_signature_list()
109
+ if signature_name not in signature_list:
110
+ raise ValueError(
111
+ 'Invalid signature name provided. Available signatures:'
112
+ f' {", ".join(signature_list.keys())}'
113
+ )
114
+
115
+ try:
116
+ runner = interpreter.get_signature_runner(signature_name)
117
+ except ValueError as exception:
118
+ if 'Invalid signature_key provided.' in str(exception):
119
+ raise ValueError(
120
+ 'Invalid signature key provided. Available signatures:'
121
+ f' {list(signature_list.keys())}'
122
+ )
123
+ else:
124
+ raise exception
125
+
126
+ if len(signature_list[signature_name]['inputs']) != len(args) + len(kwargs):
127
+ raise ValueError(
128
+ 'The model requires'
129
+ f' {len(signature_list[signature_name]["inputs"])} arguments but'
130
+ f' {len(args)} was provided.'
131
+ )
132
+
133
+ # Gather the input dictionary based on the signature.
134
+ inputs = {f'args_{idx}': args[idx] for idx in range(len(args))}
135
+ inputs = {**inputs, **kwargs}
136
+ outputs = runner(**inputs)
137
+
138
+ # When attempting to run a model, check if all the output tensors are named
139
+ # output_<number>. If so, assume the pytorch model returned a tuple and not
140
+ # a dictionary.
141
+ output_heuristic = lambda key: bool(re.search(r'output_\d+', key))
142
+ if all(output_heuristic(key) for key in outputs.keys()):
143
+ return (
144
+ outputs['output_0']
145
+ if len(outputs) == 1
146
+ else [outputs[f'output_{idx}'] for idx in range(len(outputs))]
147
+ )
148
+
149
+ return outputs
150
+
151
+ def export(self, path: str) -> None:
152
+ """Serializes the edge model to disk.
153
+
154
+ Args:
155
+ path: The path to file to which the model is serialized.
156
+ """
157
+ with open(path, 'wb') as file_handle:
158
+ file_handle.write(self._tflite_model)
159
+
160
+ @staticmethod
161
+ def load(path: str) -> TfLiteModel | None:
162
+ """Returns an edge (tflite) model by reading it from the disk.
163
+
164
+ Args:
165
+ str: The path to the model.
166
+ """
167
+ with open(path, 'rb') as file_handle:
168
+ model_content = file_handle.read()
169
+
170
+ # Check if this is indeed a tflite model:
171
+ try:
172
+ interpreter = tfl_interpreter.Interpreter(model_content=model_content)
173
+ interpreter.get_signature_list()
174
+ except:
175
+ return None
176
+
177
+ return TfLiteModel(model_content)
@@ -0,0 +1,20 @@
1
+ # Copyright 2024 The AI Edge Torch Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ from . import composite
16
+ from . import debuginfo
17
+ from . import export
18
+ from . import export_utils
19
+ from . import lowerings
20
+ from . import passes