PyNerva 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nervapy/__init__.py +50 -0
- nervapy/abi.py +91 -0
- nervapy/arm/__init__.py +124 -0
- nervapy/arm/__main__.py +0 -0
- nervapy/arm/abi.py +138 -0
- nervapy/arm/formats.py +49 -0
- nervapy/arm/function.py +2465 -0
- nervapy/arm/generic.py +10796 -0
- nervapy/arm/instructions.py +519 -0
- nervapy/arm/isa.py +409 -0
- nervapy/arm/literal_pool.py +331 -0
- nervapy/arm/microarchitecture.py +211 -0
- nervapy/arm/pseudo.py +652 -0
- nervapy/arm/registers.py +1458 -0
- nervapy/arm/vfpneon.py +4092 -0
- nervapy/arm.py +13 -0
- nervapy/c/__init__.py +1 -0
- nervapy/c/types.py +436 -0
- nervapy/codegen.py +99 -0
- nervapy/common/__init__.py +4 -0
- nervapy/common/function.py +5 -0
- nervapy/common/regalloc.py +121 -0
- nervapy/constant_data.py +282 -0
- nervapy/encoder.py +246 -0
- nervapy/formats/__init__.py +2 -0
- nervapy/formats/elf/__init__.py +4 -0
- nervapy/formats/elf/file.py +178 -0
- nervapy/formats/elf/image.py +106 -0
- nervapy/formats/elf/section.py +422 -0
- nervapy/formats/elf/symbol.py +281 -0
- nervapy/formats/macho/__init__.py +2 -0
- nervapy/formats/macho/file.py +123 -0
- nervapy/formats/macho/image.py +143 -0
- nervapy/formats/macho/section.py +322 -0
- nervapy/formats/macho/symbol.py +158 -0
- nervapy/formats/mscoff/__init__.py +8 -0
- nervapy/formats/mscoff/image.py +132 -0
- nervapy/formats/mscoff/section.py +181 -0
- nervapy/formats/mscoff/symbol.py +148 -0
- nervapy/function.py +136 -0
- nervapy/literal.py +731 -0
- nervapy/loader.py +188 -0
- nervapy/name.py +159 -0
- nervapy/parse.py +52 -0
- nervapy/stream.py +58 -0
- nervapy/util.py +126 -0
- nervapy/writer.py +518 -0
- nervapy/x86_64/__init__.py +324 -0
- nervapy/x86_64/__main__.py +407 -0
- nervapy/x86_64/abi.py +517 -0
- nervapy/x86_64/amd.py +6464 -0
- nervapy/x86_64/avx.py +102029 -0
- nervapy/x86_64/crypto.py +1533 -0
- nervapy/x86_64/encoding.py +424 -0
- nervapy/x86_64/fma.py +19138 -0
- nervapy/x86_64/function.py +2707 -0
- nervapy/x86_64/generic.py +23384 -0
- nervapy/x86_64/instructions.py +500 -0
- nervapy/x86_64/isa.py +476 -0
- nervapy/x86_64/lower.py +126 -0
- nervapy/x86_64/mask.py +2593 -0
- nervapy/x86_64/meta.py +143 -0
- nervapy/x86_64/mmxsse.py +17265 -0
- nervapy/x86_64/nacl.py +327 -0
- nervapy/x86_64/operand.py +1204 -0
- nervapy/x86_64/options.py +21 -0
- nervapy/x86_64/pseudo.py +686 -0
- nervapy/x86_64/registers.py +1225 -0
- nervapy/x86_64/types.py +17 -0
- nervapy/x86_64/uarch.py +580 -0
- pynerva-0.0.7.dist-info/METADATA +310 -0
- pynerva-0.0.7.dist-info/RECORD +74 -0
- pynerva-0.0.7.dist-info/WHEEL +4 -0
- pynerva-0.0.7.dist-info/licenses/LICENSE.rst +15 -0
nervapy/arm/function.py
ADDED
|
@@ -0,0 +1,2465 @@
|
|
|
1
|
+
# This file is part of PeachPy package and is licensed under the Simplified BSD license.
|
|
2
|
+
# See license.rst for the full text of the license.
|
|
3
|
+
|
|
4
|
+
from __future__ import print_function
|
|
5
|
+
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
import nervapy.arm.instructions
|
|
9
|
+
import nervapy.arm.registers
|
|
10
|
+
from nervapy.arm.microarchitecture import Microarchitecture
|
|
11
|
+
|
|
12
|
+
active_function = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Function(object):
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
name,
|
|
19
|
+
arguments,
|
|
20
|
+
return_type=None,
|
|
21
|
+
target=Microarchitecture.Default,
|
|
22
|
+
abi=None,
|
|
23
|
+
assembly_format=None,
|
|
24
|
+
high_register_strategy=None,
|
|
25
|
+
collect_origin=False,
|
|
26
|
+
dump_intermediate_assembly=False,
|
|
27
|
+
report_generation=True,
|
|
28
|
+
report_live_registers=False,
|
|
29
|
+
is_thumb=False,
|
|
30
|
+
alignment=0,
|
|
31
|
+
validate_stack_alignment=True,
|
|
32
|
+
preserve8=False,
|
|
33
|
+
):
|
|
34
|
+
self.name = name
|
|
35
|
+
self.arguments = arguments
|
|
36
|
+
self.return_type = return_type
|
|
37
|
+
self.is_thumb = is_thumb
|
|
38
|
+
self.alignment = alignment
|
|
39
|
+
self.validate_stack_alignment = validate_stack_alignment
|
|
40
|
+
self.preserve8 = preserve8
|
|
41
|
+
|
|
42
|
+
# Set default assembly format to GAS if not specified
|
|
43
|
+
if assembly_format is None:
|
|
44
|
+
from nervapy.arm.formats import AssemblyFormat
|
|
45
|
+
|
|
46
|
+
assembly_format = AssemblyFormat.GAS
|
|
47
|
+
self.assembly_format = assembly_format
|
|
48
|
+
|
|
49
|
+
# Set default high register strategy if not specified
|
|
50
|
+
if high_register_strategy is None:
|
|
51
|
+
from nervapy.arm.formats import HighRegisterStrategy
|
|
52
|
+
|
|
53
|
+
high_register_strategy = HighRegisterStrategy.AUTO
|
|
54
|
+
self.high_register_strategy = high_register_strategy
|
|
55
|
+
|
|
56
|
+
for argument in self.arguments:
|
|
57
|
+
argument.stack_offset = None
|
|
58
|
+
argument.register = None
|
|
59
|
+
if (
|
|
60
|
+
argument.is_size_integer
|
|
61
|
+
or argument.is_pointer_integer
|
|
62
|
+
or argument.is_pointer
|
|
63
|
+
):
|
|
64
|
+
argument.c_type.size = abi.pointer_size
|
|
65
|
+
assert argument.size
|
|
66
|
+
self.target = target
|
|
67
|
+
self.abi = abi
|
|
68
|
+
self.collect_origin = collect_origin
|
|
69
|
+
self.dump_intermediate_assembly = dump_intermediate_assembly
|
|
70
|
+
self.report_generation = report_generation
|
|
71
|
+
self.report_live_registers = report_live_registers
|
|
72
|
+
self.ticks = None
|
|
73
|
+
|
|
74
|
+
# Assign argument locations
|
|
75
|
+
from nervapy.arm.abi import arm_gnueabi, arm_gnueabihf
|
|
76
|
+
from nervapy.arm.registers import r0, r1, r2, r3
|
|
77
|
+
|
|
78
|
+
if abi == arm_gnueabi or abi == arm_gnueabihf:
|
|
79
|
+
# Up to 4 first arguments are passed in registers, others passed through stack
|
|
80
|
+
# Arguments smaller than 4 bytes are extended to 4 bytes (both when passed on stack or in a register).
|
|
81
|
+
# 8-byte arguments occupy 2 general-purpose registers or 8 bytes on stack. When they are passed in
|
|
82
|
+
# registers, the index of the first register must be even (i.e. they are passed in (r0, r1) or (r2, r3),
|
|
83
|
+
# but not in (r1, r2). When 8-byte arguments are passed on stack, their location is aligned on 8 bytes,
|
|
84
|
+
# skipping 4 bytes if necessary.
|
|
85
|
+
argument_registers = (r0, r1, r2, r3)
|
|
86
|
+
register_offset = 0
|
|
87
|
+
stack_offset = 0
|
|
88
|
+
for argument in self.arguments:
|
|
89
|
+
if argument.size <= 4:
|
|
90
|
+
if register_offset < 4:
|
|
91
|
+
argument.register = argument_registers[register_offset]
|
|
92
|
+
register_offset += 1
|
|
93
|
+
else:
|
|
94
|
+
argument.stack_offset = stack_offset
|
|
95
|
+
stack_offset += 4
|
|
96
|
+
elif argument.size == 8:
|
|
97
|
+
# First register index must be even
|
|
98
|
+
if register_offset % 2 == 1:
|
|
99
|
+
register_offset += 1
|
|
100
|
+
if register_offset < 4:
|
|
101
|
+
argument.register = (
|
|
102
|
+
argument_registers[register_offset],
|
|
103
|
+
argument_registers[register_offset + 1],
|
|
104
|
+
)
|
|
105
|
+
register_offset += 2
|
|
106
|
+
else:
|
|
107
|
+
if stack_offset % 8 == 4:
|
|
108
|
+
stack_offset += 4
|
|
109
|
+
argument.stack_offset = stack_offset
|
|
110
|
+
stack_offset += 8
|
|
111
|
+
else:
|
|
112
|
+
raise ValueError(
|
|
113
|
+
"Unsupported argument size {0}".format(argument.size)
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
raise ValueError("Unsupported assembler ABI %s" % abi)
|
|
117
|
+
|
|
118
|
+
self.instructions = list()
|
|
119
|
+
self.constants = list()
|
|
120
|
+
self.external_functions = set() # Track external function imports
|
|
121
|
+
self.stack_frame = StackFrame(self.abi)
|
|
122
|
+
self.local_variables_count = 0
|
|
123
|
+
self.virtual_registers_count = 0x40
|
|
124
|
+
self.conflicting_registers = dict()
|
|
125
|
+
self.allocation_options = dict()
|
|
126
|
+
self.unallocated_registers = list()
|
|
127
|
+
self._live_register_markers = [] # List of (instruction_index, label) tuples
|
|
128
|
+
self._register_names = {} # Map from register number to variable name
|
|
129
|
+
|
|
130
|
+
def __enter__(self):
|
|
131
|
+
import nervapy.stream
|
|
132
|
+
|
|
133
|
+
global active_function
|
|
134
|
+
|
|
135
|
+
if active_function is not None:
|
|
136
|
+
raise ValueError(
|
|
137
|
+
"Function {0} was not detached".format(active_function.name)
|
|
138
|
+
)
|
|
139
|
+
if nervapy.stream.active_stream is not None:
|
|
140
|
+
raise ValueError("Alternative instruction stream is active")
|
|
141
|
+
active_function = self
|
|
142
|
+
nervapy.stream.active_stream = self
|
|
143
|
+
if self.report_generation:
|
|
144
|
+
print(
|
|
145
|
+
"Generating function {Function} for microarchitecture {Microarchitecture} and ABI {ABI}".format(
|
|
146
|
+
Function=self.name, Microarchitecture=self.target, ABI=self.abi
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
print("\tParsing source", end="")
|
|
150
|
+
self.ticks = time.time()
|
|
151
|
+
return self
|
|
152
|
+
|
|
153
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
154
|
+
import nervapy.stream
|
|
155
|
+
from nervapy.arm.instructions import Instruction
|
|
156
|
+
|
|
157
|
+
nervapy.stream.active_stream = None
|
|
158
|
+
if exc_type is None:
|
|
159
|
+
try:
|
|
160
|
+
self.generate_labels()
|
|
161
|
+
self.decompose_instructions()
|
|
162
|
+
self.reserve_registers()
|
|
163
|
+
if self.report_generation:
|
|
164
|
+
elapsed = time.time() - self.ticks
|
|
165
|
+
print(" (%2.2f secs)" % elapsed)
|
|
166
|
+
print("\tRunning liveness analysis", end="")
|
|
167
|
+
self.ticks = time.time()
|
|
168
|
+
self.determine_available_registers()
|
|
169
|
+
self.determine_live_registers(exclude_parameter_loads=True)
|
|
170
|
+
|
|
171
|
+
# Report live registers at marked points
|
|
172
|
+
if self._live_register_markers:
|
|
173
|
+
self._report_live_registers_at_markers()
|
|
174
|
+
|
|
175
|
+
if self.dump_intermediate_assembly:
|
|
176
|
+
with open(
|
|
177
|
+
"%s.S" % self.symbol_name, "w"
|
|
178
|
+
) as intermediate_assembly_file:
|
|
179
|
+
for instruction in self.instructions:
|
|
180
|
+
if isinstance(instruction, Instruction):
|
|
181
|
+
consumed_registers = ", ".join(
|
|
182
|
+
sorted(
|
|
183
|
+
map(
|
|
184
|
+
str,
|
|
185
|
+
list(
|
|
186
|
+
instruction.get_input_registers_list()
|
|
187
|
+
),
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
)
|
|
191
|
+
produced_registers = ", ".join(
|
|
192
|
+
sorted(
|
|
193
|
+
map(
|
|
194
|
+
str,
|
|
195
|
+
list(
|
|
196
|
+
instruction.get_output_registers_list()
|
|
197
|
+
),
|
|
198
|
+
)
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
available_registers = ", ".join(
|
|
202
|
+
sorted(
|
|
203
|
+
map(str, list(instruction.available_registers))
|
|
204
|
+
)
|
|
205
|
+
)
|
|
206
|
+
live_registers = ", ".join(
|
|
207
|
+
sorted(map(str, list(instruction.live_registers)))
|
|
208
|
+
)
|
|
209
|
+
intermediate_assembly_file.write(
|
|
210
|
+
str(instruction) + "\n"
|
|
211
|
+
)
|
|
212
|
+
intermediate_assembly_file.write(
|
|
213
|
+
"\tConsumed registers: " + consumed_registers + "\n"
|
|
214
|
+
)
|
|
215
|
+
intermediate_assembly_file.write(
|
|
216
|
+
"\tProduced registers: " + produced_registers + "\n"
|
|
217
|
+
)
|
|
218
|
+
intermediate_assembly_file.write(
|
|
219
|
+
"\tLive registers: " + live_registers + "\n"
|
|
220
|
+
)
|
|
221
|
+
if instruction.line_number:
|
|
222
|
+
intermediate_assembly_file.write(
|
|
223
|
+
"\tLine: " + str(instruction.line_number) + "\n"
|
|
224
|
+
)
|
|
225
|
+
if instruction.source_code:
|
|
226
|
+
intermediate_assembly_file.write(
|
|
227
|
+
"\tCode: " + instruction.source_code + "\n"
|
|
228
|
+
)
|
|
229
|
+
else:
|
|
230
|
+
intermediate_assembly_file.write(
|
|
231
|
+
str(instruction) + "\n"
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if self.report_generation:
|
|
235
|
+
elapsed = time.time() - self.ticks
|
|
236
|
+
print(" (%2.2f secs)" % elapsed)
|
|
237
|
+
print("\tRunning register allocation", end="")
|
|
238
|
+
self.ticks = time.time()
|
|
239
|
+
self.check_live_registers()
|
|
240
|
+
self.determine_register_relations()
|
|
241
|
+
self.allocate_registers()
|
|
242
|
+
|
|
243
|
+
if self.report_generation:
|
|
244
|
+
elapsed = time.time() - self.ticks
|
|
245
|
+
print(" (%2.2f secs)" % elapsed)
|
|
246
|
+
print("\tGenerating code", end="")
|
|
247
|
+
self.ticks = time.time()
|
|
248
|
+
self.remove_assume_statements()
|
|
249
|
+
self.update_stack_frame()
|
|
250
|
+
self.generate_parameter_loads()
|
|
251
|
+
if self.report_live_registers:
|
|
252
|
+
self.determine_live_registers()
|
|
253
|
+
self.generate_prolog_and_epilog()
|
|
254
|
+
if self.validate_stack_alignment:
|
|
255
|
+
self.validate_stack_alignment_check()
|
|
256
|
+
|
|
257
|
+
self.generate_constant_loads()
|
|
258
|
+
self.optimize_instructions()
|
|
259
|
+
if self.report_generation:
|
|
260
|
+
elapsed = time.time() - self.ticks
|
|
261
|
+
print(" (%2.2f secs)" % elapsed)
|
|
262
|
+
self.ticks = time.time()
|
|
263
|
+
finally:
|
|
264
|
+
self.detach()
|
|
265
|
+
else:
|
|
266
|
+
self.detach()
|
|
267
|
+
|
|
268
|
+
def find_argument(self, argument_target):
|
|
269
|
+
from nervapy import Argument
|
|
270
|
+
|
|
271
|
+
assert isinstance(
|
|
272
|
+
argument_target, (Argument, str)
|
|
273
|
+
), "Either Argument object or argument name expected"
|
|
274
|
+
if isinstance(argument_target, Argument):
|
|
275
|
+
if argument_target in self.arguments:
|
|
276
|
+
return argument_target
|
|
277
|
+
else:
|
|
278
|
+
return None
|
|
279
|
+
else:
|
|
280
|
+
return next(
|
|
281
|
+
(
|
|
282
|
+
argument
|
|
283
|
+
for argument in self.arguments
|
|
284
|
+
if argument.name == argument_target
|
|
285
|
+
),
|
|
286
|
+
None,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
def detach(self):
|
|
290
|
+
import nervapy.stream
|
|
291
|
+
|
|
292
|
+
global active_function
|
|
293
|
+
if active_function is None:
|
|
294
|
+
raise ValueError("Trying to detach a function while no function is active")
|
|
295
|
+
active_function = None
|
|
296
|
+
nervapy.stream.active_stream = None
|
|
297
|
+
return self
|
|
298
|
+
|
|
299
|
+
@property
|
|
300
|
+
def assembly(self):
|
|
301
|
+
"""Generate assembly code in the specified format."""
|
|
302
|
+
from nervapy.arm.formats import AssemblyFormat
|
|
303
|
+
|
|
304
|
+
if self.assembly_format == AssemblyFormat.ARMCC:
|
|
305
|
+
return self._generate_armcc_assembly()
|
|
306
|
+
else: # Default to GAS format
|
|
307
|
+
return self._generate_gas_assembly()
|
|
308
|
+
|
|
309
|
+
def _generate_constant_data_section(self):
|
|
310
|
+
"""Generate .data section for ConstantData objects"""
|
|
311
|
+
try:
|
|
312
|
+
from nervapy.constant_data import ConstantData
|
|
313
|
+
constants = ConstantData.get_function_constants(self)
|
|
314
|
+
if not constants:
|
|
315
|
+
return ""
|
|
316
|
+
|
|
317
|
+
import os
|
|
318
|
+
lines = []
|
|
319
|
+
lines.append("")
|
|
320
|
+
lines.append("\t.data")
|
|
321
|
+
lines.append("\t.align 4")
|
|
322
|
+
for const in constants:
|
|
323
|
+
lines.append(const.generate_data_section())
|
|
324
|
+
return os.linesep.join(lines)
|
|
325
|
+
except ImportError:
|
|
326
|
+
return ""
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
@property
|
|
330
|
+
def global_asm(self):
|
|
331
|
+
"""Generate a Rust global_asm!() macro call embedding the GAS assembly.
|
|
332
|
+
|
|
333
|
+
Usage in a Rust source file:
|
|
334
|
+
use core::arch::global_asm;
|
|
335
|
+
include!("generated_kernels.rs"); // or paste directly
|
|
336
|
+
|
|
337
|
+
The extern declaration goes in your Rust code:
|
|
338
|
+
unsafe extern "C" { fn my_func(a: u32) -> u32; }
|
|
339
|
+
"""
|
|
340
|
+
gas = self._generate_gas_assembly()
|
|
341
|
+
return 'core::arch::global_asm!(r#"\n{asm}"#);\n'.format(
|
|
342
|
+
asm=self._escape_rust_asm_template(gas)
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
@staticmethod
|
|
346
|
+
def _escape_rust_asm_template(asm):
|
|
347
|
+
return asm.replace("{", "{{").replace("}", "}}")
|
|
348
|
+
|
|
349
|
+
def _rust_ffi_type(self, c_type):
|
|
350
|
+
if c_type.is_pointer:
|
|
351
|
+
pointee = c_type.base
|
|
352
|
+
if pointee is None:
|
|
353
|
+
pointee_type = "core::ffi::c_void"
|
|
354
|
+
is_const_pointee = c_type.is_const
|
|
355
|
+
else:
|
|
356
|
+
pointee_type = self._rust_ffi_type(pointee)
|
|
357
|
+
is_const_pointee = pointee.is_const
|
|
358
|
+
return "{0} {1}".format(
|
|
359
|
+
"*const" if is_const_pointee else "*mut", pointee_type
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
if c_type.is_size_integer:
|
|
363
|
+
return "usize" if c_type.is_unsigned_integer else "isize"
|
|
364
|
+
if c_type.is_pointer_integer:
|
|
365
|
+
return "usize" if c_type.is_unsigned_integer else "isize"
|
|
366
|
+
if c_type.is_bool:
|
|
367
|
+
return "bool"
|
|
368
|
+
if c_type.is_char:
|
|
369
|
+
return "core::ffi::c_char"
|
|
370
|
+
if c_type.is_wchar:
|
|
371
|
+
wchar_size = c_type.get_size(self.abi)
|
|
372
|
+
return {2: "u16", 4: "u32"}[wchar_size]
|
|
373
|
+
if c_type.is_floating_point:
|
|
374
|
+
return {2: "u16", 4: "f32", 8: "f64"}[c_type.get_size(self.abi)]
|
|
375
|
+
if c_type.is_signed_integer:
|
|
376
|
+
return {1: "i8", 2: "i16", 4: "i32", 8: "i64"}[c_type.get_size(self.abi)]
|
|
377
|
+
if c_type.is_unsigned_integer:
|
|
378
|
+
return {1: "u8", 2: "u16", 4: "u32", 8: "u64"}[c_type.get_size(self.abi)]
|
|
379
|
+
|
|
380
|
+
raise ValueError("Unsupported Rust FFI type for {0}".format(c_type))
|
|
381
|
+
|
|
382
|
+
@property
|
|
383
|
+
def rust_extern_declaration(self):
|
|
384
|
+
args = ", ".join(
|
|
385
|
+
"{0}: {1}".format(argument.name, self._rust_ffi_type(argument.c_type))
|
|
386
|
+
for argument in self.arguments
|
|
387
|
+
)
|
|
388
|
+
signature = "pub fn {0}({1})".format(self.name, args)
|
|
389
|
+
if self.return_type is not None:
|
|
390
|
+
signature += " -> {0}".format(self._rust_ffi_type(self.return_type))
|
|
391
|
+
return signature + ";"
|
|
392
|
+
|
|
393
|
+
@property
|
|
394
|
+
def rust_extern(self):
|
|
395
|
+
return "unsafe extern \"C\" {{\n {0}\n}}\n".format(
|
|
396
|
+
self.rust_extern_declaration
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
@property
|
|
400
|
+
def rust_module(self):
|
|
401
|
+
return self.global_asm + "\n" + self.rust_extern
|
|
402
|
+
|
|
403
|
+
def _generate_gas_assembly(self):
|
|
404
|
+
"""Generate assembly code in GNU Assembler (GAS) format."""
|
|
405
|
+
import os
|
|
406
|
+
|
|
407
|
+
from nervapy.arm.generic import BranchInstruction
|
|
408
|
+
from nervapy.arm.instructions import Instruction
|
|
409
|
+
from nervapy.arm.pseudo import LabelQuasiInstruction
|
|
410
|
+
|
|
411
|
+
function_label = self.name
|
|
412
|
+
constants_label = self.name + "_constants"
|
|
413
|
+
assembly = ""
|
|
414
|
+
assembly += "\t.syntax unified" + os.linesep
|
|
415
|
+
if self.is_thumb:
|
|
416
|
+
assembly += "\t.thumb" + os.linesep
|
|
417
|
+
assembly += "\t" + self.gnu_arch_spec + os.linesep
|
|
418
|
+
|
|
419
|
+
# Generate .data section for ConstantData if present
|
|
420
|
+
constant_data_section = self._generate_constant_data_section()
|
|
421
|
+
if constant_data_section:
|
|
422
|
+
assembly += constant_data_section + os.linesep
|
|
423
|
+
|
|
424
|
+
if len(self.constants) > 0:
|
|
425
|
+
assembly += (
|
|
426
|
+
"section .rodata.{Microarchitecture} progbits alloc noexec nowrite align={Alignment}".format(
|
|
427
|
+
Microarchitecture=self.target.id, Alignment=32
|
|
428
|
+
)
|
|
429
|
+
+ os.linesep
|
|
430
|
+
)
|
|
431
|
+
assembly += constants_label + ":" + os.linesep
|
|
432
|
+
data_declaration_map = {8: "DB", 16: "DW", 32: "DD", 64: "DQ", 128: "DO"}
|
|
433
|
+
need_alignment = False
|
|
434
|
+
for constant_bucket in self.constants:
|
|
435
|
+
if need_alignment:
|
|
436
|
+
assembly += (
|
|
437
|
+
"\tALIGN {Alignment}".format(Alignment=constant_bucket.capacity)
|
|
438
|
+
+ os.linesep
|
|
439
|
+
)
|
|
440
|
+
for constant in constant_bucket.constants:
|
|
441
|
+
assembly += (
|
|
442
|
+
"\t.{Label}: {Declaration} {Value}".format(
|
|
443
|
+
Label=constant.label,
|
|
444
|
+
Declaration=data_declaration_map[constant.size],
|
|
445
|
+
Value=", ".join([str(constant)] * constant.repeats),
|
|
446
|
+
)
|
|
447
|
+
+ os.linesep
|
|
448
|
+
)
|
|
449
|
+
need_alignment = not constant_bucket.is_full()
|
|
450
|
+
assembly += os.linesep
|
|
451
|
+
|
|
452
|
+
if hasattr(self, "external_functions") and len(self.external_functions) > 0:
|
|
453
|
+
for func_name in sorted(self.external_functions):
|
|
454
|
+
assembly += ".extern {0}".format(func_name) + os.linesep
|
|
455
|
+
assembly += os.linesep
|
|
456
|
+
|
|
457
|
+
assembly += "\n\t.text\n" + os.linesep
|
|
458
|
+
assembly += ".global {Function}".format(Function=function_label) + os.linesep
|
|
459
|
+
assembly += (
|
|
460
|
+
".type {Function}, %function".format(Function=function_label) + os.linesep
|
|
461
|
+
)
|
|
462
|
+
if self.alignment > 0:
|
|
463
|
+
assembly += (
|
|
464
|
+
".align {Alignment}".format(Alignment=self.alignment) + os.linesep
|
|
465
|
+
)
|
|
466
|
+
assembly += function_label + ":" + os.linesep
|
|
467
|
+
if self.gnu_fpu_spec:
|
|
468
|
+
assembly += "\t" + self.gnu_fpu_spec + os.linesep
|
|
469
|
+
for instruction in self.instructions:
|
|
470
|
+
if isinstance(instruction, BranchInstruction):
|
|
471
|
+
assembly += (
|
|
472
|
+
"\t"
|
|
473
|
+
+ "{0} L{1}.{2}".format(
|
|
474
|
+
instruction.name, self.name, instruction.operands[0].label
|
|
475
|
+
)
|
|
476
|
+
+ os.linesep
|
|
477
|
+
)
|
|
478
|
+
elif isinstance(instruction, Instruction):
|
|
479
|
+
constant = instruction.get_constant()
|
|
480
|
+
if constant is not None:
|
|
481
|
+
constant.prefix = constants_label
|
|
482
|
+
assembly += "\t" + str(instruction) + os.linesep
|
|
483
|
+
elif isinstance(instruction, LabelQuasiInstruction):
|
|
484
|
+
assembly += "L{0}.{1}:".format(self.name, instruction.name) + os.linesep
|
|
485
|
+
else:
|
|
486
|
+
assembly += "\t" + str(instruction) + os.linesep
|
|
487
|
+
|
|
488
|
+
# Generate literal pool if present
|
|
489
|
+
if hasattr(self, 'literal_pool') and self.literal_pool.entries:
|
|
490
|
+
assembly += os.linesep
|
|
491
|
+
assembly += self.literal_pool.generate_assembly(format='gas') + os.linesep
|
|
492
|
+
|
|
493
|
+
assembly += os.linesep
|
|
494
|
+
return assembly
|
|
495
|
+
|
|
496
|
+
def _generate_armcc_assembly(self):
|
|
497
|
+
"""Generate assembly code in ARM Compiler (ARMCC) format."""
|
|
498
|
+
import os
|
|
499
|
+
|
|
500
|
+
from nervapy.arm.generic import BranchInstruction
|
|
501
|
+
from nervapy.arm.instructions import Instruction
|
|
502
|
+
from nervapy.arm.pseudo import LabelQuasiInstruction
|
|
503
|
+
|
|
504
|
+
function_label = self.name
|
|
505
|
+
constants_label = self.name + "_constants"
|
|
506
|
+
assembly = ""
|
|
507
|
+
|
|
508
|
+
if self.is_thumb:
|
|
509
|
+
assembly += " THUMB" + os.linesep
|
|
510
|
+
|
|
511
|
+
# ARMCC constants section
|
|
512
|
+
if len(self.constants) > 0:
|
|
513
|
+
assembly += " AREA ||.constdata||, DATA, READONLY" + os.linesep
|
|
514
|
+
assembly += constants_label + os.linesep
|
|
515
|
+
data_declaration_map = {
|
|
516
|
+
8: "DCB",
|
|
517
|
+
16: "DCW",
|
|
518
|
+
32: "DCD",
|
|
519
|
+
64: "DCDU",
|
|
520
|
+
128: "DCDU",
|
|
521
|
+
}
|
|
522
|
+
for constant_bucket in self.constants:
|
|
523
|
+
for constant in constant_bucket.constants:
|
|
524
|
+
assembly += (
|
|
525
|
+
"{Label} {Declaration} {Value}".format(
|
|
526
|
+
Label=constant.label,
|
|
527
|
+
Declaration=data_declaration_map[constant.size],
|
|
528
|
+
Value=", ".join([str(constant)] * constant.repeats),
|
|
529
|
+
)
|
|
530
|
+
+ os.linesep
|
|
531
|
+
)
|
|
532
|
+
assembly += os.linesep
|
|
533
|
+
|
|
534
|
+
# ARMCC code section
|
|
535
|
+
assembly += " AREA ||.text||, CODE, READONLY"
|
|
536
|
+
if self.alignment > 0:
|
|
537
|
+
assembly += ", ALIGN={0}".format(self.alignment)
|
|
538
|
+
assembly += os.linesep
|
|
539
|
+
if self.preserve8:
|
|
540
|
+
assembly += " PRESERVE8" + os.linesep
|
|
541
|
+
if self.armcc_fpu_spec:
|
|
542
|
+
assembly += " " + self.armcc_fpu_spec + os.linesep
|
|
543
|
+
assembly += os.linesep
|
|
544
|
+
|
|
545
|
+
# Add IMPORT statements for external functions
|
|
546
|
+
if hasattr(self, "external_functions") and len(self.external_functions) > 0:
|
|
547
|
+
for func_name in sorted(self.external_functions):
|
|
548
|
+
assembly += " IMPORT " + func_name + os.linesep
|
|
549
|
+
assembly += os.linesep
|
|
550
|
+
|
|
551
|
+
assembly += function_label + " PROC" + os.linesep
|
|
552
|
+
assembly += " EXPORT " + function_label + os.linesep
|
|
553
|
+
|
|
554
|
+
for instruction in self.instructions:
|
|
555
|
+
if isinstance(instruction, BranchInstruction):
|
|
556
|
+
assembly += (
|
|
557
|
+
" "
|
|
558
|
+
+ "{0} {1}_{2}".format(
|
|
559
|
+
instruction.name, self.name, instruction.operands[0].label
|
|
560
|
+
)
|
|
561
|
+
+ os.linesep
|
|
562
|
+
)
|
|
563
|
+
elif isinstance(instruction, Instruction):
|
|
564
|
+
constant = instruction.get_constant()
|
|
565
|
+
if constant is not None:
|
|
566
|
+
constant.prefix = constants_label
|
|
567
|
+
assembly += " " + str(instruction) + os.linesep
|
|
568
|
+
elif isinstance(instruction, LabelQuasiInstruction):
|
|
569
|
+
assembly += "{0}_{1}".format(self.name, instruction.name) + os.linesep
|
|
570
|
+
else:
|
|
571
|
+
assembly += " " + str(instruction) + os.linesep
|
|
572
|
+
|
|
573
|
+
# Generate literal pool if present
|
|
574
|
+
if hasattr(self, 'literal_pool') and self.literal_pool.entries:
|
|
575
|
+
assembly += os.linesep
|
|
576
|
+
assembly += self.literal_pool.generate_assembly(format='armcc') + os.linesep
|
|
577
|
+
|
|
578
|
+
assembly += " ENDP" + os.linesep
|
|
579
|
+
assembly += " END" + os.linesep
|
|
580
|
+
return assembly
|
|
581
|
+
|
|
582
|
+
@property
|
|
583
|
+
def gnu_arch_spec(self):
|
|
584
|
+
from nervapy.arm.isa import Extension
|
|
585
|
+
|
|
586
|
+
isa_extensions = self.isa_extensions
|
|
587
|
+
if Extension.V8_1MMain in isa_extensions:
|
|
588
|
+
return ".arch armv8.1-m.main"
|
|
589
|
+
elif Extension.V8MMain in isa_extensions:
|
|
590
|
+
return ".arch armv8-m.main"
|
|
591
|
+
elif Extension.Div in isa_extensions:
|
|
592
|
+
return ".cpu cortex-a15"
|
|
593
|
+
elif Extension.V7MP in isa_extensions:
|
|
594
|
+
return ".cpu cortex-a9"
|
|
595
|
+
elif Extension.V7M in isa_extensions:
|
|
596
|
+
return ".arch armv7-m"
|
|
597
|
+
elif Extension.V8MBase in isa_extensions:
|
|
598
|
+
return ".arch armv8-m.base"
|
|
599
|
+
elif Extension.V7 in isa_extensions:
|
|
600
|
+
return ".arch armv7-a"
|
|
601
|
+
elif Extension.V6K in isa_extensions:
|
|
602
|
+
return ".arch armv6zk"
|
|
603
|
+
elif Extension.V6 in isa_extensions:
|
|
604
|
+
return ".arch armv6"
|
|
605
|
+
elif Extension.V5E in isa_extensions:
|
|
606
|
+
return ".arch armv5te"
|
|
607
|
+
else:
|
|
608
|
+
return ".arch armv5t"
|
|
609
|
+
|
|
610
|
+
@property
|
|
611
|
+
def gnu_fpu_spec(self):
|
|
612
|
+
from nervapy.arm.isa import Extension
|
|
613
|
+
|
|
614
|
+
isa_extensions = self.isa_extensions
|
|
615
|
+
# ARMv8-M (Cortex-M33, M35P, etc.) uses FPv5-SP
|
|
616
|
+
if Extension.V8MMain in isa_extensions or Extension.V8MBase in isa_extensions:
|
|
617
|
+
if Extension.MVE in isa_extensions:
|
|
618
|
+
return ".fpu mve"
|
|
619
|
+
elif Extension.VFP4 in isa_extensions or Extension.VFP3 in isa_extensions:
|
|
620
|
+
# ARMv8-M has FPv5 single-precision FPU
|
|
621
|
+
return ".fpu fpv5-sp-d16"
|
|
622
|
+
else:
|
|
623
|
+
return None
|
|
624
|
+
# ARMv8.1-M (Cortex-M55, etc.) with Helium MVE
|
|
625
|
+
elif Extension.V8_1MMain in isa_extensions:
|
|
626
|
+
if Extension.MVE in isa_extensions:
|
|
627
|
+
return ".fpu mve"
|
|
628
|
+
elif Extension.VFP4 in isa_extensions or Extension.VFP3 in isa_extensions:
|
|
629
|
+
return ".fpu fpv5-sp-d16"
|
|
630
|
+
else:
|
|
631
|
+
return None
|
|
632
|
+
elif Extension.NEON2 in isa_extensions or (Extension.VFP4 in isa_extensions and Extension.NEON in isa_extensions):
|
|
633
|
+
return ".fpu neon-vfpv4"
|
|
634
|
+
elif (
|
|
635
|
+
Extension.NEONHP in isa_extensions
|
|
636
|
+
or Extension.VFPHP in isa_extensions
|
|
637
|
+
and Extension.NEON in isa_extensions
|
|
638
|
+
):
|
|
639
|
+
return ".fpu neon-fp16"
|
|
640
|
+
elif Extension.NEON in isa_extensions:
|
|
641
|
+
return ".fpu neon"
|
|
642
|
+
elif Extension.VFPHP in isa_extensions:
|
|
643
|
+
if Extension.VFPd32 in isa_extensions:
|
|
644
|
+
return ".fpu vfpv3-fp16"
|
|
645
|
+
else:
|
|
646
|
+
return ".fpu vfpv3-d16-fp16"
|
|
647
|
+
elif Extension.VFP3 in isa_extensions:
|
|
648
|
+
if Extension.VFPd32 in isa_extensions:
|
|
649
|
+
return ".fpu vfpv3"
|
|
650
|
+
else:
|
|
651
|
+
return ".fpu vfpv3-d16"
|
|
652
|
+
elif Extension.VFP in isa_extensions or Extension.VFP2 in isa_extensions:
|
|
653
|
+
return
|
|
654
|
+
elif Extension.VFP3 in isa_extensions:
|
|
655
|
+
return ".fpu vfp"
|
|
656
|
+
else:
|
|
657
|
+
return None
|
|
658
|
+
|
|
659
|
+
@property
|
|
660
|
+
def armcc_arch_spec(self):
|
|
661
|
+
"""Generate ARMCC-compatible architecture specification."""
|
|
662
|
+
from nervapy.arm.isa import Extension
|
|
663
|
+
|
|
664
|
+
isa_extensions = self.isa_extensions
|
|
665
|
+
if Extension.V7M in isa_extensions:
|
|
666
|
+
return "ARM"
|
|
667
|
+
elif Extension.V7MP in isa_extensions:
|
|
668
|
+
return "ARM"
|
|
669
|
+
elif Extension.V7 in isa_extensions:
|
|
670
|
+
return "ARM"
|
|
671
|
+
elif Extension.V6K in isa_extensions:
|
|
672
|
+
return "ARM"
|
|
673
|
+
elif Extension.V6 in isa_extensions:
|
|
674
|
+
return "ARM"
|
|
675
|
+
elif Extension.V5E in isa_extensions:
|
|
676
|
+
return "ARM"
|
|
677
|
+
else:
|
|
678
|
+
return "ARM"
|
|
679
|
+
|
|
680
|
+
@property
|
|
681
|
+
def armcc_fpu_spec(self):
|
|
682
|
+
"""Generate ARMCC-compatible FPU specification."""
|
|
683
|
+
from nervapy.arm.isa import Extension
|
|
684
|
+
|
|
685
|
+
isa_extensions = self.isa_extensions
|
|
686
|
+
if Extension.NEON2 in isa_extensions or Extension.VFP4 in isa_extensions:
|
|
687
|
+
return "REQUIRE VFPv4"
|
|
688
|
+
elif (
|
|
689
|
+
Extension.NEONHP in isa_extensions
|
|
690
|
+
or Extension.VFPHP in isa_extensions
|
|
691
|
+
and Extension.NEON in isa_extensions
|
|
692
|
+
):
|
|
693
|
+
return "REQUIRE VFPv3_FP16"
|
|
694
|
+
elif Extension.NEON in isa_extensions:
|
|
695
|
+
return "REQUIRE VFPv3"
|
|
696
|
+
elif Extension.VFPHP in isa_extensions:
|
|
697
|
+
return "REQUIRE VFPv3_FP16"
|
|
698
|
+
elif Extension.VFP3 in isa_extensions:
|
|
699
|
+
return "REQUIRE VFPv3"
|
|
700
|
+
elif Extension.VFP in isa_extensions or Extension.VFP2 in isa_extensions:
|
|
701
|
+
return "REQUIRE VFPv2"
|
|
702
|
+
else:
|
|
703
|
+
return None
|
|
704
|
+
|
|
705
|
+
def add_instruction(self, instruction):
|
|
706
|
+
from nervapy.arm.instructions import Instruction
|
|
707
|
+
|
|
708
|
+
if instruction is None:
|
|
709
|
+
return
|
|
710
|
+
if isinstance(instruction, Instruction):
|
|
711
|
+
for extension in instruction.isa_extensions:
|
|
712
|
+
if extension not in self.target.extensions:
|
|
713
|
+
raise ValueError(
|
|
714
|
+
"{0} is not supported on the target microarchitecture".format(
|
|
715
|
+
extension
|
|
716
|
+
)
|
|
717
|
+
)
|
|
718
|
+
local_variable = instruction.get_local_variable()
|
|
719
|
+
if local_variable is not None:
|
|
720
|
+
self.stack_frame.add_variable(local_variable.get_root())
|
|
721
|
+
self.stack_frame.preserve_registers(instruction.get_output_registers_list())
|
|
722
|
+
self.instructions.append(instruction)
|
|
723
|
+
|
|
724
|
+
def add_instructions(self, instructions):
|
|
725
|
+
for instruction in instructions:
|
|
726
|
+
self.add_instruction(instruction)
|
|
727
|
+
|
|
728
|
+
def preserve(self, *registers):
|
|
729
|
+
"""Force additional registers into the function prologue/epilogue.
|
|
730
|
+
|
|
731
|
+
Use this when you need registers preserved that the automatic analysis
|
|
732
|
+
would not detect (e.g. registers used only via inline logic or explicit
|
|
733
|
+
control flow). The registers are merged with the auto-detected ones so
|
|
734
|
+
the prologue emits a single PUSH / PUSH.W covering everything.
|
|
735
|
+
|
|
736
|
+
Accepts individual registers or a single tuple/list, mirroring the
|
|
737
|
+
calling convention of PUSH::
|
|
738
|
+
|
|
739
|
+
with Function("my_func", ...) as f:
|
|
740
|
+
f.preserve(r8, r9) # varargs style
|
|
741
|
+
f.preserve((r8, r9)) # tuple style (like PUSH)
|
|
742
|
+
f.preserve(lr)
|
|
743
|
+
"""
|
|
744
|
+
for item in registers:
|
|
745
|
+
if isinstance(item, (tuple, list)):
|
|
746
|
+
for register in item:
|
|
747
|
+
self.stack_frame.force_preserve_register(register)
|
|
748
|
+
else:
|
|
749
|
+
self.stack_frame.force_preserve_register(item)
|
|
750
|
+
|
|
751
|
+
def decompose_instructions(self):
|
|
752
|
+
from nervapy.arm.pseudo import ReturnInstruction
|
|
753
|
+
|
|
754
|
+
new_instructions = list()
|
|
755
|
+
for instruction in self.instructions:
|
|
756
|
+
if isinstance(instruction, ReturnInstruction):
|
|
757
|
+
new_instructions.extend(instruction.to_instruction_list())
|
|
758
|
+
else:
|
|
759
|
+
new_instructions.append(instruction)
|
|
760
|
+
self.instructions = new_instructions
|
|
761
|
+
|
|
762
|
+
def generate_prolog_and_epilog(self):
|
|
763
|
+
from nervapy.arm.generic import BranchExchangeInstruction
|
|
764
|
+
from nervapy.arm.pseudo import LabelQuasiInstruction
|
|
765
|
+
|
|
766
|
+
prologue_instructions = self.stack_frame.generate_prologue()
|
|
767
|
+
epilogue_instructions = self.stack_frame.generate_epilogue()
|
|
768
|
+
new_instructions = list()
|
|
769
|
+
for instruction in self.instructions:
|
|
770
|
+
if isinstance(instruction, LabelQuasiInstruction):
|
|
771
|
+
new_instructions.append(instruction)
|
|
772
|
+
if instruction.name == "ENTRY":
|
|
773
|
+
new_instructions.extend(prologue_instructions)
|
|
774
|
+
elif isinstance(instruction, BranchExchangeInstruction):
|
|
775
|
+
new_instructions.extend(epilogue_instructions)
|
|
776
|
+
new_instructions.append(instruction)
|
|
777
|
+
else:
|
|
778
|
+
new_instructions.append(instruction)
|
|
779
|
+
self.instructions = new_instructions
|
|
780
|
+
|
|
781
|
+
def generate_labels(self):
|
|
782
|
+
from nervapy.arm.instructions import Operand
|
|
783
|
+
from nervapy.arm.pseudo import LabelQuasiInstruction
|
|
784
|
+
|
|
785
|
+
for instruction in self.instructions:
|
|
786
|
+
if isinstance(instruction, LabelQuasiInstruction):
|
|
787
|
+
if instruction.name == "ENTRY":
|
|
788
|
+
break
|
|
789
|
+
else:
|
|
790
|
+
self.instructions.insert(0, LabelQuasiInstruction(Operand("ENTRY")))
|
|
791
|
+
|
|
792
|
+
def get_label_table(self):
|
|
793
|
+
from nervapy.arm.pseudo import LabelQuasiInstruction
|
|
794
|
+
|
|
795
|
+
label_table = dict()
|
|
796
|
+
for index, instruction in enumerate(self.instructions):
|
|
797
|
+
if isinstance(instruction, LabelQuasiInstruction):
|
|
798
|
+
label_table[instruction.name] = index
|
|
799
|
+
return label_table
|
|
800
|
+
|
|
801
|
+
def find_entry_label(self):
|
|
802
|
+
from nervapy.arm.pseudo import LabelQuasiInstruction
|
|
803
|
+
|
|
804
|
+
for index, instruction in enumerate(self.instructions):
|
|
805
|
+
if isinstance(instruction, LabelQuasiInstruction):
|
|
806
|
+
if instruction.name == "ENTRY":
|
|
807
|
+
return index
|
|
808
|
+
raise ValueError("Instruction stream does not contain the ENTRY label")
|
|
809
|
+
|
|
810
|
+
def find_exit_points(self):
|
|
811
|
+
from nervapy.arm.generic import BranchExchangeInstruction
|
|
812
|
+
|
|
813
|
+
ret_instructions = list()
|
|
814
|
+
for index, instruction in enumerate(self.instructions):
|
|
815
|
+
if isinstance(instruction, BranchExchangeInstruction):
|
|
816
|
+
ret_instructions.append(index)
|
|
817
|
+
return ret_instructions
|
|
818
|
+
|
|
819
|
+
def determine_branches(self):
|
|
820
|
+
from nervapy.arm.generic import BranchInstruction
|
|
821
|
+
from nervapy.arm.pseudo import LabelQuasiInstruction
|
|
822
|
+
|
|
823
|
+
label_table = self.get_label_table()
|
|
824
|
+
for instruction in self.instructions:
|
|
825
|
+
if isinstance(instruction, LabelQuasiInstruction):
|
|
826
|
+
instruction.input_branches = set()
|
|
827
|
+
|
|
828
|
+
for i, instruction in enumerate(self.instructions):
|
|
829
|
+
if isinstance(instruction, BranchInstruction):
|
|
830
|
+
target_label = instruction.operands[0].label
|
|
831
|
+
target_index = label_table[target_label]
|
|
832
|
+
self.instructions[target_index].input_branches.add(i)
|
|
833
|
+
|
|
834
|
+
def reserve_registers(self):
|
|
835
|
+
pass
|
|
836
|
+
|
|
837
|
+
def determine_available_registers(self):
|
|
838
|
+
from nervapy.arm.generic import BranchInstruction
|
|
839
|
+
from nervapy.arm.instructions import Instruction
|
|
840
|
+
|
|
841
|
+
processed_branches = set()
|
|
842
|
+
label_table = self.get_label_table()
|
|
843
|
+
|
|
844
|
+
def mark_available_registers(instructions, start, initial_available_registers):
|
|
845
|
+
available_registers = set(initial_available_registers)
|
|
846
|
+
for i in range(start, len(instructions)):
|
|
847
|
+
instruction = instructions[i]
|
|
848
|
+
if isinstance(instruction, Instruction):
|
|
849
|
+
instruction.available_registers = set(available_registers)
|
|
850
|
+
if isinstance(instruction, BranchInstruction):
|
|
851
|
+
if i not in processed_branches:
|
|
852
|
+
target_label = instruction.operands[0].label
|
|
853
|
+
target_index = label_table[target_label]
|
|
854
|
+
processed_branches.add(i)
|
|
855
|
+
mark_available_registers(
|
|
856
|
+
instructions, target_index, available_registers
|
|
857
|
+
)
|
|
858
|
+
if not instruction.is_conditional():
|
|
859
|
+
return
|
|
860
|
+
else:
|
|
861
|
+
available_registers |= set(
|
|
862
|
+
instruction.get_output_registers_list()
|
|
863
|
+
)
|
|
864
|
+
|
|
865
|
+
current_index = self.find_entry_label()
|
|
866
|
+
mark_available_registers(self.instructions, current_index, set())
|
|
867
|
+
|
|
868
|
+
def determine_live_registers(self, exclude_parameter_loads=False):
|
|
869
|
+
from nervapy.arm.generic import BranchInstruction
|
|
870
|
+
from nervapy.arm.instructions import Instruction
|
|
871
|
+
from nervapy.arm.pseudo import (LabelQuasiInstruction,
|
|
872
|
+
LoadArgumentPseudoInstruction)
|
|
873
|
+
from nervapy.arm.registers import Register
|
|
874
|
+
|
|
875
|
+
self.determine_branches()
|
|
876
|
+
for instruction in self.instructions:
|
|
877
|
+
if isinstance(instruction, Instruction):
|
|
878
|
+
live_registers = set()
|
|
879
|
+
if isinstance(instruction, BranchInstruction):
|
|
880
|
+
instruction.is_visited = False
|
|
881
|
+
|
|
882
|
+
def mark_live_registers(instructions, exit_point, initial_live_registers):
|
|
883
|
+
live_registers = dict(initial_live_registers)
|
|
884
|
+
# Walk from the bottom to top of the linear block
|
|
885
|
+
for i in range(exit_point, -1, -1):
|
|
886
|
+
instruction = instructions[i]
|
|
887
|
+
if (
|
|
888
|
+
isinstance(instruction, BranchInstruction)
|
|
889
|
+
and not instruction.is_conditional
|
|
890
|
+
and i != exit_point
|
|
891
|
+
):
|
|
892
|
+
return
|
|
893
|
+
elif isinstance(instruction, Instruction):
|
|
894
|
+
# First mark registers which are written to by this instruction as non-live
|
|
895
|
+
# Then mark registers which are read by this instruction as live
|
|
896
|
+
for output_register in instruction.get_output_registers_list():
|
|
897
|
+
register_id = output_register.id
|
|
898
|
+
register_mask = output_register.mask
|
|
899
|
+
if register_id in live_registers:
|
|
900
|
+
live_registers[register_id] &= ~register_mask
|
|
901
|
+
if live_registers[register_id] == 0:
|
|
902
|
+
del live_registers[register_id]
|
|
903
|
+
|
|
904
|
+
if not (
|
|
905
|
+
exclude_parameter_loads
|
|
906
|
+
and isinstance(instruction, LoadArgumentPseudoInstruction)
|
|
907
|
+
):
|
|
908
|
+
for input_register in instruction.get_input_registers_list():
|
|
909
|
+
register_id = input_register.id
|
|
910
|
+
register_mask = input_register.mask
|
|
911
|
+
if register_id in live_registers:
|
|
912
|
+
live_registers[register_id] |= register_mask
|
|
913
|
+
else:
|
|
914
|
+
live_registers[register_id] = register_mask
|
|
915
|
+
|
|
916
|
+
# Merge with previously determined as live registers
|
|
917
|
+
for instruction_live_register in instruction.live_registers:
|
|
918
|
+
if instruction_live_register.id in live_registers:
|
|
919
|
+
live_registers[
|
|
920
|
+
instruction_live_register.id
|
|
921
|
+
] |= instruction_live_register.mask
|
|
922
|
+
else:
|
|
923
|
+
live_registers[instruction_live_register.id] = (
|
|
924
|
+
instruction_live_register.mask
|
|
925
|
+
)
|
|
926
|
+
|
|
927
|
+
instruction.live_registers = set(
|
|
928
|
+
[
|
|
929
|
+
Register.from_parts(id, mask, expand=True)
|
|
930
|
+
for (id, mask) in live_registers.items()
|
|
931
|
+
]
|
|
932
|
+
)
|
|
933
|
+
elif isinstance(instruction, LabelQuasiInstruction):
|
|
934
|
+
for entry_point in instruction.input_branches:
|
|
935
|
+
if not instructions[entry_point].is_visited:
|
|
936
|
+
instructions[entry_point].is_visited = True
|
|
937
|
+
mark_live_registers(
|
|
938
|
+
instructions, entry_point, live_registers
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
exit_points = self.find_exit_points()
|
|
942
|
+
for exit_point in exit_points:
|
|
943
|
+
mark_live_registers(self.instructions, exit_point, set())
|
|
944
|
+
|
|
945
|
+
def check_live_registers(self):
|
|
946
|
+
pass
|
|
947
|
+
|
|
948
|
+
def print_live_registers(self, label=""):
|
|
949
|
+
"""Mark this point for live register analysis.
|
|
950
|
+
|
|
951
|
+
This marks the current instruction position for live register analysis
|
|
952
|
+
which will be performed after all instructions are generated.
|
|
953
|
+
|
|
954
|
+
Args:
|
|
955
|
+
label: Optional label to identify the location in code
|
|
956
|
+
"""
|
|
957
|
+
from nervapy.arm.instructions import Instruction
|
|
958
|
+
|
|
959
|
+
# Find the last actual instruction object (not index, as indices can change)
|
|
960
|
+
instr_obj = None
|
|
961
|
+
for i in range(len(self.instructions) - 1, -1, -1):
|
|
962
|
+
if isinstance(self.instructions[i], Instruction):
|
|
963
|
+
instr_obj = self.instructions[i]
|
|
964
|
+
break
|
|
965
|
+
|
|
966
|
+
# Store the marker for later analysis (store instruction object, not index)
|
|
967
|
+
self._live_register_markers.append((instr_obj, label))
|
|
968
|
+
|
|
969
|
+
def _report_live_registers_at_markers(self):
|
|
970
|
+
"""Report live registers at all marked points.
|
|
971
|
+
|
|
972
|
+
This is called after liveness analysis has been performed on all instructions.
|
|
973
|
+
"""
|
|
974
|
+
from nervapy.arm.instructions import Instruction
|
|
975
|
+
from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
|
|
976
|
+
QRegister, SRegister)
|
|
977
|
+
|
|
978
|
+
for instr_obj, label in self._live_register_markers:
|
|
979
|
+
if instr_obj is None:
|
|
980
|
+
print(f"Live registers {label}: No instructions yet")
|
|
981
|
+
continue
|
|
982
|
+
|
|
983
|
+
# Get live registers from the instruction (computed by determine_live_registers)
|
|
984
|
+
live_regs = instr_obj.live_registers if hasattr(instr_obj, 'live_registers') else set()
|
|
985
|
+
|
|
986
|
+
if not live_regs:
|
|
987
|
+
print(f"Live registers {label}: None")
|
|
988
|
+
else:
|
|
989
|
+
gp_regs = [r for r in live_regs if isinstance(r, GeneralPurposeRegister)]
|
|
990
|
+
s_regs = [r for r in live_regs if isinstance(r, SRegister)]
|
|
991
|
+
d_regs = [r for r in live_regs if isinstance(r, DRegister)]
|
|
992
|
+
q_regs = [r for r in live_regs if isinstance(r, QRegister)]
|
|
993
|
+
|
|
994
|
+
def format_reg(r, reg_type):
|
|
995
|
+
"""Format a register with its name if available."""
|
|
996
|
+
if r.is_virtual:
|
|
997
|
+
vreg_id = (r.number - 0x40000) >> 12
|
|
998
|
+
name = self._register_names.get(r.number, None)
|
|
999
|
+
prefix = reg_type.lower()
|
|
1000
|
+
if name:
|
|
1001
|
+
return f"{prefix}-vreg<{vreg_id}, {name}>"
|
|
1002
|
+
else:
|
|
1003
|
+
return f"{prefix}-vreg<{vreg_id}>"
|
|
1004
|
+
else:
|
|
1005
|
+
return str(r)
|
|
1006
|
+
|
|
1007
|
+
print(f"Live registers {label}:")
|
|
1008
|
+
if gp_regs:
|
|
1009
|
+
print(f" GP ({len(gp_regs)}): {', '.join(format_reg(r, 'gp') for r in sorted(gp_regs, key=lambda x: (x.id, x.mask)))}")
|
|
1010
|
+
if s_regs:
|
|
1011
|
+
print(f" S ({len(s_regs)}): {', '.join(format_reg(r, 's') for r in sorted(s_regs, key=lambda x: (x.id, x.mask)))}")
|
|
1012
|
+
if d_regs:
|
|
1013
|
+
print(f" D ({len(d_regs)}): {', '.join(format_reg(r, 'd') for r in sorted(d_regs, key=lambda x: (x.id, x.mask)))}")
|
|
1014
|
+
if q_regs:
|
|
1015
|
+
print(f" Q ({len(q_regs)}): {', '.join(format_reg(r, 'q') for r in sorted(q_regs, key=lambda x: (x.id, x.mask)))}")
|
|
1016
|
+
|
|
1017
|
+
# all_registers = self.abi.volatile_registers + list(reversed(self.abi.argument_registers)) + self.abi.callee_save_registers
|
|
1018
|
+
# available_registers = { Register.GPType: list(), Register.WMMXType: list(), Register.VFPType: list() }
|
|
1019
|
+
# for register in all_registers:
|
|
1020
|
+
# if register not in available_registers[register.regtype]:
|
|
1021
|
+
# available_registers[register.regtype].append(register)
|
|
1022
|
+
# for instruction in self.instructions:
|
|
1023
|
+
# live_registers = { Register.GPType: set(), Register.WMMXType: set(), Register.VFPType: set() }
|
|
1024
|
+
# if isinstance(instruction, Instruction):
|
|
1025
|
+
# for live_register in instruction.live_registers:
|
|
1026
|
+
# live_registers[live_register.regtype].add(live_register)
|
|
1027
|
+
# for register_type in live_registers.keys():
|
|
1028
|
+
# if len(live_registers[register_type]) > len(available_registers[register_type]):
|
|
1029
|
+
# raise ValueError("Not enough available registers to allocate live registers at instruction {0}".format(instruction))
|
|
1030
|
+
|
|
1031
|
+
def determine_register_relations(self):
|
|
1032
|
+
from nervapy import RegisterAllocationError
|
|
1033
|
+
from nervapy.arm.instructions import Instruction
|
|
1034
|
+
from nervapy.arm.registers import (DRegister, QRegister, Register,
|
|
1035
|
+
SRegister)
|
|
1036
|
+
from nervapy.arm.vfpneon import (NeonLoadStoreInstruction,
|
|
1037
|
+
VFPLoadStoreMultipleInstruction)
|
|
1038
|
+
|
|
1039
|
+
all_registers = (
|
|
1040
|
+
self.abi.volatile_registers
|
|
1041
|
+
+ list(reversed(self.abi.argument_registers))
|
|
1042
|
+
+ self.abi.callee_save_registers
|
|
1043
|
+
)
|
|
1044
|
+
available_registers = {
|
|
1045
|
+
Register.GPType: list(),
|
|
1046
|
+
Register.WMMXType: list(),
|
|
1047
|
+
Register.VFPType: list(),
|
|
1048
|
+
}
|
|
1049
|
+
for register in all_registers:
|
|
1050
|
+
if register.type == Register.GPType or register.type == Register.WMMXType:
|
|
1051
|
+
register_bitboard = 0x1 << register.get_physical_number()
|
|
1052
|
+
if register_bitboard not in available_registers[register.type]:
|
|
1053
|
+
available_registers[register.type].append(register_bitboard)
|
|
1054
|
+
for instruction in self.instructions:
|
|
1055
|
+
if isinstance(instruction, Instruction):
|
|
1056
|
+
# Track all virtual registers used in the instruction (both live and outputs)
|
|
1057
|
+
virtual_live_registers = [
|
|
1058
|
+
register
|
|
1059
|
+
for register in instruction.live_registers
|
|
1060
|
+
if register.is_virtual
|
|
1061
|
+
]
|
|
1062
|
+
# Also include output registers that may not be in live_registers
|
|
1063
|
+
# (e.g., dead code outputs that are written but never read)
|
|
1064
|
+
for output_reg in instruction.get_output_registers_list():
|
|
1065
|
+
if output_reg.is_virtual and output_reg not in virtual_live_registers:
|
|
1066
|
+
virtual_live_registers.append(output_reg)
|
|
1067
|
+
|
|
1068
|
+
for registerX in virtual_live_registers:
|
|
1069
|
+
if registerX.type == Register.VFPType:
|
|
1070
|
+
if isinstance(registerX, SRegister) and registerX.parent:
|
|
1071
|
+
registerX = registerX.parent
|
|
1072
|
+
if isinstance(registerX, DRegister) and registerX.parent:
|
|
1073
|
+
registerX = registerX.parent
|
|
1074
|
+
if registerX.get_id() not in self.allocation_options:
|
|
1075
|
+
if isinstance(registerX, SRegister):
|
|
1076
|
+
self.allocation_options[registerX.id] = [
|
|
1077
|
+
(0x1 << n) for n in range(32)
|
|
1078
|
+
]
|
|
1079
|
+
elif isinstance(registerX, DRegister):
|
|
1080
|
+
if self.target.has_vfpd32:
|
|
1081
|
+
self.allocation_options[registerX.id] = [
|
|
1082
|
+
(0x3 << n) for n in range(0, 64, 2)
|
|
1083
|
+
]
|
|
1084
|
+
else:
|
|
1085
|
+
self.allocation_options[registerX.id] = [
|
|
1086
|
+
(0x3 << n) for n in range(0, 32, 2)
|
|
1087
|
+
]
|
|
1088
|
+
else:
|
|
1089
|
+
self.allocation_options[registerX.id] = [
|
|
1090
|
+
(0xF << n) for n in range(0, 64, 4)
|
|
1091
|
+
]
|
|
1092
|
+
else:
|
|
1093
|
+
if registerX.id not in self.allocation_options:
|
|
1094
|
+
self.allocation_options[registerX.id] = list(
|
|
1095
|
+
available_registers[registerX.type]
|
|
1096
|
+
)
|
|
1097
|
+
|
|
1098
|
+
self.unallocated_registers.append((registerX.id, registerX.type))
|
|
1099
|
+
|
|
1100
|
+
# Setup the list of conflicting registers for each virtual register
|
|
1101
|
+
if registerX.id not in self.conflicting_registers:
|
|
1102
|
+
self.conflicting_registers[registerX.id] = set()
|
|
1103
|
+
for registerY in virtual_live_registers:
|
|
1104
|
+
# VFP registers have a conflict even they are of different size
|
|
1105
|
+
if (
|
|
1106
|
+
registerX.id != registerY.id
|
|
1107
|
+
and registerX.type == registerY.type
|
|
1108
|
+
):
|
|
1109
|
+
self.conflicting_registers[registerX.id].add(registerY.id)
|
|
1110
|
+
|
|
1111
|
+
# Mark available physical registers for each virtual register
|
|
1112
|
+
for instruction in self.instructions:
|
|
1113
|
+
if isinstance(instruction, Instruction):
|
|
1114
|
+
virtual_live_registers = [
|
|
1115
|
+
register
|
|
1116
|
+
for register in instruction.live_registers
|
|
1117
|
+
if register.is_virtual
|
|
1118
|
+
]
|
|
1119
|
+
# If a physical register is live at some point, it can not be allocated for a virtual register
|
|
1120
|
+
physical_live_registers = [
|
|
1121
|
+
register
|
|
1122
|
+
for register in instruction.live_registers
|
|
1123
|
+
if not register.is_virtual
|
|
1124
|
+
]
|
|
1125
|
+
for virtual_register in virtual_live_registers:
|
|
1126
|
+
for physical_register in physical_live_registers:
|
|
1127
|
+
if virtual_register.type == physical_register.type:
|
|
1128
|
+
virtual_register_id = virtual_register.id
|
|
1129
|
+
physical_register_bitboard = physical_register.bitboard
|
|
1130
|
+
self.allocation_options[virtual_register_id][:] = [
|
|
1131
|
+
possible_register_bitboard
|
|
1132
|
+
for possible_register_bitboard in self.allocation_options[
|
|
1133
|
+
virtual_register_id
|
|
1134
|
+
]
|
|
1135
|
+
if (
|
|
1136
|
+
possible_register_bitboard
|
|
1137
|
+
& physical_register_bitboard
|
|
1138
|
+
)
|
|
1139
|
+
== 0
|
|
1140
|
+
]
|
|
1141
|
+
|
|
1142
|
+
# Detect group constraints
|
|
1143
|
+
constraints = dict()
|
|
1144
|
+
for instruction in self.instructions:
|
|
1145
|
+
if isinstance(instruction, NeonLoadStoreInstruction) or isinstance(
|
|
1146
|
+
instruction, VFPLoadStoreMultipleInstruction
|
|
1147
|
+
):
|
|
1148
|
+
if isinstance(instruction, NeonLoadStoreInstruction):
|
|
1149
|
+
register_list = instruction.operands[0].get_registers_list()
|
|
1150
|
+
physical_registers_count = 32
|
|
1151
|
+
else:
|
|
1152
|
+
register_list = instruction.operands[1].get_registers_list()
|
|
1153
|
+
physical_registers_count = 32 if self.target.has_vfpd32 else 16
|
|
1154
|
+
if len(register_list) > 1:
|
|
1155
|
+
if all(
|
|
1156
|
+
isinstance(register, DRegister) for register in register_list
|
|
1157
|
+
):
|
|
1158
|
+
register_id_list = list()
|
|
1159
|
+
for register in register_list:
|
|
1160
|
+
register_id = register.get_id()
|
|
1161
|
+
if register_id not in register_id_list:
|
|
1162
|
+
register_id_list.append(register_id)
|
|
1163
|
+
register_id_list = tuple(register_id_list)
|
|
1164
|
+
# Iterate possible allocations for this register list
|
|
1165
|
+
# For VLD1/VST1 instructions all registers must be allocated to sequential physical registers
|
|
1166
|
+
options = list()
|
|
1167
|
+
for sequence_bitboard_position in range(
|
|
1168
|
+
0,
|
|
1169
|
+
2 * physical_registers_count - 2 * len(register_list) + 2,
|
|
1170
|
+
2,
|
|
1171
|
+
):
|
|
1172
|
+
register_bitboards = [
|
|
1173
|
+
0x3 << (sequence_bitboard_position + 2 * i)
|
|
1174
|
+
for i in range(len(register_list))
|
|
1175
|
+
]
|
|
1176
|
+
for i, (bitboard, register) in enumerate(
|
|
1177
|
+
zip(register_bitboards, register_list)
|
|
1178
|
+
):
|
|
1179
|
+
register_bitboards[i] = register.extend_bitboard(
|
|
1180
|
+
bitboard
|
|
1181
|
+
)
|
|
1182
|
+
# Check that bitboard is available for allocation
|
|
1183
|
+
for register, bitboard in zip(
|
|
1184
|
+
register_list, register_bitboards
|
|
1185
|
+
):
|
|
1186
|
+
if (
|
|
1187
|
+
bitboard
|
|
1188
|
+
not in self.allocation_options[register.get_id()]
|
|
1189
|
+
):
|
|
1190
|
+
break
|
|
1191
|
+
else:
|
|
1192
|
+
# Check that if registers with the same id use the same bitboard in this allocation
|
|
1193
|
+
register_id_map = dict()
|
|
1194
|
+
for register, bitboard in zip(
|
|
1195
|
+
register_list, register_bitboards
|
|
1196
|
+
):
|
|
1197
|
+
register_id = register.get_id()
|
|
1198
|
+
if register_id in register_id_map:
|
|
1199
|
+
if register_id_map[register_id] != bitboard:
|
|
1200
|
+
break
|
|
1201
|
+
else:
|
|
1202
|
+
register_id_map[register_id] = bitboard
|
|
1203
|
+
else:
|
|
1204
|
+
# Check that allocation bitboards do not overlap:
|
|
1205
|
+
allocation_bitboard = 0
|
|
1206
|
+
for bitboard in register_id_map.values():
|
|
1207
|
+
if (allocation_bitboard & bitboard) == 0:
|
|
1208
|
+
allocation_bitboard |= bitboard
|
|
1209
|
+
else:
|
|
1210
|
+
break
|
|
1211
|
+
else:
|
|
1212
|
+
ordered_bitboard_list = [
|
|
1213
|
+
register_id_map[register_id]
|
|
1214
|
+
for register_id in register_id_list
|
|
1215
|
+
]
|
|
1216
|
+
options.append(tuple(ordered_bitboard_list))
|
|
1217
|
+
if options:
|
|
1218
|
+
if len(register_id_list) > 1:
|
|
1219
|
+
if register_id_list in constraints:
|
|
1220
|
+
constraints[register_id_list] = tuple(
|
|
1221
|
+
[
|
|
1222
|
+
option
|
|
1223
|
+
for option in constraints[register_id_list]
|
|
1224
|
+
if option in options
|
|
1225
|
+
]
|
|
1226
|
+
)
|
|
1227
|
+
else:
|
|
1228
|
+
constraints[register_id_list] = tuple(options)
|
|
1229
|
+
else:
|
|
1230
|
+
raise RegisterAllocationError(
|
|
1231
|
+
"Impossible virtual register combination in instruction %s"
|
|
1232
|
+
% instruction
|
|
1233
|
+
)
|
|
1234
|
+
elif all(
|
|
1235
|
+
isinstance(register, SRegister) for register in register_list
|
|
1236
|
+
) and isinstance(instruction, VFPLoadStoreMultipleInstruction):
|
|
1237
|
+
register_id_list = list()
|
|
1238
|
+
for register in register_list:
|
|
1239
|
+
register_id = register.id
|
|
1240
|
+
if register_id not in register_id_list:
|
|
1241
|
+
register_id_list.append(register_id)
|
|
1242
|
+
register_id_list = tuple(register_id_list)
|
|
1243
|
+
# Iterate possible allocations for this register list
|
|
1244
|
+
# For VLDM/VSTM instructions all registers must be allocated to sequential physical registers
|
|
1245
|
+
options = list()
|
|
1246
|
+
for sequence_bitboard_position in range(
|
|
1247
|
+
0, 32 - len(register_list) + 1
|
|
1248
|
+
):
|
|
1249
|
+
register_bitboards = [
|
|
1250
|
+
0x1 << (sequence_bitboard_position + i)
|
|
1251
|
+
for i in range(len(register_list))
|
|
1252
|
+
]
|
|
1253
|
+
for i, (bitboard, register) in enumerate(
|
|
1254
|
+
zip(register_bitboards, register_list)
|
|
1255
|
+
):
|
|
1256
|
+
register_bitboards[i] = register.extend_bitboard(
|
|
1257
|
+
bitboard
|
|
1258
|
+
)
|
|
1259
|
+
# Check that bitboard is available for allocation
|
|
1260
|
+
for register, bitboard in zip(
|
|
1261
|
+
register_list, register_bitboards
|
|
1262
|
+
):
|
|
1263
|
+
if bitboard not in self.allocation_options[register.id]:
|
|
1264
|
+
break
|
|
1265
|
+
else:
|
|
1266
|
+
# Check that if registers with the same id use the same bitboard in this allocation
|
|
1267
|
+
register_id_map = dict()
|
|
1268
|
+
for register, bitboard in zip(
|
|
1269
|
+
register_list, register_bitboards
|
|
1270
|
+
):
|
|
1271
|
+
register_id = register.id
|
|
1272
|
+
if register_id in register_id_map:
|
|
1273
|
+
if register_id_map[register_id] != bitboard:
|
|
1274
|
+
break
|
|
1275
|
+
else:
|
|
1276
|
+
register_id_map[register_id] = bitboard
|
|
1277
|
+
else:
|
|
1278
|
+
# Check that allocation bitboards do not overlap:
|
|
1279
|
+
allocation_bitboard = 0
|
|
1280
|
+
for bitboard in register_id_map.values():
|
|
1281
|
+
if (allocation_bitboard & bitboard) == 0:
|
|
1282
|
+
allocation_bitboard |= bitboard
|
|
1283
|
+
else:
|
|
1284
|
+
break
|
|
1285
|
+
else:
|
|
1286
|
+
ordered_bitboard_list = [
|
|
1287
|
+
register_id_map[register_id]
|
|
1288
|
+
for register_id in register_id_list
|
|
1289
|
+
]
|
|
1290
|
+
options.append(tuple(ordered_bitboard_list))
|
|
1291
|
+
if options:
|
|
1292
|
+
if len(register_id_list) > 1:
|
|
1293
|
+
if register_id_list in constraints:
|
|
1294
|
+
constraints[register_id_list] = tuple(
|
|
1295
|
+
[
|
|
1296
|
+
option
|
|
1297
|
+
for option in constraints[register_id_list]
|
|
1298
|
+
if option in options
|
|
1299
|
+
]
|
|
1300
|
+
)
|
|
1301
|
+
else:
|
|
1302
|
+
constraints[register_id_list] = tuple(options)
|
|
1303
|
+
else:
|
|
1304
|
+
raise RegisterAllocationError(
|
|
1305
|
+
"Impossible virtual register combination in instruction %s"
|
|
1306
|
+
% instruction
|
|
1307
|
+
)
|
|
1308
|
+
else:
|
|
1309
|
+
assert False
|
|
1310
|
+
report_register_constraints = False
|
|
1311
|
+
if report_register_constraints:
|
|
1312
|
+
for register_list, options in constraints.items():
|
|
1313
|
+
print("REGISTER CONSTRAINTS: ", map(str, register_list))
|
|
1314
|
+
for option in options:
|
|
1315
|
+
print("\t", map(lambda t: "%016X" % t, option))
|
|
1316
|
+
|
|
1317
|
+
# Merging of different groups sharing a register will be implemented here sometime
|
|
1318
|
+
|
|
1319
|
+
# Check that each register id appears only once
|
|
1320
|
+
constrained_register_id_list = [
|
|
1321
|
+
register_id
|
|
1322
|
+
for register_id_list in constraints.keys()
|
|
1323
|
+
for register_id in register_id_list
|
|
1324
|
+
]
|
|
1325
|
+
assert len(constrained_register_id_list) == len(
|
|
1326
|
+
set(constrained_register_id_list)
|
|
1327
|
+
)
|
|
1328
|
+
constrained_register_id_set = set(constrained_register_id_list)
|
|
1329
|
+
|
|
1330
|
+
# Create a map from constrained register to constrained register group
|
|
1331
|
+
# constrained_register_map = dict()
|
|
1332
|
+
# for register_id_list in constraints.keys():
|
|
1333
|
+
# for register_id in register_id_list:
|
|
1334
|
+
# constrained_register_map[register_id] = register_id_list
|
|
1335
|
+
|
|
1336
|
+
# Remove individual registers from the set of unallocated registers and add the register group instead
|
|
1337
|
+
for constrained_register_id in constrained_register_id_list:
|
|
1338
|
+
while (
|
|
1339
|
+
constrained_register_id,
|
|
1340
|
+
Register.VFPType,
|
|
1341
|
+
) in self.unallocated_registers:
|
|
1342
|
+
self.unallocated_registers.remove(
|
|
1343
|
+
(constrained_register_id, Register.VFPType)
|
|
1344
|
+
)
|
|
1345
|
+
for register_id_list in constraints.keys():
|
|
1346
|
+
self.unallocated_registers.append((register_id_list, Register.VFPType))
|
|
1347
|
+
|
|
1348
|
+
# print "UNALLOCATED REGISTERS:"
|
|
1349
|
+
# print "\t", self.unallocated_registers
|
|
1350
|
+
|
|
1351
|
+
# Remove individual registers from the sets of conflicting registers and add the register group instead
|
|
1352
|
+
# for register_id_list in constraints.keys():
|
|
1353
|
+
# self.conflicting_registers[register_id_list] = set()
|
|
1354
|
+
# for constrained_register_id in constrained_register_id_list:
|
|
1355
|
+
# self.conflicting_registers[constrained_register_map[constrained_register_id]].update(self.conflicting_registers[constrained_register_id])
|
|
1356
|
+
# del self.conflicting_registers[constrained_register_id]
|
|
1357
|
+
# for conflicting_registers_set in self.conflicting_registers.values():
|
|
1358
|
+
# for constrained_register_id in constrained_register_id_list:
|
|
1359
|
+
# if constrained_register_id in conflicting_registers_set:
|
|
1360
|
+
# conflicting_registers_set.remove(constrained_register_id)
|
|
1361
|
+
# conflicting_registers_set.add(constrained_register_map[constrained_register_id])
|
|
1362
|
+
|
|
1363
|
+
# Remove individual registers from the lists of allocation options and add the register group instead
|
|
1364
|
+
for constrained_register_id in constrained_register_id_list:
|
|
1365
|
+
del self.allocation_options[constrained_register_id]
|
|
1366
|
+
for register_id_list, constrained_options in constraints.items():
|
|
1367
|
+
self.allocation_options[register_id_list] = list(options)
|
|
1368
|
+
|
|
1369
|
+
def _get_register_type_name(self, register_type):
|
|
1370
|
+
"""Get human-readable name for register type."""
|
|
1371
|
+
from nervapy.arm.registers import Register
|
|
1372
|
+
|
|
1373
|
+
if register_type == Register.GPType:
|
|
1374
|
+
return "general-purpose"
|
|
1375
|
+
elif register_type == Register.VFPType:
|
|
1376
|
+
return "VFP/NEON"
|
|
1377
|
+
elif register_type == Register.WMMXType:
|
|
1378
|
+
return "WMMX"
|
|
1379
|
+
else:
|
|
1380
|
+
return "unknown type %d" % register_type
|
|
1381
|
+
|
|
1382
|
+
def _get_available_registers_info(self, register_type):
|
|
1383
|
+
"""Get information about available registers for a type."""
|
|
1384
|
+
from nervapy.arm.registers import Register
|
|
1385
|
+
|
|
1386
|
+
if register_type == Register.GPType:
|
|
1387
|
+
# General purpose registers: r0-r12 (13 registers)
|
|
1388
|
+
# Note: r13 (sp), r14 (lr), r15 (pc) are special and typically not used for general allocation
|
|
1389
|
+
return "r0-r12 (13 registers available for allocation)"
|
|
1390
|
+
elif register_type == Register.VFPType:
|
|
1391
|
+
return "s0-s31 or d0-d31 or q0-q15 (depending on instruction)"
|
|
1392
|
+
elif register_type == Register.WMMXType:
|
|
1393
|
+
return "wr0-wr15 (16 registers)"
|
|
1394
|
+
else:
|
|
1395
|
+
return "unknown"
|
|
1396
|
+
|
|
1397
|
+
def _count_virtual_registers_by_type(self, register_type):
|
|
1398
|
+
"""Count how many virtual registers of a given type are actually being allocated (after optimization)."""
|
|
1399
|
+
from nervapy.arm.registers import Register
|
|
1400
|
+
|
|
1401
|
+
# Count unique registers in unallocated_registers that match the given type
|
|
1402
|
+
# This list has already been filtered by liveness analysis
|
|
1403
|
+
# Use a set to avoid counting duplicates
|
|
1404
|
+
unique_ids = set()
|
|
1405
|
+
for virtual_register_id, virtual_register_type in self.unallocated_registers:
|
|
1406
|
+
if isinstance(virtual_register_id, tuple):
|
|
1407
|
+
# Register list - all registers in the list should be the same type
|
|
1408
|
+
if virtual_register_type == register_type:
|
|
1409
|
+
unique_ids.update(virtual_register_id)
|
|
1410
|
+
else:
|
|
1411
|
+
# Single register
|
|
1412
|
+
if virtual_register_type == register_type:
|
|
1413
|
+
unique_ids.add(virtual_register_id)
|
|
1414
|
+
return len(unique_ids)
|
|
1415
|
+
|
|
1416
|
+
def _get_max_physical_registers(self, register_type):
|
|
1417
|
+
"""Get the maximum number of physical registers available for a type."""
|
|
1418
|
+
from nervapy.arm.registers import Register
|
|
1419
|
+
|
|
1420
|
+
if register_type == Register.GPType:
|
|
1421
|
+
# Typically r0-r12 can be allocated (13 registers)
|
|
1422
|
+
# But this can vary based on ABI and function constraints
|
|
1423
|
+
return 13
|
|
1424
|
+
elif register_type == Register.VFPType:
|
|
1425
|
+
# VFP/NEON: 32 single-precision (s0-s31) or 16 double-precision (d0-d15) or 8 quad (q0-q7)
|
|
1426
|
+
# This is a simplification - actual count depends on usage
|
|
1427
|
+
return 32
|
|
1428
|
+
elif register_type == Register.WMMXType:
|
|
1429
|
+
return 16
|
|
1430
|
+
else:
|
|
1431
|
+
return 0
|
|
1432
|
+
|
|
1433
|
+
def allocate_registers(self):
|
|
1434
|
+
from nervapy.arm.instructions import Instruction
|
|
1435
|
+
from nervapy.arm.pseudo import LoadArgumentPseudoInstruction
|
|
1436
|
+
from nervapy.arm.registers import Register
|
|
1437
|
+
|
|
1438
|
+
# Save counts before allocation starts (after liveness analysis has eliminated dead code)
|
|
1439
|
+
# This gives us accurate counts for error messages
|
|
1440
|
+
self._vr_counts_by_type = {}
|
|
1441
|
+
for reg_type in [Register.GPType, Register.VFPType, Register.WMMXType]:
|
|
1442
|
+
self._vr_counts_by_type[reg_type] = self._count_virtual_registers_by_type(reg_type)
|
|
1443
|
+
|
|
1444
|
+
# Map from virtual register id to physical register
|
|
1445
|
+
register_allocation = dict()
|
|
1446
|
+
for virtual_register_id, virtual_register_type in self.unallocated_registers:
|
|
1447
|
+
register_allocation[virtual_register_id] = None
|
|
1448
|
+
|
|
1449
|
+
def bind_register(virtual_register_id, physical_register):
|
|
1450
|
+
# Remove option to allocate any conflicting virtual register to the same physical register or its enclosing register
|
|
1451
|
+
physical_register_bitboard = physical_register.bitboard
|
|
1452
|
+
for conflicting_register_id in self.conflicting_registers[
|
|
1453
|
+
virtual_register_id
|
|
1454
|
+
]:
|
|
1455
|
+
if conflicting_register_id in self.allocation_options:
|
|
1456
|
+
for allocation_bitboard in self.allocation_options[
|
|
1457
|
+
conflicting_register_id
|
|
1458
|
+
]:
|
|
1459
|
+
if (allocation_bitboard & physical_register_bitboard) != 0:
|
|
1460
|
+
self.allocation_options[conflicting_register_id].remove(
|
|
1461
|
+
allocation_bitboard
|
|
1462
|
+
)
|
|
1463
|
+
register_allocation[virtual_register_id] = physical_register
|
|
1464
|
+
|
|
1465
|
+
def bind_registers(virtual_register_id_list, physical_register_id_list):
|
|
1466
|
+
# Remove option to allocate any conflicting virtual register to the same physical register or its enclosing register
|
|
1467
|
+
physical_register_bitboard_list = [
|
|
1468
|
+
physical_register.get_bitboard()
|
|
1469
|
+
for physical_register in physical_register_id_list
|
|
1470
|
+
]
|
|
1471
|
+
for virtual_register_id, physical_register_bitboard in zip(
|
|
1472
|
+
virtual_register_id_list, physical_register_bitboard_list
|
|
1473
|
+
):
|
|
1474
|
+
for conflicting_register_id in self.conflicting_registers[
|
|
1475
|
+
virtual_register_id
|
|
1476
|
+
]:
|
|
1477
|
+
for (
|
|
1478
|
+
allocation_key,
|
|
1479
|
+
allocation_option,
|
|
1480
|
+
) in self.allocation_options.items():
|
|
1481
|
+
if isinstance(allocation_key, tuple):
|
|
1482
|
+
if conflicting_register_id in allocation_key:
|
|
1483
|
+
conflicting_register_index = allocation_key.index(
|
|
1484
|
+
conflicting_register_id
|
|
1485
|
+
)
|
|
1486
|
+
for bitboard_list in allocation_option:
|
|
1487
|
+
if (
|
|
1488
|
+
bitboard_list[conflicting_register_index]
|
|
1489
|
+
& physical_register_bitboard
|
|
1490
|
+
) != 0:
|
|
1491
|
+
allocation_option.remove(bitboard_list)
|
|
1492
|
+
else:
|
|
1493
|
+
if conflicting_register_id == allocation_key:
|
|
1494
|
+
for bitboard in allocation_option:
|
|
1495
|
+
if (bitboard & physical_register_bitboard) != 0:
|
|
1496
|
+
allocation_option.remove(bitboard)
|
|
1497
|
+
|
|
1498
|
+
for virtual_register_id, physical_register_id in zip(
|
|
1499
|
+
virtual_register_id_list, physical_register_id_list
|
|
1500
|
+
):
|
|
1501
|
+
register_allocation[virtual_register_id] = physical_register_id
|
|
1502
|
+
|
|
1503
|
+
def is_allocated(virtual_register_id):
|
|
1504
|
+
return bool(register_allocation[virtual_register_id])
|
|
1505
|
+
|
|
1506
|
+
# First allocate parameters
|
|
1507
|
+
for instruction in self.instructions:
|
|
1508
|
+
if isinstance(instruction, LoadArgumentPseudoInstruction):
|
|
1509
|
+
if instruction.argument.register:
|
|
1510
|
+
if instruction.destination.register.is_virtual:
|
|
1511
|
+
if not is_allocated(instruction.destination.register.id):
|
|
1512
|
+
if (
|
|
1513
|
+
instruction.argument.register.bitboard
|
|
1514
|
+
in self.allocation_options[
|
|
1515
|
+
instruction.destination.register.id
|
|
1516
|
+
]
|
|
1517
|
+
):
|
|
1518
|
+
bind_register(
|
|
1519
|
+
instruction.destination.register.id,
|
|
1520
|
+
instruction.argument.register,
|
|
1521
|
+
)
|
|
1522
|
+
|
|
1523
|
+
# Now allocate registers with special restrictions
|
|
1524
|
+
for (
|
|
1525
|
+
virtual_register_id_list,
|
|
1526
|
+
virtual_register_type,
|
|
1527
|
+
) in self.unallocated_registers:
|
|
1528
|
+
if isinstance(virtual_register_id_list, tuple):
|
|
1529
|
+
# print "REGLIST: ", map(str, virtual_register_id_list)
|
|
1530
|
+
if not self.allocation_options[virtual_register_id_list]:
|
|
1531
|
+
# Use saved count from before allocation started
|
|
1532
|
+
vr_count = self._vr_counts_by_type.get(virtual_register_type, 0)
|
|
1533
|
+
max_phys = self._get_max_physical_registers(virtual_register_type)
|
|
1534
|
+
raise RuntimeError(
|
|
1535
|
+
"Register allocation failed: No available physical registers for virtual register list %s (type: %s).\n"
|
|
1536
|
+
"Your code uses %d virtual %s registers, but only ~%d physical registers are available.\n"
|
|
1537
|
+
"To fix: reduce the number of registers used in your Python code."
|
|
1538
|
+
% (virtual_register_id_list, self._get_register_type_name(virtual_register_type),
|
|
1539
|
+
vr_count, self._get_register_type_name(virtual_register_type), max_phys)
|
|
1540
|
+
)
|
|
1541
|
+
physical_register_bitboard_list = self.allocation_options[
|
|
1542
|
+
virtual_register_id_list
|
|
1543
|
+
][0]
|
|
1544
|
+
physcial_registers_list = [
|
|
1545
|
+
Register.from_bitboard(
|
|
1546
|
+
physical_register_bitboard, virtual_register_type
|
|
1547
|
+
)
|
|
1548
|
+
for physical_register_bitboard in physical_register_bitboard_list
|
|
1549
|
+
]
|
|
1550
|
+
bind_registers(virtual_register_id_list, physcial_registers_list)
|
|
1551
|
+
|
|
1552
|
+
# Now allocate all other registers
|
|
1553
|
+
while self.unallocated_registers:
|
|
1554
|
+
virtual_register_id, virtual_register_type = self.unallocated_registers.pop(
|
|
1555
|
+
0
|
|
1556
|
+
)
|
|
1557
|
+
if not isinstance(virtual_register_id, tuple):
|
|
1558
|
+
if not is_allocated(virtual_register_id):
|
|
1559
|
+
if not self.allocation_options[virtual_register_id]:
|
|
1560
|
+
# Use saved count from before allocation started
|
|
1561
|
+
vr_count = self._vr_counts_by_type.get(virtual_register_type, 0)
|
|
1562
|
+
max_phys = self._get_max_physical_registers(virtual_register_type)
|
|
1563
|
+
|
|
1564
|
+
# Debug: find max simultaneous live registers
|
|
1565
|
+
max_live = 0
|
|
1566
|
+
max_live_instr = None
|
|
1567
|
+
max_live_line = None
|
|
1568
|
+
max_live_idx = None
|
|
1569
|
+
max_live_regs = []
|
|
1570
|
+
for idx, instruction in enumerate(self.instructions):
|
|
1571
|
+
if hasattr(instruction, 'live_registers'):
|
|
1572
|
+
live_regs = [r for r in instruction.live_registers
|
|
1573
|
+
if hasattr(r, 'type') and r.type == virtual_register_type and r.is_virtual]
|
|
1574
|
+
live_count = len(live_regs)
|
|
1575
|
+
if live_count > max_live:
|
|
1576
|
+
max_live = live_count
|
|
1577
|
+
max_live_instr = instruction
|
|
1578
|
+
max_live_idx = idx
|
|
1579
|
+
max_live_line = getattr(instruction, 'line_number', None)
|
|
1580
|
+
# Include variable names in the register representation
|
|
1581
|
+
max_live_regs = []
|
|
1582
|
+
for r in live_regs:
|
|
1583
|
+
reg_str = str(r)
|
|
1584
|
+
var_name = self._register_names.get(r.number, None)
|
|
1585
|
+
if var_name:
|
|
1586
|
+
reg_str = f"{reg_str}, {var_name}"
|
|
1587
|
+
max_live_regs.append(reg_str)
|
|
1588
|
+
|
|
1589
|
+
debug_msg = ""
|
|
1590
|
+
if max_live <= max_phys:
|
|
1591
|
+
debug_msg = (f"\n\nPhysical registers available: {max_phys}\n"
|
|
1592
|
+
f"The register pressure ({max_live}/{max_phys}) should be manageable, but the allocator\n"
|
|
1593
|
+
f"couldn't find a valid allocation due to conflicting constraints.\n")
|
|
1594
|
+
if max_live_instr is not None:
|
|
1595
|
+
# Show instruction location info
|
|
1596
|
+
location_info = []
|
|
1597
|
+
if hasattr(max_live_instr, 'source_file') and max_live_instr.source_file:
|
|
1598
|
+
location_info.append(f"File: {max_live_instr.source_file}")
|
|
1599
|
+
if hasattr(max_live_instr, 'line_number') and max_live_instr.line_number:
|
|
1600
|
+
location_info.append(f"Line: {max_live_instr.line_number}")
|
|
1601
|
+
if location_info:
|
|
1602
|
+
debug_msg += f"Max register pressure at {', '.join(location_info)}\n"
|
|
1603
|
+
elif max_live_idx is not None:
|
|
1604
|
+
debug_msg += f"Max register pressure at instruction #{max_live_idx} (index in generated code)\n"
|
|
1605
|
+
debug_msg += f"Hint: Enable collect_origin=True in Function() to see Python source line numbers.\n"
|
|
1606
|
+
|
|
1607
|
+
debug_msg += f"Instruction with max pressure: {max_live_instr}\n"
|
|
1608
|
+
|
|
1609
|
+
# Show the source code if available
|
|
1610
|
+
if hasattr(max_live_instr, 'source_code') and max_live_instr.source_code:
|
|
1611
|
+
debug_msg += f"Source code: {max_live_instr.source_code}\n"
|
|
1612
|
+
|
|
1613
|
+
if max_live_regs:
|
|
1614
|
+
debug_msg += f"Live virtual registers: {', '.join(sorted(max_live_regs))}\n"
|
|
1615
|
+
debug_msg += f"\nThis suggests the greedy allocator made suboptimal early choices.\n"
|
|
1616
|
+
debug_msg += f"Try reordering your code or reducing temporary register usage.\n"
|
|
1617
|
+
else:
|
|
1618
|
+
debug_msg = (f"\n\nThis exceeds the {max_phys} physical registers available.\n"
|
|
1619
|
+
f"You need to reduce the number of live registers at once.\n")
|
|
1620
|
+
if max_live_instr is not None:
|
|
1621
|
+
# Show instruction location info
|
|
1622
|
+
location_info = []
|
|
1623
|
+
if hasattr(max_live_instr, 'source_file') and max_live_instr.source_file:
|
|
1624
|
+
location_info.append(f"File: {max_live_instr.source_file}")
|
|
1625
|
+
if hasattr(max_live_instr, 'line_number') and max_live_instr.line_number:
|
|
1626
|
+
location_info.append(f"Line: {max_live_instr.line_number}")
|
|
1627
|
+
if location_info:
|
|
1628
|
+
debug_msg += f"Max register pressure at {', '.join(location_info)}\n"
|
|
1629
|
+
elif max_live_idx is not None:
|
|
1630
|
+
debug_msg += f"Max register pressure at instruction #{max_live_idx} (index in generated code)\n"
|
|
1631
|
+
debug_msg += f"Hint: Enable collect_origin=True in Function() to see Python source line numbers.\n"
|
|
1632
|
+
|
|
1633
|
+
debug_msg += f"Instruction: {max_live_instr}\n"
|
|
1634
|
+
|
|
1635
|
+
# Show the source code if available
|
|
1636
|
+
if hasattr(max_live_instr, 'source_code') and max_live_instr.source_code:
|
|
1637
|
+
debug_msg += f"Source code: {max_live_instr.source_code}\n"
|
|
1638
|
+
|
|
1639
|
+
raise RuntimeError(
|
|
1640
|
+
"Register allocation failed: No available physical registers for virtual register #%d (type: %s).\n"
|
|
1641
|
+
"Your code uses %d virtual %s registers, but only ~%d physical registers are available.\n"
|
|
1642
|
+
"Available %s registers: %s\n"
|
|
1643
|
+
"To fix: reduce the number of registers used in your Python code.%s"
|
|
1644
|
+
% (
|
|
1645
|
+
virtual_register_id,
|
|
1646
|
+
self._get_register_type_name(virtual_register_type),
|
|
1647
|
+
vr_count,
|
|
1648
|
+
self._get_register_type_name(virtual_register_type),
|
|
1649
|
+
max_phys,
|
|
1650
|
+
self._get_register_type_name(virtual_register_type),
|
|
1651
|
+
self._get_available_registers_info(virtual_register_type),
|
|
1652
|
+
debug_msg
|
|
1653
|
+
)
|
|
1654
|
+
)
|
|
1655
|
+
physical_register_bitboard = self.allocation_options[
|
|
1656
|
+
virtual_register_id
|
|
1657
|
+
][0]
|
|
1658
|
+
physical_register = Register.from_bitboard(
|
|
1659
|
+
physical_register_bitboard, virtual_register_type
|
|
1660
|
+
)
|
|
1661
|
+
bind_register(virtual_register_id, physical_register)
|
|
1662
|
+
|
|
1663
|
+
# Verify all virtual registers used in instructions are tracked
|
|
1664
|
+
untracked_registers = set()
|
|
1665
|
+
for instruction in self.instructions:
|
|
1666
|
+
if isinstance(instruction, Instruction):
|
|
1667
|
+
for input_register in instruction.get_input_registers_list():
|
|
1668
|
+
if input_register.is_virtual:
|
|
1669
|
+
if input_register.id not in register_allocation:
|
|
1670
|
+
untracked_registers.add(input_register.id)
|
|
1671
|
+
for output_register in instruction.get_output_registers_list():
|
|
1672
|
+
if output_register.is_virtual:
|
|
1673
|
+
if output_register.id not in register_allocation:
|
|
1674
|
+
untracked_registers.add(output_register.id)
|
|
1675
|
+
|
|
1676
|
+
if untracked_registers:
|
|
1677
|
+
raise RuntimeError(
|
|
1678
|
+
f"Internal error: Virtual registers {sorted(untracked_registers)} used in instructions "
|
|
1679
|
+
f"but were not tracked for allocation. This indicates a bug where registers were created "
|
|
1680
|
+
f"after liveness analysis or were not properly added to live_registers."
|
|
1681
|
+
)
|
|
1682
|
+
|
|
1683
|
+
for instruction in self.instructions:
|
|
1684
|
+
if isinstance(instruction, Instruction):
|
|
1685
|
+
for input_register in instruction.get_input_registers_list():
|
|
1686
|
+
if input_register.is_virtual:
|
|
1687
|
+
input_register.bind(register_allocation[input_register.id])
|
|
1688
|
+
for output_register in instruction.get_output_registers_list():
|
|
1689
|
+
if output_register.is_virtual:
|
|
1690
|
+
output_register.bind(
|
|
1691
|
+
register_allocation[output_register.id]
|
|
1692
|
+
)
|
|
1693
|
+
|
|
1694
|
+
# Updates information about registers to be saved/restored in the function prologue/epilogue
|
|
1695
|
+
def update_stack_frame(self):
|
|
1696
|
+
from nervapy.arm.instructions import Instruction
|
|
1697
|
+
|
|
1698
|
+
for instruction in self.instructions:
|
|
1699
|
+
if isinstance(instruction, Instruction):
|
|
1700
|
+
self.stack_frame.preserve_registers(
|
|
1701
|
+
instruction.get_output_registers_list()
|
|
1702
|
+
)
|
|
1703
|
+
|
|
1704
|
+
def remove_assume_statements(self):
|
|
1705
|
+
from nervapy.arm.pseudo import AssumeInitializedPseudoInstruction
|
|
1706
|
+
|
|
1707
|
+
new_instructions = list()
|
|
1708
|
+
for instruction in self.instructions:
|
|
1709
|
+
if isinstance(instruction, AssumeInitializedPseudoInstruction):
|
|
1710
|
+
continue
|
|
1711
|
+
else:
|
|
1712
|
+
new_instructions.append(instruction)
|
|
1713
|
+
self.instructions = new_instructions
|
|
1714
|
+
|
|
1715
|
+
def generate_parameter_loads(self):
|
|
1716
|
+
from nervapy.arm.generic import LDR, MOV
|
|
1717
|
+
from nervapy.arm.pseudo import LoadArgumentPseudoInstruction
|
|
1718
|
+
from nervapy.arm.registers import sp
|
|
1719
|
+
|
|
1720
|
+
new_instructions = list()
|
|
1721
|
+
for instruction in self.instructions:
|
|
1722
|
+
if isinstance(instruction, LoadArgumentPseudoInstruction):
|
|
1723
|
+
parameter = instruction.argument
|
|
1724
|
+
if parameter.register:
|
|
1725
|
+
# If parameter is in a register, use register-register move:
|
|
1726
|
+
if instruction.destination.register != parameter.register:
|
|
1727
|
+
# Parameter is in a different register than instruction destination, generate move:
|
|
1728
|
+
new_instruction = MOV(
|
|
1729
|
+
instruction.destination.register, parameter.register
|
|
1730
|
+
)
|
|
1731
|
+
new_instruction.live_registers = instruction.live_registers
|
|
1732
|
+
new_instruction.available_registers = (
|
|
1733
|
+
instruction.available_registers
|
|
1734
|
+
)
|
|
1735
|
+
new_instructions.append(new_instruction)
|
|
1736
|
+
# If parameter is in the same register as instruction destination, no instruction needed:
|
|
1737
|
+
# MOV( instruction.destination == parameter.register_location, parameter.register_location )
|
|
1738
|
+
# is a no-op
|
|
1739
|
+
else:
|
|
1740
|
+
parameter_address = (
|
|
1741
|
+
self.stack_frame.get_parameters_offset()
|
|
1742
|
+
+ parameter.stack_offset
|
|
1743
|
+
)
|
|
1744
|
+
new_instruction = LDR(
|
|
1745
|
+
instruction.destination.register, [sp, parameter_address]
|
|
1746
|
+
)
|
|
1747
|
+
new_instruction.live_registers = instruction.live_registers
|
|
1748
|
+
new_instruction.available_registers = (
|
|
1749
|
+
instruction.available_registers
|
|
1750
|
+
)
|
|
1751
|
+
new_instructions.append(new_instruction)
|
|
1752
|
+
else:
|
|
1753
|
+
new_instructions.append(instruction)
|
|
1754
|
+
self.instructions = new_instructions
|
|
1755
|
+
|
|
1756
|
+
def generate_constant_loads(self):
|
|
1757
|
+
from nervapy import ConstantBucket
|
|
1758
|
+
from nervapy.arm.instructions import Instruction
|
|
1759
|
+
from nervapy.arm.pseudo import LoadConstantPseudoInstruction
|
|
1760
|
+
|
|
1761
|
+
max_alignment = 0
|
|
1762
|
+
for instruction in self.instructions:
|
|
1763
|
+
if isinstance(instruction, Instruction):
|
|
1764
|
+
constant = instruction.get_constant()
|
|
1765
|
+
if constant is not None:
|
|
1766
|
+
constant_alignment = constant.get_alignment()
|
|
1767
|
+
constant_size = constant.size * constant.repeats
|
|
1768
|
+
max_alignment = max(max_alignment, constant_alignment)
|
|
1769
|
+
|
|
1770
|
+
constant_id = 0
|
|
1771
|
+
constant_label_map = dict()
|
|
1772
|
+
constant_buckets = dict()
|
|
1773
|
+
for instruction in self.instructions:
|
|
1774
|
+
if isinstance(instruction, Instruction):
|
|
1775
|
+
constant = instruction.get_constant()
|
|
1776
|
+
if constant is not None:
|
|
1777
|
+
if constant in constant_label_map:
|
|
1778
|
+
constant.label = constant_label_map[constant]
|
|
1779
|
+
else:
|
|
1780
|
+
constant.label = "c" + str(constant_id)
|
|
1781
|
+
constant_id += 1
|
|
1782
|
+
constant_label_map[constant] = constant.label
|
|
1783
|
+
constant_alignment = constant.get_alignment()
|
|
1784
|
+
constant_size = constant.size * constant.repeats
|
|
1785
|
+
if constant_alignment in constant_buckets:
|
|
1786
|
+
constant_buckets[constant_alignment].add(constant)
|
|
1787
|
+
if constant_buckets[constant_alignment].is_full():
|
|
1788
|
+
del constant_buckets[constant_alignment]
|
|
1789
|
+
else:
|
|
1790
|
+
constant_bucket = ConstantBucket(max_alignment / 8)
|
|
1791
|
+
constant_bucket.add(constant)
|
|
1792
|
+
self.constants.append(constant_bucket)
|
|
1793
|
+
if not constant_bucket.is_full():
|
|
1794
|
+
constant_buckets[constant_alignment] = constant_bucket
|
|
1795
|
+
|
|
1796
|
+
new_instructions = list()
|
|
1797
|
+
for instruction in self.instructions:
|
|
1798
|
+
if isinstance(instruction, LoadConstantPseudoInstruction):
|
|
1799
|
+
raise NotImplementedError()
|
|
1800
|
+
else:
|
|
1801
|
+
new_instructions.append(instruction)
|
|
1802
|
+
self.instructions = new_instructions
|
|
1803
|
+
|
|
1804
|
+
def validate_stack_alignment_check(self):
|
|
1805
|
+
"""
|
|
1806
|
+
Validate that stack is 8-byte aligned before BL/BLX instructions.
|
|
1807
|
+
|
|
1808
|
+
For ARMv7-M architecture (Cortex-M), the AAPCS requires that the stack
|
|
1809
|
+
pointer must be 8-byte aligned at any public interface (function calls).
|
|
1810
|
+
This method tracks stack pointer changes and validates alignment before
|
|
1811
|
+
BL and BLX instructions.
|
|
1812
|
+
"""
|
|
1813
|
+
from nervapy.arm.generic import (ArithmeticInstruction,
|
|
1814
|
+
BranchLinkExchangeInstruction,
|
|
1815
|
+
BranchWithLinkInstruction,
|
|
1816
|
+
PushPopInstruction,
|
|
1817
|
+
StoreMultipleInstruction)
|
|
1818
|
+
from nervapy.arm.instructions import Instruction
|
|
1819
|
+
from nervapy.arm.isa import Extension
|
|
1820
|
+
from nervapy.arm.registers import sp
|
|
1821
|
+
|
|
1822
|
+
# Enforce for ARMv7-M and ARMv8-M architectures (V8MMain implies V7M via prerequisites)
|
|
1823
|
+
if (
|
|
1824
|
+
Extension.V7M not in self.target.extensions
|
|
1825
|
+
and Extension.V8MBase not in self.target.extensions
|
|
1826
|
+
):
|
|
1827
|
+
return
|
|
1828
|
+
|
|
1829
|
+
# Track stack offset from initial 8-byte aligned position
|
|
1830
|
+
# The prologue is generated by generate_prolog_and_epilog() which inserts
|
|
1831
|
+
# instructions after the ENTRY label. These are guaranteed to maintain
|
|
1832
|
+
# 8-byte alignment. We need to skip them when tracking.
|
|
1833
|
+
#
|
|
1834
|
+
# Strategy: Count prologue size, then skip that many PUSH/VPUSH/STMDB instructions
|
|
1835
|
+
# at the start of the function.
|
|
1836
|
+
prologue_size = len(self.stack_frame.generate_prologue())
|
|
1837
|
+
prologue_instructions_seen = 0
|
|
1838
|
+
stack_offset = 0
|
|
1839
|
+
|
|
1840
|
+
for instruction in self.instructions:
|
|
1841
|
+
if not isinstance(instruction, Instruction):
|
|
1842
|
+
continue
|
|
1843
|
+
|
|
1844
|
+
# Skip prologue instructions (PUSH/VPUSH/STMDB/SUB-sp at start of function)
|
|
1845
|
+
if prologue_instructions_seen < prologue_size:
|
|
1846
|
+
if isinstance(instruction, PushPopInstruction) and instruction.name in (
|
|
1847
|
+
"PUSH",
|
|
1848
|
+
"PUSH.W",
|
|
1849
|
+
):
|
|
1850
|
+
prologue_instructions_seen += 1
|
|
1851
|
+
continue
|
|
1852
|
+
# Also check for VPUSH (VFP register saves)
|
|
1853
|
+
elif instruction.__class__.__name__ == "VfpNeonPushPopInstruction":
|
|
1854
|
+
prologue_instructions_seen += 1
|
|
1855
|
+
continue
|
|
1856
|
+
# Also check for STMDB (used with high registers)
|
|
1857
|
+
elif isinstance(instruction, StoreMultipleInstruction):
|
|
1858
|
+
if instruction.writeback and instruction.name.startswith("STM"):
|
|
1859
|
+
prologue_instructions_seen += 1
|
|
1860
|
+
continue
|
|
1861
|
+
# Also skip SUB sp, sp, #imm used for alignment padding
|
|
1862
|
+
elif isinstance(instruction, ArithmeticInstruction):
|
|
1863
|
+
if len(instruction.operands) >= 3:
|
|
1864
|
+
dest = instruction.operands[0]
|
|
1865
|
+
src1 = instruction.operands[1]
|
|
1866
|
+
if (
|
|
1867
|
+
hasattr(dest, "register")
|
|
1868
|
+
and dest.register == sp
|
|
1869
|
+
and hasattr(src1, "register")
|
|
1870
|
+
and src1.register == sp
|
|
1871
|
+
and instruction.name.startswith("SUB")
|
|
1872
|
+
):
|
|
1873
|
+
prologue_instructions_seen += 1
|
|
1874
|
+
continue
|
|
1875
|
+
|
|
1876
|
+
# Track PUSH instructions (user code)
|
|
1877
|
+
if isinstance(instruction, PushPopInstruction):
|
|
1878
|
+
if instruction.name in ("PUSH", "PUSH.W"):
|
|
1879
|
+
# Each register pushes 4 bytes
|
|
1880
|
+
num_registers = len(instruction.operands[0].get_registers_list())
|
|
1881
|
+
stack_offset += num_registers * 4
|
|
1882
|
+
elif instruction.name in ("POP", "POP.W"):
|
|
1883
|
+
# Each register pops 4 bytes
|
|
1884
|
+
num_registers = len(instruction.operands[0].get_registers_list())
|
|
1885
|
+
stack_offset -= num_registers * 4
|
|
1886
|
+
|
|
1887
|
+
# Track STMDB/LDMIA instructions that modify SP
|
|
1888
|
+
elif isinstance(instruction, StoreMultipleInstruction):
|
|
1889
|
+
if instruction.writeback:
|
|
1890
|
+
base_reg = instruction.operands[0]
|
|
1891
|
+
# Check if base register is SP
|
|
1892
|
+
if hasattr(base_reg, "register") and base_reg.register == sp:
|
|
1893
|
+
num_registers = len(
|
|
1894
|
+
instruction.operands[1].get_registers_list()
|
|
1895
|
+
)
|
|
1896
|
+
if instruction.name.startswith("STM"):
|
|
1897
|
+
stack_offset += num_registers * 4
|
|
1898
|
+
elif instruction.name.startswith("LDM"):
|
|
1899
|
+
stack_offset -= num_registers * 4
|
|
1900
|
+
|
|
1901
|
+
# Track SUB/ADD with SP
|
|
1902
|
+
elif isinstance(instruction, ArithmeticInstruction):
|
|
1903
|
+
if len(instruction.operands) >= 3:
|
|
1904
|
+
dest = instruction.operands[0]
|
|
1905
|
+
src1 = instruction.operands[1]
|
|
1906
|
+
src2 = instruction.operands[2]
|
|
1907
|
+
|
|
1908
|
+
# Check if destination is SP
|
|
1909
|
+
if (
|
|
1910
|
+
hasattr(dest, "register")
|
|
1911
|
+
and dest.register == sp
|
|
1912
|
+
and hasattr(src1, "register")
|
|
1913
|
+
and src1.register == sp
|
|
1914
|
+
):
|
|
1915
|
+
|
|
1916
|
+
# Get immediate value (check both 'immediate' and 'value')
|
|
1917
|
+
imm_value = None
|
|
1918
|
+
if hasattr(src2, "immediate"):
|
|
1919
|
+
imm_value = src2.immediate
|
|
1920
|
+
elif hasattr(src2, "value"):
|
|
1921
|
+
imm_value = src2.value
|
|
1922
|
+
|
|
1923
|
+
if imm_value is not None:
|
|
1924
|
+
if instruction.name.startswith("SUB"):
|
|
1925
|
+
# SUB sp, sp, #imm - allocates stack space
|
|
1926
|
+
stack_offset += imm_value
|
|
1927
|
+
elif instruction.name.startswith("ADD"):
|
|
1928
|
+
# ADD sp, sp, #imm - deallocates stack space
|
|
1929
|
+
stack_offset -= imm_value
|
|
1930
|
+
|
|
1931
|
+
# Check alignment before BL/BLX
|
|
1932
|
+
elif isinstance(
|
|
1933
|
+
instruction, (BranchWithLinkInstruction, BranchLinkExchangeInstruction)
|
|
1934
|
+
):
|
|
1935
|
+
if stack_offset % 8 != 0:
|
|
1936
|
+
raise ValueError(
|
|
1937
|
+
"Stack is not 8-byte aligned before {0} instruction.\n"
|
|
1938
|
+
"Current stack offset: {1} bytes (misaligned by {2} bytes).\n"
|
|
1939
|
+
"ARMv7-M/ARMv8-M requires 8-byte stack alignment at function calls (AAPCS requirement).\n"
|
|
1940
|
+
"Add registers in pairs to PUSH instructions or adjust the stack manually to maintain alignment.".format(
|
|
1941
|
+
instruction.name, stack_offset, stack_offset % 8
|
|
1942
|
+
)
|
|
1943
|
+
)
|
|
1944
|
+
|
|
1945
|
+
def optimize_instructions(self):
|
|
1946
|
+
from nervapy.arm.generic import MovInstruction
|
|
1947
|
+
from nervapy.arm.vfpneon import VfpNeonMovInstruction
|
|
1948
|
+
|
|
1949
|
+
new_instructions = list()
|
|
1950
|
+
for instruction in self.instructions:
|
|
1951
|
+
# Remove moves where source and destination are the same
|
|
1952
|
+
|
|
1953
|
+
if isinstance(instruction, VfpNeonMovInstruction):
|
|
1954
|
+
if instruction.operands[0] != instruction.operands[1]:
|
|
1955
|
+
new_instructions.append(instruction)
|
|
1956
|
+
else:
|
|
1957
|
+
new_instructions.append(instruction)
|
|
1958
|
+
self.instructions = new_instructions
|
|
1959
|
+
|
|
1960
|
+
def get_target(self):
|
|
1961
|
+
return self.target
|
|
1962
|
+
|
|
1963
|
+
@property
|
|
1964
|
+
def isa_extensions(self):
|
|
1965
|
+
from nervapy.arm.instructions import Instruction
|
|
1966
|
+
from nervapy.arm.isa import Extension, Extensions
|
|
1967
|
+
from nervapy.arm.registers import DRegister, QRegister
|
|
1968
|
+
|
|
1969
|
+
# Start with the target microarchitecture's extensions
|
|
1970
|
+
isa_extensions = Extensions(*self.target.extensions)
|
|
1971
|
+
for instruction in self.instructions:
|
|
1972
|
+
if isinstance(instruction, Instruction):
|
|
1973
|
+
for extension in instruction.isa_extensions:
|
|
1974
|
+
isa_extensions += extension
|
|
1975
|
+
if any(
|
|
1976
|
+
isinstance(register, QRegister)
|
|
1977
|
+
or isinstance(register, DRegister)
|
|
1978
|
+
and register.is_extended
|
|
1979
|
+
for register in instruction.get_registers_list()
|
|
1980
|
+
):
|
|
1981
|
+
isa_extensions += Extension.VFPd32
|
|
1982
|
+
return isa_extensions
|
|
1983
|
+
|
|
1984
|
+
def get_yeppp_isa_extensions(self):
|
|
1985
|
+
isa_extensions_map = {
|
|
1986
|
+
"V4": ("V4", None, None),
|
|
1987
|
+
"V5": ("V5", None, None),
|
|
1988
|
+
"V5E": ("V5E", None, None),
|
|
1989
|
+
"V6": ("V6", None, None),
|
|
1990
|
+
"V6K": ("V6K", None, None),
|
|
1991
|
+
"V7": ("V7", None, None),
|
|
1992
|
+
"V7MP": ("V7MP", None, None),
|
|
1993
|
+
"Div": ("Div", None, None),
|
|
1994
|
+
"Thumb": ("Thumb", None, None),
|
|
1995
|
+
"Thumb2": ("Thumb2", None, None),
|
|
1996
|
+
"VFP": ("VFP", None, None),
|
|
1997
|
+
"VFP2": ("VFP2", None, None),
|
|
1998
|
+
"VFP3": ("VFP3", None, None),
|
|
1999
|
+
"VFPd32": ("VFPd32", None, None),
|
|
2000
|
+
"VFP3HP": ("VFP3HP", None, None),
|
|
2001
|
+
"VFP4": ("VFP4", None, None),
|
|
2002
|
+
"VFPVectorMode": (None, None, "VFPVectorMode"),
|
|
2003
|
+
"XScale": (None, "XScale", None),
|
|
2004
|
+
"WMMX": (None, "WMMX", None),
|
|
2005
|
+
"WMMX2": (None, "WMMX2", None),
|
|
2006
|
+
"NEON": (None, "NEON", None),
|
|
2007
|
+
"NEONHP": (None, "NEONHP", None),
|
|
2008
|
+
"NEON2": (None, "NEON2", None),
|
|
2009
|
+
}
|
|
2010
|
+
isa_extensions, simd_extensions, system_extensions = (set(), set(), set())
|
|
2011
|
+
for isa_extension in self.get_isa_extensions():
|
|
2012
|
+
if isa_extension is not None:
|
|
2013
|
+
isa_extension, simd_extension, system_extension = isa_extensions_map[
|
|
2014
|
+
isa_extension
|
|
2015
|
+
]
|
|
2016
|
+
if isa_extension is not None:
|
|
2017
|
+
isa_extensions.add(isa_extension)
|
|
2018
|
+
if simd_extension is not None:
|
|
2019
|
+
simd_extensions.add(simd_extension)
|
|
2020
|
+
if system_extension is not None:
|
|
2021
|
+
system_extensions.add(system_extension)
|
|
2022
|
+
isa_extensions = map(lambda id: "YepARMIsaFeature" + id, isa_extensions)
|
|
2023
|
+
if not isa_extensions:
|
|
2024
|
+
isa_extensions = ["YepIsaFeaturesDefault"]
|
|
2025
|
+
simd_extensions = map(lambda id: "YepARMSimdFeature" + id, simd_extensions)
|
|
2026
|
+
if not simd_extensions:
|
|
2027
|
+
simd_extensions = ["YepSimdFeaturesDefault"]
|
|
2028
|
+
system_extensions = map(
|
|
2029
|
+
lambda id: "YepARMSystemFeature" + id, system_extensions
|
|
2030
|
+
)
|
|
2031
|
+
if not system_extensions:
|
|
2032
|
+
system_extensions = ["YepSystemFeaturesDefault"]
|
|
2033
|
+
return (isa_extensions, simd_extensions, system_extensions)
|
|
2034
|
+
|
|
2035
|
+
def allocate_local_variable(self):
|
|
2036
|
+
self.local_variables_count += 1
|
|
2037
|
+
return self.local_variables_count
|
|
2038
|
+
|
|
2039
|
+
def allocate_q_register(self):
|
|
2040
|
+
self.virtual_registers_count += 1
|
|
2041
|
+
register_number = (self.virtual_registers_count << 12) | 0x0F0
|
|
2042
|
+
|
|
2043
|
+
# Try to capture variable name from caller's frame
|
|
2044
|
+
try:
|
|
2045
|
+
import inspect
|
|
2046
|
+
frame = inspect.currentframe().f_back.f_back
|
|
2047
|
+
if frame:
|
|
2048
|
+
import linecache
|
|
2049
|
+
line = linecache.getline(frame.f_code.co_filename, frame.f_lineno).strip()
|
|
2050
|
+
if '=' in line and 'QRegister' in line:
|
|
2051
|
+
var_name = line.split('=')[0].strip()
|
|
2052
|
+
if var_name and not var_name.startswith('#'):
|
|
2053
|
+
self._register_names[register_number] = var_name
|
|
2054
|
+
except:
|
|
2055
|
+
pass
|
|
2056
|
+
|
|
2057
|
+
return register_number
|
|
2058
|
+
|
|
2059
|
+
def allocate_d_register(self):
|
|
2060
|
+
self.virtual_registers_count += 1
|
|
2061
|
+
return (self.virtual_registers_count << 12) | 0x300
|
|
2062
|
+
|
|
2063
|
+
def allocate_s_register(self):
|
|
2064
|
+
self.virtual_registers_count += 1
|
|
2065
|
+
return (self.virtual_registers_count << 12) | 0x400
|
|
2066
|
+
|
|
2067
|
+
def allocate_wmmx_register(self):
|
|
2068
|
+
self.virtual_registers_count += 1
|
|
2069
|
+
return (self.virtual_registers_count << 12) | 0x002
|
|
2070
|
+
|
|
2071
|
+
def allocate_general_purpose_register(self):
|
|
2072
|
+
self.virtual_registers_count += 1
|
|
2073
|
+
register_number = (self.virtual_registers_count << 12) | 0x001
|
|
2074
|
+
|
|
2075
|
+
# Try to capture variable name from caller's frame
|
|
2076
|
+
try:
|
|
2077
|
+
import inspect
|
|
2078
|
+
frame = inspect.currentframe().f_back.f_back # Go up 2 frames: this -> __init__ -> caller
|
|
2079
|
+
if frame:
|
|
2080
|
+
# Get the line of code being executed
|
|
2081
|
+
import linecache
|
|
2082
|
+
line = linecache.getline(frame.f_code.co_filename, frame.f_lineno).strip()
|
|
2083
|
+
# Simple pattern matching for "varname = GeneralPurposeRegister()"
|
|
2084
|
+
if '=' in line and 'GeneralPurposeRegister' in line:
|
|
2085
|
+
var_name = line.split('=')[0].strip()
|
|
2086
|
+
if var_name and not var_name.startswith('#'):
|
|
2087
|
+
self._register_names[register_number] = var_name
|
|
2088
|
+
except:
|
|
2089
|
+
pass # If name capture fails, just continue without name
|
|
2090
|
+
|
|
2091
|
+
return register_number
|
|
2092
|
+
|
|
2093
|
+
def allocate_p_register(self):
|
|
2094
|
+
self.virtual_registers_count += 1
|
|
2095
|
+
return (self.virtual_registers_count << 12) | 0x001
|
|
2096
|
+
|
|
2097
|
+
|
|
2098
|
+
class LocalVariable(object):
|
|
2099
|
+
def __init__(self, register_type):
|
|
2100
|
+
from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
|
|
2101
|
+
QRegister, SRegister, WMMXRegister)
|
|
2102
|
+
|
|
2103
|
+
super(LocalVariable, self).__init__()
|
|
2104
|
+
if isinstance(register_type, int):
|
|
2105
|
+
self.size = register_type
|
|
2106
|
+
elif register_type == GeneralPurposeRegister:
|
|
2107
|
+
self.size = 4
|
|
2108
|
+
elif register_type == WMMXRegister:
|
|
2109
|
+
self.size = 8
|
|
2110
|
+
elif register_type == SRegister:
|
|
2111
|
+
self.size = 4
|
|
2112
|
+
elif register_type == DRegister:
|
|
2113
|
+
self.size = 8
|
|
2114
|
+
elif register_type == QRegister:
|
|
2115
|
+
self.size = 16
|
|
2116
|
+
else:
|
|
2117
|
+
raise ValueError("Unsupported register type {0}".format(register_type))
|
|
2118
|
+
self.id = active_function.allocate_local_variable()
|
|
2119
|
+
self.address = None
|
|
2120
|
+
self.offset = 0
|
|
2121
|
+
self.parent = None
|
|
2122
|
+
|
|
2123
|
+
def __eq__(self, other):
|
|
2124
|
+
return self.id == other.id
|
|
2125
|
+
|
|
2126
|
+
def __hash__(self):
|
|
2127
|
+
return hash(self.id)
|
|
2128
|
+
|
|
2129
|
+
def __str__(self):
|
|
2130
|
+
if self.is_subvariable():
|
|
2131
|
+
address = self.parent.get_address()
|
|
2132
|
+
if address is not None:
|
|
2133
|
+
address += self.offset
|
|
2134
|
+
else:
|
|
2135
|
+
address = self.address
|
|
2136
|
+
if address is not None:
|
|
2137
|
+
return "[{0}]".format(address)
|
|
2138
|
+
else:
|
|
2139
|
+
return "local-variable<{0}>".format(self.id)
|
|
2140
|
+
|
|
2141
|
+
def is_subvariable(self):
|
|
2142
|
+
return self.parent is not None
|
|
2143
|
+
|
|
2144
|
+
def get_parent(self):
|
|
2145
|
+
return self.parent
|
|
2146
|
+
|
|
2147
|
+
def get_root(self):
|
|
2148
|
+
if self.is_subvariable():
|
|
2149
|
+
return self.get_parent().get_root()
|
|
2150
|
+
else:
|
|
2151
|
+
return self
|
|
2152
|
+
|
|
2153
|
+
def get_address(self):
|
|
2154
|
+
if self.is_subvariable():
|
|
2155
|
+
return self.parent.get_address() + self.offset
|
|
2156
|
+
else:
|
|
2157
|
+
return self.address
|
|
2158
|
+
|
|
2159
|
+
def get_size(self):
|
|
2160
|
+
return self.size
|
|
2161
|
+
|
|
2162
|
+
def get_low(self):
|
|
2163
|
+
assert self.get_size() % 2 == 0
|
|
2164
|
+
child = LocalVariable(self.get_size() / 2)
|
|
2165
|
+
child.parent = self
|
|
2166
|
+
child.offset = 0
|
|
2167
|
+
return child
|
|
2168
|
+
|
|
2169
|
+
def get_high(self):
|
|
2170
|
+
assert self.get_size() % 2 == 0
|
|
2171
|
+
child = LocalVariable(self.get_size() / 2)
|
|
2172
|
+
child.parent = self
|
|
2173
|
+
child.offset = self.get_size() / 2
|
|
2174
|
+
return child
|
|
2175
|
+
|
|
2176
|
+
|
|
2177
|
+
class StackFrame(object):
|
|
2178
|
+
def __init__(self, abi):
|
|
2179
|
+
super(StackFrame, self).__init__()
|
|
2180
|
+
self.abi = abi
|
|
2181
|
+
self.general_purpose_registers = list()
|
|
2182
|
+
self.d_registers = list()
|
|
2183
|
+
self.s_variables = list()
|
|
2184
|
+
self.d_variables = list()
|
|
2185
|
+
self.q_variables = list()
|
|
2186
|
+
|
|
2187
|
+
def preserve_registers(self, registers):
|
|
2188
|
+
for register in registers:
|
|
2189
|
+
self.preserve_register(register)
|
|
2190
|
+
|
|
2191
|
+
def preserve_register(self, register):
|
|
2192
|
+
from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
|
|
2193
|
+
QRegister, SRegister)
|
|
2194
|
+
|
|
2195
|
+
if isinstance(register, GeneralPurposeRegister):
|
|
2196
|
+
if not register in self.general_purpose_registers:
|
|
2197
|
+
if register in self.abi.callee_save_registers:
|
|
2198
|
+
self.general_purpose_registers.append(register)
|
|
2199
|
+
elif isinstance(register, SRegister):
|
|
2200
|
+
if not register.is_virtual():
|
|
2201
|
+
register = register.get_parent()
|
|
2202
|
+
if not register in self.d_registers:
|
|
2203
|
+
if register in self.abi.callee_save_registers:
|
|
2204
|
+
self.d_registers.append(register)
|
|
2205
|
+
elif isinstance(register, DRegister):
|
|
2206
|
+
if not register in self.d_registers:
|
|
2207
|
+
if register in self.abi.callee_save_registers:
|
|
2208
|
+
self.d_registers.append(register)
|
|
2209
|
+
elif isinstance(register, QRegister):
|
|
2210
|
+
d_low = register.get_low_part()
|
|
2211
|
+
d_high = register.get_high_part()
|
|
2212
|
+
if d_low not in self.d_registers:
|
|
2213
|
+
if register in self.abi.callee_save_registers:
|
|
2214
|
+
self.d_registers.append(d_low)
|
|
2215
|
+
if d_high not in self.d_registers:
|
|
2216
|
+
if register in self.abi.callee_save_registers:
|
|
2217
|
+
self.d_registers.append(d_high)
|
|
2218
|
+
else:
|
|
2219
|
+
raise TypeError("Unsupported register type {0}".format(type(register)))
|
|
2220
|
+
|
|
2221
|
+
def force_preserve_register(self, register):
|
|
2222
|
+
"""Add *register* to the preservation list unconditionally (no ABI check)."""
|
|
2223
|
+
from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
|
|
2224
|
+
QRegister, SRegister)
|
|
2225
|
+
|
|
2226
|
+
if isinstance(register, GeneralPurposeRegister):
|
|
2227
|
+
if register not in self.general_purpose_registers:
|
|
2228
|
+
self.general_purpose_registers.append(register)
|
|
2229
|
+
elif isinstance(register, SRegister):
|
|
2230
|
+
if not register.is_virtual():
|
|
2231
|
+
register = register.get_parent()
|
|
2232
|
+
if register not in self.d_registers:
|
|
2233
|
+
self.d_registers.append(register)
|
|
2234
|
+
elif isinstance(register, DRegister):
|
|
2235
|
+
if register not in self.d_registers:
|
|
2236
|
+
self.d_registers.append(register)
|
|
2237
|
+
elif isinstance(register, QRegister):
|
|
2238
|
+
d_low = register.get_low_part()
|
|
2239
|
+
d_high = register.get_high_part()
|
|
2240
|
+
if d_low not in self.d_registers:
|
|
2241
|
+
self.d_registers.append(d_low)
|
|
2242
|
+
if d_high not in self.d_registers:
|
|
2243
|
+
self.d_registers.append(d_high)
|
|
2244
|
+
else:
|
|
2245
|
+
raise TypeError("Unsupported register type {0}".format(type(register)))
|
|
2246
|
+
|
|
2247
|
+
def add_variable(self, variable):
|
|
2248
|
+
if variable.get_size() == 16:
|
|
2249
|
+
if variable not in self.sse_variables:
|
|
2250
|
+
self.sse_variables.append(variable)
|
|
2251
|
+
elif variable.get_size() == 32:
|
|
2252
|
+
if variable not in self.avx_variables:
|
|
2253
|
+
self.avx_variables.append(variable)
|
|
2254
|
+
else:
|
|
2255
|
+
raise TypeError("Unsupported variable type {0}".format(type(variable)))
|
|
2256
|
+
|
|
2257
|
+
def get_parameters_offset(self):
|
|
2258
|
+
parameters_offset = len(self.general_purpose_registers) * 4
|
|
2259
|
+
if parameters_offset % 8 == 4:
|
|
2260
|
+
parameters_offset += 4
|
|
2261
|
+
return parameters_offset + len(self.d_registers) * 8
|
|
2262
|
+
|
|
2263
|
+
def generate_prologue(self):
|
|
2264
|
+
from nervapy.arm.formats import HighRegisterStrategy
|
|
2265
|
+
from nervapy.arm.generic import PUSH, PUSH_W, STMDB, SUB
|
|
2266
|
+
from nervapy.arm.isa import Extension
|
|
2267
|
+
from nervapy.arm.registers import sp
|
|
2268
|
+
from nervapy.arm.vfpneon import VPUSH
|
|
2269
|
+
from nervapy.stream import InstructionStream
|
|
2270
|
+
|
|
2271
|
+
with InstructionStream() as instructions:
|
|
2272
|
+
if self.general_purpose_registers:
|
|
2273
|
+
general_purpose_registers = list(self.general_purpose_registers)
|
|
2274
|
+
|
|
2275
|
+
# Check if we're targeting ARMv7-M (Cortex-M) processors
|
|
2276
|
+
function = self.get_function()
|
|
2277
|
+
is_armv7m = function and Extension.V7M in function.target.extensions
|
|
2278
|
+
|
|
2279
|
+
if is_armv7m:
|
|
2280
|
+
low_registers = [
|
|
2281
|
+
reg
|
|
2282
|
+
for reg in general_purpose_registers
|
|
2283
|
+
if reg.get_physical_number() <= 7
|
|
2284
|
+
]
|
|
2285
|
+
high_registers = [
|
|
2286
|
+
reg
|
|
2287
|
+
for reg in general_purpose_registers
|
|
2288
|
+
if reg.get_physical_number() > 7
|
|
2289
|
+
]
|
|
2290
|
+
|
|
2291
|
+
if high_registers:
|
|
2292
|
+
# Merge low and high into one instruction so the
|
|
2293
|
+
# prologue is a single PUSH.W / STMDB covering all
|
|
2294
|
+
# callee-saved registers.
|
|
2295
|
+
all_registers = low_registers + high_registers
|
|
2296
|
+
needs_pad = len(all_registers) % 2 == 1
|
|
2297
|
+
sorted_regs = tuple(
|
|
2298
|
+
sorted(
|
|
2299
|
+
all_registers,
|
|
2300
|
+
key=lambda reg: reg.get_physical_number(),
|
|
2301
|
+
)
|
|
2302
|
+
)
|
|
2303
|
+
strategy = function.high_register_strategy
|
|
2304
|
+
if strategy == HighRegisterStrategy.STMDB or (
|
|
2305
|
+
strategy == HighRegisterStrategy.AUTO
|
|
2306
|
+
and function.assembly_format.name == "ARMCC"
|
|
2307
|
+
):
|
|
2308
|
+
STMDB(sp, sorted_regs)
|
|
2309
|
+
else:
|
|
2310
|
+
PUSH_W(sorted_regs)
|
|
2311
|
+
if needs_pad:
|
|
2312
|
+
SUB(sp, sp, 4)
|
|
2313
|
+
elif low_registers:
|
|
2314
|
+
# Only low registers - use efficient 16-bit PUSH
|
|
2315
|
+
needs_pad = len(low_registers) % 2 == 1
|
|
2316
|
+
PUSH(
|
|
2317
|
+
tuple(
|
|
2318
|
+
sorted(
|
|
2319
|
+
low_registers,
|
|
2320
|
+
key=lambda reg: reg.get_physical_number(),
|
|
2321
|
+
)
|
|
2322
|
+
)
|
|
2323
|
+
)
|
|
2324
|
+
if needs_pad:
|
|
2325
|
+
SUB(sp, sp, 4)
|
|
2326
|
+
else:
|
|
2327
|
+
# Standard ARM (non-Cortex-M) handling
|
|
2328
|
+
needs_pad = len(general_purpose_registers) % 2 == 1
|
|
2329
|
+
PUSH(
|
|
2330
|
+
tuple(
|
|
2331
|
+
sorted(
|
|
2332
|
+
general_purpose_registers,
|
|
2333
|
+
key=lambda reg: reg.get_physical_number(),
|
|
2334
|
+
)
|
|
2335
|
+
)
|
|
2336
|
+
)
|
|
2337
|
+
if needs_pad:
|
|
2338
|
+
SUB(sp, sp, 4)
|
|
2339
|
+
|
|
2340
|
+
if self.d_registers:
|
|
2341
|
+
VPUSH(
|
|
2342
|
+
tuple(
|
|
2343
|
+
sorted(
|
|
2344
|
+
self.d_registers, key=lambda reg: reg.get_physical_number()
|
|
2345
|
+
)
|
|
2346
|
+
)
|
|
2347
|
+
)
|
|
2348
|
+
return list(iter(instructions))
|
|
2349
|
+
|
|
2350
|
+
def generate_epilogue(self):
|
|
2351
|
+
from nervapy.arm.formats import HighRegisterStrategy
|
|
2352
|
+
from nervapy.arm.generic import ADD, LDMIA, POP, POP_W
|
|
2353
|
+
from nervapy.arm.isa import Extension
|
|
2354
|
+
from nervapy.arm.registers import sp
|
|
2355
|
+
from nervapy.arm.vfpneon import VPOP
|
|
2356
|
+
from nervapy.stream import InstructionStream
|
|
2357
|
+
|
|
2358
|
+
with InstructionStream() as instructions:
|
|
2359
|
+
if self.d_registers:
|
|
2360
|
+
VPOP(
|
|
2361
|
+
tuple(
|
|
2362
|
+
sorted(
|
|
2363
|
+
self.d_registers, key=lambda reg: reg.get_physical_number()
|
|
2364
|
+
)
|
|
2365
|
+
)
|
|
2366
|
+
)
|
|
2367
|
+
|
|
2368
|
+
if self.general_purpose_registers:
|
|
2369
|
+
general_purpose_registers = list(self.general_purpose_registers)
|
|
2370
|
+
|
|
2371
|
+
# Check if we're targeting ARMv7-M (Cortex-M) processors
|
|
2372
|
+
function = self.get_function()
|
|
2373
|
+
is_armv7m = function and Extension.V7M in function.target.extensions
|
|
2374
|
+
|
|
2375
|
+
if is_armv7m:
|
|
2376
|
+
low_registers = [
|
|
2377
|
+
reg
|
|
2378
|
+
for reg in general_purpose_registers
|
|
2379
|
+
if reg.get_physical_number() <= 7
|
|
2380
|
+
]
|
|
2381
|
+
high_registers = [
|
|
2382
|
+
reg
|
|
2383
|
+
for reg in general_purpose_registers
|
|
2384
|
+
if reg.get_physical_number() > 7
|
|
2385
|
+
]
|
|
2386
|
+
|
|
2387
|
+
if high_registers:
|
|
2388
|
+
# Mirror of prologue: one instruction restoring all regs
|
|
2389
|
+
all_registers = low_registers + high_registers
|
|
2390
|
+
needs_pad = len(all_registers) % 2 == 1
|
|
2391
|
+
sorted_regs = tuple(
|
|
2392
|
+
sorted(
|
|
2393
|
+
all_registers,
|
|
2394
|
+
key=lambda reg: reg.get_physical_number(),
|
|
2395
|
+
)
|
|
2396
|
+
)
|
|
2397
|
+
strategy = function.high_register_strategy
|
|
2398
|
+
if needs_pad:
|
|
2399
|
+
ADD(sp, sp, 4)
|
|
2400
|
+
if strategy == HighRegisterStrategy.STMDB or (
|
|
2401
|
+
strategy == HighRegisterStrategy.AUTO
|
|
2402
|
+
and function.assembly_format.name == "ARMCC"
|
|
2403
|
+
):
|
|
2404
|
+
LDMIA(sp, sorted_regs)
|
|
2405
|
+
else:
|
|
2406
|
+
POP_W(sorted_regs)
|
|
2407
|
+
elif low_registers:
|
|
2408
|
+
# Only low registers - use efficient 16-bit POP
|
|
2409
|
+
needs_pad = len(low_registers) % 2 == 1
|
|
2410
|
+
if needs_pad:
|
|
2411
|
+
ADD(sp, sp, 4)
|
|
2412
|
+
POP(
|
|
2413
|
+
tuple(
|
|
2414
|
+
sorted(
|
|
2415
|
+
low_registers,
|
|
2416
|
+
key=lambda reg: reg.get_physical_number(),
|
|
2417
|
+
)
|
|
2418
|
+
)
|
|
2419
|
+
)
|
|
2420
|
+
else:
|
|
2421
|
+
# Standard ARM (non-Cortex-M) handling
|
|
2422
|
+
needs_pad = len(general_purpose_registers) % 2 == 1
|
|
2423
|
+
if needs_pad:
|
|
2424
|
+
ADD(sp, sp, 4)
|
|
2425
|
+
POP(
|
|
2426
|
+
tuple(
|
|
2427
|
+
sorted(
|
|
2428
|
+
general_purpose_registers,
|
|
2429
|
+
key=lambda reg: reg.get_physical_number(),
|
|
2430
|
+
)
|
|
2431
|
+
)
|
|
2432
|
+
)
|
|
2433
|
+
return list(iter(instructions))
|
|
2434
|
+
|
|
2435
|
+
def get_function(self):
|
|
2436
|
+
"""Get the active function that owns this stack frame."""
|
|
2437
|
+
from nervapy.arm.function import active_function
|
|
2438
|
+
|
|
2439
|
+
return active_function
|
|
2440
|
+
|
|
2441
|
+
|
|
2442
|
+
def print_live_registers(label=""):
|
|
2443
|
+
"""Print live registers at the current point in code generation.
|
|
2444
|
+
|
|
2445
|
+
This function can be called from within a Function context to inspect
|
|
2446
|
+
which registers are currently live (i.e., their values will be used later).
|
|
2447
|
+
|
|
2448
|
+
Note: Live register information is computed during function compilation,
|
|
2449
|
+
so this will show an approximation based on instructions emitted so far.
|
|
2450
|
+
|
|
2451
|
+
Args:
|
|
2452
|
+
label: Optional label to identify the location in code
|
|
2453
|
+
|
|
2454
|
+
Example:
|
|
2455
|
+
with Function("my_func", args, ...):
|
|
2456
|
+
t0 = GeneralPurposeRegister()
|
|
2457
|
+
ADD(t0, r0, r1)
|
|
2458
|
+
print_live_registers("after ADD") # Shows which regs are live
|
|
2459
|
+
"""
|
|
2460
|
+
global active_function
|
|
2461
|
+
if active_function is None:
|
|
2462
|
+
print(f"Live registers {label}: No active function")
|
|
2463
|
+
return
|
|
2464
|
+
|
|
2465
|
+
active_function.print_live_registers(label)
|