PyNerva 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. nervapy/__init__.py +50 -0
  2. nervapy/abi.py +91 -0
  3. nervapy/arm/__init__.py +124 -0
  4. nervapy/arm/__main__.py +0 -0
  5. nervapy/arm/abi.py +138 -0
  6. nervapy/arm/formats.py +49 -0
  7. nervapy/arm/function.py +2465 -0
  8. nervapy/arm/generic.py +10796 -0
  9. nervapy/arm/instructions.py +519 -0
  10. nervapy/arm/isa.py +409 -0
  11. nervapy/arm/literal_pool.py +331 -0
  12. nervapy/arm/microarchitecture.py +211 -0
  13. nervapy/arm/pseudo.py +652 -0
  14. nervapy/arm/registers.py +1458 -0
  15. nervapy/arm/vfpneon.py +4092 -0
  16. nervapy/arm.py +13 -0
  17. nervapy/c/__init__.py +1 -0
  18. nervapy/c/types.py +436 -0
  19. nervapy/codegen.py +99 -0
  20. nervapy/common/__init__.py +4 -0
  21. nervapy/common/function.py +5 -0
  22. nervapy/common/regalloc.py +121 -0
  23. nervapy/constant_data.py +282 -0
  24. nervapy/encoder.py +246 -0
  25. nervapy/formats/__init__.py +2 -0
  26. nervapy/formats/elf/__init__.py +4 -0
  27. nervapy/formats/elf/file.py +178 -0
  28. nervapy/formats/elf/image.py +106 -0
  29. nervapy/formats/elf/section.py +422 -0
  30. nervapy/formats/elf/symbol.py +281 -0
  31. nervapy/formats/macho/__init__.py +2 -0
  32. nervapy/formats/macho/file.py +123 -0
  33. nervapy/formats/macho/image.py +143 -0
  34. nervapy/formats/macho/section.py +322 -0
  35. nervapy/formats/macho/symbol.py +158 -0
  36. nervapy/formats/mscoff/__init__.py +8 -0
  37. nervapy/formats/mscoff/image.py +132 -0
  38. nervapy/formats/mscoff/section.py +181 -0
  39. nervapy/formats/mscoff/symbol.py +148 -0
  40. nervapy/function.py +136 -0
  41. nervapy/literal.py +731 -0
  42. nervapy/loader.py +188 -0
  43. nervapy/name.py +159 -0
  44. nervapy/parse.py +52 -0
  45. nervapy/stream.py +58 -0
  46. nervapy/util.py +126 -0
  47. nervapy/writer.py +518 -0
  48. nervapy/x86_64/__init__.py +324 -0
  49. nervapy/x86_64/__main__.py +407 -0
  50. nervapy/x86_64/abi.py +517 -0
  51. nervapy/x86_64/amd.py +6464 -0
  52. nervapy/x86_64/avx.py +102029 -0
  53. nervapy/x86_64/crypto.py +1533 -0
  54. nervapy/x86_64/encoding.py +424 -0
  55. nervapy/x86_64/fma.py +19138 -0
  56. nervapy/x86_64/function.py +2707 -0
  57. nervapy/x86_64/generic.py +23384 -0
  58. nervapy/x86_64/instructions.py +500 -0
  59. nervapy/x86_64/isa.py +476 -0
  60. nervapy/x86_64/lower.py +126 -0
  61. nervapy/x86_64/mask.py +2593 -0
  62. nervapy/x86_64/meta.py +143 -0
  63. nervapy/x86_64/mmxsse.py +17265 -0
  64. nervapy/x86_64/nacl.py +327 -0
  65. nervapy/x86_64/operand.py +1204 -0
  66. nervapy/x86_64/options.py +21 -0
  67. nervapy/x86_64/pseudo.py +686 -0
  68. nervapy/x86_64/registers.py +1225 -0
  69. nervapy/x86_64/types.py +17 -0
  70. nervapy/x86_64/uarch.py +580 -0
  71. pynerva-0.0.7.dist-info/METADATA +310 -0
  72. pynerva-0.0.7.dist-info/RECORD +74 -0
  73. pynerva-0.0.7.dist-info/WHEEL +4 -0
  74. pynerva-0.0.7.dist-info/licenses/LICENSE.rst +15 -0
@@ -0,0 +1,2465 @@
1
+ # This file is part of PeachPy package and is licensed under the Simplified BSD license.
2
+ # See license.rst for the full text of the license.
3
+
4
+ from __future__ import print_function
5
+
6
+ import time
7
+
8
+ import nervapy.arm.instructions
9
+ import nervapy.arm.registers
10
+ from nervapy.arm.microarchitecture import Microarchitecture
11
+
12
+ active_function = None
13
+
14
+
15
+ class Function(object):
16
+ def __init__(
17
+ self,
18
+ name,
19
+ arguments,
20
+ return_type=None,
21
+ target=Microarchitecture.Default,
22
+ abi=None,
23
+ assembly_format=None,
24
+ high_register_strategy=None,
25
+ collect_origin=False,
26
+ dump_intermediate_assembly=False,
27
+ report_generation=True,
28
+ report_live_registers=False,
29
+ is_thumb=False,
30
+ alignment=0,
31
+ validate_stack_alignment=True,
32
+ preserve8=False,
33
+ ):
34
+ self.name = name
35
+ self.arguments = arguments
36
+ self.return_type = return_type
37
+ self.is_thumb = is_thumb
38
+ self.alignment = alignment
39
+ self.validate_stack_alignment = validate_stack_alignment
40
+ self.preserve8 = preserve8
41
+
42
+ # Set default assembly format to GAS if not specified
43
+ if assembly_format is None:
44
+ from nervapy.arm.formats import AssemblyFormat
45
+
46
+ assembly_format = AssemblyFormat.GAS
47
+ self.assembly_format = assembly_format
48
+
49
+ # Set default high register strategy if not specified
50
+ if high_register_strategy is None:
51
+ from nervapy.arm.formats import HighRegisterStrategy
52
+
53
+ high_register_strategy = HighRegisterStrategy.AUTO
54
+ self.high_register_strategy = high_register_strategy
55
+
56
+ for argument in self.arguments:
57
+ argument.stack_offset = None
58
+ argument.register = None
59
+ if (
60
+ argument.is_size_integer
61
+ or argument.is_pointer_integer
62
+ or argument.is_pointer
63
+ ):
64
+ argument.c_type.size = abi.pointer_size
65
+ assert argument.size
66
+ self.target = target
67
+ self.abi = abi
68
+ self.collect_origin = collect_origin
69
+ self.dump_intermediate_assembly = dump_intermediate_assembly
70
+ self.report_generation = report_generation
71
+ self.report_live_registers = report_live_registers
72
+ self.ticks = None
73
+
74
+ # Assign argument locations
75
+ from nervapy.arm.abi import arm_gnueabi, arm_gnueabihf
76
+ from nervapy.arm.registers import r0, r1, r2, r3
77
+
78
+ if abi == arm_gnueabi or abi == arm_gnueabihf:
79
+ # Up to 4 first arguments are passed in registers, others passed through stack
80
+ # Arguments smaller than 4 bytes are extended to 4 bytes (both when passed on stack or in a register).
81
+ # 8-byte arguments occupy 2 general-purpose registers or 8 bytes on stack. When they are passed in
82
+ # registers, the index of the first register must be even (i.e. they are passed in (r0, r1) or (r2, r3),
83
+ # but not in (r1, r2). When 8-byte arguments are passed on stack, their location is aligned on 8 bytes,
84
+ # skipping 4 bytes if necessary.
85
+ argument_registers = (r0, r1, r2, r3)
86
+ register_offset = 0
87
+ stack_offset = 0
88
+ for argument in self.arguments:
89
+ if argument.size <= 4:
90
+ if register_offset < 4:
91
+ argument.register = argument_registers[register_offset]
92
+ register_offset += 1
93
+ else:
94
+ argument.stack_offset = stack_offset
95
+ stack_offset += 4
96
+ elif argument.size == 8:
97
+ # First register index must be even
98
+ if register_offset % 2 == 1:
99
+ register_offset += 1
100
+ if register_offset < 4:
101
+ argument.register = (
102
+ argument_registers[register_offset],
103
+ argument_registers[register_offset + 1],
104
+ )
105
+ register_offset += 2
106
+ else:
107
+ if stack_offset % 8 == 4:
108
+ stack_offset += 4
109
+ argument.stack_offset = stack_offset
110
+ stack_offset += 8
111
+ else:
112
+ raise ValueError(
113
+ "Unsupported argument size {0}".format(argument.size)
114
+ )
115
+ else:
116
+ raise ValueError("Unsupported assembler ABI %s" % abi)
117
+
118
+ self.instructions = list()
119
+ self.constants = list()
120
+ self.external_functions = set() # Track external function imports
121
+ self.stack_frame = StackFrame(self.abi)
122
+ self.local_variables_count = 0
123
+ self.virtual_registers_count = 0x40
124
+ self.conflicting_registers = dict()
125
+ self.allocation_options = dict()
126
+ self.unallocated_registers = list()
127
+ self._live_register_markers = [] # List of (instruction_index, label) tuples
128
+ self._register_names = {} # Map from register number to variable name
129
+
130
+ def __enter__(self):
131
+ import nervapy.stream
132
+
133
+ global active_function
134
+
135
+ if active_function is not None:
136
+ raise ValueError(
137
+ "Function {0} was not detached".format(active_function.name)
138
+ )
139
+ if nervapy.stream.active_stream is not None:
140
+ raise ValueError("Alternative instruction stream is active")
141
+ active_function = self
142
+ nervapy.stream.active_stream = self
143
+ if self.report_generation:
144
+ print(
145
+ "Generating function {Function} for microarchitecture {Microarchitecture} and ABI {ABI}".format(
146
+ Function=self.name, Microarchitecture=self.target, ABI=self.abi
147
+ )
148
+ )
149
+ print("\tParsing source", end="")
150
+ self.ticks = time.time()
151
+ return self
152
+
153
+ def __exit__(self, exc_type, exc_value, traceback):
154
+ import nervapy.stream
155
+ from nervapy.arm.instructions import Instruction
156
+
157
+ nervapy.stream.active_stream = None
158
+ if exc_type is None:
159
+ try:
160
+ self.generate_labels()
161
+ self.decompose_instructions()
162
+ self.reserve_registers()
163
+ if self.report_generation:
164
+ elapsed = time.time() - self.ticks
165
+ print(" (%2.2f secs)" % elapsed)
166
+ print("\tRunning liveness analysis", end="")
167
+ self.ticks = time.time()
168
+ self.determine_available_registers()
169
+ self.determine_live_registers(exclude_parameter_loads=True)
170
+
171
+ # Report live registers at marked points
172
+ if self._live_register_markers:
173
+ self._report_live_registers_at_markers()
174
+
175
+ if self.dump_intermediate_assembly:
176
+ with open(
177
+ "%s.S" % self.symbol_name, "w"
178
+ ) as intermediate_assembly_file:
179
+ for instruction in self.instructions:
180
+ if isinstance(instruction, Instruction):
181
+ consumed_registers = ", ".join(
182
+ sorted(
183
+ map(
184
+ str,
185
+ list(
186
+ instruction.get_input_registers_list()
187
+ ),
188
+ )
189
+ )
190
+ )
191
+ produced_registers = ", ".join(
192
+ sorted(
193
+ map(
194
+ str,
195
+ list(
196
+ instruction.get_output_registers_list()
197
+ ),
198
+ )
199
+ )
200
+ )
201
+ available_registers = ", ".join(
202
+ sorted(
203
+ map(str, list(instruction.available_registers))
204
+ )
205
+ )
206
+ live_registers = ", ".join(
207
+ sorted(map(str, list(instruction.live_registers)))
208
+ )
209
+ intermediate_assembly_file.write(
210
+ str(instruction) + "\n"
211
+ )
212
+ intermediate_assembly_file.write(
213
+ "\tConsumed registers: " + consumed_registers + "\n"
214
+ )
215
+ intermediate_assembly_file.write(
216
+ "\tProduced registers: " + produced_registers + "\n"
217
+ )
218
+ intermediate_assembly_file.write(
219
+ "\tLive registers: " + live_registers + "\n"
220
+ )
221
+ if instruction.line_number:
222
+ intermediate_assembly_file.write(
223
+ "\tLine: " + str(instruction.line_number) + "\n"
224
+ )
225
+ if instruction.source_code:
226
+ intermediate_assembly_file.write(
227
+ "\tCode: " + instruction.source_code + "\n"
228
+ )
229
+ else:
230
+ intermediate_assembly_file.write(
231
+ str(instruction) + "\n"
232
+ )
233
+
234
+ if self.report_generation:
235
+ elapsed = time.time() - self.ticks
236
+ print(" (%2.2f secs)" % elapsed)
237
+ print("\tRunning register allocation", end="")
238
+ self.ticks = time.time()
239
+ self.check_live_registers()
240
+ self.determine_register_relations()
241
+ self.allocate_registers()
242
+
243
+ if self.report_generation:
244
+ elapsed = time.time() - self.ticks
245
+ print(" (%2.2f secs)" % elapsed)
246
+ print("\tGenerating code", end="")
247
+ self.ticks = time.time()
248
+ self.remove_assume_statements()
249
+ self.update_stack_frame()
250
+ self.generate_parameter_loads()
251
+ if self.report_live_registers:
252
+ self.determine_live_registers()
253
+ self.generate_prolog_and_epilog()
254
+ if self.validate_stack_alignment:
255
+ self.validate_stack_alignment_check()
256
+
257
+ self.generate_constant_loads()
258
+ self.optimize_instructions()
259
+ if self.report_generation:
260
+ elapsed = time.time() - self.ticks
261
+ print(" (%2.2f secs)" % elapsed)
262
+ self.ticks = time.time()
263
+ finally:
264
+ self.detach()
265
+ else:
266
+ self.detach()
267
+
268
+ def find_argument(self, argument_target):
269
+ from nervapy import Argument
270
+
271
+ assert isinstance(
272
+ argument_target, (Argument, str)
273
+ ), "Either Argument object or argument name expected"
274
+ if isinstance(argument_target, Argument):
275
+ if argument_target in self.arguments:
276
+ return argument_target
277
+ else:
278
+ return None
279
+ else:
280
+ return next(
281
+ (
282
+ argument
283
+ for argument in self.arguments
284
+ if argument.name == argument_target
285
+ ),
286
+ None,
287
+ )
288
+
289
+ def detach(self):
290
+ import nervapy.stream
291
+
292
+ global active_function
293
+ if active_function is None:
294
+ raise ValueError("Trying to detach a function while no function is active")
295
+ active_function = None
296
+ nervapy.stream.active_stream = None
297
+ return self
298
+
299
+ @property
300
+ def assembly(self):
301
+ """Generate assembly code in the specified format."""
302
+ from nervapy.arm.formats import AssemblyFormat
303
+
304
+ if self.assembly_format == AssemblyFormat.ARMCC:
305
+ return self._generate_armcc_assembly()
306
+ else: # Default to GAS format
307
+ return self._generate_gas_assembly()
308
+
309
+ def _generate_constant_data_section(self):
310
+ """Generate .data section for ConstantData objects"""
311
+ try:
312
+ from nervapy.constant_data import ConstantData
313
+ constants = ConstantData.get_function_constants(self)
314
+ if not constants:
315
+ return ""
316
+
317
+ import os
318
+ lines = []
319
+ lines.append("")
320
+ lines.append("\t.data")
321
+ lines.append("\t.align 4")
322
+ for const in constants:
323
+ lines.append(const.generate_data_section())
324
+ return os.linesep.join(lines)
325
+ except ImportError:
326
+ return ""
327
+
328
+
329
+ @property
330
+ def global_asm(self):
331
+ """Generate a Rust global_asm!() macro call embedding the GAS assembly.
332
+
333
+ Usage in a Rust source file:
334
+ use core::arch::global_asm;
335
+ include!("generated_kernels.rs"); // or paste directly
336
+
337
+ The extern declaration goes in your Rust code:
338
+ unsafe extern "C" { fn my_func(a: u32) -> u32; }
339
+ """
340
+ gas = self._generate_gas_assembly()
341
+ return 'core::arch::global_asm!(r#"\n{asm}"#);\n'.format(
342
+ asm=self._escape_rust_asm_template(gas)
343
+ )
344
+
345
+ @staticmethod
346
+ def _escape_rust_asm_template(asm):
347
+ return asm.replace("{", "{{").replace("}", "}}")
348
+
349
+ def _rust_ffi_type(self, c_type):
350
+ if c_type.is_pointer:
351
+ pointee = c_type.base
352
+ if pointee is None:
353
+ pointee_type = "core::ffi::c_void"
354
+ is_const_pointee = c_type.is_const
355
+ else:
356
+ pointee_type = self._rust_ffi_type(pointee)
357
+ is_const_pointee = pointee.is_const
358
+ return "{0} {1}".format(
359
+ "*const" if is_const_pointee else "*mut", pointee_type
360
+ )
361
+
362
+ if c_type.is_size_integer:
363
+ return "usize" if c_type.is_unsigned_integer else "isize"
364
+ if c_type.is_pointer_integer:
365
+ return "usize" if c_type.is_unsigned_integer else "isize"
366
+ if c_type.is_bool:
367
+ return "bool"
368
+ if c_type.is_char:
369
+ return "core::ffi::c_char"
370
+ if c_type.is_wchar:
371
+ wchar_size = c_type.get_size(self.abi)
372
+ return {2: "u16", 4: "u32"}[wchar_size]
373
+ if c_type.is_floating_point:
374
+ return {2: "u16", 4: "f32", 8: "f64"}[c_type.get_size(self.abi)]
375
+ if c_type.is_signed_integer:
376
+ return {1: "i8", 2: "i16", 4: "i32", 8: "i64"}[c_type.get_size(self.abi)]
377
+ if c_type.is_unsigned_integer:
378
+ return {1: "u8", 2: "u16", 4: "u32", 8: "u64"}[c_type.get_size(self.abi)]
379
+
380
+ raise ValueError("Unsupported Rust FFI type for {0}".format(c_type))
381
+
382
+ @property
383
+ def rust_extern_declaration(self):
384
+ args = ", ".join(
385
+ "{0}: {1}".format(argument.name, self._rust_ffi_type(argument.c_type))
386
+ for argument in self.arguments
387
+ )
388
+ signature = "pub fn {0}({1})".format(self.name, args)
389
+ if self.return_type is not None:
390
+ signature += " -> {0}".format(self._rust_ffi_type(self.return_type))
391
+ return signature + ";"
392
+
393
+ @property
394
+ def rust_extern(self):
395
+ return "unsafe extern \"C\" {{\n {0}\n}}\n".format(
396
+ self.rust_extern_declaration
397
+ )
398
+
399
+ @property
400
+ def rust_module(self):
401
+ return self.global_asm + "\n" + self.rust_extern
402
+
403
+ def _generate_gas_assembly(self):
404
+ """Generate assembly code in GNU Assembler (GAS) format."""
405
+ import os
406
+
407
+ from nervapy.arm.generic import BranchInstruction
408
+ from nervapy.arm.instructions import Instruction
409
+ from nervapy.arm.pseudo import LabelQuasiInstruction
410
+
411
+ function_label = self.name
412
+ constants_label = self.name + "_constants"
413
+ assembly = ""
414
+ assembly += "\t.syntax unified" + os.linesep
415
+ if self.is_thumb:
416
+ assembly += "\t.thumb" + os.linesep
417
+ assembly += "\t" + self.gnu_arch_spec + os.linesep
418
+
419
+ # Generate .data section for ConstantData if present
420
+ constant_data_section = self._generate_constant_data_section()
421
+ if constant_data_section:
422
+ assembly += constant_data_section + os.linesep
423
+
424
+ if len(self.constants) > 0:
425
+ assembly += (
426
+ "section .rodata.{Microarchitecture} progbits alloc noexec nowrite align={Alignment}".format(
427
+ Microarchitecture=self.target.id, Alignment=32
428
+ )
429
+ + os.linesep
430
+ )
431
+ assembly += constants_label + ":" + os.linesep
432
+ data_declaration_map = {8: "DB", 16: "DW", 32: "DD", 64: "DQ", 128: "DO"}
433
+ need_alignment = False
434
+ for constant_bucket in self.constants:
435
+ if need_alignment:
436
+ assembly += (
437
+ "\tALIGN {Alignment}".format(Alignment=constant_bucket.capacity)
438
+ + os.linesep
439
+ )
440
+ for constant in constant_bucket.constants:
441
+ assembly += (
442
+ "\t.{Label}: {Declaration} {Value}".format(
443
+ Label=constant.label,
444
+ Declaration=data_declaration_map[constant.size],
445
+ Value=", ".join([str(constant)] * constant.repeats),
446
+ )
447
+ + os.linesep
448
+ )
449
+ need_alignment = not constant_bucket.is_full()
450
+ assembly += os.linesep
451
+
452
+ if hasattr(self, "external_functions") and len(self.external_functions) > 0:
453
+ for func_name in sorted(self.external_functions):
454
+ assembly += ".extern {0}".format(func_name) + os.linesep
455
+ assembly += os.linesep
456
+
457
+ assembly += "\n\t.text\n" + os.linesep
458
+ assembly += ".global {Function}".format(Function=function_label) + os.linesep
459
+ assembly += (
460
+ ".type {Function}, %function".format(Function=function_label) + os.linesep
461
+ )
462
+ if self.alignment > 0:
463
+ assembly += (
464
+ ".align {Alignment}".format(Alignment=self.alignment) + os.linesep
465
+ )
466
+ assembly += function_label + ":" + os.linesep
467
+ if self.gnu_fpu_spec:
468
+ assembly += "\t" + self.gnu_fpu_spec + os.linesep
469
+ for instruction in self.instructions:
470
+ if isinstance(instruction, BranchInstruction):
471
+ assembly += (
472
+ "\t"
473
+ + "{0} L{1}.{2}".format(
474
+ instruction.name, self.name, instruction.operands[0].label
475
+ )
476
+ + os.linesep
477
+ )
478
+ elif isinstance(instruction, Instruction):
479
+ constant = instruction.get_constant()
480
+ if constant is not None:
481
+ constant.prefix = constants_label
482
+ assembly += "\t" + str(instruction) + os.linesep
483
+ elif isinstance(instruction, LabelQuasiInstruction):
484
+ assembly += "L{0}.{1}:".format(self.name, instruction.name) + os.linesep
485
+ else:
486
+ assembly += "\t" + str(instruction) + os.linesep
487
+
488
+ # Generate literal pool if present
489
+ if hasattr(self, 'literal_pool') and self.literal_pool.entries:
490
+ assembly += os.linesep
491
+ assembly += self.literal_pool.generate_assembly(format='gas') + os.linesep
492
+
493
+ assembly += os.linesep
494
+ return assembly
495
+
496
+ def _generate_armcc_assembly(self):
497
+ """Generate assembly code in ARM Compiler (ARMCC) format."""
498
+ import os
499
+
500
+ from nervapy.arm.generic import BranchInstruction
501
+ from nervapy.arm.instructions import Instruction
502
+ from nervapy.arm.pseudo import LabelQuasiInstruction
503
+
504
+ function_label = self.name
505
+ constants_label = self.name + "_constants"
506
+ assembly = ""
507
+
508
+ if self.is_thumb:
509
+ assembly += " THUMB" + os.linesep
510
+
511
+ # ARMCC constants section
512
+ if len(self.constants) > 0:
513
+ assembly += " AREA ||.constdata||, DATA, READONLY" + os.linesep
514
+ assembly += constants_label + os.linesep
515
+ data_declaration_map = {
516
+ 8: "DCB",
517
+ 16: "DCW",
518
+ 32: "DCD",
519
+ 64: "DCDU",
520
+ 128: "DCDU",
521
+ }
522
+ for constant_bucket in self.constants:
523
+ for constant in constant_bucket.constants:
524
+ assembly += (
525
+ "{Label} {Declaration} {Value}".format(
526
+ Label=constant.label,
527
+ Declaration=data_declaration_map[constant.size],
528
+ Value=", ".join([str(constant)] * constant.repeats),
529
+ )
530
+ + os.linesep
531
+ )
532
+ assembly += os.linesep
533
+
534
+ # ARMCC code section
535
+ assembly += " AREA ||.text||, CODE, READONLY"
536
+ if self.alignment > 0:
537
+ assembly += ", ALIGN={0}".format(self.alignment)
538
+ assembly += os.linesep
539
+ if self.preserve8:
540
+ assembly += " PRESERVE8" + os.linesep
541
+ if self.armcc_fpu_spec:
542
+ assembly += " " + self.armcc_fpu_spec + os.linesep
543
+ assembly += os.linesep
544
+
545
+ # Add IMPORT statements for external functions
546
+ if hasattr(self, "external_functions") and len(self.external_functions) > 0:
547
+ for func_name in sorted(self.external_functions):
548
+ assembly += " IMPORT " + func_name + os.linesep
549
+ assembly += os.linesep
550
+
551
+ assembly += function_label + " PROC" + os.linesep
552
+ assembly += " EXPORT " + function_label + os.linesep
553
+
554
+ for instruction in self.instructions:
555
+ if isinstance(instruction, BranchInstruction):
556
+ assembly += (
557
+ " "
558
+ + "{0} {1}_{2}".format(
559
+ instruction.name, self.name, instruction.operands[0].label
560
+ )
561
+ + os.linesep
562
+ )
563
+ elif isinstance(instruction, Instruction):
564
+ constant = instruction.get_constant()
565
+ if constant is not None:
566
+ constant.prefix = constants_label
567
+ assembly += " " + str(instruction) + os.linesep
568
+ elif isinstance(instruction, LabelQuasiInstruction):
569
+ assembly += "{0}_{1}".format(self.name, instruction.name) + os.linesep
570
+ else:
571
+ assembly += " " + str(instruction) + os.linesep
572
+
573
+ # Generate literal pool if present
574
+ if hasattr(self, 'literal_pool') and self.literal_pool.entries:
575
+ assembly += os.linesep
576
+ assembly += self.literal_pool.generate_assembly(format='armcc') + os.linesep
577
+
578
+ assembly += " ENDP" + os.linesep
579
+ assembly += " END" + os.linesep
580
+ return assembly
581
+
582
+ @property
583
+ def gnu_arch_spec(self):
584
+ from nervapy.arm.isa import Extension
585
+
586
+ isa_extensions = self.isa_extensions
587
+ if Extension.V8_1MMain in isa_extensions:
588
+ return ".arch armv8.1-m.main"
589
+ elif Extension.V8MMain in isa_extensions:
590
+ return ".arch armv8-m.main"
591
+ elif Extension.Div in isa_extensions:
592
+ return ".cpu cortex-a15"
593
+ elif Extension.V7MP in isa_extensions:
594
+ return ".cpu cortex-a9"
595
+ elif Extension.V7M in isa_extensions:
596
+ return ".arch armv7-m"
597
+ elif Extension.V8MBase in isa_extensions:
598
+ return ".arch armv8-m.base"
599
+ elif Extension.V7 in isa_extensions:
600
+ return ".arch armv7-a"
601
+ elif Extension.V6K in isa_extensions:
602
+ return ".arch armv6zk"
603
+ elif Extension.V6 in isa_extensions:
604
+ return ".arch armv6"
605
+ elif Extension.V5E in isa_extensions:
606
+ return ".arch armv5te"
607
+ else:
608
+ return ".arch armv5t"
609
+
610
+ @property
611
+ def gnu_fpu_spec(self):
612
+ from nervapy.arm.isa import Extension
613
+
614
+ isa_extensions = self.isa_extensions
615
+ # ARMv8-M (Cortex-M33, M35P, etc.) uses FPv5-SP
616
+ if Extension.V8MMain in isa_extensions or Extension.V8MBase in isa_extensions:
617
+ if Extension.MVE in isa_extensions:
618
+ return ".fpu mve"
619
+ elif Extension.VFP4 in isa_extensions or Extension.VFP3 in isa_extensions:
620
+ # ARMv8-M has FPv5 single-precision FPU
621
+ return ".fpu fpv5-sp-d16"
622
+ else:
623
+ return None
624
+ # ARMv8.1-M (Cortex-M55, etc.) with Helium MVE
625
+ elif Extension.V8_1MMain in isa_extensions:
626
+ if Extension.MVE in isa_extensions:
627
+ return ".fpu mve"
628
+ elif Extension.VFP4 in isa_extensions or Extension.VFP3 in isa_extensions:
629
+ return ".fpu fpv5-sp-d16"
630
+ else:
631
+ return None
632
+ elif Extension.NEON2 in isa_extensions or (Extension.VFP4 in isa_extensions and Extension.NEON in isa_extensions):
633
+ return ".fpu neon-vfpv4"
634
+ elif (
635
+ Extension.NEONHP in isa_extensions
636
+ or Extension.VFPHP in isa_extensions
637
+ and Extension.NEON in isa_extensions
638
+ ):
639
+ return ".fpu neon-fp16"
640
+ elif Extension.NEON in isa_extensions:
641
+ return ".fpu neon"
642
+ elif Extension.VFPHP in isa_extensions:
643
+ if Extension.VFPd32 in isa_extensions:
644
+ return ".fpu vfpv3-fp16"
645
+ else:
646
+ return ".fpu vfpv3-d16-fp16"
647
+ elif Extension.VFP3 in isa_extensions:
648
+ if Extension.VFPd32 in isa_extensions:
649
+ return ".fpu vfpv3"
650
+ else:
651
+ return ".fpu vfpv3-d16"
652
+ elif Extension.VFP in isa_extensions or Extension.VFP2 in isa_extensions:
653
+ return
654
+ elif Extension.VFP3 in isa_extensions:
655
+ return ".fpu vfp"
656
+ else:
657
+ return None
658
+
659
+ @property
660
+ def armcc_arch_spec(self):
661
+ """Generate ARMCC-compatible architecture specification."""
662
+ from nervapy.arm.isa import Extension
663
+
664
+ isa_extensions = self.isa_extensions
665
+ if Extension.V7M in isa_extensions:
666
+ return "ARM"
667
+ elif Extension.V7MP in isa_extensions:
668
+ return "ARM"
669
+ elif Extension.V7 in isa_extensions:
670
+ return "ARM"
671
+ elif Extension.V6K in isa_extensions:
672
+ return "ARM"
673
+ elif Extension.V6 in isa_extensions:
674
+ return "ARM"
675
+ elif Extension.V5E in isa_extensions:
676
+ return "ARM"
677
+ else:
678
+ return "ARM"
679
+
680
+ @property
681
+ def armcc_fpu_spec(self):
682
+ """Generate ARMCC-compatible FPU specification."""
683
+ from nervapy.arm.isa import Extension
684
+
685
+ isa_extensions = self.isa_extensions
686
+ if Extension.NEON2 in isa_extensions or Extension.VFP4 in isa_extensions:
687
+ return "REQUIRE VFPv4"
688
+ elif (
689
+ Extension.NEONHP in isa_extensions
690
+ or Extension.VFPHP in isa_extensions
691
+ and Extension.NEON in isa_extensions
692
+ ):
693
+ return "REQUIRE VFPv3_FP16"
694
+ elif Extension.NEON in isa_extensions:
695
+ return "REQUIRE VFPv3"
696
+ elif Extension.VFPHP in isa_extensions:
697
+ return "REQUIRE VFPv3_FP16"
698
+ elif Extension.VFP3 in isa_extensions:
699
+ return "REQUIRE VFPv3"
700
+ elif Extension.VFP in isa_extensions or Extension.VFP2 in isa_extensions:
701
+ return "REQUIRE VFPv2"
702
+ else:
703
+ return None
704
+
705
+ def add_instruction(self, instruction):
706
+ from nervapy.arm.instructions import Instruction
707
+
708
+ if instruction is None:
709
+ return
710
+ if isinstance(instruction, Instruction):
711
+ for extension in instruction.isa_extensions:
712
+ if extension not in self.target.extensions:
713
+ raise ValueError(
714
+ "{0} is not supported on the target microarchitecture".format(
715
+ extension
716
+ )
717
+ )
718
+ local_variable = instruction.get_local_variable()
719
+ if local_variable is not None:
720
+ self.stack_frame.add_variable(local_variable.get_root())
721
+ self.stack_frame.preserve_registers(instruction.get_output_registers_list())
722
+ self.instructions.append(instruction)
723
+
724
+ def add_instructions(self, instructions):
725
+ for instruction in instructions:
726
+ self.add_instruction(instruction)
727
+
728
+ def preserve(self, *registers):
729
+ """Force additional registers into the function prologue/epilogue.
730
+
731
+ Use this when you need registers preserved that the automatic analysis
732
+ would not detect (e.g. registers used only via inline logic or explicit
733
+ control flow). The registers are merged with the auto-detected ones so
734
+ the prologue emits a single PUSH / PUSH.W covering everything.
735
+
736
+ Accepts individual registers or a single tuple/list, mirroring the
737
+ calling convention of PUSH::
738
+
739
+ with Function("my_func", ...) as f:
740
+ f.preserve(r8, r9) # varargs style
741
+ f.preserve((r8, r9)) # tuple style (like PUSH)
742
+ f.preserve(lr)
743
+ """
744
+ for item in registers:
745
+ if isinstance(item, (tuple, list)):
746
+ for register in item:
747
+ self.stack_frame.force_preserve_register(register)
748
+ else:
749
+ self.stack_frame.force_preserve_register(item)
750
+
751
+ def decompose_instructions(self):
752
+ from nervapy.arm.pseudo import ReturnInstruction
753
+
754
+ new_instructions = list()
755
+ for instruction in self.instructions:
756
+ if isinstance(instruction, ReturnInstruction):
757
+ new_instructions.extend(instruction.to_instruction_list())
758
+ else:
759
+ new_instructions.append(instruction)
760
+ self.instructions = new_instructions
761
+
762
+ def generate_prolog_and_epilog(self):
763
+ from nervapy.arm.generic import BranchExchangeInstruction
764
+ from nervapy.arm.pseudo import LabelQuasiInstruction
765
+
766
+ prologue_instructions = self.stack_frame.generate_prologue()
767
+ epilogue_instructions = self.stack_frame.generate_epilogue()
768
+ new_instructions = list()
769
+ for instruction in self.instructions:
770
+ if isinstance(instruction, LabelQuasiInstruction):
771
+ new_instructions.append(instruction)
772
+ if instruction.name == "ENTRY":
773
+ new_instructions.extend(prologue_instructions)
774
+ elif isinstance(instruction, BranchExchangeInstruction):
775
+ new_instructions.extend(epilogue_instructions)
776
+ new_instructions.append(instruction)
777
+ else:
778
+ new_instructions.append(instruction)
779
+ self.instructions = new_instructions
780
+
781
+ def generate_labels(self):
782
+ from nervapy.arm.instructions import Operand
783
+ from nervapy.arm.pseudo import LabelQuasiInstruction
784
+
785
+ for instruction in self.instructions:
786
+ if isinstance(instruction, LabelQuasiInstruction):
787
+ if instruction.name == "ENTRY":
788
+ break
789
+ else:
790
+ self.instructions.insert(0, LabelQuasiInstruction(Operand("ENTRY")))
791
+
792
+ def get_label_table(self):
793
+ from nervapy.arm.pseudo import LabelQuasiInstruction
794
+
795
+ label_table = dict()
796
+ for index, instruction in enumerate(self.instructions):
797
+ if isinstance(instruction, LabelQuasiInstruction):
798
+ label_table[instruction.name] = index
799
+ return label_table
800
+
801
+ def find_entry_label(self):
802
+ from nervapy.arm.pseudo import LabelQuasiInstruction
803
+
804
+ for index, instruction in enumerate(self.instructions):
805
+ if isinstance(instruction, LabelQuasiInstruction):
806
+ if instruction.name == "ENTRY":
807
+ return index
808
+ raise ValueError("Instruction stream does not contain the ENTRY label")
809
+
810
+ def find_exit_points(self):
811
+ from nervapy.arm.generic import BranchExchangeInstruction
812
+
813
+ ret_instructions = list()
814
+ for index, instruction in enumerate(self.instructions):
815
+ if isinstance(instruction, BranchExchangeInstruction):
816
+ ret_instructions.append(index)
817
+ return ret_instructions
818
+
819
+ def determine_branches(self):
820
+ from nervapy.arm.generic import BranchInstruction
821
+ from nervapy.arm.pseudo import LabelQuasiInstruction
822
+
823
+ label_table = self.get_label_table()
824
+ for instruction in self.instructions:
825
+ if isinstance(instruction, LabelQuasiInstruction):
826
+ instruction.input_branches = set()
827
+
828
+ for i, instruction in enumerate(self.instructions):
829
+ if isinstance(instruction, BranchInstruction):
830
+ target_label = instruction.operands[0].label
831
+ target_index = label_table[target_label]
832
+ self.instructions[target_index].input_branches.add(i)
833
+
834
+ def reserve_registers(self):
835
+ pass
836
+
837
+ def determine_available_registers(self):
838
+ from nervapy.arm.generic import BranchInstruction
839
+ from nervapy.arm.instructions import Instruction
840
+
841
+ processed_branches = set()
842
+ label_table = self.get_label_table()
843
+
844
+ def mark_available_registers(instructions, start, initial_available_registers):
845
+ available_registers = set(initial_available_registers)
846
+ for i in range(start, len(instructions)):
847
+ instruction = instructions[i]
848
+ if isinstance(instruction, Instruction):
849
+ instruction.available_registers = set(available_registers)
850
+ if isinstance(instruction, BranchInstruction):
851
+ if i not in processed_branches:
852
+ target_label = instruction.operands[0].label
853
+ target_index = label_table[target_label]
854
+ processed_branches.add(i)
855
+ mark_available_registers(
856
+ instructions, target_index, available_registers
857
+ )
858
+ if not instruction.is_conditional():
859
+ return
860
+ else:
861
+ available_registers |= set(
862
+ instruction.get_output_registers_list()
863
+ )
864
+
865
+ current_index = self.find_entry_label()
866
+ mark_available_registers(self.instructions, current_index, set())
867
+
868
+ def determine_live_registers(self, exclude_parameter_loads=False):
869
+ from nervapy.arm.generic import BranchInstruction
870
+ from nervapy.arm.instructions import Instruction
871
+ from nervapy.arm.pseudo import (LabelQuasiInstruction,
872
+ LoadArgumentPseudoInstruction)
873
+ from nervapy.arm.registers import Register
874
+
875
+ self.determine_branches()
876
+ for instruction in self.instructions:
877
+ if isinstance(instruction, Instruction):
878
+ live_registers = set()
879
+ if isinstance(instruction, BranchInstruction):
880
+ instruction.is_visited = False
881
+
882
+ def mark_live_registers(instructions, exit_point, initial_live_registers):
883
+ live_registers = dict(initial_live_registers)
884
+ # Walk from the bottom to top of the linear block
885
+ for i in range(exit_point, -1, -1):
886
+ instruction = instructions[i]
887
+ if (
888
+ isinstance(instruction, BranchInstruction)
889
+ and not instruction.is_conditional
890
+ and i != exit_point
891
+ ):
892
+ return
893
+ elif isinstance(instruction, Instruction):
894
+ # First mark registers which are written to by this instruction as non-live
895
+ # Then mark registers which are read by this instruction as live
896
+ for output_register in instruction.get_output_registers_list():
897
+ register_id = output_register.id
898
+ register_mask = output_register.mask
899
+ if register_id in live_registers:
900
+ live_registers[register_id] &= ~register_mask
901
+ if live_registers[register_id] == 0:
902
+ del live_registers[register_id]
903
+
904
+ if not (
905
+ exclude_parameter_loads
906
+ and isinstance(instruction, LoadArgumentPseudoInstruction)
907
+ ):
908
+ for input_register in instruction.get_input_registers_list():
909
+ register_id = input_register.id
910
+ register_mask = input_register.mask
911
+ if register_id in live_registers:
912
+ live_registers[register_id] |= register_mask
913
+ else:
914
+ live_registers[register_id] = register_mask
915
+
916
+ # Merge with previously determined as live registers
917
+ for instruction_live_register in instruction.live_registers:
918
+ if instruction_live_register.id in live_registers:
919
+ live_registers[
920
+ instruction_live_register.id
921
+ ] |= instruction_live_register.mask
922
+ else:
923
+ live_registers[instruction_live_register.id] = (
924
+ instruction_live_register.mask
925
+ )
926
+
927
+ instruction.live_registers = set(
928
+ [
929
+ Register.from_parts(id, mask, expand=True)
930
+ for (id, mask) in live_registers.items()
931
+ ]
932
+ )
933
+ elif isinstance(instruction, LabelQuasiInstruction):
934
+ for entry_point in instruction.input_branches:
935
+ if not instructions[entry_point].is_visited:
936
+ instructions[entry_point].is_visited = True
937
+ mark_live_registers(
938
+ instructions, entry_point, live_registers
939
+ )
940
+
941
+ exit_points = self.find_exit_points()
942
+ for exit_point in exit_points:
943
+ mark_live_registers(self.instructions, exit_point, set())
944
+
945
+ def check_live_registers(self):
946
+ pass
947
+
948
+ def print_live_registers(self, label=""):
949
+ """Mark this point for live register analysis.
950
+
951
+ This marks the current instruction position for live register analysis
952
+ which will be performed after all instructions are generated.
953
+
954
+ Args:
955
+ label: Optional label to identify the location in code
956
+ """
957
+ from nervapy.arm.instructions import Instruction
958
+
959
+ # Find the last actual instruction object (not index, as indices can change)
960
+ instr_obj = None
961
+ for i in range(len(self.instructions) - 1, -1, -1):
962
+ if isinstance(self.instructions[i], Instruction):
963
+ instr_obj = self.instructions[i]
964
+ break
965
+
966
+ # Store the marker for later analysis (store instruction object, not index)
967
+ self._live_register_markers.append((instr_obj, label))
968
+
969
+ def _report_live_registers_at_markers(self):
970
+ """Report live registers at all marked points.
971
+
972
+ This is called after liveness analysis has been performed on all instructions.
973
+ """
974
+ from nervapy.arm.instructions import Instruction
975
+ from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
976
+ QRegister, SRegister)
977
+
978
+ for instr_obj, label in self._live_register_markers:
979
+ if instr_obj is None:
980
+ print(f"Live registers {label}: No instructions yet")
981
+ continue
982
+
983
+ # Get live registers from the instruction (computed by determine_live_registers)
984
+ live_regs = instr_obj.live_registers if hasattr(instr_obj, 'live_registers') else set()
985
+
986
+ if not live_regs:
987
+ print(f"Live registers {label}: None")
988
+ else:
989
+ gp_regs = [r for r in live_regs if isinstance(r, GeneralPurposeRegister)]
990
+ s_regs = [r for r in live_regs if isinstance(r, SRegister)]
991
+ d_regs = [r for r in live_regs if isinstance(r, DRegister)]
992
+ q_regs = [r for r in live_regs if isinstance(r, QRegister)]
993
+
994
+ def format_reg(r, reg_type):
995
+ """Format a register with its name if available."""
996
+ if r.is_virtual:
997
+ vreg_id = (r.number - 0x40000) >> 12
998
+ name = self._register_names.get(r.number, None)
999
+ prefix = reg_type.lower()
1000
+ if name:
1001
+ return f"{prefix}-vreg<{vreg_id}, {name}>"
1002
+ else:
1003
+ return f"{prefix}-vreg<{vreg_id}>"
1004
+ else:
1005
+ return str(r)
1006
+
1007
+ print(f"Live registers {label}:")
1008
+ if gp_regs:
1009
+ print(f" GP ({len(gp_regs)}): {', '.join(format_reg(r, 'gp') for r in sorted(gp_regs, key=lambda x: (x.id, x.mask)))}")
1010
+ if s_regs:
1011
+ print(f" S ({len(s_regs)}): {', '.join(format_reg(r, 's') for r in sorted(s_regs, key=lambda x: (x.id, x.mask)))}")
1012
+ if d_regs:
1013
+ print(f" D ({len(d_regs)}): {', '.join(format_reg(r, 'd') for r in sorted(d_regs, key=lambda x: (x.id, x.mask)))}")
1014
+ if q_regs:
1015
+ print(f" Q ({len(q_regs)}): {', '.join(format_reg(r, 'q') for r in sorted(q_regs, key=lambda x: (x.id, x.mask)))}")
1016
+
1017
+ # all_registers = self.abi.volatile_registers + list(reversed(self.abi.argument_registers)) + self.abi.callee_save_registers
1018
+ # available_registers = { Register.GPType: list(), Register.WMMXType: list(), Register.VFPType: list() }
1019
+ # for register in all_registers:
1020
+ # if register not in available_registers[register.regtype]:
1021
+ # available_registers[register.regtype].append(register)
1022
+ # for instruction in self.instructions:
1023
+ # live_registers = { Register.GPType: set(), Register.WMMXType: set(), Register.VFPType: set() }
1024
+ # if isinstance(instruction, Instruction):
1025
+ # for live_register in instruction.live_registers:
1026
+ # live_registers[live_register.regtype].add(live_register)
1027
+ # for register_type in live_registers.keys():
1028
+ # if len(live_registers[register_type]) > len(available_registers[register_type]):
1029
+ # raise ValueError("Not enough available registers to allocate live registers at instruction {0}".format(instruction))
1030
+
1031
+ def determine_register_relations(self):
1032
+ from nervapy import RegisterAllocationError
1033
+ from nervapy.arm.instructions import Instruction
1034
+ from nervapy.arm.registers import (DRegister, QRegister, Register,
1035
+ SRegister)
1036
+ from nervapy.arm.vfpneon import (NeonLoadStoreInstruction,
1037
+ VFPLoadStoreMultipleInstruction)
1038
+
1039
+ all_registers = (
1040
+ self.abi.volatile_registers
1041
+ + list(reversed(self.abi.argument_registers))
1042
+ + self.abi.callee_save_registers
1043
+ )
1044
+ available_registers = {
1045
+ Register.GPType: list(),
1046
+ Register.WMMXType: list(),
1047
+ Register.VFPType: list(),
1048
+ }
1049
+ for register in all_registers:
1050
+ if register.type == Register.GPType or register.type == Register.WMMXType:
1051
+ register_bitboard = 0x1 << register.get_physical_number()
1052
+ if register_bitboard not in available_registers[register.type]:
1053
+ available_registers[register.type].append(register_bitboard)
1054
+ for instruction in self.instructions:
1055
+ if isinstance(instruction, Instruction):
1056
+ # Track all virtual registers used in the instruction (both live and outputs)
1057
+ virtual_live_registers = [
1058
+ register
1059
+ for register in instruction.live_registers
1060
+ if register.is_virtual
1061
+ ]
1062
+ # Also include output registers that may not be in live_registers
1063
+ # (e.g., dead code outputs that are written but never read)
1064
+ for output_reg in instruction.get_output_registers_list():
1065
+ if output_reg.is_virtual and output_reg not in virtual_live_registers:
1066
+ virtual_live_registers.append(output_reg)
1067
+
1068
+ for registerX in virtual_live_registers:
1069
+ if registerX.type == Register.VFPType:
1070
+ if isinstance(registerX, SRegister) and registerX.parent:
1071
+ registerX = registerX.parent
1072
+ if isinstance(registerX, DRegister) and registerX.parent:
1073
+ registerX = registerX.parent
1074
+ if registerX.get_id() not in self.allocation_options:
1075
+ if isinstance(registerX, SRegister):
1076
+ self.allocation_options[registerX.id] = [
1077
+ (0x1 << n) for n in range(32)
1078
+ ]
1079
+ elif isinstance(registerX, DRegister):
1080
+ if self.target.has_vfpd32:
1081
+ self.allocation_options[registerX.id] = [
1082
+ (0x3 << n) for n in range(0, 64, 2)
1083
+ ]
1084
+ else:
1085
+ self.allocation_options[registerX.id] = [
1086
+ (0x3 << n) for n in range(0, 32, 2)
1087
+ ]
1088
+ else:
1089
+ self.allocation_options[registerX.id] = [
1090
+ (0xF << n) for n in range(0, 64, 4)
1091
+ ]
1092
+ else:
1093
+ if registerX.id not in self.allocation_options:
1094
+ self.allocation_options[registerX.id] = list(
1095
+ available_registers[registerX.type]
1096
+ )
1097
+
1098
+ self.unallocated_registers.append((registerX.id, registerX.type))
1099
+
1100
+ # Setup the list of conflicting registers for each virtual register
1101
+ if registerX.id not in self.conflicting_registers:
1102
+ self.conflicting_registers[registerX.id] = set()
1103
+ for registerY in virtual_live_registers:
1104
+ # VFP registers have a conflict even they are of different size
1105
+ if (
1106
+ registerX.id != registerY.id
1107
+ and registerX.type == registerY.type
1108
+ ):
1109
+ self.conflicting_registers[registerX.id].add(registerY.id)
1110
+
1111
+ # Mark available physical registers for each virtual register
1112
+ for instruction in self.instructions:
1113
+ if isinstance(instruction, Instruction):
1114
+ virtual_live_registers = [
1115
+ register
1116
+ for register in instruction.live_registers
1117
+ if register.is_virtual
1118
+ ]
1119
+ # If a physical register is live at some point, it can not be allocated for a virtual register
1120
+ physical_live_registers = [
1121
+ register
1122
+ for register in instruction.live_registers
1123
+ if not register.is_virtual
1124
+ ]
1125
+ for virtual_register in virtual_live_registers:
1126
+ for physical_register in physical_live_registers:
1127
+ if virtual_register.type == physical_register.type:
1128
+ virtual_register_id = virtual_register.id
1129
+ physical_register_bitboard = physical_register.bitboard
1130
+ self.allocation_options[virtual_register_id][:] = [
1131
+ possible_register_bitboard
1132
+ for possible_register_bitboard in self.allocation_options[
1133
+ virtual_register_id
1134
+ ]
1135
+ if (
1136
+ possible_register_bitboard
1137
+ & physical_register_bitboard
1138
+ )
1139
+ == 0
1140
+ ]
1141
+
1142
+ # Detect group constraints
1143
+ constraints = dict()
1144
+ for instruction in self.instructions:
1145
+ if isinstance(instruction, NeonLoadStoreInstruction) or isinstance(
1146
+ instruction, VFPLoadStoreMultipleInstruction
1147
+ ):
1148
+ if isinstance(instruction, NeonLoadStoreInstruction):
1149
+ register_list = instruction.operands[0].get_registers_list()
1150
+ physical_registers_count = 32
1151
+ else:
1152
+ register_list = instruction.operands[1].get_registers_list()
1153
+ physical_registers_count = 32 if self.target.has_vfpd32 else 16
1154
+ if len(register_list) > 1:
1155
+ if all(
1156
+ isinstance(register, DRegister) for register in register_list
1157
+ ):
1158
+ register_id_list = list()
1159
+ for register in register_list:
1160
+ register_id = register.get_id()
1161
+ if register_id not in register_id_list:
1162
+ register_id_list.append(register_id)
1163
+ register_id_list = tuple(register_id_list)
1164
+ # Iterate possible allocations for this register list
1165
+ # For VLD1/VST1 instructions all registers must be allocated to sequential physical registers
1166
+ options = list()
1167
+ for sequence_bitboard_position in range(
1168
+ 0,
1169
+ 2 * physical_registers_count - 2 * len(register_list) + 2,
1170
+ 2,
1171
+ ):
1172
+ register_bitboards = [
1173
+ 0x3 << (sequence_bitboard_position + 2 * i)
1174
+ for i in range(len(register_list))
1175
+ ]
1176
+ for i, (bitboard, register) in enumerate(
1177
+ zip(register_bitboards, register_list)
1178
+ ):
1179
+ register_bitboards[i] = register.extend_bitboard(
1180
+ bitboard
1181
+ )
1182
+ # Check that bitboard is available for allocation
1183
+ for register, bitboard in zip(
1184
+ register_list, register_bitboards
1185
+ ):
1186
+ if (
1187
+ bitboard
1188
+ not in self.allocation_options[register.get_id()]
1189
+ ):
1190
+ break
1191
+ else:
1192
+ # Check that if registers with the same id use the same bitboard in this allocation
1193
+ register_id_map = dict()
1194
+ for register, bitboard in zip(
1195
+ register_list, register_bitboards
1196
+ ):
1197
+ register_id = register.get_id()
1198
+ if register_id in register_id_map:
1199
+ if register_id_map[register_id] != bitboard:
1200
+ break
1201
+ else:
1202
+ register_id_map[register_id] = bitboard
1203
+ else:
1204
+ # Check that allocation bitboards do not overlap:
1205
+ allocation_bitboard = 0
1206
+ for bitboard in register_id_map.values():
1207
+ if (allocation_bitboard & bitboard) == 0:
1208
+ allocation_bitboard |= bitboard
1209
+ else:
1210
+ break
1211
+ else:
1212
+ ordered_bitboard_list = [
1213
+ register_id_map[register_id]
1214
+ for register_id in register_id_list
1215
+ ]
1216
+ options.append(tuple(ordered_bitboard_list))
1217
+ if options:
1218
+ if len(register_id_list) > 1:
1219
+ if register_id_list in constraints:
1220
+ constraints[register_id_list] = tuple(
1221
+ [
1222
+ option
1223
+ for option in constraints[register_id_list]
1224
+ if option in options
1225
+ ]
1226
+ )
1227
+ else:
1228
+ constraints[register_id_list] = tuple(options)
1229
+ else:
1230
+ raise RegisterAllocationError(
1231
+ "Impossible virtual register combination in instruction %s"
1232
+ % instruction
1233
+ )
1234
+ elif all(
1235
+ isinstance(register, SRegister) for register in register_list
1236
+ ) and isinstance(instruction, VFPLoadStoreMultipleInstruction):
1237
+ register_id_list = list()
1238
+ for register in register_list:
1239
+ register_id = register.id
1240
+ if register_id not in register_id_list:
1241
+ register_id_list.append(register_id)
1242
+ register_id_list = tuple(register_id_list)
1243
+ # Iterate possible allocations for this register list
1244
+ # For VLDM/VSTM instructions all registers must be allocated to sequential physical registers
1245
+ options = list()
1246
+ for sequence_bitboard_position in range(
1247
+ 0, 32 - len(register_list) + 1
1248
+ ):
1249
+ register_bitboards = [
1250
+ 0x1 << (sequence_bitboard_position + i)
1251
+ for i in range(len(register_list))
1252
+ ]
1253
+ for i, (bitboard, register) in enumerate(
1254
+ zip(register_bitboards, register_list)
1255
+ ):
1256
+ register_bitboards[i] = register.extend_bitboard(
1257
+ bitboard
1258
+ )
1259
+ # Check that bitboard is available for allocation
1260
+ for register, bitboard in zip(
1261
+ register_list, register_bitboards
1262
+ ):
1263
+ if bitboard not in self.allocation_options[register.id]:
1264
+ break
1265
+ else:
1266
+ # Check that if registers with the same id use the same bitboard in this allocation
1267
+ register_id_map = dict()
1268
+ for register, bitboard in zip(
1269
+ register_list, register_bitboards
1270
+ ):
1271
+ register_id = register.id
1272
+ if register_id in register_id_map:
1273
+ if register_id_map[register_id] != bitboard:
1274
+ break
1275
+ else:
1276
+ register_id_map[register_id] = bitboard
1277
+ else:
1278
+ # Check that allocation bitboards do not overlap:
1279
+ allocation_bitboard = 0
1280
+ for bitboard in register_id_map.values():
1281
+ if (allocation_bitboard & bitboard) == 0:
1282
+ allocation_bitboard |= bitboard
1283
+ else:
1284
+ break
1285
+ else:
1286
+ ordered_bitboard_list = [
1287
+ register_id_map[register_id]
1288
+ for register_id in register_id_list
1289
+ ]
1290
+ options.append(tuple(ordered_bitboard_list))
1291
+ if options:
1292
+ if len(register_id_list) > 1:
1293
+ if register_id_list in constraints:
1294
+ constraints[register_id_list] = tuple(
1295
+ [
1296
+ option
1297
+ for option in constraints[register_id_list]
1298
+ if option in options
1299
+ ]
1300
+ )
1301
+ else:
1302
+ constraints[register_id_list] = tuple(options)
1303
+ else:
1304
+ raise RegisterAllocationError(
1305
+ "Impossible virtual register combination in instruction %s"
1306
+ % instruction
1307
+ )
1308
+ else:
1309
+ assert False
1310
+ report_register_constraints = False
1311
+ if report_register_constraints:
1312
+ for register_list, options in constraints.items():
1313
+ print("REGISTER CONSTRAINTS: ", map(str, register_list))
1314
+ for option in options:
1315
+ print("\t", map(lambda t: "%016X" % t, option))
1316
+
1317
+ # Merging of different groups sharing a register will be implemented here sometime
1318
+
1319
+ # Check that each register id appears only once
1320
+ constrained_register_id_list = [
1321
+ register_id
1322
+ for register_id_list in constraints.keys()
1323
+ for register_id in register_id_list
1324
+ ]
1325
+ assert len(constrained_register_id_list) == len(
1326
+ set(constrained_register_id_list)
1327
+ )
1328
+ constrained_register_id_set = set(constrained_register_id_list)
1329
+
1330
+ # Create a map from constrained register to constrained register group
1331
+ # constrained_register_map = dict()
1332
+ # for register_id_list in constraints.keys():
1333
+ # for register_id in register_id_list:
1334
+ # constrained_register_map[register_id] = register_id_list
1335
+
1336
+ # Remove individual registers from the set of unallocated registers and add the register group instead
1337
+ for constrained_register_id in constrained_register_id_list:
1338
+ while (
1339
+ constrained_register_id,
1340
+ Register.VFPType,
1341
+ ) in self.unallocated_registers:
1342
+ self.unallocated_registers.remove(
1343
+ (constrained_register_id, Register.VFPType)
1344
+ )
1345
+ for register_id_list in constraints.keys():
1346
+ self.unallocated_registers.append((register_id_list, Register.VFPType))
1347
+
1348
+ # print "UNALLOCATED REGISTERS:"
1349
+ # print "\t", self.unallocated_registers
1350
+
1351
+ # Remove individual registers from the sets of conflicting registers and add the register group instead
1352
+ # for register_id_list in constraints.keys():
1353
+ # self.conflicting_registers[register_id_list] = set()
1354
+ # for constrained_register_id in constrained_register_id_list:
1355
+ # self.conflicting_registers[constrained_register_map[constrained_register_id]].update(self.conflicting_registers[constrained_register_id])
1356
+ # del self.conflicting_registers[constrained_register_id]
1357
+ # for conflicting_registers_set in self.conflicting_registers.values():
1358
+ # for constrained_register_id in constrained_register_id_list:
1359
+ # if constrained_register_id in conflicting_registers_set:
1360
+ # conflicting_registers_set.remove(constrained_register_id)
1361
+ # conflicting_registers_set.add(constrained_register_map[constrained_register_id])
1362
+
1363
+ # Remove individual registers from the lists of allocation options and add the register group instead
1364
+ for constrained_register_id in constrained_register_id_list:
1365
+ del self.allocation_options[constrained_register_id]
1366
+ for register_id_list, constrained_options in constraints.items():
1367
+ self.allocation_options[register_id_list] = list(options)
1368
+
1369
+ def _get_register_type_name(self, register_type):
1370
+ """Get human-readable name for register type."""
1371
+ from nervapy.arm.registers import Register
1372
+
1373
+ if register_type == Register.GPType:
1374
+ return "general-purpose"
1375
+ elif register_type == Register.VFPType:
1376
+ return "VFP/NEON"
1377
+ elif register_type == Register.WMMXType:
1378
+ return "WMMX"
1379
+ else:
1380
+ return "unknown type %d" % register_type
1381
+
1382
+ def _get_available_registers_info(self, register_type):
1383
+ """Get information about available registers for a type."""
1384
+ from nervapy.arm.registers import Register
1385
+
1386
+ if register_type == Register.GPType:
1387
+ # General purpose registers: r0-r12 (13 registers)
1388
+ # Note: r13 (sp), r14 (lr), r15 (pc) are special and typically not used for general allocation
1389
+ return "r0-r12 (13 registers available for allocation)"
1390
+ elif register_type == Register.VFPType:
1391
+ return "s0-s31 or d0-d31 or q0-q15 (depending on instruction)"
1392
+ elif register_type == Register.WMMXType:
1393
+ return "wr0-wr15 (16 registers)"
1394
+ else:
1395
+ return "unknown"
1396
+
1397
+ def _count_virtual_registers_by_type(self, register_type):
1398
+ """Count how many virtual registers of a given type are actually being allocated (after optimization)."""
1399
+ from nervapy.arm.registers import Register
1400
+
1401
+ # Count unique registers in unallocated_registers that match the given type
1402
+ # This list has already been filtered by liveness analysis
1403
+ # Use a set to avoid counting duplicates
1404
+ unique_ids = set()
1405
+ for virtual_register_id, virtual_register_type in self.unallocated_registers:
1406
+ if isinstance(virtual_register_id, tuple):
1407
+ # Register list - all registers in the list should be the same type
1408
+ if virtual_register_type == register_type:
1409
+ unique_ids.update(virtual_register_id)
1410
+ else:
1411
+ # Single register
1412
+ if virtual_register_type == register_type:
1413
+ unique_ids.add(virtual_register_id)
1414
+ return len(unique_ids)
1415
+
1416
+ def _get_max_physical_registers(self, register_type):
1417
+ """Get the maximum number of physical registers available for a type."""
1418
+ from nervapy.arm.registers import Register
1419
+
1420
+ if register_type == Register.GPType:
1421
+ # Typically r0-r12 can be allocated (13 registers)
1422
+ # But this can vary based on ABI and function constraints
1423
+ return 13
1424
+ elif register_type == Register.VFPType:
1425
+ # VFP/NEON: 32 single-precision (s0-s31) or 16 double-precision (d0-d15) or 8 quad (q0-q7)
1426
+ # This is a simplification - actual count depends on usage
1427
+ return 32
1428
+ elif register_type == Register.WMMXType:
1429
+ return 16
1430
+ else:
1431
+ return 0
1432
+
1433
+ def allocate_registers(self):
1434
+ from nervapy.arm.instructions import Instruction
1435
+ from nervapy.arm.pseudo import LoadArgumentPseudoInstruction
1436
+ from nervapy.arm.registers import Register
1437
+
1438
+ # Save counts before allocation starts (after liveness analysis has eliminated dead code)
1439
+ # This gives us accurate counts for error messages
1440
+ self._vr_counts_by_type = {}
1441
+ for reg_type in [Register.GPType, Register.VFPType, Register.WMMXType]:
1442
+ self._vr_counts_by_type[reg_type] = self._count_virtual_registers_by_type(reg_type)
1443
+
1444
+ # Map from virtual register id to physical register
1445
+ register_allocation = dict()
1446
+ for virtual_register_id, virtual_register_type in self.unallocated_registers:
1447
+ register_allocation[virtual_register_id] = None
1448
+
1449
+ def bind_register(virtual_register_id, physical_register):
1450
+ # Remove option to allocate any conflicting virtual register to the same physical register or its enclosing register
1451
+ physical_register_bitboard = physical_register.bitboard
1452
+ for conflicting_register_id in self.conflicting_registers[
1453
+ virtual_register_id
1454
+ ]:
1455
+ if conflicting_register_id in self.allocation_options:
1456
+ for allocation_bitboard in self.allocation_options[
1457
+ conflicting_register_id
1458
+ ]:
1459
+ if (allocation_bitboard & physical_register_bitboard) != 0:
1460
+ self.allocation_options[conflicting_register_id].remove(
1461
+ allocation_bitboard
1462
+ )
1463
+ register_allocation[virtual_register_id] = physical_register
1464
+
1465
+ def bind_registers(virtual_register_id_list, physical_register_id_list):
1466
+ # Remove option to allocate any conflicting virtual register to the same physical register or its enclosing register
1467
+ physical_register_bitboard_list = [
1468
+ physical_register.get_bitboard()
1469
+ for physical_register in physical_register_id_list
1470
+ ]
1471
+ for virtual_register_id, physical_register_bitboard in zip(
1472
+ virtual_register_id_list, physical_register_bitboard_list
1473
+ ):
1474
+ for conflicting_register_id in self.conflicting_registers[
1475
+ virtual_register_id
1476
+ ]:
1477
+ for (
1478
+ allocation_key,
1479
+ allocation_option,
1480
+ ) in self.allocation_options.items():
1481
+ if isinstance(allocation_key, tuple):
1482
+ if conflicting_register_id in allocation_key:
1483
+ conflicting_register_index = allocation_key.index(
1484
+ conflicting_register_id
1485
+ )
1486
+ for bitboard_list in allocation_option:
1487
+ if (
1488
+ bitboard_list[conflicting_register_index]
1489
+ & physical_register_bitboard
1490
+ ) != 0:
1491
+ allocation_option.remove(bitboard_list)
1492
+ else:
1493
+ if conflicting_register_id == allocation_key:
1494
+ for bitboard in allocation_option:
1495
+ if (bitboard & physical_register_bitboard) != 0:
1496
+ allocation_option.remove(bitboard)
1497
+
1498
+ for virtual_register_id, physical_register_id in zip(
1499
+ virtual_register_id_list, physical_register_id_list
1500
+ ):
1501
+ register_allocation[virtual_register_id] = physical_register_id
1502
+
1503
+ def is_allocated(virtual_register_id):
1504
+ return bool(register_allocation[virtual_register_id])
1505
+
1506
+ # First allocate parameters
1507
+ for instruction in self.instructions:
1508
+ if isinstance(instruction, LoadArgumentPseudoInstruction):
1509
+ if instruction.argument.register:
1510
+ if instruction.destination.register.is_virtual:
1511
+ if not is_allocated(instruction.destination.register.id):
1512
+ if (
1513
+ instruction.argument.register.bitboard
1514
+ in self.allocation_options[
1515
+ instruction.destination.register.id
1516
+ ]
1517
+ ):
1518
+ bind_register(
1519
+ instruction.destination.register.id,
1520
+ instruction.argument.register,
1521
+ )
1522
+
1523
+ # Now allocate registers with special restrictions
1524
+ for (
1525
+ virtual_register_id_list,
1526
+ virtual_register_type,
1527
+ ) in self.unallocated_registers:
1528
+ if isinstance(virtual_register_id_list, tuple):
1529
+ # print "REGLIST: ", map(str, virtual_register_id_list)
1530
+ if not self.allocation_options[virtual_register_id_list]:
1531
+ # Use saved count from before allocation started
1532
+ vr_count = self._vr_counts_by_type.get(virtual_register_type, 0)
1533
+ max_phys = self._get_max_physical_registers(virtual_register_type)
1534
+ raise RuntimeError(
1535
+ "Register allocation failed: No available physical registers for virtual register list %s (type: %s).\n"
1536
+ "Your code uses %d virtual %s registers, but only ~%d physical registers are available.\n"
1537
+ "To fix: reduce the number of registers used in your Python code."
1538
+ % (virtual_register_id_list, self._get_register_type_name(virtual_register_type),
1539
+ vr_count, self._get_register_type_name(virtual_register_type), max_phys)
1540
+ )
1541
+ physical_register_bitboard_list = self.allocation_options[
1542
+ virtual_register_id_list
1543
+ ][0]
1544
+ physcial_registers_list = [
1545
+ Register.from_bitboard(
1546
+ physical_register_bitboard, virtual_register_type
1547
+ )
1548
+ for physical_register_bitboard in physical_register_bitboard_list
1549
+ ]
1550
+ bind_registers(virtual_register_id_list, physcial_registers_list)
1551
+
1552
+ # Now allocate all other registers
1553
+ while self.unallocated_registers:
1554
+ virtual_register_id, virtual_register_type = self.unallocated_registers.pop(
1555
+ 0
1556
+ )
1557
+ if not isinstance(virtual_register_id, tuple):
1558
+ if not is_allocated(virtual_register_id):
1559
+ if not self.allocation_options[virtual_register_id]:
1560
+ # Use saved count from before allocation started
1561
+ vr_count = self._vr_counts_by_type.get(virtual_register_type, 0)
1562
+ max_phys = self._get_max_physical_registers(virtual_register_type)
1563
+
1564
+ # Debug: find max simultaneous live registers
1565
+ max_live = 0
1566
+ max_live_instr = None
1567
+ max_live_line = None
1568
+ max_live_idx = None
1569
+ max_live_regs = []
1570
+ for idx, instruction in enumerate(self.instructions):
1571
+ if hasattr(instruction, 'live_registers'):
1572
+ live_regs = [r for r in instruction.live_registers
1573
+ if hasattr(r, 'type') and r.type == virtual_register_type and r.is_virtual]
1574
+ live_count = len(live_regs)
1575
+ if live_count > max_live:
1576
+ max_live = live_count
1577
+ max_live_instr = instruction
1578
+ max_live_idx = idx
1579
+ max_live_line = getattr(instruction, 'line_number', None)
1580
+ # Include variable names in the register representation
1581
+ max_live_regs = []
1582
+ for r in live_regs:
1583
+ reg_str = str(r)
1584
+ var_name = self._register_names.get(r.number, None)
1585
+ if var_name:
1586
+ reg_str = f"{reg_str}, {var_name}"
1587
+ max_live_regs.append(reg_str)
1588
+
1589
+ debug_msg = ""
1590
+ if max_live <= max_phys:
1591
+ debug_msg = (f"\n\nPhysical registers available: {max_phys}\n"
1592
+ f"The register pressure ({max_live}/{max_phys}) should be manageable, but the allocator\n"
1593
+ f"couldn't find a valid allocation due to conflicting constraints.\n")
1594
+ if max_live_instr is not None:
1595
+ # Show instruction location info
1596
+ location_info = []
1597
+ if hasattr(max_live_instr, 'source_file') and max_live_instr.source_file:
1598
+ location_info.append(f"File: {max_live_instr.source_file}")
1599
+ if hasattr(max_live_instr, 'line_number') and max_live_instr.line_number:
1600
+ location_info.append(f"Line: {max_live_instr.line_number}")
1601
+ if location_info:
1602
+ debug_msg += f"Max register pressure at {', '.join(location_info)}\n"
1603
+ elif max_live_idx is not None:
1604
+ debug_msg += f"Max register pressure at instruction #{max_live_idx} (index in generated code)\n"
1605
+ debug_msg += f"Hint: Enable collect_origin=True in Function() to see Python source line numbers.\n"
1606
+
1607
+ debug_msg += f"Instruction with max pressure: {max_live_instr}\n"
1608
+
1609
+ # Show the source code if available
1610
+ if hasattr(max_live_instr, 'source_code') and max_live_instr.source_code:
1611
+ debug_msg += f"Source code: {max_live_instr.source_code}\n"
1612
+
1613
+ if max_live_regs:
1614
+ debug_msg += f"Live virtual registers: {', '.join(sorted(max_live_regs))}\n"
1615
+ debug_msg += f"\nThis suggests the greedy allocator made suboptimal early choices.\n"
1616
+ debug_msg += f"Try reordering your code or reducing temporary register usage.\n"
1617
+ else:
1618
+ debug_msg = (f"\n\nThis exceeds the {max_phys} physical registers available.\n"
1619
+ f"You need to reduce the number of live registers at once.\n")
1620
+ if max_live_instr is not None:
1621
+ # Show instruction location info
1622
+ location_info = []
1623
+ if hasattr(max_live_instr, 'source_file') and max_live_instr.source_file:
1624
+ location_info.append(f"File: {max_live_instr.source_file}")
1625
+ if hasattr(max_live_instr, 'line_number') and max_live_instr.line_number:
1626
+ location_info.append(f"Line: {max_live_instr.line_number}")
1627
+ if location_info:
1628
+ debug_msg += f"Max register pressure at {', '.join(location_info)}\n"
1629
+ elif max_live_idx is not None:
1630
+ debug_msg += f"Max register pressure at instruction #{max_live_idx} (index in generated code)\n"
1631
+ debug_msg += f"Hint: Enable collect_origin=True in Function() to see Python source line numbers.\n"
1632
+
1633
+ debug_msg += f"Instruction: {max_live_instr}\n"
1634
+
1635
+ # Show the source code if available
1636
+ if hasattr(max_live_instr, 'source_code') and max_live_instr.source_code:
1637
+ debug_msg += f"Source code: {max_live_instr.source_code}\n"
1638
+
1639
+ raise RuntimeError(
1640
+ "Register allocation failed: No available physical registers for virtual register #%d (type: %s).\n"
1641
+ "Your code uses %d virtual %s registers, but only ~%d physical registers are available.\n"
1642
+ "Available %s registers: %s\n"
1643
+ "To fix: reduce the number of registers used in your Python code.%s"
1644
+ % (
1645
+ virtual_register_id,
1646
+ self._get_register_type_name(virtual_register_type),
1647
+ vr_count,
1648
+ self._get_register_type_name(virtual_register_type),
1649
+ max_phys,
1650
+ self._get_register_type_name(virtual_register_type),
1651
+ self._get_available_registers_info(virtual_register_type),
1652
+ debug_msg
1653
+ )
1654
+ )
1655
+ physical_register_bitboard = self.allocation_options[
1656
+ virtual_register_id
1657
+ ][0]
1658
+ physical_register = Register.from_bitboard(
1659
+ physical_register_bitboard, virtual_register_type
1660
+ )
1661
+ bind_register(virtual_register_id, physical_register)
1662
+
1663
+ # Verify all virtual registers used in instructions are tracked
1664
+ untracked_registers = set()
1665
+ for instruction in self.instructions:
1666
+ if isinstance(instruction, Instruction):
1667
+ for input_register in instruction.get_input_registers_list():
1668
+ if input_register.is_virtual:
1669
+ if input_register.id not in register_allocation:
1670
+ untracked_registers.add(input_register.id)
1671
+ for output_register in instruction.get_output_registers_list():
1672
+ if output_register.is_virtual:
1673
+ if output_register.id not in register_allocation:
1674
+ untracked_registers.add(output_register.id)
1675
+
1676
+ if untracked_registers:
1677
+ raise RuntimeError(
1678
+ f"Internal error: Virtual registers {sorted(untracked_registers)} used in instructions "
1679
+ f"but were not tracked for allocation. This indicates a bug where registers were created "
1680
+ f"after liveness analysis or were not properly added to live_registers."
1681
+ )
1682
+
1683
+ for instruction in self.instructions:
1684
+ if isinstance(instruction, Instruction):
1685
+ for input_register in instruction.get_input_registers_list():
1686
+ if input_register.is_virtual:
1687
+ input_register.bind(register_allocation[input_register.id])
1688
+ for output_register in instruction.get_output_registers_list():
1689
+ if output_register.is_virtual:
1690
+ output_register.bind(
1691
+ register_allocation[output_register.id]
1692
+ )
1693
+
1694
+ # Updates information about registers to be saved/restored in the function prologue/epilogue
1695
+ def update_stack_frame(self):
1696
+ from nervapy.arm.instructions import Instruction
1697
+
1698
+ for instruction in self.instructions:
1699
+ if isinstance(instruction, Instruction):
1700
+ self.stack_frame.preserve_registers(
1701
+ instruction.get_output_registers_list()
1702
+ )
1703
+
1704
+ def remove_assume_statements(self):
1705
+ from nervapy.arm.pseudo import AssumeInitializedPseudoInstruction
1706
+
1707
+ new_instructions = list()
1708
+ for instruction in self.instructions:
1709
+ if isinstance(instruction, AssumeInitializedPseudoInstruction):
1710
+ continue
1711
+ else:
1712
+ new_instructions.append(instruction)
1713
+ self.instructions = new_instructions
1714
+
1715
+ def generate_parameter_loads(self):
1716
+ from nervapy.arm.generic import LDR, MOV
1717
+ from nervapy.arm.pseudo import LoadArgumentPseudoInstruction
1718
+ from nervapy.arm.registers import sp
1719
+
1720
+ new_instructions = list()
1721
+ for instruction in self.instructions:
1722
+ if isinstance(instruction, LoadArgumentPseudoInstruction):
1723
+ parameter = instruction.argument
1724
+ if parameter.register:
1725
+ # If parameter is in a register, use register-register move:
1726
+ if instruction.destination.register != parameter.register:
1727
+ # Parameter is in a different register than instruction destination, generate move:
1728
+ new_instruction = MOV(
1729
+ instruction.destination.register, parameter.register
1730
+ )
1731
+ new_instruction.live_registers = instruction.live_registers
1732
+ new_instruction.available_registers = (
1733
+ instruction.available_registers
1734
+ )
1735
+ new_instructions.append(new_instruction)
1736
+ # If parameter is in the same register as instruction destination, no instruction needed:
1737
+ # MOV( instruction.destination == parameter.register_location, parameter.register_location )
1738
+ # is a no-op
1739
+ else:
1740
+ parameter_address = (
1741
+ self.stack_frame.get_parameters_offset()
1742
+ + parameter.stack_offset
1743
+ )
1744
+ new_instruction = LDR(
1745
+ instruction.destination.register, [sp, parameter_address]
1746
+ )
1747
+ new_instruction.live_registers = instruction.live_registers
1748
+ new_instruction.available_registers = (
1749
+ instruction.available_registers
1750
+ )
1751
+ new_instructions.append(new_instruction)
1752
+ else:
1753
+ new_instructions.append(instruction)
1754
+ self.instructions = new_instructions
1755
+
1756
+ def generate_constant_loads(self):
1757
+ from nervapy import ConstantBucket
1758
+ from nervapy.arm.instructions import Instruction
1759
+ from nervapy.arm.pseudo import LoadConstantPseudoInstruction
1760
+
1761
+ max_alignment = 0
1762
+ for instruction in self.instructions:
1763
+ if isinstance(instruction, Instruction):
1764
+ constant = instruction.get_constant()
1765
+ if constant is not None:
1766
+ constant_alignment = constant.get_alignment()
1767
+ constant_size = constant.size * constant.repeats
1768
+ max_alignment = max(max_alignment, constant_alignment)
1769
+
1770
+ constant_id = 0
1771
+ constant_label_map = dict()
1772
+ constant_buckets = dict()
1773
+ for instruction in self.instructions:
1774
+ if isinstance(instruction, Instruction):
1775
+ constant = instruction.get_constant()
1776
+ if constant is not None:
1777
+ if constant in constant_label_map:
1778
+ constant.label = constant_label_map[constant]
1779
+ else:
1780
+ constant.label = "c" + str(constant_id)
1781
+ constant_id += 1
1782
+ constant_label_map[constant] = constant.label
1783
+ constant_alignment = constant.get_alignment()
1784
+ constant_size = constant.size * constant.repeats
1785
+ if constant_alignment in constant_buckets:
1786
+ constant_buckets[constant_alignment].add(constant)
1787
+ if constant_buckets[constant_alignment].is_full():
1788
+ del constant_buckets[constant_alignment]
1789
+ else:
1790
+ constant_bucket = ConstantBucket(max_alignment / 8)
1791
+ constant_bucket.add(constant)
1792
+ self.constants.append(constant_bucket)
1793
+ if not constant_bucket.is_full():
1794
+ constant_buckets[constant_alignment] = constant_bucket
1795
+
1796
+ new_instructions = list()
1797
+ for instruction in self.instructions:
1798
+ if isinstance(instruction, LoadConstantPseudoInstruction):
1799
+ raise NotImplementedError()
1800
+ else:
1801
+ new_instructions.append(instruction)
1802
+ self.instructions = new_instructions
1803
+
1804
+ def validate_stack_alignment_check(self):
1805
+ """
1806
+ Validate that stack is 8-byte aligned before BL/BLX instructions.
1807
+
1808
+ For ARMv7-M architecture (Cortex-M), the AAPCS requires that the stack
1809
+ pointer must be 8-byte aligned at any public interface (function calls).
1810
+ This method tracks stack pointer changes and validates alignment before
1811
+ BL and BLX instructions.
1812
+ """
1813
+ from nervapy.arm.generic import (ArithmeticInstruction,
1814
+ BranchLinkExchangeInstruction,
1815
+ BranchWithLinkInstruction,
1816
+ PushPopInstruction,
1817
+ StoreMultipleInstruction)
1818
+ from nervapy.arm.instructions import Instruction
1819
+ from nervapy.arm.isa import Extension
1820
+ from nervapy.arm.registers import sp
1821
+
1822
+ # Enforce for ARMv7-M and ARMv8-M architectures (V8MMain implies V7M via prerequisites)
1823
+ if (
1824
+ Extension.V7M not in self.target.extensions
1825
+ and Extension.V8MBase not in self.target.extensions
1826
+ ):
1827
+ return
1828
+
1829
+ # Track stack offset from initial 8-byte aligned position
1830
+ # The prologue is generated by generate_prolog_and_epilog() which inserts
1831
+ # instructions after the ENTRY label. These are guaranteed to maintain
1832
+ # 8-byte alignment. We need to skip them when tracking.
1833
+ #
1834
+ # Strategy: Count prologue size, then skip that many PUSH/VPUSH/STMDB instructions
1835
+ # at the start of the function.
1836
+ prologue_size = len(self.stack_frame.generate_prologue())
1837
+ prologue_instructions_seen = 0
1838
+ stack_offset = 0
1839
+
1840
+ for instruction in self.instructions:
1841
+ if not isinstance(instruction, Instruction):
1842
+ continue
1843
+
1844
+ # Skip prologue instructions (PUSH/VPUSH/STMDB/SUB-sp at start of function)
1845
+ if prologue_instructions_seen < prologue_size:
1846
+ if isinstance(instruction, PushPopInstruction) and instruction.name in (
1847
+ "PUSH",
1848
+ "PUSH.W",
1849
+ ):
1850
+ prologue_instructions_seen += 1
1851
+ continue
1852
+ # Also check for VPUSH (VFP register saves)
1853
+ elif instruction.__class__.__name__ == "VfpNeonPushPopInstruction":
1854
+ prologue_instructions_seen += 1
1855
+ continue
1856
+ # Also check for STMDB (used with high registers)
1857
+ elif isinstance(instruction, StoreMultipleInstruction):
1858
+ if instruction.writeback and instruction.name.startswith("STM"):
1859
+ prologue_instructions_seen += 1
1860
+ continue
1861
+ # Also skip SUB sp, sp, #imm used for alignment padding
1862
+ elif isinstance(instruction, ArithmeticInstruction):
1863
+ if len(instruction.operands) >= 3:
1864
+ dest = instruction.operands[0]
1865
+ src1 = instruction.operands[1]
1866
+ if (
1867
+ hasattr(dest, "register")
1868
+ and dest.register == sp
1869
+ and hasattr(src1, "register")
1870
+ and src1.register == sp
1871
+ and instruction.name.startswith("SUB")
1872
+ ):
1873
+ prologue_instructions_seen += 1
1874
+ continue
1875
+
1876
+ # Track PUSH instructions (user code)
1877
+ if isinstance(instruction, PushPopInstruction):
1878
+ if instruction.name in ("PUSH", "PUSH.W"):
1879
+ # Each register pushes 4 bytes
1880
+ num_registers = len(instruction.operands[0].get_registers_list())
1881
+ stack_offset += num_registers * 4
1882
+ elif instruction.name in ("POP", "POP.W"):
1883
+ # Each register pops 4 bytes
1884
+ num_registers = len(instruction.operands[0].get_registers_list())
1885
+ stack_offset -= num_registers * 4
1886
+
1887
+ # Track STMDB/LDMIA instructions that modify SP
1888
+ elif isinstance(instruction, StoreMultipleInstruction):
1889
+ if instruction.writeback:
1890
+ base_reg = instruction.operands[0]
1891
+ # Check if base register is SP
1892
+ if hasattr(base_reg, "register") and base_reg.register == sp:
1893
+ num_registers = len(
1894
+ instruction.operands[1].get_registers_list()
1895
+ )
1896
+ if instruction.name.startswith("STM"):
1897
+ stack_offset += num_registers * 4
1898
+ elif instruction.name.startswith("LDM"):
1899
+ stack_offset -= num_registers * 4
1900
+
1901
+ # Track SUB/ADD with SP
1902
+ elif isinstance(instruction, ArithmeticInstruction):
1903
+ if len(instruction.operands) >= 3:
1904
+ dest = instruction.operands[0]
1905
+ src1 = instruction.operands[1]
1906
+ src2 = instruction.operands[2]
1907
+
1908
+ # Check if destination is SP
1909
+ if (
1910
+ hasattr(dest, "register")
1911
+ and dest.register == sp
1912
+ and hasattr(src1, "register")
1913
+ and src1.register == sp
1914
+ ):
1915
+
1916
+ # Get immediate value (check both 'immediate' and 'value')
1917
+ imm_value = None
1918
+ if hasattr(src2, "immediate"):
1919
+ imm_value = src2.immediate
1920
+ elif hasattr(src2, "value"):
1921
+ imm_value = src2.value
1922
+
1923
+ if imm_value is not None:
1924
+ if instruction.name.startswith("SUB"):
1925
+ # SUB sp, sp, #imm - allocates stack space
1926
+ stack_offset += imm_value
1927
+ elif instruction.name.startswith("ADD"):
1928
+ # ADD sp, sp, #imm - deallocates stack space
1929
+ stack_offset -= imm_value
1930
+
1931
+ # Check alignment before BL/BLX
1932
+ elif isinstance(
1933
+ instruction, (BranchWithLinkInstruction, BranchLinkExchangeInstruction)
1934
+ ):
1935
+ if stack_offset % 8 != 0:
1936
+ raise ValueError(
1937
+ "Stack is not 8-byte aligned before {0} instruction.\n"
1938
+ "Current stack offset: {1} bytes (misaligned by {2} bytes).\n"
1939
+ "ARMv7-M/ARMv8-M requires 8-byte stack alignment at function calls (AAPCS requirement).\n"
1940
+ "Add registers in pairs to PUSH instructions or adjust the stack manually to maintain alignment.".format(
1941
+ instruction.name, stack_offset, stack_offset % 8
1942
+ )
1943
+ )
1944
+
1945
+ def optimize_instructions(self):
1946
+ from nervapy.arm.generic import MovInstruction
1947
+ from nervapy.arm.vfpneon import VfpNeonMovInstruction
1948
+
1949
+ new_instructions = list()
1950
+ for instruction in self.instructions:
1951
+ # Remove moves where source and destination are the same
1952
+
1953
+ if isinstance(instruction, VfpNeonMovInstruction):
1954
+ if instruction.operands[0] != instruction.operands[1]:
1955
+ new_instructions.append(instruction)
1956
+ else:
1957
+ new_instructions.append(instruction)
1958
+ self.instructions = new_instructions
1959
+
1960
+ def get_target(self):
1961
+ return self.target
1962
+
1963
+ @property
1964
+ def isa_extensions(self):
1965
+ from nervapy.arm.instructions import Instruction
1966
+ from nervapy.arm.isa import Extension, Extensions
1967
+ from nervapy.arm.registers import DRegister, QRegister
1968
+
1969
+ # Start with the target microarchitecture's extensions
1970
+ isa_extensions = Extensions(*self.target.extensions)
1971
+ for instruction in self.instructions:
1972
+ if isinstance(instruction, Instruction):
1973
+ for extension in instruction.isa_extensions:
1974
+ isa_extensions += extension
1975
+ if any(
1976
+ isinstance(register, QRegister)
1977
+ or isinstance(register, DRegister)
1978
+ and register.is_extended
1979
+ for register in instruction.get_registers_list()
1980
+ ):
1981
+ isa_extensions += Extension.VFPd32
1982
+ return isa_extensions
1983
+
1984
+ def get_yeppp_isa_extensions(self):
1985
+ isa_extensions_map = {
1986
+ "V4": ("V4", None, None),
1987
+ "V5": ("V5", None, None),
1988
+ "V5E": ("V5E", None, None),
1989
+ "V6": ("V6", None, None),
1990
+ "V6K": ("V6K", None, None),
1991
+ "V7": ("V7", None, None),
1992
+ "V7MP": ("V7MP", None, None),
1993
+ "Div": ("Div", None, None),
1994
+ "Thumb": ("Thumb", None, None),
1995
+ "Thumb2": ("Thumb2", None, None),
1996
+ "VFP": ("VFP", None, None),
1997
+ "VFP2": ("VFP2", None, None),
1998
+ "VFP3": ("VFP3", None, None),
1999
+ "VFPd32": ("VFPd32", None, None),
2000
+ "VFP3HP": ("VFP3HP", None, None),
2001
+ "VFP4": ("VFP4", None, None),
2002
+ "VFPVectorMode": (None, None, "VFPVectorMode"),
2003
+ "XScale": (None, "XScale", None),
2004
+ "WMMX": (None, "WMMX", None),
2005
+ "WMMX2": (None, "WMMX2", None),
2006
+ "NEON": (None, "NEON", None),
2007
+ "NEONHP": (None, "NEONHP", None),
2008
+ "NEON2": (None, "NEON2", None),
2009
+ }
2010
+ isa_extensions, simd_extensions, system_extensions = (set(), set(), set())
2011
+ for isa_extension in self.get_isa_extensions():
2012
+ if isa_extension is not None:
2013
+ isa_extension, simd_extension, system_extension = isa_extensions_map[
2014
+ isa_extension
2015
+ ]
2016
+ if isa_extension is not None:
2017
+ isa_extensions.add(isa_extension)
2018
+ if simd_extension is not None:
2019
+ simd_extensions.add(simd_extension)
2020
+ if system_extension is not None:
2021
+ system_extensions.add(system_extension)
2022
+ isa_extensions = map(lambda id: "YepARMIsaFeature" + id, isa_extensions)
2023
+ if not isa_extensions:
2024
+ isa_extensions = ["YepIsaFeaturesDefault"]
2025
+ simd_extensions = map(lambda id: "YepARMSimdFeature" + id, simd_extensions)
2026
+ if not simd_extensions:
2027
+ simd_extensions = ["YepSimdFeaturesDefault"]
2028
+ system_extensions = map(
2029
+ lambda id: "YepARMSystemFeature" + id, system_extensions
2030
+ )
2031
+ if not system_extensions:
2032
+ system_extensions = ["YepSystemFeaturesDefault"]
2033
+ return (isa_extensions, simd_extensions, system_extensions)
2034
+
2035
+ def allocate_local_variable(self):
2036
+ self.local_variables_count += 1
2037
+ return self.local_variables_count
2038
+
2039
+ def allocate_q_register(self):
2040
+ self.virtual_registers_count += 1
2041
+ register_number = (self.virtual_registers_count << 12) | 0x0F0
2042
+
2043
+ # Try to capture variable name from caller's frame
2044
+ try:
2045
+ import inspect
2046
+ frame = inspect.currentframe().f_back.f_back
2047
+ if frame:
2048
+ import linecache
2049
+ line = linecache.getline(frame.f_code.co_filename, frame.f_lineno).strip()
2050
+ if '=' in line and 'QRegister' in line:
2051
+ var_name = line.split('=')[0].strip()
2052
+ if var_name and not var_name.startswith('#'):
2053
+ self._register_names[register_number] = var_name
2054
+ except:
2055
+ pass
2056
+
2057
+ return register_number
2058
+
2059
+ def allocate_d_register(self):
2060
+ self.virtual_registers_count += 1
2061
+ return (self.virtual_registers_count << 12) | 0x300
2062
+
2063
+ def allocate_s_register(self):
2064
+ self.virtual_registers_count += 1
2065
+ return (self.virtual_registers_count << 12) | 0x400
2066
+
2067
+ def allocate_wmmx_register(self):
2068
+ self.virtual_registers_count += 1
2069
+ return (self.virtual_registers_count << 12) | 0x002
2070
+
2071
+ def allocate_general_purpose_register(self):
2072
+ self.virtual_registers_count += 1
2073
+ register_number = (self.virtual_registers_count << 12) | 0x001
2074
+
2075
+ # Try to capture variable name from caller's frame
2076
+ try:
2077
+ import inspect
2078
+ frame = inspect.currentframe().f_back.f_back # Go up 2 frames: this -> __init__ -> caller
2079
+ if frame:
2080
+ # Get the line of code being executed
2081
+ import linecache
2082
+ line = linecache.getline(frame.f_code.co_filename, frame.f_lineno).strip()
2083
+ # Simple pattern matching for "varname = GeneralPurposeRegister()"
2084
+ if '=' in line and 'GeneralPurposeRegister' in line:
2085
+ var_name = line.split('=')[0].strip()
2086
+ if var_name and not var_name.startswith('#'):
2087
+ self._register_names[register_number] = var_name
2088
+ except:
2089
+ pass # If name capture fails, just continue without name
2090
+
2091
+ return register_number
2092
+
2093
+ def allocate_p_register(self):
2094
+ self.virtual_registers_count += 1
2095
+ return (self.virtual_registers_count << 12) | 0x001
2096
+
2097
+
2098
+ class LocalVariable(object):
2099
+ def __init__(self, register_type):
2100
+ from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
2101
+ QRegister, SRegister, WMMXRegister)
2102
+
2103
+ super(LocalVariable, self).__init__()
2104
+ if isinstance(register_type, int):
2105
+ self.size = register_type
2106
+ elif register_type == GeneralPurposeRegister:
2107
+ self.size = 4
2108
+ elif register_type == WMMXRegister:
2109
+ self.size = 8
2110
+ elif register_type == SRegister:
2111
+ self.size = 4
2112
+ elif register_type == DRegister:
2113
+ self.size = 8
2114
+ elif register_type == QRegister:
2115
+ self.size = 16
2116
+ else:
2117
+ raise ValueError("Unsupported register type {0}".format(register_type))
2118
+ self.id = active_function.allocate_local_variable()
2119
+ self.address = None
2120
+ self.offset = 0
2121
+ self.parent = None
2122
+
2123
+ def __eq__(self, other):
2124
+ return self.id == other.id
2125
+
2126
+ def __hash__(self):
2127
+ return hash(self.id)
2128
+
2129
+ def __str__(self):
2130
+ if self.is_subvariable():
2131
+ address = self.parent.get_address()
2132
+ if address is not None:
2133
+ address += self.offset
2134
+ else:
2135
+ address = self.address
2136
+ if address is not None:
2137
+ return "[{0}]".format(address)
2138
+ else:
2139
+ return "local-variable<{0}>".format(self.id)
2140
+
2141
+ def is_subvariable(self):
2142
+ return self.parent is not None
2143
+
2144
+ def get_parent(self):
2145
+ return self.parent
2146
+
2147
+ def get_root(self):
2148
+ if self.is_subvariable():
2149
+ return self.get_parent().get_root()
2150
+ else:
2151
+ return self
2152
+
2153
+ def get_address(self):
2154
+ if self.is_subvariable():
2155
+ return self.parent.get_address() + self.offset
2156
+ else:
2157
+ return self.address
2158
+
2159
+ def get_size(self):
2160
+ return self.size
2161
+
2162
+ def get_low(self):
2163
+ assert self.get_size() % 2 == 0
2164
+ child = LocalVariable(self.get_size() / 2)
2165
+ child.parent = self
2166
+ child.offset = 0
2167
+ return child
2168
+
2169
+ def get_high(self):
2170
+ assert self.get_size() % 2 == 0
2171
+ child = LocalVariable(self.get_size() / 2)
2172
+ child.parent = self
2173
+ child.offset = self.get_size() / 2
2174
+ return child
2175
+
2176
+
2177
+ class StackFrame(object):
2178
+ def __init__(self, abi):
2179
+ super(StackFrame, self).__init__()
2180
+ self.abi = abi
2181
+ self.general_purpose_registers = list()
2182
+ self.d_registers = list()
2183
+ self.s_variables = list()
2184
+ self.d_variables = list()
2185
+ self.q_variables = list()
2186
+
2187
+ def preserve_registers(self, registers):
2188
+ for register in registers:
2189
+ self.preserve_register(register)
2190
+
2191
+ def preserve_register(self, register):
2192
+ from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
2193
+ QRegister, SRegister)
2194
+
2195
+ if isinstance(register, GeneralPurposeRegister):
2196
+ if not register in self.general_purpose_registers:
2197
+ if register in self.abi.callee_save_registers:
2198
+ self.general_purpose_registers.append(register)
2199
+ elif isinstance(register, SRegister):
2200
+ if not register.is_virtual():
2201
+ register = register.get_parent()
2202
+ if not register in self.d_registers:
2203
+ if register in self.abi.callee_save_registers:
2204
+ self.d_registers.append(register)
2205
+ elif isinstance(register, DRegister):
2206
+ if not register in self.d_registers:
2207
+ if register in self.abi.callee_save_registers:
2208
+ self.d_registers.append(register)
2209
+ elif isinstance(register, QRegister):
2210
+ d_low = register.get_low_part()
2211
+ d_high = register.get_high_part()
2212
+ if d_low not in self.d_registers:
2213
+ if register in self.abi.callee_save_registers:
2214
+ self.d_registers.append(d_low)
2215
+ if d_high not in self.d_registers:
2216
+ if register in self.abi.callee_save_registers:
2217
+ self.d_registers.append(d_high)
2218
+ else:
2219
+ raise TypeError("Unsupported register type {0}".format(type(register)))
2220
+
2221
+ def force_preserve_register(self, register):
2222
+ """Add *register* to the preservation list unconditionally (no ABI check)."""
2223
+ from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
2224
+ QRegister, SRegister)
2225
+
2226
+ if isinstance(register, GeneralPurposeRegister):
2227
+ if register not in self.general_purpose_registers:
2228
+ self.general_purpose_registers.append(register)
2229
+ elif isinstance(register, SRegister):
2230
+ if not register.is_virtual():
2231
+ register = register.get_parent()
2232
+ if register not in self.d_registers:
2233
+ self.d_registers.append(register)
2234
+ elif isinstance(register, DRegister):
2235
+ if register not in self.d_registers:
2236
+ self.d_registers.append(register)
2237
+ elif isinstance(register, QRegister):
2238
+ d_low = register.get_low_part()
2239
+ d_high = register.get_high_part()
2240
+ if d_low not in self.d_registers:
2241
+ self.d_registers.append(d_low)
2242
+ if d_high not in self.d_registers:
2243
+ self.d_registers.append(d_high)
2244
+ else:
2245
+ raise TypeError("Unsupported register type {0}".format(type(register)))
2246
+
2247
+ def add_variable(self, variable):
2248
+ if variable.get_size() == 16:
2249
+ if variable not in self.sse_variables:
2250
+ self.sse_variables.append(variable)
2251
+ elif variable.get_size() == 32:
2252
+ if variable not in self.avx_variables:
2253
+ self.avx_variables.append(variable)
2254
+ else:
2255
+ raise TypeError("Unsupported variable type {0}".format(type(variable)))
2256
+
2257
+ def get_parameters_offset(self):
2258
+ parameters_offset = len(self.general_purpose_registers) * 4
2259
+ if parameters_offset % 8 == 4:
2260
+ parameters_offset += 4
2261
+ return parameters_offset + len(self.d_registers) * 8
2262
+
2263
+ def generate_prologue(self):
2264
+ from nervapy.arm.formats import HighRegisterStrategy
2265
+ from nervapy.arm.generic import PUSH, PUSH_W, STMDB, SUB
2266
+ from nervapy.arm.isa import Extension
2267
+ from nervapy.arm.registers import sp
2268
+ from nervapy.arm.vfpneon import VPUSH
2269
+ from nervapy.stream import InstructionStream
2270
+
2271
+ with InstructionStream() as instructions:
2272
+ if self.general_purpose_registers:
2273
+ general_purpose_registers = list(self.general_purpose_registers)
2274
+
2275
+ # Check if we're targeting ARMv7-M (Cortex-M) processors
2276
+ function = self.get_function()
2277
+ is_armv7m = function and Extension.V7M in function.target.extensions
2278
+
2279
+ if is_armv7m:
2280
+ low_registers = [
2281
+ reg
2282
+ for reg in general_purpose_registers
2283
+ if reg.get_physical_number() <= 7
2284
+ ]
2285
+ high_registers = [
2286
+ reg
2287
+ for reg in general_purpose_registers
2288
+ if reg.get_physical_number() > 7
2289
+ ]
2290
+
2291
+ if high_registers:
2292
+ # Merge low and high into one instruction so the
2293
+ # prologue is a single PUSH.W / STMDB covering all
2294
+ # callee-saved registers.
2295
+ all_registers = low_registers + high_registers
2296
+ needs_pad = len(all_registers) % 2 == 1
2297
+ sorted_regs = tuple(
2298
+ sorted(
2299
+ all_registers,
2300
+ key=lambda reg: reg.get_physical_number(),
2301
+ )
2302
+ )
2303
+ strategy = function.high_register_strategy
2304
+ if strategy == HighRegisterStrategy.STMDB or (
2305
+ strategy == HighRegisterStrategy.AUTO
2306
+ and function.assembly_format.name == "ARMCC"
2307
+ ):
2308
+ STMDB(sp, sorted_regs)
2309
+ else:
2310
+ PUSH_W(sorted_regs)
2311
+ if needs_pad:
2312
+ SUB(sp, sp, 4)
2313
+ elif low_registers:
2314
+ # Only low registers - use efficient 16-bit PUSH
2315
+ needs_pad = len(low_registers) % 2 == 1
2316
+ PUSH(
2317
+ tuple(
2318
+ sorted(
2319
+ low_registers,
2320
+ key=lambda reg: reg.get_physical_number(),
2321
+ )
2322
+ )
2323
+ )
2324
+ if needs_pad:
2325
+ SUB(sp, sp, 4)
2326
+ else:
2327
+ # Standard ARM (non-Cortex-M) handling
2328
+ needs_pad = len(general_purpose_registers) % 2 == 1
2329
+ PUSH(
2330
+ tuple(
2331
+ sorted(
2332
+ general_purpose_registers,
2333
+ key=lambda reg: reg.get_physical_number(),
2334
+ )
2335
+ )
2336
+ )
2337
+ if needs_pad:
2338
+ SUB(sp, sp, 4)
2339
+
2340
+ if self.d_registers:
2341
+ VPUSH(
2342
+ tuple(
2343
+ sorted(
2344
+ self.d_registers, key=lambda reg: reg.get_physical_number()
2345
+ )
2346
+ )
2347
+ )
2348
+ return list(iter(instructions))
2349
+
2350
+ def generate_epilogue(self):
2351
+ from nervapy.arm.formats import HighRegisterStrategy
2352
+ from nervapy.arm.generic import ADD, LDMIA, POP, POP_W
2353
+ from nervapy.arm.isa import Extension
2354
+ from nervapy.arm.registers import sp
2355
+ from nervapy.arm.vfpneon import VPOP
2356
+ from nervapy.stream import InstructionStream
2357
+
2358
+ with InstructionStream() as instructions:
2359
+ if self.d_registers:
2360
+ VPOP(
2361
+ tuple(
2362
+ sorted(
2363
+ self.d_registers, key=lambda reg: reg.get_physical_number()
2364
+ )
2365
+ )
2366
+ )
2367
+
2368
+ if self.general_purpose_registers:
2369
+ general_purpose_registers = list(self.general_purpose_registers)
2370
+
2371
+ # Check if we're targeting ARMv7-M (Cortex-M) processors
2372
+ function = self.get_function()
2373
+ is_armv7m = function and Extension.V7M in function.target.extensions
2374
+
2375
+ if is_armv7m:
2376
+ low_registers = [
2377
+ reg
2378
+ for reg in general_purpose_registers
2379
+ if reg.get_physical_number() <= 7
2380
+ ]
2381
+ high_registers = [
2382
+ reg
2383
+ for reg in general_purpose_registers
2384
+ if reg.get_physical_number() > 7
2385
+ ]
2386
+
2387
+ if high_registers:
2388
+ # Mirror of prologue: one instruction restoring all regs
2389
+ all_registers = low_registers + high_registers
2390
+ needs_pad = len(all_registers) % 2 == 1
2391
+ sorted_regs = tuple(
2392
+ sorted(
2393
+ all_registers,
2394
+ key=lambda reg: reg.get_physical_number(),
2395
+ )
2396
+ )
2397
+ strategy = function.high_register_strategy
2398
+ if needs_pad:
2399
+ ADD(sp, sp, 4)
2400
+ if strategy == HighRegisterStrategy.STMDB or (
2401
+ strategy == HighRegisterStrategy.AUTO
2402
+ and function.assembly_format.name == "ARMCC"
2403
+ ):
2404
+ LDMIA(sp, sorted_regs)
2405
+ else:
2406
+ POP_W(sorted_regs)
2407
+ elif low_registers:
2408
+ # Only low registers - use efficient 16-bit POP
2409
+ needs_pad = len(low_registers) % 2 == 1
2410
+ if needs_pad:
2411
+ ADD(sp, sp, 4)
2412
+ POP(
2413
+ tuple(
2414
+ sorted(
2415
+ low_registers,
2416
+ key=lambda reg: reg.get_physical_number(),
2417
+ )
2418
+ )
2419
+ )
2420
+ else:
2421
+ # Standard ARM (non-Cortex-M) handling
2422
+ needs_pad = len(general_purpose_registers) % 2 == 1
2423
+ if needs_pad:
2424
+ ADD(sp, sp, 4)
2425
+ POP(
2426
+ tuple(
2427
+ sorted(
2428
+ general_purpose_registers,
2429
+ key=lambda reg: reg.get_physical_number(),
2430
+ )
2431
+ )
2432
+ )
2433
+ return list(iter(instructions))
2434
+
2435
+ def get_function(self):
2436
+ """Get the active function that owns this stack frame."""
2437
+ from nervapy.arm.function import active_function
2438
+
2439
+ return active_function
2440
+
2441
+
2442
+ def print_live_registers(label=""):
2443
+ """Print live registers at the current point in code generation.
2444
+
2445
+ This function can be called from within a Function context to inspect
2446
+ which registers are currently live (i.e., their values will be used later).
2447
+
2448
+ Note: Live register information is computed during function compilation,
2449
+ so this will show an approximation based on instructions emitted so far.
2450
+
2451
+ Args:
2452
+ label: Optional label to identify the location in code
2453
+
2454
+ Example:
2455
+ with Function("my_func", args, ...):
2456
+ t0 = GeneralPurposeRegister()
2457
+ ADD(t0, r0, r1)
2458
+ print_live_registers("after ADD") # Shows which regs are live
2459
+ """
2460
+ global active_function
2461
+ if active_function is None:
2462
+ print(f"Live registers {label}: No active function")
2463
+ return
2464
+
2465
+ active_function.print_live_registers(label)