PyNerva 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PyNerva might be problematic. Click here for more details.

Files changed (74) hide show
  1. nervapy/__init__.py +50 -0
  2. nervapy/abi.py +91 -0
  3. nervapy/arm/__init__.py +124 -0
  4. nervapy/arm/__main__.py +0 -0
  5. nervapy/arm/abi.py +138 -0
  6. nervapy/arm/formats.py +49 -0
  7. nervapy/arm/function.py +2405 -0
  8. nervapy/arm/generic.py +10797 -0
  9. nervapy/arm/instructions.py +519 -0
  10. nervapy/arm/isa.py +409 -0
  11. nervapy/arm/literal_pool.py +331 -0
  12. nervapy/arm/microarchitecture.py +211 -0
  13. nervapy/arm/pseudo.py +652 -0
  14. nervapy/arm/registers.py +1458 -0
  15. nervapy/arm/vfpneon.py +4092 -0
  16. nervapy/arm.py +13 -0
  17. nervapy/c/__init__.py +1 -0
  18. nervapy/c/types.py +436 -0
  19. nervapy/codegen.py +99 -0
  20. nervapy/common/__init__.py +4 -0
  21. nervapy/common/function.py +5 -0
  22. nervapy/common/regalloc.py +121 -0
  23. nervapy/constant_data.py +282 -0
  24. nervapy/encoder.py +246 -0
  25. nervapy/formats/__init__.py +2 -0
  26. nervapy/formats/elf/__init__.py +4 -0
  27. nervapy/formats/elf/file.py +178 -0
  28. nervapy/formats/elf/image.py +106 -0
  29. nervapy/formats/elf/section.py +422 -0
  30. nervapy/formats/elf/symbol.py +281 -0
  31. nervapy/formats/macho/__init__.py +2 -0
  32. nervapy/formats/macho/file.py +123 -0
  33. nervapy/formats/macho/image.py +143 -0
  34. nervapy/formats/macho/section.py +322 -0
  35. nervapy/formats/macho/symbol.py +158 -0
  36. nervapy/formats/mscoff/__init__.py +8 -0
  37. nervapy/formats/mscoff/image.py +132 -0
  38. nervapy/formats/mscoff/section.py +181 -0
  39. nervapy/formats/mscoff/symbol.py +148 -0
  40. nervapy/function.py +136 -0
  41. nervapy/literal.py +731 -0
  42. nervapy/loader.py +188 -0
  43. nervapy/name.py +159 -0
  44. nervapy/parse.py +52 -0
  45. nervapy/stream.py +58 -0
  46. nervapy/util.py +126 -0
  47. nervapy/writer.py +518 -0
  48. nervapy/x86_64/__init__.py +324 -0
  49. nervapy/x86_64/__main__.py +407 -0
  50. nervapy/x86_64/abi.py +517 -0
  51. nervapy/x86_64/amd.py +6464 -0
  52. nervapy/x86_64/avx.py +102029 -0
  53. nervapy/x86_64/crypto.py +1533 -0
  54. nervapy/x86_64/encoding.py +424 -0
  55. nervapy/x86_64/fma.py +19138 -0
  56. nervapy/x86_64/function.py +2707 -0
  57. nervapy/x86_64/generic.py +23384 -0
  58. nervapy/x86_64/instructions.py +500 -0
  59. nervapy/x86_64/isa.py +476 -0
  60. nervapy/x86_64/lower.py +126 -0
  61. nervapy/x86_64/mask.py +2593 -0
  62. nervapy/x86_64/meta.py +143 -0
  63. nervapy/x86_64/mmxsse.py +17265 -0
  64. nervapy/x86_64/nacl.py +327 -0
  65. nervapy/x86_64/operand.py +1204 -0
  66. nervapy/x86_64/options.py +21 -0
  67. nervapy/x86_64/pseudo.py +686 -0
  68. nervapy/x86_64/registers.py +1225 -0
  69. nervapy/x86_64/types.py +17 -0
  70. nervapy/x86_64/uarch.py +580 -0
  71. pynerva-0.0.5.dist-info/METADATA +310 -0
  72. pynerva-0.0.5.dist-info/RECORD +74 -0
  73. pynerva-0.0.5.dist-info/WHEEL +4 -0
  74. pynerva-0.0.5.dist-info/licenses/LICENSE.rst +15 -0
@@ -0,0 +1,2405 @@
1
+ # This file is part of PeachPy package and is licensed under the Simplified BSD license.
2
+ # See license.rst for the full text of the license.
3
+
4
+ from __future__ import print_function
5
+
6
+ import time
7
+
8
+ import nervapy.arm.instructions
9
+ import nervapy.arm.registers
10
+ from nervapy.arm.microarchitecture import Microarchitecture
11
+
12
+ active_function = None
13
+
14
+
15
+ class Function(object):
16
+ def __init__(
17
+ self,
18
+ name,
19
+ arguments,
20
+ return_type=None,
21
+ target=Microarchitecture.Default,
22
+ abi=None,
23
+ assembly_format=None,
24
+ high_register_strategy=None,
25
+ collect_origin=False,
26
+ dump_intermediate_assembly=False,
27
+ report_generation=True,
28
+ report_live_registers=False,
29
+ is_thumb=False,
30
+ alignment=0,
31
+ validate_stack_alignment=True,
32
+ preserve8=False,
33
+ ):
34
+ self.name = name
35
+ self.arguments = arguments
36
+ self.return_type = return_type
37
+ self.is_thumb = is_thumb
38
+ self.alignment = alignment
39
+ self.validate_stack_alignment = validate_stack_alignment
40
+ self.preserve8 = preserve8
41
+
42
+ # Set default assembly format to GAS if not specified
43
+ if assembly_format is None:
44
+ from nervapy.arm.formats import AssemblyFormat
45
+
46
+ assembly_format = AssemblyFormat.GAS
47
+ self.assembly_format = assembly_format
48
+
49
+ # Set default high register strategy if not specified
50
+ if high_register_strategy is None:
51
+ from nervapy.arm.formats import HighRegisterStrategy
52
+
53
+ high_register_strategy = HighRegisterStrategy.AUTO
54
+ self.high_register_strategy = high_register_strategy
55
+
56
+ for argument in self.arguments:
57
+ argument.stack_offset = None
58
+ argument.register = None
59
+ if (
60
+ argument.is_size_integer
61
+ or argument.is_pointer_integer
62
+ or argument.is_pointer
63
+ ):
64
+ argument.c_type.size = abi.pointer_size
65
+ assert argument.size
66
+ self.target = target
67
+ self.abi = abi
68
+ self.collect_origin = collect_origin
69
+ self.dump_intermediate_assembly = dump_intermediate_assembly
70
+ self.report_generation = report_generation
71
+ self.report_live_registers = report_live_registers
72
+ self.ticks = None
73
+
74
+ # Assign argument locations
75
+ from nervapy.arm.abi import arm_gnueabi, arm_gnueabihf
76
+ from nervapy.arm.registers import r0, r1, r2, r3
77
+
78
+ if abi == arm_gnueabi or abi == arm_gnueabihf:
79
+ # Up to 4 first arguments are passed in registers, others passed through stack
80
+ # Arguments smaller than 4 bytes are extended to 4 bytes (both when passed on stack or in a register).
81
+ # 8-byte arguments occupy 2 general-purpose registers or 8 bytes on stack. When they are passed in
82
+ # registers, the index of the first register must be even (i.e. they are passed in (r0, r1) or (r2, r3),
83
+ # but not in (r1, r2). When 8-byte arguments are passed on stack, their location is aligned on 8 bytes,
84
+ # skipping 4 bytes if necessary.
85
+ argument_registers = (r0, r1, r2, r3)
86
+ register_offset = 0
87
+ stack_offset = 0
88
+ for argument in self.arguments:
89
+ if argument.size <= 4:
90
+ if register_offset < 4:
91
+ argument.register = argument_registers[register_offset]
92
+ register_offset += 1
93
+ else:
94
+ argument.stack_offset = stack_offset
95
+ stack_offset += 4
96
+ elif argument.size == 8:
97
+ # First register index must be even
98
+ if register_offset % 2 == 1:
99
+ register_offset += 1
100
+ if register_offset < 4:
101
+ argument.register = (
102
+ argument_registers[register_offset],
103
+ argument_registers[register_offset + 1],
104
+ )
105
+ register_offset += 2
106
+ else:
107
+ if stack_offset % 8 == 4:
108
+ stack_offset += 4
109
+ argument.stack_offset = stack_offset
110
+ stack_offset += 8
111
+ else:
112
+ raise ValueError(
113
+ "Unsupported argument size {0}".format(argument.size)
114
+ )
115
+ else:
116
+ raise ValueError("Unsupported assembler ABI %s" % abi)
117
+
118
+ self.instructions = list()
119
+ self.constants = list()
120
+ self.external_functions = set() # Track external function imports
121
+ self.stack_frame = StackFrame(self.abi)
122
+ self.local_variables_count = 0
123
+ self.virtual_registers_count = 0x40
124
+ self.conflicting_registers = dict()
125
+ self.allocation_options = dict()
126
+ self.unallocated_registers = list()
127
+ self._live_register_markers = [] # List of (instruction_index, label) tuples
128
+ self._register_names = {} # Map from register number to variable name
129
+
130
+ def __enter__(self):
131
+ import nervapy.stream
132
+
133
+ global active_function
134
+
135
+ if active_function is not None:
136
+ raise ValueError(
137
+ "Function {0} was not detached".format(active_function.name)
138
+ )
139
+ if nervapy.stream.active_stream is not None:
140
+ raise ValueError("Alternative instruction stream is active")
141
+ active_function = self
142
+ nervapy.stream.active_stream = self
143
+ if self.report_generation:
144
+ print(
145
+ "Generating function {Function} for microarchitecture {Microarchitecture} and ABI {ABI}".format(
146
+ Function=self.name, Microarchitecture=self.target, ABI=self.abi
147
+ )
148
+ )
149
+ print("\tParsing source", end="")
150
+ self.ticks = time.time()
151
+ return self
152
+
153
+ def __exit__(self, exc_type, exc_value, traceback):
154
+ import nervapy.stream
155
+ from nervapy.arm.instructions import Instruction
156
+
157
+ nervapy.stream.active_stream = None
158
+ if exc_type is None:
159
+ try:
160
+ self.generate_labels()
161
+ self.decompose_instructions()
162
+ self.reserve_registers()
163
+ if self.report_generation:
164
+ elapsed = time.time() - self.ticks
165
+ print(" (%2.2f secs)" % elapsed)
166
+ print("\tRunning liveness analysis", end="")
167
+ self.ticks = time.time()
168
+ self.determine_available_registers()
169
+ self.determine_live_registers(exclude_parameter_loads=True)
170
+
171
+ # Report live registers at marked points
172
+ if self._live_register_markers:
173
+ self._report_live_registers_at_markers()
174
+
175
+ if self.dump_intermediate_assembly:
176
+ with open(
177
+ "%s.S" % self.symbol_name, "w"
178
+ ) as intermediate_assembly_file:
179
+ for instruction in self.instructions:
180
+ if isinstance(instruction, Instruction):
181
+ consumed_registers = ", ".join(
182
+ sorted(
183
+ map(
184
+ str,
185
+ list(
186
+ instruction.get_input_registers_list()
187
+ ),
188
+ )
189
+ )
190
+ )
191
+ produced_registers = ", ".join(
192
+ sorted(
193
+ map(
194
+ str,
195
+ list(
196
+ instruction.get_output_registers_list()
197
+ ),
198
+ )
199
+ )
200
+ )
201
+ available_registers = ", ".join(
202
+ sorted(
203
+ map(str, list(instruction.available_registers))
204
+ )
205
+ )
206
+ live_registers = ", ".join(
207
+ sorted(map(str, list(instruction.live_registers)))
208
+ )
209
+ intermediate_assembly_file.write(
210
+ str(instruction) + "\n"
211
+ )
212
+ intermediate_assembly_file.write(
213
+ "\tConsumed registers: " + consumed_registers + "\n"
214
+ )
215
+ intermediate_assembly_file.write(
216
+ "\tProduced registers: " + produced_registers + "\n"
217
+ )
218
+ intermediate_assembly_file.write(
219
+ "\tLive registers: " + live_registers + "\n"
220
+ )
221
+ if instruction.line_number:
222
+ intermediate_assembly_file.write(
223
+ "\tLine: " + str(instruction.line_number) + "\n"
224
+ )
225
+ if instruction.source_code:
226
+ intermediate_assembly_file.write(
227
+ "\tCode: " + instruction.source_code + "\n"
228
+ )
229
+ else:
230
+ intermediate_assembly_file.write(
231
+ str(instruction) + "\n"
232
+ )
233
+
234
+ if self.report_generation:
235
+ elapsed = time.time() - self.ticks
236
+ print(" (%2.2f secs)" % elapsed)
237
+ print("\tRunning register allocation", end="")
238
+ self.ticks = time.time()
239
+ self.check_live_registers()
240
+ self.determine_register_relations()
241
+ self.allocate_registers()
242
+
243
+ if self.report_generation:
244
+ elapsed = time.time() - self.ticks
245
+ print(" (%2.2f secs)" % elapsed)
246
+ print("\tGenerating code", end="")
247
+ self.ticks = time.time()
248
+ self.remove_assume_statements()
249
+ self.update_stack_frame()
250
+ self.generate_parameter_loads()
251
+ if self.report_live_registers:
252
+ self.determine_live_registers()
253
+ self.generate_prolog_and_epilog()
254
+ if self.validate_stack_alignment:
255
+ self.validate_stack_alignment_check()
256
+
257
+ self.generate_constant_loads()
258
+ self.optimize_instructions()
259
+ if self.report_generation:
260
+ elapsed = time.time() - self.ticks
261
+ print(" (%2.2f secs)" % elapsed)
262
+ self.ticks = time.time()
263
+ finally:
264
+ self.detach()
265
+ else:
266
+ self.detach()
267
+
268
+ def find_argument(self, argument_target):
269
+ from nervapy import Argument
270
+
271
+ assert isinstance(
272
+ argument_target, (Argument, str)
273
+ ), "Either Argument object or argument name expected"
274
+ if isinstance(argument_target, Argument):
275
+ if argument_target in self.arguments:
276
+ return argument_target
277
+ else:
278
+ return None
279
+ else:
280
+ return next(
281
+ (
282
+ argument
283
+ for argument in self.arguments
284
+ if argument.name == argument_target
285
+ ),
286
+ None,
287
+ )
288
+
289
+ def detach(self):
290
+ import nervapy.stream
291
+
292
+ global active_function
293
+ if active_function is None:
294
+ raise ValueError("Trying to detach a function while no function is active")
295
+ active_function = None
296
+ nervapy.stream.active_stream = None
297
+ return self
298
+
299
+ @property
300
+ def assembly(self):
301
+ """Generate assembly code in the specified format."""
302
+ from nervapy.arm.formats import AssemblyFormat
303
+
304
+ if self.assembly_format == AssemblyFormat.ARMCC:
305
+ return self._generate_armcc_assembly()
306
+ else: # Default to GAS format
307
+ return self._generate_gas_assembly()
308
+
309
+ def _generate_constant_data_section(self):
310
+ """Generate .data section for ConstantData objects"""
311
+ try:
312
+ from nervapy.constant_data import ConstantData
313
+ constants = ConstantData.get_function_constants(self)
314
+ if not constants:
315
+ return ""
316
+
317
+ import os
318
+ lines = []
319
+ lines.append("")
320
+ lines.append("\t.data")
321
+ lines.append("\t.align 4")
322
+ for const in constants:
323
+ lines.append(const.generate_data_section())
324
+ return os.linesep.join(lines)
325
+ except ImportError:
326
+ return ""
327
+
328
+
329
+ @property
330
+ def global_asm(self):
331
+ """Generate a Rust global_asm!() macro call embedding the GAS assembly.
332
+
333
+ Usage in a Rust source file:
334
+ use core::arch::global_asm;
335
+ include!("generated_kernels.rs"); // or paste directly
336
+
337
+ The extern declaration goes in your Rust code:
338
+ unsafe extern "C" { fn my_func(a: u32) -> u32; }
339
+ """
340
+ gas = self._generate_gas_assembly()
341
+ return 'core::arch::global_asm!(r#"\n{asm}"#);\n'.format(asm=gas)
342
+
343
+ def _generate_gas_assembly(self):
344
+ """Generate assembly code in GNU Assembler (GAS) format."""
345
+ import os
346
+
347
+ from nervapy.arm.generic import BranchInstruction
348
+ from nervapy.arm.instructions import Instruction
349
+ from nervapy.arm.pseudo import LabelQuasiInstruction
350
+
351
+ function_label = self.name
352
+ constants_label = self.name + "_constants"
353
+ assembly = ""
354
+ assembly += "\t.syntax unified" + os.linesep
355
+ if self.is_thumb:
356
+ assembly += "\t.thumb" + os.linesep
357
+ assembly += "\t" + self.gnu_arch_spec + os.linesep
358
+
359
+ # Generate .data section for ConstantData if present
360
+ constant_data_section = self._generate_constant_data_section()
361
+ if constant_data_section:
362
+ assembly += constant_data_section + os.linesep
363
+
364
+ if len(self.constants) > 0:
365
+ assembly += (
366
+ "section .rodata.{Microarchitecture} progbits alloc noexec nowrite align={Alignment}".format(
367
+ Microarchitecture=self.target.id, Alignment=32
368
+ )
369
+ + os.linesep
370
+ )
371
+ assembly += constants_label + ":" + os.linesep
372
+ data_declaration_map = {8: "DB", 16: "DW", 32: "DD", 64: "DQ", 128: "DO"}
373
+ need_alignment = False
374
+ for constant_bucket in self.constants:
375
+ if need_alignment:
376
+ assembly += (
377
+ "\tALIGN {Alignment}".format(Alignment=constant_bucket.capacity)
378
+ + os.linesep
379
+ )
380
+ for constant in constant_bucket.constants:
381
+ assembly += (
382
+ "\t.{Label}: {Declaration} {Value}".format(
383
+ Label=constant.label,
384
+ Declaration=data_declaration_map[constant.size],
385
+ Value=", ".join([str(constant)] * constant.repeats),
386
+ )
387
+ + os.linesep
388
+ )
389
+ need_alignment = not constant_bucket.is_full()
390
+ assembly += os.linesep
391
+
392
+ if hasattr(self, "external_functions") and len(self.external_functions) > 0:
393
+ for func_name in sorted(self.external_functions):
394
+ assembly += ".extern {0}".format(func_name) + os.linesep
395
+ assembly += os.linesep
396
+
397
+ assembly += "\n\t.text\n" + os.linesep
398
+ assembly += ".global {Function}".format(Function=function_label) + os.linesep
399
+ assembly += (
400
+ ".type {Function}, %function".format(Function=function_label) + os.linesep
401
+ )
402
+ if self.alignment > 0:
403
+ assembly += (
404
+ ".align {Alignment}".format(Alignment=self.alignment) + os.linesep
405
+ )
406
+ assembly += function_label + ":" + os.linesep
407
+ if self.gnu_fpu_spec:
408
+ assembly += "\t" + self.gnu_fpu_spec + os.linesep
409
+ for instruction in self.instructions:
410
+ if isinstance(instruction, BranchInstruction):
411
+ assembly += (
412
+ "\t"
413
+ + "{0} L{1}.{2}".format(
414
+ instruction.name, self.name, instruction.operands[0].label
415
+ )
416
+ + os.linesep
417
+ )
418
+ elif isinstance(instruction, Instruction):
419
+ constant = instruction.get_constant()
420
+ if constant is not None:
421
+ constant.prefix = constants_label
422
+ assembly += "\t" + str(instruction) + os.linesep
423
+ elif isinstance(instruction, LabelQuasiInstruction):
424
+ assembly += "L{0}.{1}:".format(self.name, instruction.name) + os.linesep
425
+ else:
426
+ assembly += "\t" + str(instruction) + os.linesep
427
+
428
+ # Generate literal pool if present
429
+ if hasattr(self, 'literal_pool') and self.literal_pool.entries:
430
+ assembly += os.linesep
431
+ assembly += self.literal_pool.generate_assembly(format='gas') + os.linesep
432
+
433
+ assembly += os.linesep
434
+ return assembly
435
+
436
+ def _generate_armcc_assembly(self):
437
+ """Generate assembly code in ARM Compiler (ARMCC) format."""
438
+ import os
439
+
440
+ from nervapy.arm.generic import BranchInstruction
441
+ from nervapy.arm.instructions import Instruction
442
+ from nervapy.arm.pseudo import LabelQuasiInstruction
443
+
444
+ function_label = self.name
445
+ constants_label = self.name + "_constants"
446
+ assembly = ""
447
+
448
+ if self.is_thumb:
449
+ assembly += " THUMB" + os.linesep
450
+
451
+ # ARMCC constants section
452
+ if len(self.constants) > 0:
453
+ assembly += " AREA ||.constdata||, DATA, READONLY" + os.linesep
454
+ assembly += constants_label + os.linesep
455
+ data_declaration_map = {
456
+ 8: "DCB",
457
+ 16: "DCW",
458
+ 32: "DCD",
459
+ 64: "DCDU",
460
+ 128: "DCDU",
461
+ }
462
+ for constant_bucket in self.constants:
463
+ for constant in constant_bucket.constants:
464
+ assembly += (
465
+ "{Label} {Declaration} {Value}".format(
466
+ Label=constant.label,
467
+ Declaration=data_declaration_map[constant.size],
468
+ Value=", ".join([str(constant)] * constant.repeats),
469
+ )
470
+ + os.linesep
471
+ )
472
+ assembly += os.linesep
473
+
474
+ # ARMCC code section
475
+ assembly += " AREA ||.text||, CODE, READONLY"
476
+ if self.alignment > 0:
477
+ assembly += ", ALIGN={0}".format(self.alignment)
478
+ assembly += os.linesep
479
+ if self.preserve8:
480
+ assembly += " PRESERVE8" + os.linesep
481
+ if self.armcc_fpu_spec:
482
+ assembly += " " + self.armcc_fpu_spec + os.linesep
483
+ assembly += os.linesep
484
+
485
+ # Add IMPORT statements for external functions
486
+ if hasattr(self, "external_functions") and len(self.external_functions) > 0:
487
+ for func_name in sorted(self.external_functions):
488
+ assembly += " IMPORT " + func_name + os.linesep
489
+ assembly += os.linesep
490
+
491
+ assembly += function_label + " PROC" + os.linesep
492
+ assembly += " EXPORT " + function_label + os.linesep
493
+
494
+ for instruction in self.instructions:
495
+ if isinstance(instruction, BranchInstruction):
496
+ assembly += (
497
+ " "
498
+ + "{0} {1}_{2}".format(
499
+ instruction.name, self.name, instruction.operands[0].label
500
+ )
501
+ + os.linesep
502
+ )
503
+ elif isinstance(instruction, Instruction):
504
+ constant = instruction.get_constant()
505
+ if constant is not None:
506
+ constant.prefix = constants_label
507
+ assembly += " " + str(instruction) + os.linesep
508
+ elif isinstance(instruction, LabelQuasiInstruction):
509
+ assembly += "{0}_{1}".format(self.name, instruction.name) + os.linesep
510
+ else:
511
+ assembly += " " + str(instruction) + os.linesep
512
+
513
+ # Generate literal pool if present
514
+ if hasattr(self, 'literal_pool') and self.literal_pool.entries:
515
+ assembly += os.linesep
516
+ assembly += self.literal_pool.generate_assembly(format='armcc') + os.linesep
517
+
518
+ assembly += " ENDP" + os.linesep
519
+ assembly += " END" + os.linesep
520
+ return assembly
521
+
522
+ @property
523
+ def gnu_arch_spec(self):
524
+ from nervapy.arm.isa import Extension
525
+
526
+ isa_extensions = self.isa_extensions
527
+ if Extension.V8_1MMain in isa_extensions:
528
+ return ".arch armv8.1-m.main"
529
+ elif Extension.V8MMain in isa_extensions:
530
+ return ".arch armv8-m.main"
531
+ elif Extension.Div in isa_extensions:
532
+ return ".cpu cortex-a15"
533
+ elif Extension.V7MP in isa_extensions:
534
+ return ".cpu cortex-a9"
535
+ elif Extension.V7M in isa_extensions:
536
+ return ".arch armv7-m"
537
+ elif Extension.V8MBase in isa_extensions:
538
+ return ".arch armv8-m.base"
539
+ elif Extension.V7 in isa_extensions:
540
+ return ".arch armv7-a"
541
+ elif Extension.V6K in isa_extensions:
542
+ return ".arch armv6zk"
543
+ elif Extension.V6 in isa_extensions:
544
+ return ".arch armv6"
545
+ elif Extension.V5E in isa_extensions:
546
+ return ".arch armv5te"
547
+ else:
548
+ return ".arch armv5t"
549
+
550
+ @property
551
+ def gnu_fpu_spec(self):
552
+ from nervapy.arm.isa import Extension
553
+
554
+ isa_extensions = self.isa_extensions
555
+ # ARMv8-M (Cortex-M33, M35P, etc.) uses FPv5-SP
556
+ if Extension.V8MMain in isa_extensions or Extension.V8MBase in isa_extensions:
557
+ if Extension.MVE in isa_extensions:
558
+ return ".fpu mve"
559
+ elif Extension.VFP4 in isa_extensions or Extension.VFP3 in isa_extensions:
560
+ # ARMv8-M has FPv5 single-precision FPU
561
+ return ".fpu fpv5-sp-d16"
562
+ else:
563
+ return None
564
+ # ARMv8.1-M (Cortex-M55, etc.) with Helium MVE
565
+ elif Extension.V8_1MMain in isa_extensions:
566
+ if Extension.MVE in isa_extensions:
567
+ return ".fpu mve"
568
+ elif Extension.VFP4 in isa_extensions or Extension.VFP3 in isa_extensions:
569
+ return ".fpu fpv5-sp-d16"
570
+ else:
571
+ return None
572
+ elif Extension.NEON2 in isa_extensions or (Extension.VFP4 in isa_extensions and Extension.NEON in isa_extensions):
573
+ return ".fpu neon-vfpv4"
574
+ elif (
575
+ Extension.NEONHP in isa_extensions
576
+ or Extension.VFPHP in isa_extensions
577
+ and Extension.NEON in isa_extensions
578
+ ):
579
+ return ".fpu neon-fp16"
580
+ elif Extension.NEON in isa_extensions:
581
+ return ".fpu neon"
582
+ elif Extension.VFPHP in isa_extensions:
583
+ if Extension.VFPd32 in isa_extensions:
584
+ return ".fpu vfpv3-fp16"
585
+ else:
586
+ return ".fpu vfpv3-d16-fp16"
587
+ elif Extension.VFP3 in isa_extensions:
588
+ if Extension.VFPd32 in isa_extensions:
589
+ return ".fpu vfpv3"
590
+ else:
591
+ return ".fpu vfpv3-d16"
592
+ elif Extension.VFP in isa_extensions or Extension.VFP2 in isa_extensions:
593
+ return
594
+ elif Extension.VFP3 in isa_extensions:
595
+ return ".fpu vfp"
596
+ else:
597
+ return None
598
+
599
+ @property
600
+ def armcc_arch_spec(self):
601
+ """Generate ARMCC-compatible architecture specification."""
602
+ from nervapy.arm.isa import Extension
603
+
604
+ isa_extensions = self.isa_extensions
605
+ if Extension.V7M in isa_extensions:
606
+ return "ARM"
607
+ elif Extension.V7MP in isa_extensions:
608
+ return "ARM"
609
+ elif Extension.V7 in isa_extensions:
610
+ return "ARM"
611
+ elif Extension.V6K in isa_extensions:
612
+ return "ARM"
613
+ elif Extension.V6 in isa_extensions:
614
+ return "ARM"
615
+ elif Extension.V5E in isa_extensions:
616
+ return "ARM"
617
+ else:
618
+ return "ARM"
619
+
620
+ @property
621
+ def armcc_fpu_spec(self):
622
+ """Generate ARMCC-compatible FPU specification."""
623
+ from nervapy.arm.isa import Extension
624
+
625
+ isa_extensions = self.isa_extensions
626
+ if Extension.NEON2 in isa_extensions or Extension.VFP4 in isa_extensions:
627
+ return "REQUIRE VFPv4"
628
+ elif (
629
+ Extension.NEONHP in isa_extensions
630
+ or Extension.VFPHP in isa_extensions
631
+ and Extension.NEON in isa_extensions
632
+ ):
633
+ return "REQUIRE VFPv3_FP16"
634
+ elif Extension.NEON in isa_extensions:
635
+ return "REQUIRE VFPv3"
636
+ elif Extension.VFPHP in isa_extensions:
637
+ return "REQUIRE VFPv3_FP16"
638
+ elif Extension.VFP3 in isa_extensions:
639
+ return "REQUIRE VFPv3"
640
+ elif Extension.VFP in isa_extensions or Extension.VFP2 in isa_extensions:
641
+ return "REQUIRE VFPv2"
642
+ else:
643
+ return None
644
+
645
+ def add_instruction(self, instruction):
646
+ from nervapy.arm.instructions import Instruction
647
+
648
+ if instruction is None:
649
+ return
650
+ if isinstance(instruction, Instruction):
651
+ for extension in instruction.isa_extensions:
652
+ if extension not in self.target.extensions:
653
+ raise ValueError(
654
+ "{0} is not supported on the target microarchitecture".format(
655
+ extension
656
+ )
657
+ )
658
+ local_variable = instruction.get_local_variable()
659
+ if local_variable is not None:
660
+ self.stack_frame.add_variable(local_variable.get_root())
661
+ self.stack_frame.preserve_registers(instruction.get_output_registers_list())
662
+ self.instructions.append(instruction)
663
+
664
+ def add_instructions(self, instructions):
665
+ for instruction in instructions:
666
+ self.add_instruction(instruction)
667
+
668
+ def preserve(self, *registers):
669
+ """Force additional registers into the function prologue/epilogue.
670
+
671
+ Use this when you need registers preserved that the automatic analysis
672
+ would not detect (e.g. registers used only via inline logic or explicit
673
+ control flow). The registers are merged with the auto-detected ones so
674
+ the prologue emits a single PUSH / PUSH.W covering everything.
675
+
676
+ Accepts individual registers or a single tuple/list, mirroring the
677
+ calling convention of PUSH::
678
+
679
+ with Function("my_func", ...) as f:
680
+ f.preserve(r8, r9) # varargs style
681
+ f.preserve((r8, r9)) # tuple style (like PUSH)
682
+ f.preserve(lr)
683
+ """
684
+ for item in registers:
685
+ if isinstance(item, (tuple, list)):
686
+ for register in item:
687
+ self.stack_frame.force_preserve_register(register)
688
+ else:
689
+ self.stack_frame.force_preserve_register(item)
690
+
691
+ def decompose_instructions(self):
692
+ from nervapy.arm.pseudo import ReturnInstruction
693
+
694
+ new_instructions = list()
695
+ for instruction in self.instructions:
696
+ if isinstance(instruction, ReturnInstruction):
697
+ new_instructions.extend(instruction.to_instruction_list())
698
+ else:
699
+ new_instructions.append(instruction)
700
+ self.instructions = new_instructions
701
+
702
+ def generate_prolog_and_epilog(self):
703
+ from nervapy.arm.generic import BranchExchangeInstruction
704
+ from nervapy.arm.pseudo import LabelQuasiInstruction
705
+
706
+ prologue_instructions = self.stack_frame.generate_prologue()
707
+ epilogue_instructions = self.stack_frame.generate_epilogue()
708
+ new_instructions = list()
709
+ for instruction in self.instructions:
710
+ if isinstance(instruction, LabelQuasiInstruction):
711
+ new_instructions.append(instruction)
712
+ if instruction.name == "ENTRY":
713
+ new_instructions.extend(prologue_instructions)
714
+ elif isinstance(instruction, BranchExchangeInstruction):
715
+ new_instructions.extend(epilogue_instructions)
716
+ new_instructions.append(instruction)
717
+ else:
718
+ new_instructions.append(instruction)
719
+ self.instructions = new_instructions
720
+
721
+ def generate_labels(self):
722
+ from nervapy.arm.instructions import Operand
723
+ from nervapy.arm.pseudo import LabelQuasiInstruction
724
+
725
+ for instruction in self.instructions:
726
+ if isinstance(instruction, LabelQuasiInstruction):
727
+ if instruction.name == "ENTRY":
728
+ break
729
+ else:
730
+ self.instructions.insert(0, LabelQuasiInstruction(Operand("ENTRY")))
731
+
732
+ def get_label_table(self):
733
+ from nervapy.arm.pseudo import LabelQuasiInstruction
734
+
735
+ label_table = dict()
736
+ for index, instruction in enumerate(self.instructions):
737
+ if isinstance(instruction, LabelQuasiInstruction):
738
+ label_table[instruction.name] = index
739
+ return label_table
740
+
741
+ def find_entry_label(self):
742
+ from nervapy.arm.pseudo import LabelQuasiInstruction
743
+
744
+ for index, instruction in enumerate(self.instructions):
745
+ if isinstance(instruction, LabelQuasiInstruction):
746
+ if instruction.name == "ENTRY":
747
+ return index
748
+ raise ValueError("Instruction stream does not contain the ENTRY label")
749
+
750
+ def find_exit_points(self):
751
+ from nervapy.arm.generic import BranchExchangeInstruction
752
+
753
+ ret_instructions = list()
754
+ for index, instruction in enumerate(self.instructions):
755
+ if isinstance(instruction, BranchExchangeInstruction):
756
+ ret_instructions.append(index)
757
+ return ret_instructions
758
+
759
+ def determine_branches(self):
760
+ from nervapy.arm.generic import BranchInstruction
761
+ from nervapy.arm.pseudo import LabelQuasiInstruction
762
+
763
+ label_table = self.get_label_table()
764
+ for instruction in self.instructions:
765
+ if isinstance(instruction, LabelQuasiInstruction):
766
+ instruction.input_branches = set()
767
+
768
+ for i, instruction in enumerate(self.instructions):
769
+ if isinstance(instruction, BranchInstruction):
770
+ target_label = instruction.operands[0].label
771
+ target_index = label_table[target_label]
772
+ self.instructions[target_index].input_branches.add(i)
773
+
774
+ def reserve_registers(self):
775
+ pass
776
+
777
+ def determine_available_registers(self):
778
+ from nervapy.arm.generic import BranchInstruction
779
+ from nervapy.arm.instructions import Instruction
780
+
781
+ processed_branches = set()
782
+ label_table = self.get_label_table()
783
+
784
+ def mark_available_registers(instructions, start, initial_available_registers):
785
+ available_registers = set(initial_available_registers)
786
+ for i in range(start, len(instructions)):
787
+ instruction = instructions[i]
788
+ if isinstance(instruction, Instruction):
789
+ instruction.available_registers = set(available_registers)
790
+ if isinstance(instruction, BranchInstruction):
791
+ if i not in processed_branches:
792
+ target_label = instruction.operands[0].label
793
+ target_index = label_table[target_label]
794
+ processed_branches.add(i)
795
+ mark_available_registers(
796
+ instructions, target_index, available_registers
797
+ )
798
+ if not instruction.is_conditional():
799
+ return
800
+ else:
801
+ available_registers |= set(
802
+ instruction.get_output_registers_list()
803
+ )
804
+
805
+ current_index = self.find_entry_label()
806
+ mark_available_registers(self.instructions, current_index, set())
807
+
808
+ def determine_live_registers(self, exclude_parameter_loads=False):
809
+ from nervapy.arm.generic import BranchInstruction
810
+ from nervapy.arm.instructions import Instruction
811
+ from nervapy.arm.pseudo import (LabelQuasiInstruction,
812
+ LoadArgumentPseudoInstruction)
813
+ from nervapy.arm.registers import Register
814
+
815
+ self.determine_branches()
816
+ for instruction in self.instructions:
817
+ if isinstance(instruction, Instruction):
818
+ live_registers = set()
819
+ if isinstance(instruction, BranchInstruction):
820
+ instruction.is_visited = False
821
+
822
+ def mark_live_registers(instructions, exit_point, initial_live_registers):
823
+ live_registers = dict(initial_live_registers)
824
+ # Walk from the bottom to top of the linear block
825
+ for i in range(exit_point, -1, -1):
826
+ instruction = instructions[i]
827
+ if (
828
+ isinstance(instruction, BranchInstruction)
829
+ and not instruction.is_conditional
830
+ and i != exit_point
831
+ ):
832
+ return
833
+ elif isinstance(instruction, Instruction):
834
+ # First mark registers which are written to by this instruction as non-live
835
+ # Then mark registers which are read by this instruction as live
836
+ for output_register in instruction.get_output_registers_list():
837
+ register_id = output_register.id
838
+ register_mask = output_register.mask
839
+ if register_id in live_registers:
840
+ live_registers[register_id] &= ~register_mask
841
+ if live_registers[register_id] == 0:
842
+ del live_registers[register_id]
843
+
844
+ if not (
845
+ exclude_parameter_loads
846
+ and isinstance(instruction, LoadArgumentPseudoInstruction)
847
+ ):
848
+ for input_register in instruction.get_input_registers_list():
849
+ register_id = input_register.id
850
+ register_mask = input_register.mask
851
+ if register_id in live_registers:
852
+ live_registers[register_id] |= register_mask
853
+ else:
854
+ live_registers[register_id] = register_mask
855
+
856
+ # Merge with previously determined as live registers
857
+ for instruction_live_register in instruction.live_registers:
858
+ if instruction_live_register.id in live_registers:
859
+ live_registers[
860
+ instruction_live_register.id
861
+ ] |= instruction_live_register.mask
862
+ else:
863
+ live_registers[instruction_live_register.id] = (
864
+ instruction_live_register.mask
865
+ )
866
+
867
+ instruction.live_registers = set(
868
+ [
869
+ Register.from_parts(id, mask, expand=True)
870
+ for (id, mask) in live_registers.items()
871
+ ]
872
+ )
873
+ elif isinstance(instruction, LabelQuasiInstruction):
874
+ for entry_point in instruction.input_branches:
875
+ if not instructions[entry_point].is_visited:
876
+ instructions[entry_point].is_visited = True
877
+ mark_live_registers(
878
+ instructions, entry_point, live_registers
879
+ )
880
+
881
+ exit_points = self.find_exit_points()
882
+ for exit_point in exit_points:
883
+ mark_live_registers(self.instructions, exit_point, set())
884
+
885
+ def check_live_registers(self):
886
+ pass
887
+
888
+ def print_live_registers(self, label=""):
889
+ """Mark this point for live register analysis.
890
+
891
+ This marks the current instruction position for live register analysis
892
+ which will be performed after all instructions are generated.
893
+
894
+ Args:
895
+ label: Optional label to identify the location in code
896
+ """
897
+ from nervapy.arm.instructions import Instruction
898
+
899
+ # Find the last actual instruction object (not index, as indices can change)
900
+ instr_obj = None
901
+ for i in range(len(self.instructions) - 1, -1, -1):
902
+ if isinstance(self.instructions[i], Instruction):
903
+ instr_obj = self.instructions[i]
904
+ break
905
+
906
+ # Store the marker for later analysis (store instruction object, not index)
907
+ self._live_register_markers.append((instr_obj, label))
908
+
909
+ def _report_live_registers_at_markers(self):
910
+ """Report live registers at all marked points.
911
+
912
+ This is called after liveness analysis has been performed on all instructions.
913
+ """
914
+ from nervapy.arm.instructions import Instruction
915
+ from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
916
+ QRegister, SRegister)
917
+
918
+ for instr_obj, label in self._live_register_markers:
919
+ if instr_obj is None:
920
+ print(f"Live registers {label}: No instructions yet")
921
+ continue
922
+
923
+ # Get live registers from the instruction (computed by determine_live_registers)
924
+ live_regs = instr_obj.live_registers if hasattr(instr_obj, 'live_registers') else set()
925
+
926
+ if not live_regs:
927
+ print(f"Live registers {label}: None")
928
+ else:
929
+ gp_regs = [r for r in live_regs if isinstance(r, GeneralPurposeRegister)]
930
+ s_regs = [r for r in live_regs if isinstance(r, SRegister)]
931
+ d_regs = [r for r in live_regs if isinstance(r, DRegister)]
932
+ q_regs = [r for r in live_regs if isinstance(r, QRegister)]
933
+
934
+ def format_reg(r, reg_type):
935
+ """Format a register with its name if available."""
936
+ if r.is_virtual:
937
+ vreg_id = (r.number - 0x40000) >> 12
938
+ name = self._register_names.get(r.number, None)
939
+ prefix = reg_type.lower()
940
+ if name:
941
+ return f"{prefix}-vreg<{vreg_id}, {name}>"
942
+ else:
943
+ return f"{prefix}-vreg<{vreg_id}>"
944
+ else:
945
+ return str(r)
946
+
947
+ print(f"Live registers {label}:")
948
+ if gp_regs:
949
+ print(f" GP ({len(gp_regs)}): {', '.join(format_reg(r, 'gp') for r in sorted(gp_regs, key=lambda x: (x.id, x.mask)))}")
950
+ if s_regs:
951
+ print(f" S ({len(s_regs)}): {', '.join(format_reg(r, 's') for r in sorted(s_regs, key=lambda x: (x.id, x.mask)))}")
952
+ if d_regs:
953
+ print(f" D ({len(d_regs)}): {', '.join(format_reg(r, 'd') for r in sorted(d_regs, key=lambda x: (x.id, x.mask)))}")
954
+ if q_regs:
955
+ print(f" Q ({len(q_regs)}): {', '.join(format_reg(r, 'q') for r in sorted(q_regs, key=lambda x: (x.id, x.mask)))}")
956
+
957
+ # all_registers = self.abi.volatile_registers + list(reversed(self.abi.argument_registers)) + self.abi.callee_save_registers
958
+ # available_registers = { Register.GPType: list(), Register.WMMXType: list(), Register.VFPType: list() }
959
+ # for register in all_registers:
960
+ # if register not in available_registers[register.regtype]:
961
+ # available_registers[register.regtype].append(register)
962
+ # for instruction in self.instructions:
963
+ # live_registers = { Register.GPType: set(), Register.WMMXType: set(), Register.VFPType: set() }
964
+ # if isinstance(instruction, Instruction):
965
+ # for live_register in instruction.live_registers:
966
+ # live_registers[live_register.regtype].add(live_register)
967
+ # for register_type in live_registers.keys():
968
+ # if len(live_registers[register_type]) > len(available_registers[register_type]):
969
+ # raise ValueError("Not enough available registers to allocate live registers at instruction {0}".format(instruction))
970
+
971
+ def determine_register_relations(self):
972
+ from nervapy import RegisterAllocationError
973
+ from nervapy.arm.instructions import Instruction
974
+ from nervapy.arm.registers import (DRegister, QRegister, Register,
975
+ SRegister)
976
+ from nervapy.arm.vfpneon import (NeonLoadStoreInstruction,
977
+ VFPLoadStoreMultipleInstruction)
978
+
979
+ all_registers = (
980
+ self.abi.volatile_registers
981
+ + list(reversed(self.abi.argument_registers))
982
+ + self.abi.callee_save_registers
983
+ )
984
+ available_registers = {
985
+ Register.GPType: list(),
986
+ Register.WMMXType: list(),
987
+ Register.VFPType: list(),
988
+ }
989
+ for register in all_registers:
990
+ if register.type == Register.GPType or register.type == Register.WMMXType:
991
+ register_bitboard = 0x1 << register.get_physical_number()
992
+ if register_bitboard not in available_registers[register.type]:
993
+ available_registers[register.type].append(register_bitboard)
994
+ for instruction in self.instructions:
995
+ if isinstance(instruction, Instruction):
996
+ # Track all virtual registers used in the instruction (both live and outputs)
997
+ virtual_live_registers = [
998
+ register
999
+ for register in instruction.live_registers
1000
+ if register.is_virtual
1001
+ ]
1002
+ # Also include output registers that may not be in live_registers
1003
+ # (e.g., dead code outputs that are written but never read)
1004
+ for output_reg in instruction.get_output_registers_list():
1005
+ if output_reg.is_virtual and output_reg not in virtual_live_registers:
1006
+ virtual_live_registers.append(output_reg)
1007
+
1008
+ for registerX in virtual_live_registers:
1009
+ if registerX.type == Register.VFPType:
1010
+ if isinstance(registerX, SRegister) and registerX.parent:
1011
+ registerX = registerX.parent
1012
+ if isinstance(registerX, DRegister) and registerX.parent:
1013
+ registerX = registerX.parent
1014
+ if registerX.get_id() not in self.allocation_options:
1015
+ if isinstance(registerX, SRegister):
1016
+ self.allocation_options[registerX.id] = [
1017
+ (0x1 << n) for n in range(32)
1018
+ ]
1019
+ elif isinstance(registerX, DRegister):
1020
+ if self.target.has_vfpd32:
1021
+ self.allocation_options[registerX.id] = [
1022
+ (0x3 << n) for n in range(0, 64, 2)
1023
+ ]
1024
+ else:
1025
+ self.allocation_options[registerX.id] = [
1026
+ (0x3 << n) for n in range(0, 32, 2)
1027
+ ]
1028
+ else:
1029
+ self.allocation_options[registerX.id] = [
1030
+ (0xF << n) for n in range(0, 64, 4)
1031
+ ]
1032
+ else:
1033
+ if registerX.id not in self.allocation_options:
1034
+ self.allocation_options[registerX.id] = list(
1035
+ available_registers[registerX.type]
1036
+ )
1037
+
1038
+ self.unallocated_registers.append((registerX.id, registerX.type))
1039
+
1040
+ # Setup the list of conflicting registers for each virtual register
1041
+ if registerX.id not in self.conflicting_registers:
1042
+ self.conflicting_registers[registerX.id] = set()
1043
+ for registerY in virtual_live_registers:
1044
+ # VFP registers have a conflict even they are of different size
1045
+ if (
1046
+ registerX.id != registerY.id
1047
+ and registerX.type == registerY.type
1048
+ ):
1049
+ self.conflicting_registers[registerX.id].add(registerY.id)
1050
+
1051
+ # Mark available physical registers for each virtual register
1052
+ for instruction in self.instructions:
1053
+ if isinstance(instruction, Instruction):
1054
+ virtual_live_registers = [
1055
+ register
1056
+ for register in instruction.live_registers
1057
+ if register.is_virtual
1058
+ ]
1059
+ # If a physical register is live at some point, it can not be allocated for a virtual register
1060
+ physical_live_registers = [
1061
+ register
1062
+ for register in instruction.live_registers
1063
+ if not register.is_virtual
1064
+ ]
1065
+ for virtual_register in virtual_live_registers:
1066
+ for physical_register in physical_live_registers:
1067
+ if virtual_register.type == physical_register.type:
1068
+ virtual_register_id = virtual_register.id
1069
+ physical_register_bitboard = physical_register.bitboard
1070
+ self.allocation_options[virtual_register_id][:] = [
1071
+ possible_register_bitboard
1072
+ for possible_register_bitboard in self.allocation_options[
1073
+ virtual_register_id
1074
+ ]
1075
+ if (
1076
+ possible_register_bitboard
1077
+ & physical_register_bitboard
1078
+ )
1079
+ == 0
1080
+ ]
1081
+
1082
+ # Detect group constraints
1083
+ constraints = dict()
1084
+ for instruction in self.instructions:
1085
+ if isinstance(instruction, NeonLoadStoreInstruction) or isinstance(
1086
+ instruction, VFPLoadStoreMultipleInstruction
1087
+ ):
1088
+ if isinstance(instruction, NeonLoadStoreInstruction):
1089
+ register_list = instruction.operands[0].get_registers_list()
1090
+ physical_registers_count = 32
1091
+ else:
1092
+ register_list = instruction.operands[1].get_registers_list()
1093
+ physical_registers_count = 32 if self.target.has_vfpd32 else 16
1094
+ if len(register_list) > 1:
1095
+ if all(
1096
+ isinstance(register, DRegister) for register in register_list
1097
+ ):
1098
+ register_id_list = list()
1099
+ for register in register_list:
1100
+ register_id = register.get_id()
1101
+ if register_id not in register_id_list:
1102
+ register_id_list.append(register_id)
1103
+ register_id_list = tuple(register_id_list)
1104
+ # Iterate possible allocations for this register list
1105
+ # For VLD1/VST1 instructions all registers must be allocated to sequential physical registers
1106
+ options = list()
1107
+ for sequence_bitboard_position in range(
1108
+ 0,
1109
+ 2 * physical_registers_count - 2 * len(register_list) + 2,
1110
+ 2,
1111
+ ):
1112
+ register_bitboards = [
1113
+ 0x3 << (sequence_bitboard_position + 2 * i)
1114
+ for i in range(len(register_list))
1115
+ ]
1116
+ for i, (bitboard, register) in enumerate(
1117
+ zip(register_bitboards, register_list)
1118
+ ):
1119
+ register_bitboards[i] = register.extend_bitboard(
1120
+ bitboard
1121
+ )
1122
+ # Check that bitboard is available for allocation
1123
+ for register, bitboard in zip(
1124
+ register_list, register_bitboards
1125
+ ):
1126
+ if (
1127
+ bitboard
1128
+ not in self.allocation_options[register.get_id()]
1129
+ ):
1130
+ break
1131
+ else:
1132
+ # Check that if registers with the same id use the same bitboard in this allocation
1133
+ register_id_map = dict()
1134
+ for register, bitboard in zip(
1135
+ register_list, register_bitboards
1136
+ ):
1137
+ register_id = register.get_id()
1138
+ if register_id in register_id_map:
1139
+ if register_id_map[register_id] != bitboard:
1140
+ break
1141
+ else:
1142
+ register_id_map[register_id] = bitboard
1143
+ else:
1144
+ # Check that allocation bitboards do not overlap:
1145
+ allocation_bitboard = 0
1146
+ for bitboard in register_id_map.values():
1147
+ if (allocation_bitboard & bitboard) == 0:
1148
+ allocation_bitboard |= bitboard
1149
+ else:
1150
+ break
1151
+ else:
1152
+ ordered_bitboard_list = [
1153
+ register_id_map[register_id]
1154
+ for register_id in register_id_list
1155
+ ]
1156
+ options.append(tuple(ordered_bitboard_list))
1157
+ if options:
1158
+ if len(register_id_list) > 1:
1159
+ if register_id_list in constraints:
1160
+ constraints[register_id_list] = tuple(
1161
+ [
1162
+ option
1163
+ for option in constraints[register_id_list]
1164
+ if option in options
1165
+ ]
1166
+ )
1167
+ else:
1168
+ constraints[register_id_list] = tuple(options)
1169
+ else:
1170
+ raise RegisterAllocationError(
1171
+ "Impossible virtual register combination in instruction %s"
1172
+ % instruction
1173
+ )
1174
+ elif all(
1175
+ isinstance(register, SRegister) for register in register_list
1176
+ ) and isinstance(instruction, VFPLoadStoreMultipleInstruction):
1177
+ register_id_list = list()
1178
+ for register in register_list:
1179
+ register_id = register.id
1180
+ if register_id not in register_id_list:
1181
+ register_id_list.append(register_id)
1182
+ register_id_list = tuple(register_id_list)
1183
+ # Iterate possible allocations for this register list
1184
+ # For VLDM/VSTM instructions all registers must be allocated to sequential physical registers
1185
+ options = list()
1186
+ for sequence_bitboard_position in range(
1187
+ 0, 32 - len(register_list) + 1
1188
+ ):
1189
+ register_bitboards = [
1190
+ 0x1 << (sequence_bitboard_position + i)
1191
+ for i in range(len(register_list))
1192
+ ]
1193
+ for i, (bitboard, register) in enumerate(
1194
+ zip(register_bitboards, register_list)
1195
+ ):
1196
+ register_bitboards[i] = register.extend_bitboard(
1197
+ bitboard
1198
+ )
1199
+ # Check that bitboard is available for allocation
1200
+ for register, bitboard in zip(
1201
+ register_list, register_bitboards
1202
+ ):
1203
+ if bitboard not in self.allocation_options[register.id]:
1204
+ break
1205
+ else:
1206
+ # Check that if registers with the same id use the same bitboard in this allocation
1207
+ register_id_map = dict()
1208
+ for register, bitboard in zip(
1209
+ register_list, register_bitboards
1210
+ ):
1211
+ register_id = register.id
1212
+ if register_id in register_id_map:
1213
+ if register_id_map[register_id] != bitboard:
1214
+ break
1215
+ else:
1216
+ register_id_map[register_id] = bitboard
1217
+ else:
1218
+ # Check that allocation bitboards do not overlap:
1219
+ allocation_bitboard = 0
1220
+ for bitboard in register_id_map.values():
1221
+ if (allocation_bitboard & bitboard) == 0:
1222
+ allocation_bitboard |= bitboard
1223
+ else:
1224
+ break
1225
+ else:
1226
+ ordered_bitboard_list = [
1227
+ register_id_map[register_id]
1228
+ for register_id in register_id_list
1229
+ ]
1230
+ options.append(tuple(ordered_bitboard_list))
1231
+ if options:
1232
+ if len(register_id_list) > 1:
1233
+ if register_id_list in constraints:
1234
+ constraints[register_id_list] = tuple(
1235
+ [
1236
+ option
1237
+ for option in constraints[register_id_list]
1238
+ if option in options
1239
+ ]
1240
+ )
1241
+ else:
1242
+ constraints[register_id_list] = tuple(options)
1243
+ else:
1244
+ raise RegisterAllocationError(
1245
+ "Impossible virtual register combination in instruction %s"
1246
+ % instruction
1247
+ )
1248
+ else:
1249
+ assert False
1250
+ report_register_constraints = False
1251
+ if report_register_constraints:
1252
+ for register_list, options in constraints.items():
1253
+ print("REGISTER CONSTRAINTS: ", map(str, register_list))
1254
+ for option in options:
1255
+ print("\t", map(lambda t: "%016X" % t, option))
1256
+
1257
+ # Merging of different groups sharing a register will be implemented here sometime
1258
+
1259
+ # Check that each register id appears only once
1260
+ constrained_register_id_list = [
1261
+ register_id
1262
+ for register_id_list in constraints.keys()
1263
+ for register_id in register_id_list
1264
+ ]
1265
+ assert len(constrained_register_id_list) == len(
1266
+ set(constrained_register_id_list)
1267
+ )
1268
+ constrained_register_id_set = set(constrained_register_id_list)
1269
+
1270
+ # Create a map from constrained register to constrained register group
1271
+ # constrained_register_map = dict()
1272
+ # for register_id_list in constraints.keys():
1273
+ # for register_id in register_id_list:
1274
+ # constrained_register_map[register_id] = register_id_list
1275
+
1276
+ # Remove individual registers from the set of unallocated registers and add the register group instead
1277
+ for constrained_register_id in constrained_register_id_list:
1278
+ while (
1279
+ constrained_register_id,
1280
+ Register.VFPType,
1281
+ ) in self.unallocated_registers:
1282
+ self.unallocated_registers.remove(
1283
+ (constrained_register_id, Register.VFPType)
1284
+ )
1285
+ for register_id_list in constraints.keys():
1286
+ self.unallocated_registers.append((register_id_list, Register.VFPType))
1287
+
1288
+ # print "UNALLOCATED REGISTERS:"
1289
+ # print "\t", self.unallocated_registers
1290
+
1291
+ # Remove individual registers from the sets of conflicting registers and add the register group instead
1292
+ # for register_id_list in constraints.keys():
1293
+ # self.conflicting_registers[register_id_list] = set()
1294
+ # for constrained_register_id in constrained_register_id_list:
1295
+ # self.conflicting_registers[constrained_register_map[constrained_register_id]].update(self.conflicting_registers[constrained_register_id])
1296
+ # del self.conflicting_registers[constrained_register_id]
1297
+ # for conflicting_registers_set in self.conflicting_registers.values():
1298
+ # for constrained_register_id in constrained_register_id_list:
1299
+ # if constrained_register_id in conflicting_registers_set:
1300
+ # conflicting_registers_set.remove(constrained_register_id)
1301
+ # conflicting_registers_set.add(constrained_register_map[constrained_register_id])
1302
+
1303
+ # Remove individual registers from the lists of allocation options and add the register group instead
1304
+ for constrained_register_id in constrained_register_id_list:
1305
+ del self.allocation_options[constrained_register_id]
1306
+ for register_id_list, constrained_options in constraints.items():
1307
+ self.allocation_options[register_id_list] = list(options)
1308
+
1309
+ def _get_register_type_name(self, register_type):
1310
+ """Get human-readable name for register type."""
1311
+ from nervapy.arm.registers import Register
1312
+
1313
+ if register_type == Register.GPType:
1314
+ return "general-purpose"
1315
+ elif register_type == Register.VFPType:
1316
+ return "VFP/NEON"
1317
+ elif register_type == Register.WMMXType:
1318
+ return "WMMX"
1319
+ else:
1320
+ return "unknown type %d" % register_type
1321
+
1322
+ def _get_available_registers_info(self, register_type):
1323
+ """Get information about available registers for a type."""
1324
+ from nervapy.arm.registers import Register
1325
+
1326
+ if register_type == Register.GPType:
1327
+ # General purpose registers: r0-r12 (13 registers)
1328
+ # Note: r13 (sp), r14 (lr), r15 (pc) are special and typically not used for general allocation
1329
+ return "r0-r12 (13 registers available for allocation)"
1330
+ elif register_type == Register.VFPType:
1331
+ return "s0-s31 or d0-d31 or q0-q15 (depending on instruction)"
1332
+ elif register_type == Register.WMMXType:
1333
+ return "wr0-wr15 (16 registers)"
1334
+ else:
1335
+ return "unknown"
1336
+
1337
+ def _count_virtual_registers_by_type(self, register_type):
1338
+ """Count how many virtual registers of a given type are actually being allocated (after optimization)."""
1339
+ from nervapy.arm.registers import Register
1340
+
1341
+ # Count unique registers in unallocated_registers that match the given type
1342
+ # This list has already been filtered by liveness analysis
1343
+ # Use a set to avoid counting duplicates
1344
+ unique_ids = set()
1345
+ for virtual_register_id, virtual_register_type in self.unallocated_registers:
1346
+ if isinstance(virtual_register_id, tuple):
1347
+ # Register list - all registers in the list should be the same type
1348
+ if virtual_register_type == register_type:
1349
+ unique_ids.update(virtual_register_id)
1350
+ else:
1351
+ # Single register
1352
+ if virtual_register_type == register_type:
1353
+ unique_ids.add(virtual_register_id)
1354
+ return len(unique_ids)
1355
+
1356
+ def _get_max_physical_registers(self, register_type):
1357
+ """Get the maximum number of physical registers available for a type."""
1358
+ from nervapy.arm.registers import Register
1359
+
1360
+ if register_type == Register.GPType:
1361
+ # Typically r0-r12 can be allocated (13 registers)
1362
+ # But this can vary based on ABI and function constraints
1363
+ return 13
1364
+ elif register_type == Register.VFPType:
1365
+ # VFP/NEON: 32 single-precision (s0-s31) or 16 double-precision (d0-d15) or 8 quad (q0-q7)
1366
+ # This is a simplification - actual count depends on usage
1367
+ return 32
1368
+ elif register_type == Register.WMMXType:
1369
+ return 16
1370
+ else:
1371
+ return 0
1372
+
1373
+ def allocate_registers(self):
1374
+ from nervapy.arm.instructions import Instruction
1375
+ from nervapy.arm.pseudo import LoadArgumentPseudoInstruction
1376
+ from nervapy.arm.registers import Register
1377
+
1378
+ # Save counts before allocation starts (after liveness analysis has eliminated dead code)
1379
+ # This gives us accurate counts for error messages
1380
+ self._vr_counts_by_type = {}
1381
+ for reg_type in [Register.GPType, Register.VFPType, Register.WMMXType]:
1382
+ self._vr_counts_by_type[reg_type] = self._count_virtual_registers_by_type(reg_type)
1383
+
1384
+ # Map from virtual register id to physical register
1385
+ register_allocation = dict()
1386
+ for virtual_register_id, virtual_register_type in self.unallocated_registers:
1387
+ register_allocation[virtual_register_id] = None
1388
+
1389
+ def bind_register(virtual_register_id, physical_register):
1390
+ # Remove option to allocate any conflicting virtual register to the same physical register or its enclosing register
1391
+ physical_register_bitboard = physical_register.bitboard
1392
+ for conflicting_register_id in self.conflicting_registers[
1393
+ virtual_register_id
1394
+ ]:
1395
+ if conflicting_register_id in self.allocation_options:
1396
+ for allocation_bitboard in self.allocation_options[
1397
+ conflicting_register_id
1398
+ ]:
1399
+ if (allocation_bitboard & physical_register_bitboard) != 0:
1400
+ self.allocation_options[conflicting_register_id].remove(
1401
+ allocation_bitboard
1402
+ )
1403
+ register_allocation[virtual_register_id] = physical_register
1404
+
1405
+ def bind_registers(virtual_register_id_list, physical_register_id_list):
1406
+ # Remove option to allocate any conflicting virtual register to the same physical register or its enclosing register
1407
+ physical_register_bitboard_list = [
1408
+ physical_register.get_bitboard()
1409
+ for physical_register in physical_register_id_list
1410
+ ]
1411
+ for virtual_register_id, physical_register_bitboard in zip(
1412
+ virtual_register_id_list, physical_register_bitboard_list
1413
+ ):
1414
+ for conflicting_register_id in self.conflicting_registers[
1415
+ virtual_register_id
1416
+ ]:
1417
+ for (
1418
+ allocation_key,
1419
+ allocation_option,
1420
+ ) in self.allocation_options.items():
1421
+ if isinstance(allocation_key, tuple):
1422
+ if conflicting_register_id in allocation_key:
1423
+ conflicting_register_index = allocation_key.index(
1424
+ conflicting_register_id
1425
+ )
1426
+ for bitboard_list in allocation_option:
1427
+ if (
1428
+ bitboard_list[conflicting_register_index]
1429
+ & physical_register_bitboard
1430
+ ) != 0:
1431
+ allocation_option.remove(bitboard_list)
1432
+ else:
1433
+ if conflicting_register_id == allocation_key:
1434
+ for bitboard in allocation_option:
1435
+ if (bitboard & physical_register_bitboard) != 0:
1436
+ allocation_option.remove(bitboard)
1437
+
1438
+ for virtual_register_id, physical_register_id in zip(
1439
+ virtual_register_id_list, physical_register_id_list
1440
+ ):
1441
+ register_allocation[virtual_register_id] = physical_register_id
1442
+
1443
+ def is_allocated(virtual_register_id):
1444
+ return bool(register_allocation[virtual_register_id])
1445
+
1446
+ # First allocate parameters
1447
+ for instruction in self.instructions:
1448
+ if isinstance(instruction, LoadArgumentPseudoInstruction):
1449
+ if instruction.argument.register:
1450
+ if instruction.destination.register.is_virtual:
1451
+ if not is_allocated(instruction.destination.register.id):
1452
+ if (
1453
+ instruction.argument.register.bitboard
1454
+ in self.allocation_options[
1455
+ instruction.destination.register.id
1456
+ ]
1457
+ ):
1458
+ bind_register(
1459
+ instruction.destination.register.id,
1460
+ instruction.argument.register,
1461
+ )
1462
+
1463
+ # Now allocate registers with special restrictions
1464
+ for (
1465
+ virtual_register_id_list,
1466
+ virtual_register_type,
1467
+ ) in self.unallocated_registers:
1468
+ if isinstance(virtual_register_id_list, tuple):
1469
+ # print "REGLIST: ", map(str, virtual_register_id_list)
1470
+ if not self.allocation_options[virtual_register_id_list]:
1471
+ # Use saved count from before allocation started
1472
+ vr_count = self._vr_counts_by_type.get(virtual_register_type, 0)
1473
+ max_phys = self._get_max_physical_registers(virtual_register_type)
1474
+ raise RuntimeError(
1475
+ "Register allocation failed: No available physical registers for virtual register list %s (type: %s).\n"
1476
+ "Your code uses %d virtual %s registers, but only ~%d physical registers are available.\n"
1477
+ "To fix: reduce the number of registers used in your Python code."
1478
+ % (virtual_register_id_list, self._get_register_type_name(virtual_register_type),
1479
+ vr_count, self._get_register_type_name(virtual_register_type), max_phys)
1480
+ )
1481
+ physical_register_bitboard_list = self.allocation_options[
1482
+ virtual_register_id_list
1483
+ ][0]
1484
+ physcial_registers_list = [
1485
+ Register.from_bitboard(
1486
+ physical_register_bitboard, virtual_register_type
1487
+ )
1488
+ for physical_register_bitboard in physical_register_bitboard_list
1489
+ ]
1490
+ bind_registers(virtual_register_id_list, physcial_registers_list)
1491
+
1492
+ # Now allocate all other registers
1493
+ while self.unallocated_registers:
1494
+ virtual_register_id, virtual_register_type = self.unallocated_registers.pop(
1495
+ 0
1496
+ )
1497
+ if not isinstance(virtual_register_id, tuple):
1498
+ if not is_allocated(virtual_register_id):
1499
+ if not self.allocation_options[virtual_register_id]:
1500
+ # Use saved count from before allocation started
1501
+ vr_count = self._vr_counts_by_type.get(virtual_register_type, 0)
1502
+ max_phys = self._get_max_physical_registers(virtual_register_type)
1503
+
1504
+ # Debug: find max simultaneous live registers
1505
+ max_live = 0
1506
+ max_live_instr = None
1507
+ max_live_line = None
1508
+ max_live_idx = None
1509
+ max_live_regs = []
1510
+ for idx, instruction in enumerate(self.instructions):
1511
+ if hasattr(instruction, 'live_registers'):
1512
+ live_regs = [r for r in instruction.live_registers
1513
+ if hasattr(r, 'type') and r.type == virtual_register_type and r.is_virtual]
1514
+ live_count = len(live_regs)
1515
+ if live_count > max_live:
1516
+ max_live = live_count
1517
+ max_live_instr = instruction
1518
+ max_live_idx = idx
1519
+ max_live_line = getattr(instruction, 'line_number', None)
1520
+ # Include variable names in the register representation
1521
+ max_live_regs = []
1522
+ for r in live_regs:
1523
+ reg_str = str(r)
1524
+ var_name = self._register_names.get(r.number, None)
1525
+ if var_name:
1526
+ reg_str = f"{reg_str}, {var_name}"
1527
+ max_live_regs.append(reg_str)
1528
+
1529
+ debug_msg = ""
1530
+ if max_live <= max_phys:
1531
+ debug_msg = (f"\n\nPhysical registers available: {max_phys}\n"
1532
+ f"The register pressure ({max_live}/{max_phys}) should be manageable, but the allocator\n"
1533
+ f"couldn't find a valid allocation due to conflicting constraints.\n")
1534
+ if max_live_instr is not None:
1535
+ # Show instruction location info
1536
+ location_info = []
1537
+ if hasattr(max_live_instr, 'source_file') and max_live_instr.source_file:
1538
+ location_info.append(f"File: {max_live_instr.source_file}")
1539
+ if hasattr(max_live_instr, 'line_number') and max_live_instr.line_number:
1540
+ location_info.append(f"Line: {max_live_instr.line_number}")
1541
+ if location_info:
1542
+ debug_msg += f"Max register pressure at {', '.join(location_info)}\n"
1543
+ elif max_live_idx is not None:
1544
+ debug_msg += f"Max register pressure at instruction #{max_live_idx} (index in generated code)\n"
1545
+ debug_msg += f"Hint: Enable collect_origin=True in Function() to see Python source line numbers.\n"
1546
+
1547
+ debug_msg += f"Instruction with max pressure: {max_live_instr}\n"
1548
+
1549
+ # Show the source code if available
1550
+ if hasattr(max_live_instr, 'source_code') and max_live_instr.source_code:
1551
+ debug_msg += f"Source code: {max_live_instr.source_code}\n"
1552
+
1553
+ if max_live_regs:
1554
+ debug_msg += f"Live virtual registers: {', '.join(sorted(max_live_regs))}\n"
1555
+ debug_msg += f"\nThis suggests the greedy allocator made suboptimal early choices.\n"
1556
+ debug_msg += f"Try reordering your code or reducing temporary register usage.\n"
1557
+ else:
1558
+ debug_msg = (f"\n\nThis exceeds the {max_phys} physical registers available.\n"
1559
+ f"You need to reduce the number of live registers at once.\n")
1560
+ if max_live_instr is not None:
1561
+ # Show instruction location info
1562
+ location_info = []
1563
+ if hasattr(max_live_instr, 'source_file') and max_live_instr.source_file:
1564
+ location_info.append(f"File: {max_live_instr.source_file}")
1565
+ if hasattr(max_live_instr, 'line_number') and max_live_instr.line_number:
1566
+ location_info.append(f"Line: {max_live_instr.line_number}")
1567
+ if location_info:
1568
+ debug_msg += f"Max register pressure at {', '.join(location_info)}\n"
1569
+ elif max_live_idx is not None:
1570
+ debug_msg += f"Max register pressure at instruction #{max_live_idx} (index in generated code)\n"
1571
+ debug_msg += f"Hint: Enable collect_origin=True in Function() to see Python source line numbers.\n"
1572
+
1573
+ debug_msg += f"Instruction: {max_live_instr}\n"
1574
+
1575
+ # Show the source code if available
1576
+ if hasattr(max_live_instr, 'source_code') and max_live_instr.source_code:
1577
+ debug_msg += f"Source code: {max_live_instr.source_code}\n"
1578
+
1579
+ raise RuntimeError(
1580
+ "Register allocation failed: No available physical registers for virtual register #%d (type: %s).\n"
1581
+ "Your code uses %d virtual %s registers, but only ~%d physical registers are available.\n"
1582
+ "Available %s registers: %s\n"
1583
+ "To fix: reduce the number of registers used in your Python code.%s"
1584
+ % (
1585
+ virtual_register_id,
1586
+ self._get_register_type_name(virtual_register_type),
1587
+ vr_count,
1588
+ self._get_register_type_name(virtual_register_type),
1589
+ max_phys,
1590
+ self._get_register_type_name(virtual_register_type),
1591
+ self._get_available_registers_info(virtual_register_type),
1592
+ debug_msg
1593
+ )
1594
+ )
1595
+ physical_register_bitboard = self.allocation_options[
1596
+ virtual_register_id
1597
+ ][0]
1598
+ physical_register = Register.from_bitboard(
1599
+ physical_register_bitboard, virtual_register_type
1600
+ )
1601
+ bind_register(virtual_register_id, physical_register)
1602
+
1603
+ # Verify all virtual registers used in instructions are tracked
1604
+ untracked_registers = set()
1605
+ for instruction in self.instructions:
1606
+ if isinstance(instruction, Instruction):
1607
+ for input_register in instruction.get_input_registers_list():
1608
+ if input_register.is_virtual:
1609
+ if input_register.id not in register_allocation:
1610
+ untracked_registers.add(input_register.id)
1611
+ for output_register in instruction.get_output_registers_list():
1612
+ if output_register.is_virtual:
1613
+ if output_register.id not in register_allocation:
1614
+ untracked_registers.add(output_register.id)
1615
+
1616
+ if untracked_registers:
1617
+ raise RuntimeError(
1618
+ f"Internal error: Virtual registers {sorted(untracked_registers)} used in instructions "
1619
+ f"but were not tracked for allocation. This indicates a bug where registers were created "
1620
+ f"after liveness analysis or were not properly added to live_registers."
1621
+ )
1622
+
1623
+ for instruction in self.instructions:
1624
+ if isinstance(instruction, Instruction):
1625
+ for input_register in instruction.get_input_registers_list():
1626
+ if input_register.is_virtual:
1627
+ input_register.bind(register_allocation[input_register.id])
1628
+ for output_register in instruction.get_output_registers_list():
1629
+ if output_register.is_virtual:
1630
+ output_register.bind(
1631
+ register_allocation[output_register.id]
1632
+ )
1633
+
1634
+ # Updates information about registers to be saved/restored in the function prologue/epilogue
1635
+ def update_stack_frame(self):
1636
+ from nervapy.arm.instructions import Instruction
1637
+
1638
+ for instruction in self.instructions:
1639
+ if isinstance(instruction, Instruction):
1640
+ self.stack_frame.preserve_registers(
1641
+ instruction.get_output_registers_list()
1642
+ )
1643
+
1644
+ def remove_assume_statements(self):
1645
+ from nervapy.arm.pseudo import AssumeInitializedPseudoInstruction
1646
+
1647
+ new_instructions = list()
1648
+ for instruction in self.instructions:
1649
+ if isinstance(instruction, AssumeInitializedPseudoInstruction):
1650
+ continue
1651
+ else:
1652
+ new_instructions.append(instruction)
1653
+ self.instructions = new_instructions
1654
+
1655
+ def generate_parameter_loads(self):
1656
+ from nervapy.arm.generic import LDR, MOV
1657
+ from nervapy.arm.pseudo import LoadArgumentPseudoInstruction
1658
+ from nervapy.arm.registers import sp
1659
+
1660
+ new_instructions = list()
1661
+ for instruction in self.instructions:
1662
+ if isinstance(instruction, LoadArgumentPseudoInstruction):
1663
+ parameter = instruction.argument
1664
+ if parameter.register:
1665
+ # If parameter is in a register, use register-register move:
1666
+ if instruction.destination.register != parameter.register:
1667
+ # Parameter is in a different register than instruction destination, generate move:
1668
+ new_instruction = MOV(
1669
+ instruction.destination.register, parameter.register
1670
+ )
1671
+ new_instruction.live_registers = instruction.live_registers
1672
+ new_instruction.available_registers = (
1673
+ instruction.available_registers
1674
+ )
1675
+ new_instructions.append(new_instruction)
1676
+ # If parameter is in the same register as instruction destination, no instruction needed:
1677
+ # MOV( instruction.destination == parameter.register_location, parameter.register_location )
1678
+ # is a no-op
1679
+ else:
1680
+ parameter_address = (
1681
+ self.stack_frame.get_parameters_offset()
1682
+ + parameter.stack_offset
1683
+ )
1684
+ new_instruction = LDR(
1685
+ instruction.destination.register, [sp, parameter_address]
1686
+ )
1687
+ new_instruction.live_registers = instruction.live_registers
1688
+ new_instruction.available_registers = (
1689
+ instruction.available_registers
1690
+ )
1691
+ new_instructions.append(new_instruction)
1692
+ else:
1693
+ new_instructions.append(instruction)
1694
+ self.instructions = new_instructions
1695
+
1696
+ def generate_constant_loads(self):
1697
+ from nervapy import ConstantBucket
1698
+ from nervapy.arm.instructions import Instruction
1699
+ from nervapy.arm.pseudo import LoadConstantPseudoInstruction
1700
+
1701
+ max_alignment = 0
1702
+ for instruction in self.instructions:
1703
+ if isinstance(instruction, Instruction):
1704
+ constant = instruction.get_constant()
1705
+ if constant is not None:
1706
+ constant_alignment = constant.get_alignment()
1707
+ constant_size = constant.size * constant.repeats
1708
+ max_alignment = max(max_alignment, constant_alignment)
1709
+
1710
+ constant_id = 0
1711
+ constant_label_map = dict()
1712
+ constant_buckets = dict()
1713
+ for instruction in self.instructions:
1714
+ if isinstance(instruction, Instruction):
1715
+ constant = instruction.get_constant()
1716
+ if constant is not None:
1717
+ if constant in constant_label_map:
1718
+ constant.label = constant_label_map[constant]
1719
+ else:
1720
+ constant.label = "c" + str(constant_id)
1721
+ constant_id += 1
1722
+ constant_label_map[constant] = constant.label
1723
+ constant_alignment = constant.get_alignment()
1724
+ constant_size = constant.size * constant.repeats
1725
+ if constant_alignment in constant_buckets:
1726
+ constant_buckets[constant_alignment].add(constant)
1727
+ if constant_buckets[constant_alignment].is_full():
1728
+ del constant_buckets[constant_alignment]
1729
+ else:
1730
+ constant_bucket = ConstantBucket(max_alignment / 8)
1731
+ constant_bucket.add(constant)
1732
+ self.constants.append(constant_bucket)
1733
+ if not constant_bucket.is_full():
1734
+ constant_buckets[constant_alignment] = constant_bucket
1735
+
1736
+ new_instructions = list()
1737
+ for instruction in self.instructions:
1738
+ if isinstance(instruction, LoadConstantPseudoInstruction):
1739
+ raise NotImplementedError()
1740
+ else:
1741
+ new_instructions.append(instruction)
1742
+ self.instructions = new_instructions
1743
+
1744
+ def validate_stack_alignment_check(self):
1745
+ """
1746
+ Validate that stack is 8-byte aligned before BL/BLX instructions.
1747
+
1748
+ For ARMv7-M architecture (Cortex-M), the AAPCS requires that the stack
1749
+ pointer must be 8-byte aligned at any public interface (function calls).
1750
+ This method tracks stack pointer changes and validates alignment before
1751
+ BL and BLX instructions.
1752
+ """
1753
+ from nervapy.arm.generic import (ArithmeticInstruction,
1754
+ BranchLinkExchangeInstruction,
1755
+ BranchWithLinkInstruction,
1756
+ PushPopInstruction,
1757
+ StoreMultipleInstruction)
1758
+ from nervapy.arm.instructions import Instruction
1759
+ from nervapy.arm.isa import Extension
1760
+ from nervapy.arm.registers import sp
1761
+
1762
+ # Enforce for ARMv7-M and ARMv8-M architectures (V8MMain implies V7M via prerequisites)
1763
+ if (
1764
+ Extension.V7M not in self.target.extensions
1765
+ and Extension.V8MBase not in self.target.extensions
1766
+ ):
1767
+ return
1768
+
1769
+ # Track stack offset from initial 8-byte aligned position
1770
+ # The prologue is generated by generate_prolog_and_epilog() which inserts
1771
+ # instructions after the ENTRY label. These are guaranteed to maintain
1772
+ # 8-byte alignment. We need to skip them when tracking.
1773
+ #
1774
+ # Strategy: Count prologue size, then skip that many PUSH/VPUSH/STMDB instructions
1775
+ # at the start of the function.
1776
+ prologue_size = len(self.stack_frame.generate_prologue())
1777
+ prologue_instructions_seen = 0
1778
+ stack_offset = 0
1779
+
1780
+ for instruction in self.instructions:
1781
+ if not isinstance(instruction, Instruction):
1782
+ continue
1783
+
1784
+ # Skip prologue instructions (PUSH/VPUSH/STMDB/SUB-sp at start of function)
1785
+ if prologue_instructions_seen < prologue_size:
1786
+ if isinstance(instruction, PushPopInstruction) and instruction.name in (
1787
+ "PUSH",
1788
+ "PUSH.W",
1789
+ ):
1790
+ prologue_instructions_seen += 1
1791
+ continue
1792
+ # Also check for VPUSH (VFP register saves)
1793
+ elif instruction.__class__.__name__ == "VfpNeonPushPopInstruction":
1794
+ prologue_instructions_seen += 1
1795
+ continue
1796
+ # Also check for STMDB (used with high registers)
1797
+ elif isinstance(instruction, StoreMultipleInstruction):
1798
+ if instruction.writeback and instruction.name.startswith("STM"):
1799
+ prologue_instructions_seen += 1
1800
+ continue
1801
+ # Also skip SUB sp, sp, #imm used for alignment padding
1802
+ elif isinstance(instruction, ArithmeticInstruction):
1803
+ if len(instruction.operands) >= 3:
1804
+ dest = instruction.operands[0]
1805
+ src1 = instruction.operands[1]
1806
+ if (
1807
+ hasattr(dest, "register")
1808
+ and dest.register == sp
1809
+ and hasattr(src1, "register")
1810
+ and src1.register == sp
1811
+ and instruction.name.startswith("SUB")
1812
+ ):
1813
+ prologue_instructions_seen += 1
1814
+ continue
1815
+
1816
+ # Track PUSH instructions (user code)
1817
+ if isinstance(instruction, PushPopInstruction):
1818
+ if instruction.name in ("PUSH", "PUSH.W"):
1819
+ # Each register pushes 4 bytes
1820
+ num_registers = len(instruction.operands[0].get_registers_list())
1821
+ stack_offset += num_registers * 4
1822
+ elif instruction.name in ("POP", "POP.W"):
1823
+ # Each register pops 4 bytes
1824
+ num_registers = len(instruction.operands[0].get_registers_list())
1825
+ stack_offset -= num_registers * 4
1826
+
1827
+ # Track STMDB/LDMIA instructions that modify SP
1828
+ elif isinstance(instruction, StoreMultipleInstruction):
1829
+ if instruction.writeback:
1830
+ base_reg = instruction.operands[0]
1831
+ # Check if base register is SP
1832
+ if hasattr(base_reg, "register") and base_reg.register == sp:
1833
+ num_registers = len(
1834
+ instruction.operands[1].get_registers_list()
1835
+ )
1836
+ if instruction.name.startswith("STM"):
1837
+ stack_offset += num_registers * 4
1838
+ elif instruction.name.startswith("LDM"):
1839
+ stack_offset -= num_registers * 4
1840
+
1841
+ # Track SUB/ADD with SP
1842
+ elif isinstance(instruction, ArithmeticInstruction):
1843
+ if len(instruction.operands) >= 3:
1844
+ dest = instruction.operands[0]
1845
+ src1 = instruction.operands[1]
1846
+ src2 = instruction.operands[2]
1847
+
1848
+ # Check if destination is SP
1849
+ if (
1850
+ hasattr(dest, "register")
1851
+ and dest.register == sp
1852
+ and hasattr(src1, "register")
1853
+ and src1.register == sp
1854
+ ):
1855
+
1856
+ # Get immediate value (check both 'immediate' and 'value')
1857
+ imm_value = None
1858
+ if hasattr(src2, "immediate"):
1859
+ imm_value = src2.immediate
1860
+ elif hasattr(src2, "value"):
1861
+ imm_value = src2.value
1862
+
1863
+ if imm_value is not None:
1864
+ if instruction.name.startswith("SUB"):
1865
+ # SUB sp, sp, #imm - allocates stack space
1866
+ stack_offset += imm_value
1867
+ elif instruction.name.startswith("ADD"):
1868
+ # ADD sp, sp, #imm - deallocates stack space
1869
+ stack_offset -= imm_value
1870
+
1871
+ # Check alignment before BL/BLX
1872
+ elif isinstance(
1873
+ instruction, (BranchWithLinkInstruction, BranchLinkExchangeInstruction)
1874
+ ):
1875
+ if stack_offset % 8 != 0:
1876
+ raise ValueError(
1877
+ "Stack is not 8-byte aligned before {0} instruction.\n"
1878
+ "Current stack offset: {1} bytes (misaligned by {2} bytes).\n"
1879
+ "ARMv7-M/ARMv8-M requires 8-byte stack alignment at function calls (AAPCS requirement).\n"
1880
+ "Add registers in pairs to PUSH instructions or adjust the stack manually to maintain alignment.".format(
1881
+ instruction.name, stack_offset, stack_offset % 8
1882
+ )
1883
+ )
1884
+
1885
+ def optimize_instructions(self):
1886
+ from nervapy.arm.generic import MovInstruction
1887
+ from nervapy.arm.vfpneon import VfpNeonMovInstruction
1888
+
1889
+ new_instructions = list()
1890
+ for instruction in self.instructions:
1891
+ # Remove moves where source and destination are the same
1892
+
1893
+ if isinstance(instruction, VfpNeonMovInstruction):
1894
+ if instruction.operands[0] != instruction.operands[1]:
1895
+ new_instructions.append(instruction)
1896
+ else:
1897
+ new_instructions.append(instruction)
1898
+ self.instructions = new_instructions
1899
+
1900
+ def get_target(self):
1901
+ return self.target
1902
+
1903
+ @property
1904
+ def isa_extensions(self):
1905
+ from nervapy.arm.instructions import Instruction
1906
+ from nervapy.arm.isa import Extension, Extensions
1907
+ from nervapy.arm.registers import DRegister, QRegister
1908
+
1909
+ # Start with the target microarchitecture's extensions
1910
+ isa_extensions = Extensions(*self.target.extensions)
1911
+ for instruction in self.instructions:
1912
+ if isinstance(instruction, Instruction):
1913
+ for extension in instruction.isa_extensions:
1914
+ isa_extensions += extension
1915
+ if any(
1916
+ isinstance(register, QRegister)
1917
+ or isinstance(register, DRegister)
1918
+ and register.is_extended
1919
+ for register in instruction.get_registers_list()
1920
+ ):
1921
+ isa_extensions += Extension.VFPd32
1922
+ return isa_extensions
1923
+
1924
+ def get_yeppp_isa_extensions(self):
1925
+ isa_extensions_map = {
1926
+ "V4": ("V4", None, None),
1927
+ "V5": ("V5", None, None),
1928
+ "V5E": ("V5E", None, None),
1929
+ "V6": ("V6", None, None),
1930
+ "V6K": ("V6K", None, None),
1931
+ "V7": ("V7", None, None),
1932
+ "V7MP": ("V7MP", None, None),
1933
+ "Div": ("Div", None, None),
1934
+ "Thumb": ("Thumb", None, None),
1935
+ "Thumb2": ("Thumb2", None, None),
1936
+ "VFP": ("VFP", None, None),
1937
+ "VFP2": ("VFP2", None, None),
1938
+ "VFP3": ("VFP3", None, None),
1939
+ "VFPd32": ("VFPd32", None, None),
1940
+ "VFP3HP": ("VFP3HP", None, None),
1941
+ "VFP4": ("VFP4", None, None),
1942
+ "VFPVectorMode": (None, None, "VFPVectorMode"),
1943
+ "XScale": (None, "XScale", None),
1944
+ "WMMX": (None, "WMMX", None),
1945
+ "WMMX2": (None, "WMMX2", None),
1946
+ "NEON": (None, "NEON", None),
1947
+ "NEONHP": (None, "NEONHP", None),
1948
+ "NEON2": (None, "NEON2", None),
1949
+ }
1950
+ isa_extensions, simd_extensions, system_extensions = (set(), set(), set())
1951
+ for isa_extension in self.get_isa_extensions():
1952
+ if isa_extension is not None:
1953
+ isa_extension, simd_extension, system_extension = isa_extensions_map[
1954
+ isa_extension
1955
+ ]
1956
+ if isa_extension is not None:
1957
+ isa_extensions.add(isa_extension)
1958
+ if simd_extension is not None:
1959
+ simd_extensions.add(simd_extension)
1960
+ if system_extension is not None:
1961
+ system_extensions.add(system_extension)
1962
+ isa_extensions = map(lambda id: "YepARMIsaFeature" + id, isa_extensions)
1963
+ if not isa_extensions:
1964
+ isa_extensions = ["YepIsaFeaturesDefault"]
1965
+ simd_extensions = map(lambda id: "YepARMSimdFeature" + id, simd_extensions)
1966
+ if not simd_extensions:
1967
+ simd_extensions = ["YepSimdFeaturesDefault"]
1968
+ system_extensions = map(
1969
+ lambda id: "YepARMSystemFeature" + id, system_extensions
1970
+ )
1971
+ if not system_extensions:
1972
+ system_extensions = ["YepSystemFeaturesDefault"]
1973
+ return (isa_extensions, simd_extensions, system_extensions)
1974
+
1975
+ def allocate_local_variable(self):
1976
+ self.local_variables_count += 1
1977
+ return self.local_variables_count
1978
+
1979
+ def allocate_q_register(self):
1980
+ self.virtual_registers_count += 1
1981
+ register_number = (self.virtual_registers_count << 12) | 0x0F0
1982
+
1983
+ # Try to capture variable name from caller's frame
1984
+ try:
1985
+ import inspect
1986
+ frame = inspect.currentframe().f_back.f_back
1987
+ if frame:
1988
+ import linecache
1989
+ line = linecache.getline(frame.f_code.co_filename, frame.f_lineno).strip()
1990
+ if '=' in line and 'QRegister' in line:
1991
+ var_name = line.split('=')[0].strip()
1992
+ if var_name and not var_name.startswith('#'):
1993
+ self._register_names[register_number] = var_name
1994
+ except:
1995
+ pass
1996
+
1997
+ return register_number
1998
+
1999
+ def allocate_d_register(self):
2000
+ self.virtual_registers_count += 1
2001
+ return (self.virtual_registers_count << 12) | 0x300
2002
+
2003
+ def allocate_s_register(self):
2004
+ self.virtual_registers_count += 1
2005
+ return (self.virtual_registers_count << 12) | 0x400
2006
+
2007
+ def allocate_wmmx_register(self):
2008
+ self.virtual_registers_count += 1
2009
+ return (self.virtual_registers_count << 12) | 0x002
2010
+
2011
+ def allocate_general_purpose_register(self):
2012
+ self.virtual_registers_count += 1
2013
+ register_number = (self.virtual_registers_count << 12) | 0x001
2014
+
2015
+ # Try to capture variable name from caller's frame
2016
+ try:
2017
+ import inspect
2018
+ frame = inspect.currentframe().f_back.f_back # Go up 2 frames: this -> __init__ -> caller
2019
+ if frame:
2020
+ # Get the line of code being executed
2021
+ import linecache
2022
+ line = linecache.getline(frame.f_code.co_filename, frame.f_lineno).strip()
2023
+ # Simple pattern matching for "varname = GeneralPurposeRegister()"
2024
+ if '=' in line and 'GeneralPurposeRegister' in line:
2025
+ var_name = line.split('=')[0].strip()
2026
+ if var_name and not var_name.startswith('#'):
2027
+ self._register_names[register_number] = var_name
2028
+ except:
2029
+ pass # If name capture fails, just continue without name
2030
+
2031
+ return register_number
2032
+
2033
+ def allocate_p_register(self):
2034
+ self.virtual_registers_count += 1
2035
+ return (self.virtual_registers_count << 12) | 0x001
2036
+
2037
+
2038
+ class LocalVariable(object):
2039
+ def __init__(self, register_type):
2040
+ from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
2041
+ QRegister, SRegister, WMMXRegister)
2042
+
2043
+ super(LocalVariable, self).__init__()
2044
+ if isinstance(register_type, int):
2045
+ self.size = register_type
2046
+ elif register_type == GeneralPurposeRegister:
2047
+ self.size = 4
2048
+ elif register_type == WMMXRegister:
2049
+ self.size = 8
2050
+ elif register_type == SRegister:
2051
+ self.size = 4
2052
+ elif register_type == DRegister:
2053
+ self.size = 8
2054
+ elif register_type == QRegister:
2055
+ self.size = 16
2056
+ else:
2057
+ raise ValueError("Unsupported register type {0}".format(register_type))
2058
+ self.id = active_function.allocate_local_variable()
2059
+ self.address = None
2060
+ self.offset = 0
2061
+ self.parent = None
2062
+
2063
+ def __eq__(self, other):
2064
+ return self.id == other.id
2065
+
2066
+ def __hash__(self):
2067
+ return hash(self.id)
2068
+
2069
+ def __str__(self):
2070
+ if self.is_subvariable():
2071
+ address = self.parent.get_address()
2072
+ if address is not None:
2073
+ address += self.offset
2074
+ else:
2075
+ address = self.address
2076
+ if address is not None:
2077
+ return "[{0}]".format(address)
2078
+ else:
2079
+ return "local-variable<{0}>".format(self.id)
2080
+
2081
+ def is_subvariable(self):
2082
+ return self.parent is not None
2083
+
2084
+ def get_parent(self):
2085
+ return self.parent
2086
+
2087
+ def get_root(self):
2088
+ if self.is_subvariable():
2089
+ return self.get_parent().get_root()
2090
+ else:
2091
+ return self
2092
+
2093
+ def get_address(self):
2094
+ if self.is_subvariable():
2095
+ return self.parent.get_address() + self.offset
2096
+ else:
2097
+ return self.address
2098
+
2099
+ def get_size(self):
2100
+ return self.size
2101
+
2102
+ def get_low(self):
2103
+ assert self.get_size() % 2 == 0
2104
+ child = LocalVariable(self.get_size() / 2)
2105
+ child.parent = self
2106
+ child.offset = 0
2107
+ return child
2108
+
2109
+ def get_high(self):
2110
+ assert self.get_size() % 2 == 0
2111
+ child = LocalVariable(self.get_size() / 2)
2112
+ child.parent = self
2113
+ child.offset = self.get_size() / 2
2114
+ return child
2115
+
2116
+
2117
+ class StackFrame(object):
2118
+ def __init__(self, abi):
2119
+ super(StackFrame, self).__init__()
2120
+ self.abi = abi
2121
+ self.general_purpose_registers = list()
2122
+ self.d_registers = list()
2123
+ self.s_variables = list()
2124
+ self.d_variables = list()
2125
+ self.q_variables = list()
2126
+
2127
+ def preserve_registers(self, registers):
2128
+ for register in registers:
2129
+ self.preserve_register(register)
2130
+
2131
+ def preserve_register(self, register):
2132
+ from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
2133
+ QRegister, SRegister)
2134
+
2135
+ if isinstance(register, GeneralPurposeRegister):
2136
+ if not register in self.general_purpose_registers:
2137
+ if register in self.abi.callee_save_registers:
2138
+ self.general_purpose_registers.append(register)
2139
+ elif isinstance(register, SRegister):
2140
+ if not register.is_virtual():
2141
+ register = register.get_parent()
2142
+ if not register in self.d_registers:
2143
+ if register in self.abi.callee_save_registers:
2144
+ self.d_registers.append(register)
2145
+ elif isinstance(register, DRegister):
2146
+ if not register in self.d_registers:
2147
+ if register in self.abi.callee_save_registers:
2148
+ self.d_registers.append(register)
2149
+ elif isinstance(register, QRegister):
2150
+ d_low = register.get_low_part()
2151
+ d_high = register.get_high_part()
2152
+ if d_low not in self.d_registers:
2153
+ if register in self.abi.callee_save_registers:
2154
+ self.d_registers.append(d_low)
2155
+ if d_high not in self.d_registers:
2156
+ if register in self.abi.callee_save_registers:
2157
+ self.d_registers.append(d_high)
2158
+ else:
2159
+ raise TypeError("Unsupported register type {0}".format(type(register)))
2160
+
2161
+ def force_preserve_register(self, register):
2162
+ """Add *register* to the preservation list unconditionally (no ABI check)."""
2163
+ from nervapy.arm.registers import (DRegister, GeneralPurposeRegister,
2164
+ QRegister, SRegister)
2165
+
2166
+ if isinstance(register, GeneralPurposeRegister):
2167
+ if register not in self.general_purpose_registers:
2168
+ self.general_purpose_registers.append(register)
2169
+ elif isinstance(register, SRegister):
2170
+ if not register.is_virtual():
2171
+ register = register.get_parent()
2172
+ if register not in self.d_registers:
2173
+ self.d_registers.append(register)
2174
+ elif isinstance(register, DRegister):
2175
+ if register not in self.d_registers:
2176
+ self.d_registers.append(register)
2177
+ elif isinstance(register, QRegister):
2178
+ d_low = register.get_low_part()
2179
+ d_high = register.get_high_part()
2180
+ if d_low not in self.d_registers:
2181
+ self.d_registers.append(d_low)
2182
+ if d_high not in self.d_registers:
2183
+ self.d_registers.append(d_high)
2184
+ else:
2185
+ raise TypeError("Unsupported register type {0}".format(type(register)))
2186
+
2187
+ def add_variable(self, variable):
2188
+ if variable.get_size() == 16:
2189
+ if variable not in self.sse_variables:
2190
+ self.sse_variables.append(variable)
2191
+ elif variable.get_size() == 32:
2192
+ if variable not in self.avx_variables:
2193
+ self.avx_variables.append(variable)
2194
+ else:
2195
+ raise TypeError("Unsupported variable type {0}".format(type(variable)))
2196
+
2197
+ def get_parameters_offset(self):
2198
+ parameters_offset = len(self.general_purpose_registers) * 4
2199
+ if parameters_offset % 8 == 4:
2200
+ parameters_offset += 4
2201
+ return parameters_offset + len(self.d_registers) * 8
2202
+
2203
+ def generate_prologue(self):
2204
+ from nervapy.arm.formats import HighRegisterStrategy
2205
+ from nervapy.arm.generic import PUSH, PUSH_W, STMDB, SUB
2206
+ from nervapy.arm.isa import Extension
2207
+ from nervapy.arm.registers import sp
2208
+ from nervapy.arm.vfpneon import VPUSH
2209
+ from nervapy.stream import InstructionStream
2210
+
2211
+ with InstructionStream() as instructions:
2212
+ if self.general_purpose_registers:
2213
+ general_purpose_registers = list(self.general_purpose_registers)
2214
+
2215
+ # Check if we're targeting ARMv7-M (Cortex-M) processors
2216
+ function = self.get_function()
2217
+ is_armv7m = function and Extension.V7M in function.target.extensions
2218
+
2219
+ if is_armv7m:
2220
+ low_registers = [
2221
+ reg
2222
+ for reg in general_purpose_registers
2223
+ if reg.get_physical_number() <= 7
2224
+ ]
2225
+ high_registers = [
2226
+ reg
2227
+ for reg in general_purpose_registers
2228
+ if reg.get_physical_number() > 7
2229
+ ]
2230
+
2231
+ if high_registers:
2232
+ # Merge low and high into one instruction so the
2233
+ # prologue is a single PUSH.W / STMDB covering all
2234
+ # callee-saved registers.
2235
+ all_registers = low_registers + high_registers
2236
+ needs_pad = len(all_registers) % 2 == 1
2237
+ sorted_regs = tuple(
2238
+ sorted(
2239
+ all_registers,
2240
+ key=lambda reg: reg.get_physical_number(),
2241
+ )
2242
+ )
2243
+ strategy = function.high_register_strategy
2244
+ if strategy == HighRegisterStrategy.STMDB or (
2245
+ strategy == HighRegisterStrategy.AUTO
2246
+ and function.assembly_format.name == "ARMCC"
2247
+ ):
2248
+ STMDB(sp, sorted_regs)
2249
+ else:
2250
+ PUSH_W(sorted_regs)
2251
+ if needs_pad:
2252
+ SUB(sp, sp, 4)
2253
+ elif low_registers:
2254
+ # Only low registers - use efficient 16-bit PUSH
2255
+ needs_pad = len(low_registers) % 2 == 1
2256
+ PUSH(
2257
+ tuple(
2258
+ sorted(
2259
+ low_registers,
2260
+ key=lambda reg: reg.get_physical_number(),
2261
+ )
2262
+ )
2263
+ )
2264
+ if needs_pad:
2265
+ SUB(sp, sp, 4)
2266
+ else:
2267
+ # Standard ARM (non-Cortex-M) handling
2268
+ needs_pad = len(general_purpose_registers) % 2 == 1
2269
+ PUSH(
2270
+ tuple(
2271
+ sorted(
2272
+ general_purpose_registers,
2273
+ key=lambda reg: reg.get_physical_number(),
2274
+ )
2275
+ )
2276
+ )
2277
+ if needs_pad:
2278
+ SUB(sp, sp, 4)
2279
+
2280
+ if self.d_registers:
2281
+ VPUSH(
2282
+ tuple(
2283
+ sorted(
2284
+ self.d_registers, key=lambda reg: reg.get_physical_number()
2285
+ )
2286
+ )
2287
+ )
2288
+ return list(iter(instructions))
2289
+
2290
+ def generate_epilogue(self):
2291
+ from nervapy.arm.formats import HighRegisterStrategy
2292
+ from nervapy.arm.generic import ADD, LDMIA, POP, POP_W
2293
+ from nervapy.arm.isa import Extension
2294
+ from nervapy.arm.registers import sp
2295
+ from nervapy.arm.vfpneon import VPOP
2296
+ from nervapy.stream import InstructionStream
2297
+
2298
+ with InstructionStream() as instructions:
2299
+ if self.d_registers:
2300
+ VPOP(
2301
+ tuple(
2302
+ sorted(
2303
+ self.d_registers, key=lambda reg: reg.get_physical_number()
2304
+ )
2305
+ )
2306
+ )
2307
+
2308
+ if self.general_purpose_registers:
2309
+ general_purpose_registers = list(self.general_purpose_registers)
2310
+
2311
+ # Check if we're targeting ARMv7-M (Cortex-M) processors
2312
+ function = self.get_function()
2313
+ is_armv7m = function and Extension.V7M in function.target.extensions
2314
+
2315
+ if is_armv7m:
2316
+ low_registers = [
2317
+ reg
2318
+ for reg in general_purpose_registers
2319
+ if reg.get_physical_number() <= 7
2320
+ ]
2321
+ high_registers = [
2322
+ reg
2323
+ for reg in general_purpose_registers
2324
+ if reg.get_physical_number() > 7
2325
+ ]
2326
+
2327
+ if high_registers:
2328
+ # Mirror of prologue: one instruction restoring all regs
2329
+ all_registers = low_registers + high_registers
2330
+ needs_pad = len(all_registers) % 2 == 1
2331
+ sorted_regs = tuple(
2332
+ sorted(
2333
+ all_registers,
2334
+ key=lambda reg: reg.get_physical_number(),
2335
+ )
2336
+ )
2337
+ strategy = function.high_register_strategy
2338
+ if needs_pad:
2339
+ ADD(sp, sp, 4)
2340
+ if strategy == HighRegisterStrategy.STMDB or (
2341
+ strategy == HighRegisterStrategy.AUTO
2342
+ and function.assembly_format.name == "ARMCC"
2343
+ ):
2344
+ LDMIA(sp, sorted_regs)
2345
+ else:
2346
+ POP_W(sorted_regs)
2347
+ elif low_registers:
2348
+ # Only low registers - use efficient 16-bit POP
2349
+ needs_pad = len(low_registers) % 2 == 1
2350
+ if needs_pad:
2351
+ ADD(sp, sp, 4)
2352
+ POP(
2353
+ tuple(
2354
+ sorted(
2355
+ low_registers,
2356
+ key=lambda reg: reg.get_physical_number(),
2357
+ )
2358
+ )
2359
+ )
2360
+ else:
2361
+ # Standard ARM (non-Cortex-M) handling
2362
+ needs_pad = len(general_purpose_registers) % 2 == 1
2363
+ if needs_pad:
2364
+ ADD(sp, sp, 4)
2365
+ POP(
2366
+ tuple(
2367
+ sorted(
2368
+ general_purpose_registers,
2369
+ key=lambda reg: reg.get_physical_number(),
2370
+ )
2371
+ )
2372
+ )
2373
+ return list(iter(instructions))
2374
+
2375
+ def get_function(self):
2376
+ """Get the active function that owns this stack frame."""
2377
+ from nervapy.arm.function import active_function
2378
+
2379
+ return active_function
2380
+
2381
+
2382
+ def print_live_registers(label=""):
2383
+ """Print live registers at the current point in code generation.
2384
+
2385
+ This function can be called from within a Function context to inspect
2386
+ which registers are currently live (i.e., their values will be used later).
2387
+
2388
+ Note: Live register information is computed during function compilation,
2389
+ so this will show an approximation based on instructions emitted so far.
2390
+
2391
+ Args:
2392
+ label: Optional label to identify the location in code
2393
+
2394
+ Example:
2395
+ with Function("my_func", args, ...):
2396
+ t0 = GeneralPurposeRegister()
2397
+ ADD(t0, r0, r1)
2398
+ print_live_registers("after ADD") # Shows which regs are live
2399
+ """
2400
+ global active_function
2401
+ if active_function is None:
2402
+ print(f"Live registers {label}: No active function")
2403
+ return
2404
+
2405
+ active_function.print_live_registers(label)