PyNerva 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PyNerva might be problematic. Click here for more details.

Files changed (74) hide show
  1. nervapy/__init__.py +50 -0
  2. nervapy/abi.py +91 -0
  3. nervapy/arm/__init__.py +124 -0
  4. nervapy/arm/__main__.py +0 -0
  5. nervapy/arm/abi.py +138 -0
  6. nervapy/arm/formats.py +49 -0
  7. nervapy/arm/function.py +2405 -0
  8. nervapy/arm/generic.py +10797 -0
  9. nervapy/arm/instructions.py +519 -0
  10. nervapy/arm/isa.py +409 -0
  11. nervapy/arm/literal_pool.py +331 -0
  12. nervapy/arm/microarchitecture.py +211 -0
  13. nervapy/arm/pseudo.py +652 -0
  14. nervapy/arm/registers.py +1458 -0
  15. nervapy/arm/vfpneon.py +4092 -0
  16. nervapy/arm.py +13 -0
  17. nervapy/c/__init__.py +1 -0
  18. nervapy/c/types.py +436 -0
  19. nervapy/codegen.py +99 -0
  20. nervapy/common/__init__.py +4 -0
  21. nervapy/common/function.py +5 -0
  22. nervapy/common/regalloc.py +121 -0
  23. nervapy/constant_data.py +282 -0
  24. nervapy/encoder.py +246 -0
  25. nervapy/formats/__init__.py +2 -0
  26. nervapy/formats/elf/__init__.py +4 -0
  27. nervapy/formats/elf/file.py +178 -0
  28. nervapy/formats/elf/image.py +106 -0
  29. nervapy/formats/elf/section.py +422 -0
  30. nervapy/formats/elf/symbol.py +281 -0
  31. nervapy/formats/macho/__init__.py +2 -0
  32. nervapy/formats/macho/file.py +123 -0
  33. nervapy/formats/macho/image.py +143 -0
  34. nervapy/formats/macho/section.py +322 -0
  35. nervapy/formats/macho/symbol.py +158 -0
  36. nervapy/formats/mscoff/__init__.py +8 -0
  37. nervapy/formats/mscoff/image.py +132 -0
  38. nervapy/formats/mscoff/section.py +181 -0
  39. nervapy/formats/mscoff/symbol.py +148 -0
  40. nervapy/function.py +136 -0
  41. nervapy/literal.py +731 -0
  42. nervapy/loader.py +188 -0
  43. nervapy/name.py +159 -0
  44. nervapy/parse.py +52 -0
  45. nervapy/stream.py +58 -0
  46. nervapy/util.py +126 -0
  47. nervapy/writer.py +518 -0
  48. nervapy/x86_64/__init__.py +324 -0
  49. nervapy/x86_64/__main__.py +407 -0
  50. nervapy/x86_64/abi.py +517 -0
  51. nervapy/x86_64/amd.py +6464 -0
  52. nervapy/x86_64/avx.py +102029 -0
  53. nervapy/x86_64/crypto.py +1533 -0
  54. nervapy/x86_64/encoding.py +424 -0
  55. nervapy/x86_64/fma.py +19138 -0
  56. nervapy/x86_64/function.py +2707 -0
  57. nervapy/x86_64/generic.py +23384 -0
  58. nervapy/x86_64/instructions.py +500 -0
  59. nervapy/x86_64/isa.py +476 -0
  60. nervapy/x86_64/lower.py +126 -0
  61. nervapy/x86_64/mask.py +2593 -0
  62. nervapy/x86_64/meta.py +143 -0
  63. nervapy/x86_64/mmxsse.py +17265 -0
  64. nervapy/x86_64/nacl.py +327 -0
  65. nervapy/x86_64/operand.py +1204 -0
  66. nervapy/x86_64/options.py +21 -0
  67. nervapy/x86_64/pseudo.py +686 -0
  68. nervapy/x86_64/registers.py +1225 -0
  69. nervapy/x86_64/types.py +17 -0
  70. nervapy/x86_64/uarch.py +580 -0
  71. pynerva-0.0.5.dist-info/METADATA +310 -0
  72. pynerva-0.0.5.dist-info/RECORD +74 -0
  73. pynerva-0.0.5.dist-info/WHEEL +4 -0
  74. pynerva-0.0.5.dist-info/licenses/LICENSE.rst +15 -0
@@ -0,0 +1,2707 @@
1
+ # This file is part of PeachPy package and is licensed under the Simplified BSD license.
2
+ # See license.rst for the full text of the license.
3
+
4
+ from __future__ import print_function
5
+
6
+ import bisect
7
+ import collections
8
+ import operator
9
+ import os
10
+
11
+ import six
12
+
13
+ import nervapy
14
+ import nervapy.name
15
+ import nervapy.writer
16
+ import nervapy.x86_64.avx
17
+ import nervapy.x86_64.instructions
18
+ import nervapy.x86_64.meta
19
+ import nervapy.x86_64.options
20
+ import nervapy.x86_64.registers
21
+
22
+
23
+ class Function:
24
+ """Generalized x86-64 assembly function.
25
+
26
+ A function consists of C signature and a list of instructions.
27
+
28
+ On this level the function is supposed to be compatible with multiple ABIs. In particular, instructions may have
29
+ virtual registers, and instruction stream may contain pseudo-instructions, such as LOAD.ARGUMENT or RETURN.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ name,
35
+ arguments,
36
+ result_type=None,
37
+ package=None,
38
+ target=None,
39
+ debug_level=None,
40
+ ):
41
+ """
42
+ :param str name: name of the function without mangling (as in C language).
43
+ :param tuple arguments: a tuple of :class:`nervapy.Argument` objects.
44
+ :param Type result_type: the return type of the function. None if the function returns no value (void function).
45
+ :param str package: the name of the Go package containing this function.
46
+ :param Microarchitecture target: the target microarchitecture for this function.
47
+ :param int debug_level: the verbosity level for debug information collected for instructions. 0 means no
48
+ debug information, 1 and above enables information about the lines of Python code that originated an
49
+ instruction. Collecting debug information increases processing time by several times.
50
+ :ivar Label entry: a label that marks the entry point of the function. A user can place the entry point in any
51
+ place in the function by defining this label with LABEL pseudo-instruction. If this label is not defined
52
+ by the user, it will be placed automatically before the first instruction of the function.
53
+
54
+ :ivar int _indent_level: the level of indentation for this instruction in assembly listings. Indentation level
55
+ is changed by Loop statements.
56
+ :ivar list _instructions: the list of :class:`Instruction` objects that comprise the function code.
57
+
58
+ :ivar set _label_names: a set of string names of LABEL quasi-instructions in the function. The set is populated
59
+ as instructions are added and is intended to track duplicate labels.
60
+
61
+ :ivar dict _named_constants: a dictionary that maps names of literal constants to Constant objects.
62
+ As instructions are added the dictionary is used to track constants with same names, but different content.
63
+ """
64
+ self.name = name
65
+ self.arguments = arguments
66
+ self.result_type = result_type
67
+ if package is None:
68
+ self.package = nervapy.x86_64.options.package
69
+ self.package = package
70
+ if target is None:
71
+ target = nervapy.x86_64.options.target
72
+ if target is None:
73
+ target = nervapy.x86_64.uarch.default
74
+ if not isinstance(target, nervapy.x86_64.uarch.Microarchitecture):
75
+ raise TypeError("%s is not an valid CPU target" % str(target))
76
+ self.target = target
77
+ if debug_level is None:
78
+ self.debug_level = nervapy.x86_64.options.debug_level
79
+ else:
80
+ self.debug_level = int(debug_level)
81
+
82
+ from nervapy.name import Name
83
+ from nervapy.x86_64.pseudo import Label
84
+
85
+ self.entry = Label((Name("__entry__", None),))
86
+
87
+ self._indent_level = 1
88
+
89
+ self._instructions = list()
90
+
91
+ # This set is only used to ensure that all labels references in branches are defined
92
+ self._label_names = set()
93
+ # Map from id of Name objects to their copies.
94
+ # This ensures that Name objects without name can be compared for equality using id
95
+ self._names_memo = dict()
96
+ self._scope = nervapy.name.Namespace(None)
97
+
98
+ self._local_variables_count = 0
99
+ self._virtual_general_purpose_registers_count = 0
100
+ self._virtual_mmx_registers_count = 0
101
+ self._virtual_xmm_registers_count = 0
102
+ self._virtual_mask_registers_count = 0
103
+
104
+ from nervapy.x86_64 import m256, m256d, m256i
105
+
106
+ avx_types = [m256, m256d, m256i]
107
+ self.avx_environment = (
108
+ any([arg.c_type in avx_types for arg in self.arguments])
109
+ or self.result_type in avx_types
110
+ )
111
+ self._avx_prolog = None
112
+
113
+ from nervapy.common import RegisterAllocator
114
+ from nervapy.x86_64.registers import (GeneralPurposeRegister,
115
+ KRegister, MMXRegister,
116
+ XMMRegister)
117
+
118
+ self._register_allocators = {
119
+ GeneralPurposeRegister._kind: RegisterAllocator(),
120
+ MMXRegister._kind: RegisterAllocator(),
121
+ XMMRegister._kind: RegisterAllocator(),
122
+ KRegister._kind: RegisterAllocator(),
123
+ }
124
+
125
+ @property
126
+ def c_signature(self):
127
+ """C signature (including parameter names) for the function"""
128
+
129
+ signature = "void" if self.result_type is None else str(self.result_type)
130
+ signature = signature + " " + self.name
131
+ signature = signature + "(" + ", ".join(map(str, self.arguments)) + ")"
132
+ return signature
133
+
134
+ @property
135
+ def go_signature(self):
136
+ """Go signature (including parameter names) for the function.
137
+
138
+ None if the function argument or return type is incompatible with Go"""
139
+
140
+ def c_to_go_type(c_type):
141
+ assert isinstance(c_type, nervapy.Type)
142
+ if c_type.is_pointer:
143
+ if c_type.base is not None:
144
+ return "*" + c_to_go_type(c_type.base)
145
+ else:
146
+ return "uintptr"
147
+ elif c_type.is_bool:
148
+ return "boolean"
149
+ elif c_type.is_size_integer:
150
+ return "int" if c_type.is_signed_integer else "uint"
151
+ elif c_type.is_signed_integer:
152
+ return {1: "int8", 2: "int16", 4: "int32", 8: "int64"}[c_type.size]
153
+ elif c_type.is_unsigned_integer:
154
+ return {1: "uint8", 2: "uint16", 4: "uint32", 8: "uint64"}[c_type.size]
155
+ elif c_type.is_floating_point:
156
+ return {4: "float32", 8: "float64"}[c_type.size]
157
+ else:
158
+ return None
159
+
160
+ go_argument_types = list(
161
+ map(c_to_go_type, map(operator.attrgetter("c_type"), self.arguments))
162
+ )
163
+ # Some of the C types doesn't have a Go analog
164
+ if not (all(map(bool, go_argument_types))):
165
+ return None
166
+
167
+ go_arguments = map(
168
+ lambda name_gotype: " ".join(name_gotype),
169
+ zip(map(operator.attrgetter("name"), self.arguments), go_argument_types),
170
+ )
171
+ if self.result_type is None:
172
+ return "func %s(%s)" % (self.name, ", ".join(go_arguments))
173
+ else:
174
+ go_result_type = c_to_go_type(self.result_type)
175
+ if go_result_type is None:
176
+ return None
177
+ else:
178
+ return "func %s(%s) %s" % (
179
+ self.name,
180
+ ", ".join(go_arguments),
181
+ go_result_type,
182
+ )
183
+
184
+ @property
185
+ def isa_extensions(self):
186
+ from nervapy.x86_64.isa import Extensions
187
+
188
+ extensions = set()
189
+ for instruction in self._instructions:
190
+ extensions.update(instruction.isa_extensions)
191
+ return Extensions(*extensions)
192
+
193
+ def __enter__(self):
194
+ self.attach()
195
+ return self
196
+
197
+ def __exit__(self, exc_type, exc_value, traceback):
198
+ self.detach()
199
+ if exc_type is None:
200
+ self._add_default_labels()
201
+ self._check_undefined_labels()
202
+ self._remove_unused_labels()
203
+ self._analize()
204
+ if nervapy.x86_64.options.rtl_dump_file:
205
+ nervapy.x86_64.options.rtl_dump_file.write(self.format_instructions())
206
+ self._check_live_registers()
207
+ self._preallocate_registers()
208
+ self._bind_registers()
209
+ self._scope.assign_names()
210
+ if nervapy.x86_64.options.abi is not None:
211
+ abi_function = self.finalize(nervapy.x86_64.options.abi)
212
+
213
+ for writer in nervapy.writer.active_writers:
214
+ writer.add_function(abi_function)
215
+ else:
216
+ raise
217
+
218
+ def attach(self):
219
+ """Makes active the function and its associated instruction stream.
220
+
221
+ While the instruction stream is active, generated instructions are added to this function.
222
+
223
+ While the function is active, generated instructions are checked for compatibility with the function target.
224
+ """
225
+ import nervapy.common.function
226
+ import nervapy.stream
227
+
228
+ if nervapy.common.function.active_function is not None:
229
+ raise ValueError(
230
+ "Can not attach the function: alternative function %s is active"
231
+ % nervapy.common.function.active_function.name
232
+ )
233
+ if nervapy.stream.active_stream is not None:
234
+ raise ValueError(
235
+ "Can not attach the function instruction stream: alternative instruction stream is active"
236
+ )
237
+ nervapy.common.function.active_function = self
238
+ nervapy.stream.active_stream = self
239
+ return self
240
+
241
+ def detach(self):
242
+ """Make the function and its associated instruction stream no longer active.
243
+
244
+ The function and its instruction stream must be active before calling the method.
245
+ """
246
+ import nervapy.common.function
247
+ import nervapy.stream
248
+
249
+ if nervapy.common.function.active_function is None:
250
+ raise ValueError("Can not detach the function: no function is active")
251
+ if nervapy.common.function.active_function is not self:
252
+ raise ValueError(
253
+ "Can not detach the function: a different function is active"
254
+ )
255
+ nervapy.common.function.active_function = None
256
+ nervapy.stream.active_stream = None
257
+ return self
258
+
259
+ @staticmethod
260
+ def _check_arguments(args):
261
+ # Check types
262
+ if not isinstance(args, (list, tuple)):
263
+ raise TypeError(
264
+ "Invalid arguments types (%s): a tuple or list of function arguments expected"
265
+ % str(args)
266
+ )
267
+ for i, arg in enumerate(args):
268
+ if not isinstance(arg, Argument):
269
+ raise TypeError(
270
+ "Invalid argument object for argument #%d (%s): nervapy.Argument expected"
271
+ % (i, str(arg))
272
+ )
273
+ # mapping from argument name to argument number
274
+ names = dict()
275
+
276
+ # First check argument names for arguments with explicit names
277
+ for i, arg in enumerate(args):
278
+ if arg.name:
279
+ if arg.name in names:
280
+ raise ValueError(
281
+ "Argument #%d (%s) has the same name as argument #%d (%s)"
282
+ % (i, str(arg), names[arg.name], args[names[arg.name]])
283
+ )
284
+ names[arg.name] = i
285
+
286
+ def _find_argument(self, argument_target):
287
+ from nervapy import Argument
288
+
289
+ assert isinstance(
290
+ argument_target, (Argument, str)
291
+ ), "Either Argument object or argument name expected"
292
+ if isinstance(argument_target, Argument):
293
+ if argument_target in self.arguments:
294
+ return argument_target
295
+ else:
296
+ return None
297
+ else:
298
+ return next(
299
+ (
300
+ argument
301
+ for argument in self.arguments
302
+ if argument.name == argument_target
303
+ ),
304
+ None,
305
+ )
306
+
307
+ def add_instruction(self, instruction):
308
+ # If instruction is None, do nothing
309
+ if instruction is None:
310
+ return
311
+
312
+ from nervapy.x86_64.instructions import Instruction
313
+
314
+ if not isinstance(instruction, Instruction):
315
+ raise TypeError("Instruction object expected")
316
+
317
+ from nervapy.x86_64.pseudo import LABEL
318
+
319
+ # Check that label with the same name is not added twice
320
+ if isinstance(instruction, LABEL):
321
+ self._scope.add_scoped_name(instruction.identifier)
322
+ self._label_names.add(instruction.identifier)
323
+
324
+ constant = instruction.constant
325
+ if constant is not None:
326
+ self._scope.add_scoped_name(constant.name)
327
+
328
+ # Check that the instruction is supported by the target ISA
329
+ for extension in instruction.isa_extensions:
330
+ if self.target is not None and extension not in self.target.extensions:
331
+ raise ValueError(
332
+ "{0} is not supported on the target microarchitecture".format(
333
+ extension
334
+ )
335
+ )
336
+
337
+ instruction._indent_level = self._indent_level
338
+ self._instructions.append(instruction)
339
+
340
+ def add_instructions(self, instructions):
341
+ for instruction in instructions:
342
+ self.add_instruction(instruction)
343
+
344
+ def finalize(self, abi):
345
+ from nervapy.x86_64.abi import ABI
346
+
347
+ if not isinstance(abi, ABI):
348
+ raise TypeError("%s is not an ABI object" % str(abi))
349
+ return ABIFunction(self, abi)
350
+
351
+ def _add_default_labels(self):
352
+ """Adds default labels if they are not defined"""
353
+
354
+ from nervapy.x86_64.pseudo import LABEL
355
+
356
+ if self.entry.name not in self._scope.names:
357
+ self._instructions.insert(0, LABEL(self.entry))
358
+ self._scope.add_scoped_name(self.entry.name)
359
+ self._label_names.add(self.entry.name)
360
+
361
+ def _check_undefined_labels(self):
362
+ """Verifies that all labels referenced by branch instructions are defined"""
363
+
364
+ from nervapy.x86_64.instructions import BranchInstruction
365
+
366
+ referenced_label_names = set()
367
+ for instruction in self._instructions:
368
+ if isinstance(instruction, BranchInstruction) and instruction.label_name:
369
+ referenced_label_names.add(instruction.label_name)
370
+ if not referenced_label_names.issubset(self._label_names):
371
+ undefined_label_names = referenced_label_names.difference(self._label_names)
372
+ raise ValueError(
373
+ "Undefined labels found: "
374
+ + ", ".join(
375
+ map(lambda name: ".".join(map(str, name)), undefined_label_names)
376
+ )
377
+ )
378
+
379
+ def _remove_unused_labels(self):
380
+ """Removes labels that are not referenced by any instruction"""
381
+
382
+ from nervapy.x86_64.instructions import BranchInstruction
383
+ from nervapy.x86_64.pseudo import LABEL
384
+
385
+ referenced_label_names = set()
386
+ for instruction in self._instructions:
387
+ if isinstance(instruction, BranchInstruction) and instruction.label_name:
388
+ referenced_label_names.add(instruction.label_name)
389
+ unreferenced_label_names = self._label_names.difference(referenced_label_names)
390
+ # Do not remove entry label if it is in the middle of the function
391
+ if self.entry.name in unreferenced_label_names:
392
+ if (
393
+ not isinstance(self._instructions[0], LABEL)
394
+ or self._instructions[0].identifier != self.entry.name
395
+ ):
396
+ unreferenced_label_names.remove(self.entry.name)
397
+ # Remove LABEL pseudo-instructions with unreferenced label names
398
+ self._instructions = [
399
+ instruction
400
+ for instruction in self._instructions
401
+ if not isinstance(instruction, LABEL)
402
+ or instruction.identifier not in unreferenced_label_names
403
+ ]
404
+ self._label_names.difference_update(unreferenced_label_names)
405
+
406
+ def _analize(self):
407
+ from nervapy.x86_64.generic import RET
408
+ from nervapy.x86_64.instructions import BranchInstruction
409
+ from nervapy.x86_64.pseudo import LABEL, RETURN
410
+
411
+ # Query input/output registers for each instruction
412
+ input_registers = []
413
+ output_registers = []
414
+ for instruction in self._instructions:
415
+ input_registers.append(instruction.input_registers_masks)
416
+ output_registers.append(instruction.output_registers_masks)
417
+
418
+ # Map from label name to its quasi-instruction number in the stream
419
+ labels = {
420
+ instruction.identifier: i
421
+ for (i, instruction) in enumerate(self._instructions)
422
+ if isinstance(instruction, LABEL)
423
+ }
424
+ entry_position = 0
425
+ if self.entry.name in self._label_names:
426
+ entry_position = labels[self.entry.name]
427
+ branch_instructions = [
428
+ (i, instruction)
429
+ for (i, instruction) in enumerate(self._instructions)
430
+ if isinstance(instruction, BranchInstruction) and instruction.label_name
431
+ ]
432
+ # Basic blocks start at function entry position or on branch target
433
+ basic_block_starts = {entry_position}
434
+ for i, branch_instruction in branch_instructions:
435
+ basic_block_starts.add(labels[branch_instruction.label_name])
436
+ if branch_instruction.is_conditional:
437
+ basic_block_starts.add(i + 1)
438
+ basic_block_starts = sorted(basic_block_starts)
439
+ # Basic block ends on a referenced label instruction or right after return/branch instructions
440
+ basic_block_ends = [
441
+ i + int(not isinstance(instruction, LABEL))
442
+ for (i, instruction) in enumerate(self._instructions)
443
+ if isinstance(instruction, (BranchInstruction, RETURN, RET, LABEL))
444
+ ]
445
+ # TODO: check that the last block with an unconditional branch/return instruction
446
+ basic_block_bounds = [
447
+ (start, basic_block_ends[bisect.bisect_right(basic_block_ends, start)])
448
+ for start in basic_block_starts
449
+ ]
450
+
451
+ class BasicBlock:
452
+ def __init__(
453
+ self,
454
+ start_position,
455
+ end_position,
456
+ input_registers_list,
457
+ output_registers_list,
458
+ ):
459
+ self.start_position = start_position
460
+ self.end_position = end_position
461
+ self.input_registers_list = input_registers_list
462
+ self.output_registers_list = output_registers_list
463
+
464
+ self.consumed_register_masks = collections.defaultdict(int)
465
+ self.produced_register_masks = collections.defaultdict(int)
466
+
467
+ self.live_register_masks = collections.defaultdict(int)
468
+ self.available_register_masks = collections.defaultdict(int)
469
+
470
+ self.is_reachable = False
471
+
472
+ self.liveness_analysis_passes = 0
473
+ self.availability_analysis_passes = 0
474
+
475
+ self.input_blocks = list()
476
+ self.output_blocks = list()
477
+
478
+ self.processed_input_blocks = set()
479
+ self.processed_output_blocks = set()
480
+
481
+ # Mark available and consumed registers:
482
+ # - If a register is consumed by an instruction but not produced by preceding instructions of the basic
483
+ # block, the register is consumed by the basic block
484
+ # - If a register is produced by an instruction, it becomes available for the subsequent instructions
485
+ # of the basic block and counts as produced by the basic block
486
+ for input_registers, output_registers in zip(
487
+ input_registers_list, output_registers_list
488
+ ):
489
+ for input_register_id, input_register_mask in six.iteritems(
490
+ input_registers
491
+ ):
492
+ consumed_mask = (
493
+ input_register_mask
494
+ & ~self.produced_register_masks[input_register_id]
495
+ )
496
+ if consumed_mask != 0:
497
+ self.consumed_register_masks[
498
+ input_register_id
499
+ ] |= consumed_mask
500
+ for output_register_id, output_register_mask in six.iteritems(
501
+ output_registers
502
+ ):
503
+ self.produced_register_masks[
504
+ output_register_id
505
+ ] |= output_register_mask
506
+
507
+ def reset_processed_blocks(self):
508
+ self.processed_input_blocks = set()
509
+ self.processed_output_blocks = set()
510
+
511
+ @property
512
+ def available_registers_list(self):
513
+ from nervapy.x86_64.registers import Register
514
+
515
+ available_registers_list = []
516
+ available_registers_masks = self.available_register_masks.copy()
517
+ for output_registers in self.output_registers_list:
518
+ # Record available registers for current instruction
519
+ available_registers_list.append(available_registers_masks.copy())
520
+ # Update with output registers for current instruction
521
+ for output_register_id, output_register_mask in six.iteritems(
522
+ output_registers
523
+ ):
524
+ available_registers_masks[output_register_id] = (
525
+ available_registers_masks.get(output_register_id, 0)
526
+ | output_register_mask
527
+ )
528
+ return available_registers_list
529
+
530
+ @property
531
+ def live_registers_list(self):
532
+ from nervapy.x86_64.registers import Register
533
+
534
+ live_registers_list = []
535
+ live_registers_masks = self.live_register_masks.copy()
536
+ for input_registers, output_registers in reversed(
537
+ list(zip(self.input_registers_list, self.output_registers_list))
538
+ ):
539
+ # Mark register written by the instruction as non-live
540
+ for output_register_id, output_register_mask in six.iteritems(
541
+ output_registers
542
+ ):
543
+ if output_register_id in live_registers_masks:
544
+ new_live_register_mask = (
545
+ live_registers_masks[output_register_id]
546
+ & ~output_register_mask
547
+ )
548
+ if new_live_register_mask != 0:
549
+ live_registers_masks[output_register_id] = (
550
+ new_live_register_mask
551
+ )
552
+ else:
553
+ del live_registers_masks[output_register_id]
554
+ # Mark registers read by the instruction as live
555
+ for input_register_id, input_register_mask in six.iteritems(
556
+ input_registers
557
+ ):
558
+ live_registers_masks[input_register_id] = (
559
+ live_registers_masks.get(input_register_id, 0)
560
+ | input_register_mask
561
+ )
562
+ # Record available registers for current instruction
563
+ live_registers_list.append(live_registers_masks.copy())
564
+ live_registers_list.reverse()
565
+ return live_registers_list
566
+
567
+ def __str__(self):
568
+ return "[%d, %d)" % (self.start_position, self.end_position)
569
+
570
+ def __repr__(self):
571
+ return str(self)
572
+
573
+ def analyze_availability(self, extra_available_registers):
574
+ self.availability_analysis_passes += 1
575
+
576
+ if self.availability_analysis_passes == 1:
577
+ # First pass: add registers produced by the block and propagate further
578
+ self.available_register_masks.update(extra_available_registers)
579
+ if self.output_blocks:
580
+ # Add registers produced by this block
581
+ for produced_reg_id, produced_reg_mask in six.iteritems(
582
+ self.produced_register_masks
583
+ ):
584
+ extra_available_registers[produced_reg_id] = (
585
+ extra_available_registers.get(produced_reg_id, 0)
586
+ | produced_reg_mask
587
+ )
588
+ else:
589
+ # Subsequent passes: compute and propagate only the input registers that were not processed before
590
+ for reg_id, extra_reg_mask in list(
591
+ six.iteritems(extra_available_registers)
592
+ ):
593
+ old_reg_mask = self.available_register_masks[reg_id]
594
+ update_reg_mask = extra_reg_mask & ~old_reg_mask
595
+ if update_reg_mask != 0:
596
+ self.available_register_masks[reg_id] |= extra_reg_mask
597
+ if self.output_blocks:
598
+ if update_reg_mask != 0:
599
+ extra_available_registers[reg_id] = update_reg_mask
600
+ else:
601
+ del extra_available_registers[reg_id]
602
+
603
+ if self.output_blocks and (
604
+ extra_available_registers or self.availability_analysis_passes == 1
605
+ ):
606
+ for output_block in self.output_blocks[1:]:
607
+ # The dict needs to be copied because output blocks can change it
608
+ output_block.analyze_availability(
609
+ extra_available_registers.copy()
610
+ )
611
+ # Optimization: do not create a copy of the dict
612
+ self.output_blocks[0].analyze_availability(
613
+ extra_available_registers
614
+ )
615
+
616
+ def analyze_liveness(self, extra_live_registers):
617
+ # Update in liveness analysis consists of three steps:
618
+ # 1. Update live registers for this basic block.
619
+ # 2. Mark registers which are produced to by the basic block as non-live.
620
+ # 3. Mark registers which are consumed by the basic block as live (only on first pass).
621
+
622
+ self.liveness_analysis_passes += 1
623
+
624
+ # Steps 1 and 2
625
+ for reg_id, extra_reg_mask in list(six.iteritems(extra_live_registers)):
626
+ old_reg_mask = self.live_register_masks[reg_id]
627
+ update_reg_mask = extra_reg_mask & ~old_reg_mask
628
+ if update_reg_mask != 0:
629
+ self.live_register_masks[reg_id] |= extra_reg_mask
630
+ if self.input_blocks:
631
+ # On the first pass do not modify the extra live registers masks that are passed to input blocks
632
+ # On subsequent passes only the novel live registers need to be passed further
633
+ if self.liveness_analysis_passes == 1:
634
+ update_reg_mask = extra_reg_mask
635
+ update_reg_mask &= ~self.produced_register_masks.get(reg_id, 0)
636
+ if update_reg_mask != 0:
637
+ extra_live_registers[reg_id] = update_reg_mask
638
+ else:
639
+ del extra_live_registers[reg_id]
640
+
641
+ # Step 3
642
+ if self.input_blocks:
643
+ if self.liveness_analysis_passes == 1:
644
+ for consumed_reg_id, consumed_reg_mask in six.iteritems(
645
+ self.consumed_register_masks
646
+ ):
647
+ extra_live_registers[consumed_reg_id] = (
648
+ extra_live_registers.get(consumed_reg_id, 0)
649
+ | consumed_reg_mask
650
+ )
651
+
652
+ if self.input_blocks and (
653
+ extra_live_registers or self.liveness_analysis_passes == 1
654
+ ):
655
+ for input_block in self.input_blocks[1:]:
656
+ # The dict needs to be copied because input blocks can change it
657
+ input_block.analyze_liveness(extra_live_registers.copy())
658
+ # Optimization: do not create a copy of the dict
659
+ self.input_blocks[0].analyze_liveness(extra_live_registers)
660
+
661
+ def analyze_reachability(self):
662
+ if not self.is_reachable:
663
+ self.is_reachable = True
664
+ for output_block in self.output_blocks:
665
+ output_block.analyze_reachability()
666
+
667
+ def forward_pass(self, processing_function, instructions, input_state):
668
+ output_state = processing_function(self, instructions, input_state)
669
+ for output_block in self.output_blocks:
670
+ if output_block.start_position not in self.processed_output_blocks:
671
+ self.processed_output_blocks.add(output_block.start_position)
672
+ output_block.forward_pass(
673
+ processing_function, instructions, output_state
674
+ )
675
+
676
+ def backward_pass(self, processing_function, instructions, input_state):
677
+ output_state = processing_function(self, instructions, input_state)
678
+ for input_block in self.input_blocks:
679
+ if input_block.start_position not in self.processed_input_blocks:
680
+ self.processed_input_blocks.add(input_block.start_position)
681
+ input_block.backward_pass(
682
+ processing_function, instructions, output_state
683
+ )
684
+
685
+ def propogate_sse_avx_state_forward(self, instructions, is_avx_environment):
686
+ from nervapy.x86_64.avx import VZEROALL, VZEROUPPER
687
+ from nervapy.x86_64.pseudo import LOAD, STORE
688
+
689
+ avx_state = True if is_avx_environment else None
690
+
691
+ def propogate_forward(block, instructions, avx_state):
692
+ for instruction in instructions[
693
+ block.start_position : block.end_position
694
+ ]:
695
+ if isinstance(instruction, (VZEROUPPER, VZEROALL)):
696
+ avx_state = None
697
+ elif instruction.avx_mode is None:
698
+ # Instruction without a mode
699
+ if isinstance(
700
+ instruction,
701
+ (LOAD.ARGUMENT, STORE.RESULT, RETURN, RET, LABEL),
702
+ ):
703
+ # Some pseudo-instructions need AVX/SSE mode for lowering
704
+ instruction.avx_mode = avx_state
705
+ elif instruction.avx_mode:
706
+ # AVX-mode instruction
707
+ avx_state = True
708
+ else:
709
+ # SSE-mode instruction
710
+ if avx_state:
711
+ raise TypeError(
712
+ "AVX-mode instruction {0} follows an SSE-mode instruction".format(
713
+ instruction
714
+ )
715
+ )
716
+ avx_state = False
717
+ return avx_state
718
+
719
+ self.forward_pass(propogate_forward, instructions, avx_state)
720
+
721
+ def propogate_sse_state_backward(self, instructions, is_avx_environment):
722
+ from nervapy.x86_64.pseudo import LOAD, STORE
723
+
724
+ avx_state = True if is_avx_environment else None
725
+
726
+ def propogate_sse_backward(block, instructions, avx_state):
727
+ for instruction in reversed(
728
+ instructions[block.start_position : block.end_position]
729
+ ):
730
+ if instruction.avx_mode is not None:
731
+ avx_state = instruction.avx_mode
732
+ elif avx_state is not None and not avx_state:
733
+ if isinstance(
734
+ instruction,
735
+ (LOAD.ARGUMENT, STORE.RESULT, RETURN, RET, LABEL),
736
+ ):
737
+ instruction.avx_mode = avx_state
738
+ return avx_state
739
+
740
+ self.backward_pass(propogate_sse_backward, instructions, avx_state)
741
+
742
+ def propogate_avx_state_backward(self, instructions, is_avx_environment):
743
+ from nervapy.x86_64.pseudo import LOAD, STORE
744
+
745
+ avx_state = True if is_avx_environment else None
746
+
747
+ def propogate_avx_backward(block, instructions, avx_state):
748
+ for instruction in reversed(
749
+ instructions[block.start_position : block.end_position]
750
+ ):
751
+ if instruction.avx_mode is not None:
752
+ avx_state = instruction.avx_mode
753
+ elif avx_state:
754
+ if isinstance(
755
+ instruction,
756
+ (LOAD.ARGUMENT, STORE.RESULT, RETURN, RET, LABEL),
757
+ ):
758
+ instruction.avx_mode = avx_state
759
+
760
+ self.backward_pass(propogate_avx_backward, instructions, avx_state)
761
+
762
+ basic_blocks = list(
763
+ map(
764
+ lambda start_end: BasicBlock(
765
+ start_end[0],
766
+ start_end[1],
767
+ input_registers[start_end[0] : start_end[1]],
768
+ output_registers[start_end[0] : start_end[1]],
769
+ ),
770
+ basic_block_bounds,
771
+ )
772
+ )
773
+ # Map from block start position to BasicBlock object
774
+ basic_blocks_map = {
775
+ basic_block_start: basic_block
776
+ for (basic_block_start, basic_block) in zip(
777
+ basic_block_starts, basic_blocks
778
+ )
779
+ }
780
+ # Set output basic blocks for each basic block object
781
+ for i, basic_block in enumerate(basic_blocks):
782
+ # Consider last instruction of the basic block
783
+ last_instruction = self._instructions[basic_block.end_position - 1]
784
+ if isinstance(last_instruction, (RET, RETURN)):
785
+ # Basic block that ends with a return instruction has no output blocks
786
+ pass
787
+ elif (
788
+ isinstance(last_instruction, BranchInstruction)
789
+ and last_instruction.label_name
790
+ ):
791
+ # Basic block that ends with a branch instruction can jump to the block at branch label
792
+ target_position = labels[last_instruction.label_name]
793
+ basic_block.output_blocks = [basic_blocks_map[target_position]]
794
+ if last_instruction.is_conditional:
795
+ # Basic blocks that end with a conditional branch instruction can fall through to the next block
796
+ basic_block.output_blocks.append(basic_blocks[i + 1])
797
+ else:
798
+ # Basic block ends before a label and continues to the next basic block
799
+ basic_block.output_blocks = [basic_blocks[i + 1]]
800
+ # Set input basic blocks for each basic block object
801
+ for basic_block in basic_blocks:
802
+ basic_block.input_blocks = list(
803
+ filter(lambda bb: basic_block in bb.output_blocks, basic_blocks)
804
+ )
805
+
806
+ # Analyze which blocks can be reached from the entry point
807
+ basic_blocks_map[entry_position].analyze_reachability()
808
+ exit_positions = [
809
+ block.start_position for block in basic_blocks if not block.output_blocks
810
+ ]
811
+
812
+ # Analyze register lifetime
813
+ basic_blocks_map[entry_position].analyze_availability(dict())
814
+ for exit_position in exit_positions:
815
+ basic_blocks_map[exit_position].analyze_liveness(dict())
816
+
817
+ # Analyze SSE/AVX mode
818
+ basic_blocks_map[entry_position].propogate_sse_avx_state_forward(
819
+ self._instructions, self.avx_environment
820
+ )
821
+ for exit_position in exit_positions:
822
+ basic_blocks_map[exit_position].propogate_sse_state_backward(
823
+ self._instructions, self.avx_environment
824
+ )
825
+ for basic_block in basic_blocks:
826
+ basic_block.reset_processed_blocks()
827
+ for exit_position in exit_positions:
828
+ basic_blocks_map[exit_position].propogate_avx_state_backward(
829
+ self._instructions, self.avx_environment
830
+ )
831
+ self._avx_prolog = self._instructions[entry_position].avx_mode
832
+
833
+ # Reconstruct live and available registers for the whole instruction sequence
834
+ for basic_block in basic_blocks:
835
+ for instruction, available_registers, live_registers in zip(
836
+ self._instructions[
837
+ basic_block.start_position : basic_block.end_position
838
+ ],
839
+ basic_block.available_registers_list,
840
+ basic_block.live_registers_list,
841
+ ):
842
+ instruction._live_registers = live_registers
843
+ instruction._available_registers = available_registers
844
+ # Remove referenced to input/output blocks to avoid memory leaks due to cycles in ref graph
845
+ basic_block.input_blocks = None
846
+ basic_block.output_blocks = None
847
+
848
+ # Analyze conflicting registers
849
+ output_registers = set()
850
+ for instruction in self._instructions:
851
+ instruction_registers = instruction.input_registers
852
+ instruction_registers.update(output_registers)
853
+ for instruction_register in instruction_registers:
854
+ if instruction_register.is_virtual:
855
+ conflict_internal_ids = [
856
+ reg_id
857
+ for (reg_id, reg_mask) in six.iteritems(
858
+ instruction._live_registers
859
+ )
860
+ if reg_mask & instruction_register.mask != 0
861
+ ]
862
+ self._register_allocators[instruction_register.kind].add_conflicts(
863
+ instruction_register.virtual_id, conflict_internal_ids
864
+ )
865
+ physical_registers = [r for r in instruction_registers if not r.is_virtual]
866
+ if physical_registers:
867
+ from nervapy.x86_64.registers import Register
868
+
869
+ live_virtual_registers = Register._reconstruct_multiple(
870
+ {
871
+ reg_id: reg_mask
872
+ for (reg_id, reg_mask) in six.iteritems(
873
+ instruction._live_registers
874
+ )
875
+ if reg_id < 0
876
+ }
877
+ )
878
+ for live_virtual_register in live_virtual_registers:
879
+ conflict_internal_ids = [
880
+ reg._internal_id
881
+ for reg in physical_registers
882
+ if reg.mask & live_virtual_register.mask != 0
883
+ ]
884
+ self._register_allocators[live_virtual_register.kind].add_conflicts(
885
+ live_virtual_register.virtual_id, conflict_internal_ids
886
+ )
887
+ output_registers = instruction.output_registers
888
+
889
+ def _check_live_registers(self):
890
+ """Checks that the number of live registers does not exceed the number of physical registers for each insruction"""
891
+ from nervapy.x86_64.registers import (GeneralPurposeRegister,
892
+ KRegister, MMXRegister,
893
+ XMMRegister)
894
+
895
+ max_live_registers = {
896
+ GeneralPurposeRegister._kind: 15,
897
+ MMXRegister._kind: 8,
898
+ XMMRegister._kind: 16,
899
+ KRegister._kind: 8,
900
+ }
901
+ for instruction in self._instructions:
902
+ live_registers = max_live_registers.copy()
903
+ for reg in instruction.live_registers:
904
+ live_registers[reg.kind] -= 1
905
+ if any(
906
+ surplus_count < 0 for surplus_count in six.itervalues(live_registers)
907
+ ):
908
+ if (
909
+ instruction.source_file is not None
910
+ and instruction.line_number is not None
911
+ ):
912
+ raise nervapy.RegisterAllocationError(
913
+ "The number of live virtual registers exceeds physical constraints %s at %s:%d"
914
+ % (
915
+ str(instruction),
916
+ instruction.source_file,
917
+ instruction.line_number,
918
+ )
919
+ )
920
+ else:
921
+ raise nervapy.RegisterAllocationError(
922
+ "The number of live virtual registers exceeds physical constraints %s"
923
+ % str(instruction)
924
+ )
925
+
926
+ def _preallocate_registers(self):
927
+ """Allocates registers that can be binded only to a single virtual register.
928
+
929
+ Several instructions accept only a fixed a register as their operand. If a virtual register is supplied as such
930
+ operand, it must be binded to the fixed register accepted by instruction encoding.
931
+
932
+ These instructions are:
933
+
934
+ - BLENDVPS xmm, xmm/m128, xmm0
935
+ - BLENDVPD xmm, xmm/m128, xmm0
936
+ - PBLENDVB xmm, xmm/m128, xmm0
937
+ - SHA256RNDS2 xmm, xmm/m128, xmm0
938
+ - SHR r/m, cl
939
+ - SAR r/m, cl
940
+ - SAL r/m, cl
941
+ - SHL r/m, cl
942
+ - ROR r/m, cl
943
+ - ROL r/m, cl
944
+ - SHRD r/m, r, cl
945
+ - SHLD r/m, r, cl
946
+ """
947
+
948
+ from nervapy import RegisterAllocationError
949
+ from nervapy.x86_64.registers import (GeneralPurposeRegister,
950
+ GeneralPurposeRegister8,
951
+ XMMRegister, cl, xmm0)
952
+
953
+ cl_binded_registers = set()
954
+ xmm0_binded_registers = set()
955
+ for instruction in self._instructions:
956
+ if instruction.name in {"BLENDVPD", "BLENDVPS", "PBLENDVB", "SHA256RNDS2"}:
957
+ assert (
958
+ len(instruction.operands) == 3
959
+ ), "expected 3 operands, got %d (%s)" % (
960
+ len(instruction.operands),
961
+ ", ".join(map(str, instruction.operands)),
962
+ )
963
+ xmm0_operand = instruction.operands[2]
964
+ assert isinstance(
965
+ xmm0_operand, XMMRegister
966
+ ), "expected xmm registers in the 3rd operand, got %s" % str(
967
+ xmm0_operand
968
+ )
969
+ if xmm0_operand.is_virtual:
970
+ # Check that xmm0 is not live at this instruction
971
+ if (
972
+ instruction._live_registers.get(xmm0._internal_id, 0)
973
+ & XMMRegister._mask
974
+ != 0
975
+ ):
976
+ raise RegisterAllocationError(
977
+ (
978
+ "Instruction %s requires operand 3 to be allocated to xmm0 register, "
979
+ + "but xmm0 is a live register"
980
+ )
981
+ % str(instruction.name)
982
+ )
983
+ xmm0_binded_registers.add(xmm0_operand._internal_id)
984
+ elif instruction.name in {
985
+ "SAL",
986
+ "SAR",
987
+ "SHL",
988
+ "SHR",
989
+ "ROL",
990
+ "ROR",
991
+ "RCL",
992
+ "RCR",
993
+ }:
994
+ assert (
995
+ len(instruction.operands) == 2
996
+ ), "expected 2 operands, got %d (%s)" % (
997
+ len(instruction.operands),
998
+ ", ".join(map(str, instruction.operands)),
999
+ )
1000
+ count_operand = instruction.operands[1]
1001
+ # The count operand can be cl or imm8
1002
+ if (
1003
+ isinstance(count_operand, GeneralPurposeRegister8)
1004
+ and count_operand.is_virtual
1005
+ ):
1006
+ # Check that cl is not live at this instruction
1007
+ if (
1008
+ instruction._live_registers.get(cl._internal_id, 0)
1009
+ & GeneralPurposeRegister8._mask
1010
+ != 0
1011
+ ):
1012
+ raise RegisterAllocationError(
1013
+ "Instruction %s requires operand 2 to be allocated to cl register, "
1014
+ + "but cl is a live register" % instruction.name
1015
+ )
1016
+
1017
+ cl_binded_registers.add(count_operand._internal_id)
1018
+ elif instruction.name in {"SHLD", "SHRD"}:
1019
+ assert (
1020
+ len(instruction.operands) == 3
1021
+ ), "expected 3 operands, got %d (%s)" % (
1022
+ len(instruction.operands),
1023
+ ", ".join(map(str, instruction.operands)),
1024
+ )
1025
+ count_operand = instruction.operands[2]
1026
+ # The count operand can be cl or imm8
1027
+ if (
1028
+ isinstance(count_operand, GeneralPurposeRegister8)
1029
+ and count_operand.is_virtual
1030
+ ):
1031
+ # Check that cl is not live at this instruction
1032
+ if (
1033
+ instruction._live_registers.get(cl._internal_id, 0)
1034
+ & GeneralPurposeRegister8._mask
1035
+ != 0
1036
+ ):
1037
+ raise RegisterAllocationError(
1038
+ "Instruction %s requires operand 3 to be allocated to cl register, "
1039
+ + "but cl is a live register" % instruction.name
1040
+ )
1041
+
1042
+ cl_binded_registers.add(count_operand._internal_id)
1043
+
1044
+ # Check that cl-binded registers are not mutually conflicting
1045
+ for cl_register in cl_binded_registers:
1046
+ other_cl_registers = filter(
1047
+ operator.methodcaller("__ne__", cl_register), cl_binded_registers
1048
+ )
1049
+ conflicting_registers = self._conflicting_registers[1][cl_register]
1050
+ if any(
1051
+ [
1052
+ other_register in conflicting_registers
1053
+ for other_register in other_cl_registers
1054
+ ]
1055
+ ):
1056
+ raise RegisterAllocationError(
1057
+ "Two conflicting virtual registers are required to bind to cl"
1058
+ )
1059
+
1060
+ # Check that xmm0-binded registers are not mutually conflicting
1061
+ for xmm0_register in xmm0_binded_registers:
1062
+ other_xmm0_registers = filter(
1063
+ operator.methodcaller("__ne__", xmm0_register), xmm0_binded_registers
1064
+ )
1065
+ conflicting_registers = self._conflicting_registers[3][xmm0_register]
1066
+ if any(
1067
+ [
1068
+ other_register in conflicting_registers
1069
+ for other_register in other_xmm0_registers
1070
+ ]
1071
+ ):
1072
+ raise RegisterAllocationError(
1073
+ "Two conflicting virtual registers are required to bind to xmm0"
1074
+ )
1075
+
1076
+ # Commit register allocations
1077
+ for cl_register in cl_binded_registers:
1078
+ self._register_allocations[GeneralPurposeRegister._kind][
1079
+ cl_register
1080
+ ] = cl.physical_id
1081
+ for xmm0_register in xmm0_binded_registers:
1082
+ self._register_allocations[XMMRegister._kind][
1083
+ xmm0_register
1084
+ ] = xmm0.physical_id
1085
+
1086
+ def _bind_registers(self):
1087
+ """Iterates through the list of instructions and assigns physical IDs to allocated registers"""
1088
+
1089
+ for instruction in self._instructions:
1090
+ for register in instruction.register_objects:
1091
+ if register.is_virtual:
1092
+ register.physical_id = self._register_allocators[
1093
+ register.kind
1094
+ ].register_allocations.get(
1095
+ register._internal_id, register.physical_id
1096
+ )
1097
+
1098
+ def _allocate_local_variable(self):
1099
+ """Returns a new unique ID for a local variable"""
1100
+ self._local_variables_count += 1
1101
+ return self._local_variables_count
1102
+
1103
+ def _allocate_mask_register_id(self):
1104
+ """Returns a new unique ID for a virtual mask (k) register"""
1105
+ self._virtual_mask_registers_count += 1
1106
+ return self._virtual_mask_registers_count
1107
+
1108
+ def _allocate_xmm_register_id(self):
1109
+ """Returns a new unique ID for a virtual SSE/AVX/AVX-512 (xmm/ymm/zmm) register"""
1110
+ self._virtual_xmm_registers_count += 1
1111
+ return self._virtual_xmm_registers_count
1112
+
1113
+ def _allocate_mmx_register_id(self):
1114
+ """Returns a new unique ID for a virtual MMX (mm) register"""
1115
+ self._virtual_mmx_registers_count += 1
1116
+ return self._virtual_mmx_registers_count
1117
+
1118
+ def _allocate_general_purpose_register_id(self):
1119
+ """Returns a new unique ID for a virtual general-purpose register"""
1120
+ self._virtual_general_purpose_registers_count += 1
1121
+ return self._virtual_general_purpose_registers_count
1122
+
1123
+ def __str__(self):
1124
+ """Returns string representation of the function signature and instructions"""
1125
+
1126
+ return self.format()
1127
+
1128
+ def format_instructions(self, line_separator=os.linesep):
1129
+ """Formats instruction listing including data on input, output, available and live registers"""
1130
+
1131
+ from nervapy.x86_64.pseudo import ALIGN, LABEL
1132
+
1133
+ code = []
1134
+ tab = " " * 4
1135
+ for instruction in self._instructions:
1136
+ code.append(instruction.format("peachpy", indent=False))
1137
+ if not isinstance(instruction, (LABEL, ALIGN)):
1138
+ code.append(
1139
+ tab
1140
+ + "In regs: "
1141
+ + ", ".join(sorted(map(str, instruction.input_registers)))
1142
+ )
1143
+ code.append(
1144
+ tab
1145
+ + "Out regs: "
1146
+ + ", ".join(sorted(map(str, instruction.output_registers)))
1147
+ )
1148
+ code.append(
1149
+ tab
1150
+ + "Live regs: "
1151
+ + ", ".join(sorted(map(str, instruction.live_registers)))
1152
+ )
1153
+ code.append(
1154
+ tab
1155
+ + "Avail regs: "
1156
+ + ", ".join(sorted(map(str, instruction.available_registers)))
1157
+ )
1158
+ code.append("")
1159
+ if line_separator is None:
1160
+ return code
1161
+ else:
1162
+ return str(line_separator).join(code)
1163
+
1164
+ def format(self, line_separator=os.linesep):
1165
+ """Formats assembly listing of the function according to specified parameters"""
1166
+
1167
+ code = [self.c_signature]
1168
+ for instruction in self._instructions:
1169
+ code.append(instruction.format("peachpy", indent=True))
1170
+ if line_separator is None:
1171
+ return code
1172
+ else:
1173
+ return str(line_separator).join(code)
1174
+
1175
+
1176
+ class Argument(nervapy.Argument):
1177
+ def __init__(self, argument, abi):
1178
+ """Extends generic Argument object with x86-64 specific attributes required for stack frame construction
1179
+
1180
+ :ivar nervapy.x86_64.registers.Register register: the register in which the argument is passed to the function
1181
+ or None if the argument is passed on stack.
1182
+ :ivar int stack_offset: offset from the end of return address on the stack to the location of the argument on
1183
+ stack or None if the argument is passed in a register and has no stack location. Note that in Microsoft X64
1184
+ ABI the first four arguments are passed in registers but have stack space reserved for their storage.
1185
+ For these arguments both register and stack_offset are non-null.
1186
+ :ivar nervapy.x86_64.operand.MemoryAddress address: address of the argument on stack, relative to rsp or rbp.
1187
+ The value of this attribute is None until after register allocation. In Golang ABIs this attribute is never
1188
+ initialized because to load arguments from stack Golang uses its own pseudo-register FP, which is not
1189
+ representable in PeachPy (LOAD.ARGUMENT pseudo-instructions use stack_offset instead when formatted as
1190
+ Golang assembly).
1191
+ """
1192
+ assert isinstance(
1193
+ argument, nervapy.Argument
1194
+ ), "Architecture-specific argument must be constructed from generic Argument object"
1195
+ from nervapy.x86_64.abi import ABI
1196
+
1197
+ assert isinstance(abi, ABI), "ABI object expected"
1198
+ from copy import deepcopy
1199
+
1200
+ super(Argument, self).__init__(deepcopy(argument.c_type), argument.name)
1201
+ if self.c_type.size is None:
1202
+ self.c_type.size = self.c_type.get_size(abi)
1203
+ self.abi = abi
1204
+ self.register = None
1205
+ self.address = None
1206
+ self.stack_offset = None
1207
+ self.save_on_stack = False
1208
+
1209
+ @property
1210
+ def passed_on_stack(self):
1211
+ return self.register is None
1212
+
1213
+
1214
+ class ABIFunction:
1215
+ """ABI-specific x86-64 assembly function.
1216
+
1217
+ A function consists of C signature, ABI, and a list of instructions without virtual registers.
1218
+ """
1219
+
1220
+ def __init__(self, function, abi):
1221
+ from copy import deepcopy
1222
+
1223
+ from nervapy.x86_64.abi import (ABI, goasm_amd64_abi,
1224
+ goasm_amd64p32_abi, gosyso_amd64_abi,
1225
+ gosyso_amd64p32_abi, linux_x32_abi,
1226
+ microsoft_x64_abi,
1227
+ native_client_x86_64_abi,
1228
+ system_v_x86_64_abi)
1229
+
1230
+ assert isinstance(function, Function), "Function object expected"
1231
+ assert isinstance(abi, ABI), "ABI object expected"
1232
+ self.name = function.name
1233
+ self.arguments = [Argument(argument, abi) for argument in function.arguments]
1234
+ self.result_type = function.result_type
1235
+ self.result_offset = None
1236
+ self.package = function.package
1237
+ self.target = function.target
1238
+ self.isa_extensions = function.isa_extensions
1239
+ self.abi = abi
1240
+ self.c_signature = function.c_signature
1241
+ self.go_signature = function.go_signature
1242
+
1243
+ self.avx_environment = function.avx_environment
1244
+ self._avx_prolog = function._avx_prolog
1245
+
1246
+ from nervapy.x86_64.registers import rsp
1247
+
1248
+ self._stack_base = rsp
1249
+ self._stack_frame_size = 0
1250
+ self._stack_frame_alignment = self.abi.stack_alignment
1251
+ self._local_variables_size = 0
1252
+
1253
+ self._instructions = deepcopy(function._instructions)
1254
+ self._register_allocators = deepcopy(function._register_allocators)
1255
+
1256
+ if abi == microsoft_x64_abi:
1257
+ self._setup_windows_arguments()
1258
+ elif abi in {system_v_x86_64_abi, linux_x32_abi, native_client_x86_64_abi}:
1259
+ self._setup_unix_arguments()
1260
+ elif abi in {
1261
+ gosyso_amd64_abi,
1262
+ gosyso_amd64p32_abi,
1263
+ goasm_amd64_abi,
1264
+ goasm_amd64p32_abi,
1265
+ }:
1266
+ self._setup_golang_arguments()
1267
+ else:
1268
+ raise ValueError("Unsupported ABI: %s" % str(abi))
1269
+
1270
+ self._update_argument_loads(function.arguments)
1271
+
1272
+ self._layout_local_variables()
1273
+
1274
+ self._allocate_registers()
1275
+ self._bind_registers()
1276
+
1277
+ self._clobbered_registers = self._analyze_clobbered_registers()
1278
+ self._update_stack_frame()
1279
+ self._update_argument_addresses()
1280
+
1281
+ self._lower_argument_loads()
1282
+ self._lower_pseudoinstructions()
1283
+ self._filter_instruction_encodings()
1284
+
1285
+ self.mangled_name = self.mangle_name()
1286
+
1287
+ def _update_argument_loads(self, arguments):
1288
+ from nervapy.x86_64.pseudo import LOAD
1289
+
1290
+ for instruction in self._instructions:
1291
+ if isinstance(instruction, LOAD.ARGUMENT):
1292
+ instruction.operands = (
1293
+ instruction.operands[0],
1294
+ self.arguments[arguments.index(instruction.operands[1])],
1295
+ )
1296
+ if instruction.operands[1].register in instruction.available_registers:
1297
+ instruction.operands[1].save_on_stack = True
1298
+
1299
+ def _setup_windows_arguments(self):
1300
+ from nervapy.x86_64.abi import microsoft_x64_abi
1301
+
1302
+ assert (
1303
+ self.abi == microsoft_x64_abi
1304
+ ), "This function must only be used with Microsoft x64 ABI"
1305
+ # The first 4 arguments are passed in registers, others are on stack.
1306
+ # 8 bytes on stack is reserved for each parameter (regardless of their size).
1307
+ # On-stack space is also reserved, but not initialized, for parameters passed in registers.
1308
+ # Arguments are NOT extended to 8 bytes, and high bytes of registers/stack cells may contain garbage.
1309
+ from nervapy.x86_64 import m64
1310
+ from nervapy.x86_64.registers import (r8, r9, rcx, rdx, xmm0, xmm1,
1311
+ xmm2, xmm3)
1312
+
1313
+ floating_point_argument_registers = (xmm0, xmm1, xmm2, xmm3)
1314
+ integer_argument_registers = (rcx, rdx, r8, r9)
1315
+ for index, argument in enumerate(self.arguments):
1316
+ argument.passed_by_reference = argument.is_vector and argument != m64
1317
+ if index < 4:
1318
+ if argument.is_floating_point:
1319
+ argument.register = floating_point_argument_registers[index]
1320
+ elif (
1321
+ argument.is_integer
1322
+ or argument.is_pointer
1323
+ or argument.is_codeunit
1324
+ or argument.is_mask
1325
+ or argument.c_type == m64
1326
+ ):
1327
+ argument_register = integer_argument_registers[index]
1328
+ argument.register = {
1329
+ 1: argument_register.as_low_byte,
1330
+ 2: argument_register.as_word,
1331
+ 4: argument_register.as_dword,
1332
+ 8: argument_register,
1333
+ }[argument.size]
1334
+ elif argument.is_vector:
1335
+ argument.register = integer_argument_registers[index]
1336
+ else:
1337
+ assert False
1338
+ # Stack offset does not include return address
1339
+ argument.stack_offset = index * 8
1340
+
1341
+ def _setup_unix_arguments(self):
1342
+ from nervapy.x86_64.abi import (linux_x32_abi,
1343
+ native_client_x86_64_abi,
1344
+ system_v_x86_64_abi)
1345
+
1346
+ assert self.abi in {
1347
+ system_v_x86_64_abi,
1348
+ linux_x32_abi,
1349
+ native_client_x86_64_abi,
1350
+ }, "This function must only be used with System V x86-64, Linux x32 or Native Client x86-64 SFI ABI"
1351
+
1352
+ from nervapy.x86_64.registers import (r8, r9, rcx, rdi, rdx, rsi, xmm0,
1353
+ xmm1, xmm2, xmm3, xmm4, xmm5,
1354
+ xmm6, xmm7)
1355
+
1356
+ # The first 6 integer/pointer arguments are passed in general-purpose registers.
1357
+ # The first 8 floating-point arguments are passed in SSE registers.
1358
+ # For all integer arguments in excess of 6 and floating-point arguments in excess the caller reserves
1359
+ # 8 bytes on stack.
1360
+ # Arguments smaller than 4 bytes are extended (with sign-extension, if needed) to 4 bytes.
1361
+ # For 4-byte and smaller arguments passed on stack the high 4 bytes are not initialized.
1362
+ # For 4-byte and smaller arguments passed in registers the high 4 bytes are zero-initialized.
1363
+ # X32 and Native Client ABIs were not much tested, but they seem similar
1364
+ available_floating_point_registers = [
1365
+ xmm0,
1366
+ xmm1,
1367
+ xmm2,
1368
+ xmm3,
1369
+ xmm4,
1370
+ xmm5,
1371
+ xmm6,
1372
+ xmm7,
1373
+ ]
1374
+ available_integer_registers = [rdi, rsi, rdx, rcx, r8, r9]
1375
+
1376
+ # Stack offset does not include return address
1377
+ stack_offset = 0
1378
+ for argument in self.arguments:
1379
+ if (argument.is_floating_point or argument.is_vector) and len(
1380
+ available_floating_point_registers
1381
+ ) > 0:
1382
+ argument.register = available_floating_point_registers.pop(0)
1383
+ if argument.size in {4, 8, 16}:
1384
+ pass
1385
+ elif argument.size == 32:
1386
+ argument.register = argument.register.as_ymm
1387
+ elif argument.size == 64:
1388
+ argument.register = argument.register.as_zmm
1389
+ else:
1390
+ assert False
1391
+ elif (
1392
+ argument.is_integer or argument.is_pointer or argument.is_codeunit
1393
+ ) and len(available_integer_registers) > 0:
1394
+ argument_register = available_integer_registers.pop(0)
1395
+ argument.register = {
1396
+ 1: argument_register.as_dword,
1397
+ 2: argument_register.as_dword,
1398
+ 4: argument_register.as_dword,
1399
+ 8: argument_register,
1400
+ }[argument.size]
1401
+ elif argument.is_vector or argument.is_mask:
1402
+ assert False
1403
+ else:
1404
+ argument.stack_offset = stack_offset
1405
+ stack_offset += 8
1406
+
1407
+ def _setup_golang_arguments(self):
1408
+ from nervapy.x86_64.abi import (goasm_amd64_abi, goasm_amd64p32_abi,
1409
+ gosyso_amd64_abi, gosyso_amd64p32_abi)
1410
+
1411
+ assert self.abi in {
1412
+ gosyso_amd64_abi,
1413
+ gosyso_amd64p32_abi,
1414
+ goasm_amd64_abi,
1415
+ goasm_amd64p32_abi,
1416
+ }, "This function must only be used with Golang AMD64 or AMD64p32 ABI"
1417
+
1418
+ from nervapy.util import roundup
1419
+
1420
+ # All arguments are passed on stack
1421
+ # Stack offset does not include the return address
1422
+ stack_offset = 0
1423
+ for index, argument in enumerate(self.arguments):
1424
+ # Arguments are aligned on stack
1425
+ stack_offset = roundup(stack_offset, argument.size)
1426
+ argument.stack_offset = stack_offset
1427
+ stack_offset += argument.size
1428
+ if self.result_type is not None:
1429
+ self.result_offset = roundup(stack_offset, self.result_type.size)
1430
+
1431
+ def _layout_local_variables(self):
1432
+ from nervapy.x86_64.registers import rsp
1433
+
1434
+ local_variables_set = set()
1435
+ local_variables_list = list()
1436
+ for instruction in self._instructions:
1437
+ local_variable = instruction.local_variable
1438
+ if local_variable is not None:
1439
+ local_variable = local_variable.root
1440
+ if local_variable not in local_variables_set:
1441
+ local_variables_set.add(local_variable)
1442
+ local_variables_list.append(local_variable)
1443
+ if local_variables_list:
1444
+ local_variables_list = list(
1445
+ sorted(local_variables_list, key=lambda var: var.size)
1446
+ )
1447
+ self._stack_frame_alignment = max(
1448
+ var.alignment for var in local_variables_list
1449
+ )
1450
+ local_variable_address = 0
1451
+ from nervapy.util import roundup
1452
+
1453
+ for local_variable in local_variables_list:
1454
+ local_variable_address = roundup(
1455
+ local_variable_address, local_variable.alignment
1456
+ )
1457
+ local_variable._address = local_variable_address
1458
+ local_variable_address += local_variable.size
1459
+ self._local_variables_size = local_variable_address
1460
+
1461
+ for instruction in self._instructions:
1462
+ local_variable = instruction.local_variable
1463
+ if local_variable is not None:
1464
+ assert local_variable.address is not None
1465
+ memory_address = instruction.memory_address
1466
+ assert memory_address is not None
1467
+ assert memory_address.base == rsp
1468
+ instruction.memory_address.displacement = local_variable.address
1469
+
1470
+ def _allocate_registers(self):
1471
+ for register_kind, register_allocator in six.iteritems(
1472
+ self._register_allocators
1473
+ ):
1474
+ register_allocator.set_allocation_options(self.abi, register_kind)
1475
+
1476
+ from nervapy.x86_64.pseudo import LOAD
1477
+ from nervapy.x86_64.registers import (GeneralPurposeRegister,
1478
+ KRegister, MMXRegister, Register,
1479
+ XMMRegister)
1480
+
1481
+ for instruction in self._instructions:
1482
+ if isinstance(instruction, LOAD.ARGUMENT):
1483
+ dst_reg = instruction.operands[0]
1484
+ src_arg = instruction.operands[1]
1485
+ assert isinstance(dst_reg, Register)
1486
+ assert isinstance(src_arg, Argument)
1487
+ if dst_reg.is_virtual and src_arg.register is not None:
1488
+ self._register_allocators[dst_reg.kind].try_allocate_register(
1489
+ dst_reg.virtual_id, src_arg.register.physical_id
1490
+ )
1491
+
1492
+ for register_allocator in six.itervalues(self._register_allocators):
1493
+ register_allocator.allocate_registers()
1494
+
1495
+ def _lower_argument_loads(self):
1496
+ from nervapy.x86_64.abi import goasm_amd64_abi, goasm_amd64p32_abi
1497
+ from nervapy.x86_64.lower import load_memory, load_register
1498
+ from nervapy.x86_64.pseudo import LOAD
1499
+ from nervapy.x86_64.registers import (GeneralPurposeRegister,
1500
+ MMXRegister, XMMRegister,
1501
+ YMMRegister)
1502
+
1503
+ if self.abi == goasm_amd64_abi or self.abi == goasm_amd64p32_abi:
1504
+ # Like PeachPy, Go assembler uses pseudo-instructions for argument loads
1505
+ return
1506
+ lowered_instructions = []
1507
+ for i, instruction in enumerate(self._instructions):
1508
+ if isinstance(instruction, LOAD.ARGUMENT):
1509
+ assert isinstance(
1510
+ instruction.operands[0],
1511
+ (GeneralPurposeRegister, MMXRegister, XMMRegister, YMMRegister),
1512
+ ), "Lowering LOAD.ARGUMENT is supported only for general-purpose, mmx, xmm, and ymm target registers"
1513
+ if instruction.operands[1].register is not None:
1514
+ # The argument is passed to function in a register
1515
+ ld_reg = load_register(
1516
+ instruction.operands[0],
1517
+ instruction.operands[1].register,
1518
+ instruction.operands[1].c_type,
1519
+ prototype=instruction,
1520
+ )
1521
+ if ld_reg is not None:
1522
+ lowered_instructions.append(ld_reg)
1523
+ else:
1524
+ # The argument is passed to function on stack
1525
+ ld_mem = load_memory(
1526
+ instruction.operands[0],
1527
+ instruction.operands[1].address,
1528
+ instruction.operands[1].c_type,
1529
+ prototype=instruction,
1530
+ )
1531
+ lowered_instructions.append(ld_mem)
1532
+ else:
1533
+ lowered_instructions.append(instruction)
1534
+ self._instructions = lowered_instructions
1535
+
1536
+ def _lower_pseudoinstructions(self):
1537
+ from nervapy.stream import InstructionStream
1538
+ from nervapy.util import is_int, is_sint32, is_uint32
1539
+ from nervapy.x86_64.abi import (goasm_amd64_abi, goasm_amd64p32_abi,
1540
+ gosyso_amd64_abi, gosyso_amd64p32_abi,
1541
+ native_client_x86_64_abi)
1542
+ from nervapy.x86_64.avx import VMOVAPS, VZEROUPPER
1543
+ from nervapy.x86_64.generic import (ADD, AND, LEA, MOV, POP, PUSH, RET,
1544
+ SUB, XOR)
1545
+ from nervapy.x86_64.lower import load_register
1546
+ from nervapy.x86_64.mmxsse import MOVAPS
1547
+ from nervapy.x86_64.nacl import (NACLASP, NACLJMP, NACLRESTBP,
1548
+ NACLRESTSP, NACLSSP)
1549
+ from nervapy.x86_64.pseudo import RETURN, STORE
1550
+ from nervapy.x86_64.registers import (GeneralPurposeRegister64,
1551
+ XMMRegister, eax, rbp, rsp)
1552
+
1553
+ # The new list with lowered instructions
1554
+ instructions = list()
1555
+ # Generate prologue
1556
+ cloberred_xmm_registers = list()
1557
+ cloberred_general_purpose_registers = list()
1558
+ with InstructionStream() as prolog_stream:
1559
+ # 1. Save clobbered general-purpose registers with PUSH instruction
1560
+ # 2. If there are clobbered XMM registers, allocate space for them on stack (subtract stack pointer)
1561
+ # 3. Save clobbered XMM registers on stack with (V)MOVAPS instruction
1562
+ for reg in self._clobbered_registers:
1563
+ assert isinstance(
1564
+ reg, (GeneralPurposeRegister64, XMMRegister)
1565
+ ), "Internal error: unexpected register %s in clobber list" % str(reg)
1566
+ if isinstance(reg, GeneralPurposeRegister64):
1567
+ cloberred_general_purpose_registers.append(reg)
1568
+ PUSH(reg)
1569
+ else:
1570
+ cloberred_xmm_registers.append(reg)
1571
+ # If stack needs to be realigned
1572
+ if self._stack_frame_alignment > self.abi.stack_alignment:
1573
+ cloberred_general_purpose_registers.append(rbp)
1574
+ PUSH(rbp)
1575
+ MOV(rbp, rsp)
1576
+ if cloberred_xmm_registers or self._local_variables_size != 0:
1577
+ # Total size of the stack frame less what is already adjusted with PUSH instructions
1578
+ stack_adjustment = (
1579
+ self._stack_frame_size
1580
+ - len(cloberred_general_purpose_registers)
1581
+ * GeneralPurposeRegister64.size
1582
+ )
1583
+ if self.abi != native_client_x86_64_abi:
1584
+ SUB(rsp, stack_adjustment + self._local_variables_size)
1585
+ if self._stack_frame_alignment > self.abi.stack_alignment:
1586
+ AND(rsp, -self._stack_frame_alignment)
1587
+ else:
1588
+ if self._stack_frame_alignment > self.abi.stack_alignment:
1589
+ # Note: do not modify rcx/rdx/r8/r9 as they may contain function arguments
1590
+ LEA(
1591
+ eax, [rsp - (stack_adjustment + self._local_variables_size)]
1592
+ )
1593
+ AND(eax, -self._stack_frame_alignment)
1594
+ NACLRESTSP(eax)
1595
+ else:
1596
+ NACLSSP(stack_adjustment + self._local_variables_size)
1597
+ for i, xmm_reg in enumerate(cloberred_xmm_registers):
1598
+ movaps = VMOVAPS if self._avx_prolog else MOVAPS
1599
+ movaps(
1600
+ [rsp + self._local_variables_size + i * XMMRegister.size], xmm_reg
1601
+ )
1602
+
1603
+ # TODO: handle situations when entry point is in the middle of a function
1604
+ instructions.extend(prolog_stream.instructions)
1605
+
1606
+ for instruction in self._instructions:
1607
+ if isinstance(instruction, RETURN):
1608
+ from nervapy.x86_64.registers import (GeneralPurposeRegister,
1609
+ MMXRegister, XMMRegister,
1610
+ YMMRegister, al, ax, eax,
1611
+ ecx, mm0, rax, rcx, xmm0,
1612
+ ymm0)
1613
+
1614
+ is_goasm_abi = self.abi in {goasm_amd64_abi, goasm_amd64p32_abi}
1615
+ is_gosyso_abi = self.abi in {gosyso_amd64_abi, gosyso_amd64p32_abi}
1616
+ with InstructionStream() as epilog_stream:
1617
+ # Save return value
1618
+ if instruction.operands:
1619
+ assert len(instruction.operands) == 1
1620
+ if is_int(instruction.operands[0]):
1621
+ assert (
1622
+ self.result_type.is_integer
1623
+ or self.result_type.is_pointer
1624
+ )
1625
+ # Return immediate constant
1626
+ if is_goasm_abi:
1627
+ # Return value must be saved on stack with STORE.RESULT pseudo-instruction
1628
+ if self.result_type.size <= 4 or is_sint32(
1629
+ instruction.operands[0]
1630
+ ):
1631
+ # STORE.RESULT will assemble to one of the forms:
1632
+ # - MOV m8, imm8
1633
+ # - MOV m16, imm16
1634
+ # - MOV m32, imm32
1635
+ # - MOV m64, imm32
1636
+ STORE.RESULT(
1637
+ instruction.operands[0],
1638
+ prototype=instruction,
1639
+ target_function=self,
1640
+ )
1641
+ else:
1642
+ # STORE.RESULT can't be used directly (MOV m64, imm64 doesn't exist), instead use
1643
+ # MOV rax, imm64 + MOV m64, rax (STORE.RESULT)
1644
+ MOV(
1645
+ eax,
1646
+ instruction.operands[0],
1647
+ prototype=instruction,
1648
+ )
1649
+ STORE.RESULT(
1650
+ eax, prototype=instruction, target_function=self
1651
+ )
1652
+ else:
1653
+ # Return value is returned in:
1654
+ # - eax register if result type is not greater than 4 bytes
1655
+ # - rax register if result type is greater than 8 bytes
1656
+ if instruction.operands[0] == 0:
1657
+ # - Zero eax register (high 32 bits of rax register clear automatically)
1658
+ XOR(eax, eax, prototype=instruction)
1659
+ elif self.result_type.size <= 4 or is_uint32(
1660
+ instruction.operands[0]
1661
+ ):
1662
+ # - If the result type is not greater than 4 bytes, directly mov it to eax register
1663
+ # - If the result type is greater than 4 bytes, but the result value is
1664
+ # representable as unsigned 32-bit literal, mov it to eax register and the high
1665
+ # 32 bits of rax will be cleared automatically
1666
+ MOV(
1667
+ eax,
1668
+ instruction.operands[0],
1669
+ prototype=instruction,
1670
+ )
1671
+ else:
1672
+ # - Either negative 32-bit constant (would use MOV rax, imm32 form)
1673
+ # - Or large 64-bit constant (would use MOV rax, imm64 form)
1674
+ MOV(
1675
+ rax,
1676
+ instruction.operands[0],
1677
+ prototype=instruction,
1678
+ )
1679
+ elif isinstance(
1680
+ instruction.operands[0], GeneralPurposeRegister
1681
+ ):
1682
+ if (
1683
+ is_goasm_abi
1684
+ and instruction.operands[0].size
1685
+ == self.result_type.size
1686
+ ):
1687
+ STORE.RESULT(
1688
+ instruction.operands[0],
1689
+ prototype=instruction,
1690
+ target_function=self,
1691
+ )
1692
+ else:
1693
+ result_reg = eax if self.result_type.size <= 4 else rax
1694
+ epilog_stream.add_instruction(
1695
+ load_register(
1696
+ result_reg,
1697
+ instruction.operands[0],
1698
+ self.result_type,
1699
+ prototype=instruction,
1700
+ )
1701
+ )
1702
+ if is_goasm_abi:
1703
+ result_subreg = {1: al, 2: ax, 4: eax, 8: rax}[
1704
+ self.result_type.size
1705
+ ]
1706
+ STORE.RESULT(
1707
+ result_subreg,
1708
+ prototype=instruction,
1709
+ target_function=self,
1710
+ )
1711
+ elif isinstance(instruction.operands[0], MMXRegister):
1712
+ epilog_stream.add_instruction(
1713
+ load_register(
1714
+ mm0,
1715
+ instruction.operands[0],
1716
+ self.result_type,
1717
+ prototype=instruction,
1718
+ )
1719
+ )
1720
+ elif isinstance(instruction.operands[0], XMMRegister):
1721
+ if self.result_type.is_floating_point and is_goasm_abi:
1722
+ assert self.result_type.size in {4, 8}
1723
+ STORE.RESULT(
1724
+ instruction.operands[0],
1725
+ prototype=instruction,
1726
+ target_function=self,
1727
+ )
1728
+ else:
1729
+ epilog_stream.add_instruction(
1730
+ load_register(
1731
+ xmm0,
1732
+ instruction.operands[0],
1733
+ self.result_type,
1734
+ prototype=instruction,
1735
+ )
1736
+ )
1737
+ elif isinstance(instruction.operands[0], YMMRegister):
1738
+ epilog_stream.add_instruction(
1739
+ load_register(
1740
+ ymm0,
1741
+ instruction.operands[0],
1742
+ self.result_type,
1743
+ prototype=instruction,
1744
+ )
1745
+ )
1746
+ else:
1747
+ assert False
1748
+ if instruction.avx_mode and not self.avx_environment:
1749
+ VZEROUPPER(prototype=instruction)
1750
+ # Generate epilog
1751
+ # 1. Restore clobbered XMM registers on stack with (V)MOVAPS instruction
1752
+ # 2. If there are clobbered XMM registers, release their space on stack (increment stack pointer)
1753
+ # 3. Restore clobbered general-purpose registers with PUSH instruction
1754
+ for i, xmm_reg in enumerate(cloberred_xmm_registers):
1755
+ movaps = VMOVAPS if self.avx_environment else MOVAPS
1756
+ movaps(
1757
+ xmm_reg,
1758
+ [rsp + self._local_variables_size + i * XMMRegister.size],
1759
+ )
1760
+ if self._stack_frame_alignment > self.abi.stack_alignment:
1761
+ # Restore rsp value from rbp
1762
+ MOV(rsp, rbp)
1763
+ elif cloberred_xmm_registers or self._local_variables_size != 0:
1764
+ # Total size of the stack frame less what will be adjusted with POP instructions
1765
+ stack_adjustment = (
1766
+ self._stack_frame_size
1767
+ - len(cloberred_general_purpose_registers)
1768
+ * GeneralPurposeRegister64.size
1769
+ )
1770
+ if self.abi != native_client_x86_64_abi:
1771
+ ADD(rsp, stack_adjustment + self._local_variables_size)
1772
+ else:
1773
+ NACLASP(stack_adjustment + self._local_variables_size)
1774
+ # Important: registers must be POPed in reverse order
1775
+ for reg in reversed(cloberred_general_purpose_registers):
1776
+ if reg == rbp and self.abi == native_client_x86_64_abi:
1777
+ POP(rcx)
1778
+ NACLRESTBP(ecx)
1779
+ else:
1780
+ POP(reg)
1781
+ # Return from the function
1782
+ if self.abi == native_client_x86_64_abi:
1783
+ POP(rcx, prototype=instruction)
1784
+ NACLJMP(ecx)
1785
+ else:
1786
+ RET(prototype=instruction)
1787
+ instructions.extend(epilog_stream.instructions)
1788
+ elif isinstance(instruction, STORE.RESULT):
1789
+ instruction.destination_offset = self.result_offset
1790
+ instructions.append(instruction)
1791
+ else:
1792
+ if self.abi == native_client_x86_64_abi and instruction.name != "LEA":
1793
+ from nervapy.x86_64.operand import is_m
1794
+
1795
+ memory_operands = list(
1796
+ filter(lambda op: is_m(op), instruction.operands)
1797
+ )
1798
+ if memory_operands:
1799
+ assert (
1800
+ len(memory_operands) == 1
1801
+ ), "x86-64 instructions can not have more than 1 explicit memory operand"
1802
+ memory_address = memory_operands[0].address
1803
+ from nervapy.x86_64.operand import MemoryAddress
1804
+
1805
+ if isinstance(memory_address, MemoryAddress):
1806
+ if memory_address.index is not None:
1807
+ raise ValueError("NaCl does not allow index addressing")
1808
+ from nervapy.x86_64.registers import r15, rbp, rsp
1809
+
1810
+ if (
1811
+ memory_address.base is not None
1812
+ and memory_address.base not in {rbp, rsp, r15}
1813
+ ):
1814
+ # Base register is not a restricted register: needs transformation
1815
+ memory_address.index = memory_address.base
1816
+ memory_address.scale = 1
1817
+ memory_address.base = r15
1818
+ instructions.append(instruction)
1819
+ self._instructions = instructions
1820
+
1821
+ def _filter_instruction_encodings(self):
1822
+ for instruction in self._instructions:
1823
+ instruction.encodings = instruction._filter_encodings()
1824
+
1825
+ def _update_argument_addresses(self):
1826
+ for argument in self.arguments:
1827
+ if argument.stack_offset is not None:
1828
+ argument.address = self._argument_stack_base + argument.stack_offset
1829
+
1830
+ def _analyze_clobbered_registers(self):
1831
+ from nervapy.x86_64.registers import (GeneralPurposeRegister,
1832
+ XMMRegister, YMMRegister,
1833
+ ZMMRegister)
1834
+
1835
+ output_subregisters = set()
1836
+ for instruction in self._instructions:
1837
+ output_subregisters.update(instruction.output_registers)
1838
+ output_registers = set()
1839
+ for subreg in output_subregisters:
1840
+ if isinstance(subreg, GeneralPurposeRegister):
1841
+ output_registers.add(subreg.as_qword)
1842
+ elif isinstance(subreg, (XMMRegister, YMMRegister, ZMMRegister)):
1843
+ output_registers.add(subreg.as_xmm)
1844
+ # Other register types are volatile registers for all x86-64 ABIs
1845
+ return list(
1846
+ sorted(
1847
+ filter(
1848
+ lambda reg: reg in self.abi.callee_save_registers, output_registers
1849
+ )
1850
+ )
1851
+ )
1852
+
1853
+ def _update_stack_frame(self):
1854
+ from nervapy.x86_64.registers import (GeneralPurposeRegister64,
1855
+ XMMRegister, rbp, rsp)
1856
+
1857
+ clobbered_general_purpose_registers = 0
1858
+ clobbered_xmm_registers = 0
1859
+ for reg in self._clobbered_registers:
1860
+ assert isinstance(
1861
+ reg, (GeneralPurposeRegister64, XMMRegister)
1862
+ ), "Internal error: unexpected register %s in clobber list" % str(reg)
1863
+ if isinstance(reg, GeneralPurposeRegister64):
1864
+ clobbered_general_purpose_registers += 1
1865
+ else:
1866
+ clobbered_xmm_registers += 1
1867
+ # If the stack needs to be aligned, rbp register needs to be preserved too
1868
+ if self._stack_frame_alignment > self.abi.stack_alignment:
1869
+ clobbered_general_purpose_registers += 1
1870
+ self._stack_frame_size = (
1871
+ clobbered_general_purpose_registers * GeneralPurposeRegister64.size
1872
+ + clobbered_xmm_registers * XMMRegister.size
1873
+ )
1874
+ # 1. On function entry stack is misaligned by 8
1875
+ # 2. Each clobbered general-purpose register is pushed as 8 bytes
1876
+ # 3. If the number of clobbered general-purpose registers is odd, the stack will be misaligned by 8 after they
1877
+ # are pushed on stack
1878
+ # 4. If additionally there are clobbered XMM registers, we need to subtract 8 from stack to make it aligned
1879
+ # by 16 after the general-purpose registers are pushed
1880
+ if (
1881
+ clobbered_xmm_registers != 0 or self._local_variables_size != 0
1882
+ ) and clobbered_general_purpose_registers % 2 == 0:
1883
+ self._stack_frame_size += 8
1884
+
1885
+ # Set stack_argument_base
1886
+ return_address_size = 8
1887
+ if self._stack_frame_alignment > self.abi.stack_alignment:
1888
+ # rsp is realigned, argument addressing uses rbp
1889
+ saved_rbp_size = 8
1890
+ self._argument_stack_base = rbp + return_address_size + saved_rbp_size
1891
+ else:
1892
+ # argument addressing uses rsp
1893
+ self._argument_stack_base = (
1894
+ rsp
1895
+ + return_address_size
1896
+ + self._stack_frame_size
1897
+ + self._local_variables_size
1898
+ )
1899
+
1900
+ def _bind_registers(self):
1901
+ """Iterates through the list of instructions and assigns physical IDs to allocated registers"""
1902
+
1903
+ for instruction in self._instructions:
1904
+ for register in instruction.register_objects:
1905
+ if register.is_virtual:
1906
+ register.physical_id = self._register_allocators[
1907
+ register.kind
1908
+ ].register_allocations[register.virtual_id]
1909
+
1910
+ def format_code(
1911
+ self,
1912
+ assembly_format="peachpy",
1913
+ line_separator=os.linesep,
1914
+ indent=True,
1915
+ line_number=1,
1916
+ ):
1917
+ """Returns code of assembly instructions comprising the function"""
1918
+
1919
+ code = []
1920
+ if assembly_format == "gas":
1921
+ # Pre-assign line number to labels
1922
+ from nervapy.x86_64.pseudo import LABEL
1923
+
1924
+ for i, instruction in enumerate(self._instructions):
1925
+ if isinstance(instruction, LABEL):
1926
+ instruction.operands[0].line_number = line_number + i
1927
+
1928
+ for i, instruction in enumerate(self._instructions):
1929
+ from nervapy.x86_64.instructions import Instruction
1930
+
1931
+ # if isinstance(instruction, Instruction):
1932
+ # try:
1933
+ # hex_string = " ".join("%02X" % byte for byte in instruction.encode())
1934
+ # code.append(" " + "# " + hex_string)
1935
+ # except Exception as e:
1936
+ # import sys
1937
+ # code.append(e.message)
1938
+ # # raise
1939
+ code.append(
1940
+ instruction.format(
1941
+ assembly_format=assembly_format,
1942
+ indent=indent,
1943
+ line_number=line_number + i,
1944
+ )
1945
+ )
1946
+ if line_separator is None:
1947
+ return code
1948
+ else:
1949
+ return str(line_separator).join(code)
1950
+
1951
+ def format(
1952
+ self, assembly_format="peachpy", line_separator=os.linesep, line_number=1
1953
+ ):
1954
+ """Formats assembly listing of the function according to specified parameters"""
1955
+
1956
+ if assembly_format == "go":
1957
+ # Arguments for TEXT directive in Go assembler
1958
+ package_string = self.package
1959
+ if package_string is None:
1960
+ package_string = ""
1961
+ if six.PY2:
1962
+ text_arguments = [
1963
+ package_string + "\xc2\xb7" + self.mangled_name + "(SB)"
1964
+ ]
1965
+ else:
1966
+ text_arguments = [
1967
+ package_string + "\u00b7" + self.mangled_name + "(SB)"
1968
+ ]
1969
+
1970
+ text_arguments.append("4")
1971
+ stack_size = sum(map(operator.attrgetter("size"), self.arguments))
1972
+ if self.result_offset is not None:
1973
+ stack_size = self.result_offset + self.result_type.size
1974
+ if stack_size == 0:
1975
+ text_arguments.append("$0")
1976
+ else:
1977
+ text_arguments.append("$0-%d" % stack_size)
1978
+
1979
+ code = ["TEXT " + ",".join(text_arguments)]
1980
+ if self.go_signature is not None:
1981
+ code.insert(0, "// " + self.go_signature)
1982
+ elif assembly_format == "gas":
1983
+ from nervapy.util import ilog2
1984
+
1985
+ code_alignment = 16
1986
+ code = [
1987
+ "#ifdef __APPLE__",
1988
+ ".section __TEXT,__text,regular,pure_instructions",
1989
+ ".globl _{name}".format(name=self.mangled_name),
1990
+ ".p2align {ilog2alignment}, 0x90".format(
1991
+ ilog2alignment=ilog2(code_alignment)
1992
+ ),
1993
+ "_{name}:".format(name=self.mangled_name),
1994
+ "#else /* !__APPLE__ */",
1995
+ ".text",
1996
+ ".p2align {ilog2alignment},,{max_alignment_bytes}".format(
1997
+ ilog2alignment=ilog2(code_alignment),
1998
+ max_alignment_bytes=code_alignment - 1,
1999
+ ),
2000
+ ".globl " + self.mangled_name,
2001
+ ".type {name}, @function".format(name=self.mangled_name),
2002
+ "{name}:".format(name=self.mangled_name),
2003
+ "#endif /* !__APPLE */",
2004
+ ]
2005
+ else:
2006
+ code = []
2007
+
2008
+ code += self.format_code(
2009
+ assembly_format,
2010
+ line_separator=None,
2011
+ indent=True,
2012
+ line_number=line_number + len(code),
2013
+ )
2014
+ if assembly_format == "gas":
2015
+ code += [
2016
+ "#ifndef __APPLE__",
2017
+ ".size {name}, .-{name}".format(name=self.mangled_name),
2018
+ "#endif /* !__APPLE__ */",
2019
+ ]
2020
+
2021
+ if assembly_format in ["go", "gas"]:
2022
+ # Add trailing line or assembler will refuse to compile
2023
+ code.append("")
2024
+ if line_separator is None:
2025
+ return code
2026
+ else:
2027
+ return str(line_separator).join(code)
2028
+
2029
+ def encode(self):
2030
+ return EncodedFunction(self)
2031
+
2032
+ @property
2033
+ def metadata(self):
2034
+ metadata = collections.OrderedDict(
2035
+ [
2036
+ ("entry", "function"),
2037
+ ("name", self.name),
2038
+ ("symbol", self.mangled_name),
2039
+ (
2040
+ "return",
2041
+ "void" if self.result_type is None else str(self.result_type),
2042
+ ),
2043
+ (
2044
+ "arguments",
2045
+ [
2046
+ collections.OrderedDict(
2047
+ [("name", argument.name), ("type", str(argument.c_type))]
2048
+ )
2049
+ for argument in self.arguments
2050
+ ],
2051
+ ),
2052
+ ("arch", "x86-64"),
2053
+ ("abi", str(self.abi)),
2054
+ ("uarch", self.target.name),
2055
+ ("isa", [str(extension) for extension in self.isa_extensions.minify()]),
2056
+ ]
2057
+ )
2058
+ return metadata
2059
+
2060
+ def mangle_name(self):
2061
+ import string
2062
+
2063
+ import nervapy.x86_64.options
2064
+
2065
+ name = (
2066
+ nervapy.x86_64.options.name_mangling.replace("${Name}", self.name)
2067
+ .replace("${name}", self.name.lower())
2068
+ .replace("${NAME}", self.name.upper())
2069
+ .replace("${uArch}", self.target.id)
2070
+ .replace("${uarch}", self.target.id.lower())
2071
+ .replace("${UARCH}", self.target.id.upper())
2072
+ .replace(
2073
+ "${ISA}",
2074
+ "_".join(
2075
+ [extension.safe_name for extension in self.isa_extensions.minify()]
2076
+ ),
2077
+ )
2078
+ .replace(
2079
+ "${isa}",
2080
+ "_".join(
2081
+ [
2082
+ extension.safe_name.lower()
2083
+ for extension in self.isa_extensions.minify()
2084
+ ]
2085
+ ),
2086
+ )
2087
+ )
2088
+ return name
2089
+
2090
+
2091
+ class InstructionBundle:
2092
+ def __init__(self, capacity, address):
2093
+ if capacity not in {16, 32, 64}:
2094
+ raise ValueError("Bundle capacity must be 16, 32, or 64")
2095
+ self.capacity = capacity
2096
+ self.address = address
2097
+ self.size = 0
2098
+ self._instructions = []
2099
+ # Map from instruction position to tuple (label address, long encoding, short range)
2100
+ self.branch_info_map = dict()
2101
+
2102
+ @property
2103
+ def padding(self):
2104
+ return self.capacity - self.size
2105
+
2106
+ def add(self, instructions):
2107
+ from nervapy.x86_64.instructions import Instruction
2108
+
2109
+ assert isinstance(instructions, list)
2110
+ assert all(
2111
+ isinstance(instruction, Instruction) for instruction in instructions
2112
+ ), "Instruction instance expected"
2113
+ bytecode = bytearray().join(
2114
+ [instruction.encode() for instruction in instructions]
2115
+ )
2116
+ if self.size + len(bytecode) <= self.capacity:
2117
+ self.size += len(bytecode)
2118
+ for instruction in instructions:
2119
+ instruction.bytecode = instruction.encode()
2120
+ self._instructions.append(instruction)
2121
+ else:
2122
+ raise BufferError()
2123
+
2124
+ def add_label_branch(self, instruction, label_address=None, long_encoding=False):
2125
+ from nervapy.x86_64.instructions import BranchInstruction
2126
+
2127
+ assert isinstance(
2128
+ instruction, BranchInstruction
2129
+ ), "BranchInstruction instance expected"
2130
+ long_encoding, bytecode = instruction._encode_label_branch(
2131
+ self.address + self.size, label_address, long_encoding
2132
+ )
2133
+ if self.capacity - self.size > len(bytecode):
2134
+ self.size += len(bytecode)
2135
+ if not long_encoding and label_address is not None:
2136
+ # offset = label_address - self.end_address
2137
+ # -> self.end_address = label_address - offset
2138
+ # -> branch_address = label_address - len(bytecode) - offset
2139
+ # -> branch_position = label_address - self.start_address - len(bytecode) - offset
2140
+ #
2141
+ # -> branch_pos >= label_address - self.start_address - len(bytecode) - 127
2142
+ # -> branch_pos <= label_address - self.start_address - len(bytecode) + 128
2143
+ branch_pos = label_address - self.address - len(bytecode)
2144
+ branch_pos_max = min(branch_pos + 128, self.capacity)
2145
+ else:
2146
+ branch_pos_max = self.capacity
2147
+ instruction.bytecode = bytecode
2148
+ self.branch_info_map[len(self._instructions)] = (
2149
+ label_address,
2150
+ long_encoding,
2151
+ branch_pos_max,
2152
+ )
2153
+ self._instructions.append(instruction)
2154
+ else:
2155
+ raise BufferError()
2156
+
2157
+ def optimize(self):
2158
+ from nervapy.x86_64.instructions import BranchInstruction
2159
+ from nervapy.x86_64.pseudo import LABEL
2160
+
2161
+ if any(
2162
+ isinstance(instruction, (BranchInstruction, LABEL))
2163
+ for instruction in self._instructions
2164
+ ):
2165
+ return
2166
+
2167
+ def suitable_encodings(instruction):
2168
+ return [
2169
+ (encoding, length)
2170
+ for (length, encoding) in six.iteritems(
2171
+ instruction.encode_length_options()
2172
+ )
2173
+ if 0 < length - len(instruction.bytecode) <= self.padding
2174
+ ]
2175
+
2176
+ while self.size < self.capacity:
2177
+ suitable_instructions = [
2178
+ instr for instr in self._instructions if any(suitable_encodings(instr))
2179
+ ]
2180
+ if not suitable_instructions:
2181
+ break
2182
+
2183
+ shortest_suitable_instruction = min(
2184
+ suitable_instructions, key=lambda instr: len(instr.bytecode)
2185
+ )
2186
+ new_encoding, new_length = min(
2187
+ suitable_encodings(shortest_suitable_instruction),
2188
+ key=operator.itemgetter(1),
2189
+ )
2190
+ self.size += new_length - len(shortest_suitable_instruction.bytecode)
2191
+ assert self.size <= self.capacity
2192
+ shortest_suitable_instruction.bytecode = new_encoding
2193
+
2194
+ def finalize(self):
2195
+ from nervapy.x86_64.generic import NOP
2196
+
2197
+ while self.capacity > self.size:
2198
+ self.add([NOP()])
2199
+ self.size = self.capacity
2200
+
2201
+ @property
2202
+ def label_address_map(self):
2203
+ from nervapy.x86_64.pseudo import LABEL
2204
+
2205
+ label_address_map = dict()
2206
+ code_address = self.address
2207
+ for instruction in self._instructions:
2208
+ if isinstance(instruction, LABEL):
2209
+ label_address_map[instruction.identifier] = code_address
2210
+ else:
2211
+ code_address += len(instruction.bytecode)
2212
+ return label_address_map
2213
+
2214
+ def __len__(self):
2215
+ return self.size
2216
+
2217
+
2218
+ class EncodedFunction:
2219
+ """ABI-specific x86-64 assembly function.
2220
+
2221
+ A function consists of C signature, ABI, and a list of instructions without virtual registers.
2222
+ """
2223
+
2224
+ def __init__(self, function):
2225
+ from copy import copy, deepcopy
2226
+
2227
+ assert isinstance(function, ABIFunction), "ABIFunction object expected"
2228
+ self.name = function.name
2229
+ self.mangled_name = function.mangled_name
2230
+ self.arguments = list(map(copy, function.arguments))
2231
+ self.result_type = function.result_type
2232
+ self.target = function.target
2233
+ self.abi = function.abi
2234
+
2235
+ from nervapy.x86_64.abi import native_client_x86_64_abi
2236
+ from nervapy.x86_64.meta import Section, SectionType
2237
+
2238
+ if self.abi == native_client_x86_64_abi:
2239
+ # Align with HLT instruction
2240
+ self.code_section = Section(SectionType.code, alignment_byte=0xF4)
2241
+ self.code_section.alignment = 32
2242
+ else:
2243
+ # Align with INT 3 instruction
2244
+ self.code_section = Section(SectionType.code, alignment_byte=0xCC)
2245
+ self.code_section.alignment = 16
2246
+
2247
+ self.const_section = Section(SectionType.const_data)
2248
+
2249
+ self._instructions = deepcopy(function._instructions)
2250
+
2251
+ self._constant_symbol_map = dict()
2252
+ self._layout_literal_constants()
2253
+ self._encode()
2254
+
2255
+ def _layout_literal_constants(self):
2256
+ from nervapy.encoder import Encoder
2257
+ from nervapy.x86_64.meta import Symbol, SymbolType
2258
+
2259
+ encoder = Encoder(self.abi.endianness)
2260
+
2261
+ constants = list()
2262
+ for instruction in self._instructions:
2263
+ constant = instruction.constant
2264
+ if constant is not None:
2265
+ constants.append(constant)
2266
+
2267
+ max_constant_size = 0
2268
+ max_constant_alignment = 0
2269
+ if constants:
2270
+ max_constant_size = max(constant.size for constant in constants)
2271
+ max_constant_alignment = max(constant.alignment for constant in constants)
2272
+ self.const_section.alignment = max_constant_alignment
2273
+
2274
+ # Unsorted list of Symbol objects for constants
2275
+ constant_symbols = list()
2276
+ # This set is used to ensure that each constant is added only once
2277
+ constant_names_set = set()
2278
+ if max_constant_size != 0:
2279
+ # Map from constant value (as bytes) to address in the const data section
2280
+ constants_address_map = dict()
2281
+
2282
+ for instruction in self._instructions:
2283
+ constant = instruction.constant
2284
+ if constant is not None:
2285
+ constant_value = bytes(constant.encode(encoder))
2286
+ if constant_value not in constants_address_map:
2287
+ # Add the new constant to the section
2288
+ assert (
2289
+ constant.size == max_constant_size
2290
+ ), "Handling of functions with constant literals of different size is not implemented"
2291
+ assert (
2292
+ constant.alignment == max_constant_alignment
2293
+ ), "Handling of functions with constant literals of different alignment is not implemented"
2294
+ constants_address_map[constant_value] = len(self.const_section)
2295
+ self.const_section.content += constant_value
2296
+ if constant.name not in constant_names_set:
2297
+ constant_names_set.add(constant.name)
2298
+ const_symbol = Symbol(
2299
+ constants_address_map[constant_value],
2300
+ SymbolType.literal_constant,
2301
+ name=constant.name,
2302
+ size=constant.size,
2303
+ )
2304
+ constant_symbols.append(const_symbol)
2305
+ self._constant_symbol_map[constant.name] = const_symbol
2306
+ for constant_symbol in sorted(
2307
+ constant_symbols, key=lambda sym: (sym.offset, -sym.size)
2308
+ ):
2309
+ self.const_section.add_symbol(constant_symbol)
2310
+
2311
+ def _encode(self):
2312
+ from nervapy.x86_64.instructions import BranchInstruction
2313
+ from nervapy.x86_64.pseudo import LABEL
2314
+
2315
+ label_address_map = dict()
2316
+ long_branches = set()
2317
+
2318
+ # Special post-processing for Native Client SFI
2319
+ from nervapy.x86_64.abi import native_client_x86_64_abi
2320
+
2321
+ if self.abi == native_client_x86_64_abi:
2322
+ has_updated_branches = True
2323
+ has_unresolved_labels = True
2324
+ bundles = list()
2325
+ while has_updated_branches or has_unresolved_labels:
2326
+ code_address = 0
2327
+ has_updated_branches = False
2328
+ has_unresolved_labels = False
2329
+ bundles = list()
2330
+ current_bundle = InstructionBundle(32, code_address)
2331
+ for i, instruction in enumerate(self._instructions):
2332
+ if isinstance(instruction, LABEL):
2333
+ label_address_map[instruction.identifier] = code_address
2334
+ current_bundle.add([instruction])
2335
+ elif (
2336
+ isinstance(instruction, BranchInstruction)
2337
+ and instruction.label_name
2338
+ ):
2339
+ label_address = label_address_map.get(instruction.label_name)
2340
+ if label_address is None:
2341
+ has_unresolved_labels = True
2342
+ was_long = i in long_branches
2343
+ is_long, instruction.bytecode = (
2344
+ instruction._encode_label_branch(
2345
+ code_address, label_address, long_encoding=was_long
2346
+ )
2347
+ )
2348
+ if is_long and not was_long:
2349
+ long_branches.add(i)
2350
+ has_updated_branches = True
2351
+ try:
2352
+ current_bundle.add_label_branch(
2353
+ instruction, label_address, is_long
2354
+ )
2355
+ except BufferError:
2356
+ bundles.append(current_bundle)
2357
+ current_bundle = InstructionBundle(
2358
+ 32, current_bundle.address + current_bundle.capacity
2359
+ )
2360
+ current_bundle.add_label_branch(
2361
+ instruction, label_address, is_long
2362
+ )
2363
+ else:
2364
+ instruction_group = [instruction]
2365
+
2366
+ memory_address = instruction.memory_address
2367
+ from nervapy.x86_64.operand import MemoryAddress
2368
+
2369
+ if (
2370
+ isinstance(memory_address, MemoryAddress)
2371
+ and memory_address.index is not None
2372
+ ):
2373
+ from nervapy.stream import NullStream
2374
+
2375
+ with NullStream():
2376
+ from nervapy.x86_64.generic import MOV
2377
+
2378
+ instruction_group.insert(
2379
+ 0,
2380
+ MOV(
2381
+ memory_address.index.as_dword,
2382
+ memory_address.index.as_dword,
2383
+ ),
2384
+ )
2385
+ try:
2386
+ current_bundle.add(instruction_group)
2387
+ except BufferError:
2388
+ bundles.append(current_bundle)
2389
+ current_bundle = InstructionBundle(
2390
+ 32, current_bundle.address + current_bundle.capacity
2391
+ )
2392
+ current_bundle.add(instruction_group)
2393
+ code_address = current_bundle.address + current_bundle.size
2394
+ bundles.append(current_bundle)
2395
+ self._instructions = list()
2396
+ for bundle in bundles:
2397
+ bundle.optimize()
2398
+ for instruction in bundle._instructions:
2399
+ constant = instruction.constant
2400
+ if constant:
2401
+ relocation = instruction.relocation
2402
+ for index in range(relocation.offset, relocation.offset + 4):
2403
+ instruction.bytecode[index] = 0
2404
+ relocation.offset += len(self.code_section)
2405
+ relocation.program_counter += len(self.code_section)
2406
+ relocation.symbol = self._constant_symbol_map[
2407
+ instruction.constant.name
2408
+ ]
2409
+ self.code_section.add_relocation(relocation)
2410
+
2411
+ if instruction.bytecode:
2412
+ self.code_section.content += instruction.bytecode
2413
+ if bundle.size < bundle.capacity:
2414
+ if bundle is not bundles[-1]:
2415
+ self.code_section.content += self._encode_nops(
2416
+ bundle.capacity - bundle.size
2417
+ )
2418
+ else:
2419
+ self.code_section.content += self._encode_abort(
2420
+ bundle.capacity - bundle.size
2421
+ )
2422
+ else:
2423
+ has_updated_branches = True
2424
+ has_unresolved_labels = True
2425
+ while has_updated_branches or has_unresolved_labels:
2426
+ code_address = 0
2427
+ has_updated_branches = False
2428
+ has_unresolved_labels = False
2429
+ for i, instruction in enumerate(self._instructions):
2430
+ if isinstance(instruction, LABEL):
2431
+ label_address_map[instruction.identifier] = code_address
2432
+ elif (
2433
+ isinstance(instruction, BranchInstruction)
2434
+ and instruction.label_name
2435
+ ):
2436
+ label_address = label_address_map.get(instruction.label_name)
2437
+ if label_address is None:
2438
+ has_unresolved_labels = True
2439
+ was_long = i in long_branches
2440
+ is_long, instruction.bytecode = (
2441
+ instruction._encode_label_branch(
2442
+ code_address, label_address, long_encoding=was_long
2443
+ )
2444
+ )
2445
+ if is_long and not was_long:
2446
+ long_branches.add(i)
2447
+ has_updated_branches = True
2448
+ else:
2449
+ instruction.bytecode = instruction.encode()
2450
+ if instruction.bytecode:
2451
+ code_address += len(instruction.bytecode)
2452
+
2453
+ for instruction in self._instructions:
2454
+ constant = instruction.constant
2455
+ if constant:
2456
+ relocation = instruction.relocation
2457
+ for index in range(relocation.offset, relocation.offset + 4):
2458
+ instruction.bytecode[index] = 0
2459
+ relocation.offset += len(self.code_section)
2460
+ relocation.program_counter += len(self.code_section)
2461
+ relocation.symbol = self._constant_symbol_map[
2462
+ instruction.constant.name
2463
+ ]
2464
+ self.code_section.add_relocation(relocation)
2465
+
2466
+ if instruction.bytecode:
2467
+ self.code_section.content += instruction.bytecode
2468
+
2469
+ def _encode_nops(self, length):
2470
+ assert 1 <= length <= 31
2471
+ from nervapy.x86_64.encoding import nop
2472
+
2473
+ if length <= 15:
2474
+ return nop(length)
2475
+ elif length <= 30:
2476
+ return nop(length // 2) + nop(length - length // 2)
2477
+ else:
2478
+ return nop(8) + nop(8) + nop(15)
2479
+
2480
+ def _encode_abort(self, length):
2481
+ from nervapy.x86_64.abi import (goasm_amd64_abi, goasm_amd64p32_abi,
2482
+ native_client_x86_64_abi)
2483
+
2484
+ if self.abi == native_client_x86_64_abi:
2485
+ # Use HLT instructions
2486
+ return bytearray([0xF4] * length)
2487
+ elif self.abi in {goasm_amd64_abi, goasm_amd64p32_abi}:
2488
+ # Use a single INT 3 instruction as alignment is not supported anyway
2489
+ return bytearray([0xCD])
2490
+ else:
2491
+ # Use INT 3 instructions
2492
+ return bytearray([0xCD] * length)
2493
+
2494
+ def format_code(
2495
+ self, assembly_format="peachpy", line_separator=os.linesep, indent=True
2496
+ ):
2497
+ """Returns code of assembly instructions comprising the function"""
2498
+
2499
+ code = []
2500
+ for instruction in self._instructions:
2501
+ code.append(instruction.format_encoding(indent=indent))
2502
+ code.append(
2503
+ instruction.format(assembly_format=assembly_format, indent=indent)
2504
+ )
2505
+ if line_separator is None:
2506
+ return code
2507
+ else:
2508
+ return str(line_separator).join(filter(lambda line: line is not None, code))
2509
+
2510
+ def format(self, assembly_format="peachpy", line_separator=os.linesep):
2511
+ """Formats assembly listing of the function according to specified parameters"""
2512
+
2513
+ if assembly_format == "go":
2514
+ # Arguments for TEXT directive in Go assembler
2515
+ text_arguments = ["%s\xc2\xb7%s(SB)" % (self.package_name, self.name)]
2516
+
2517
+ text_arguments.append("4")
2518
+ text_arguments.append("$0")
2519
+
2520
+ code = ["TEXT " + ",".join(text_arguments)]
2521
+ else:
2522
+ code = []
2523
+
2524
+ code.extend(self.format_code(assembly_format, line_separator=None, indent=True))
2525
+ if assembly_format == "go":
2526
+ # Add trailing line or assembler will refuse to compile
2527
+ code.append("")
2528
+ if line_separator is None:
2529
+ return code
2530
+ else:
2531
+ return str(line_separator).join(filter(bool, code))
2532
+
2533
+ def load(self):
2534
+ return ExecutableFuntion(self)
2535
+
2536
+
2537
+ class ExecutableFuntion:
2538
+ def __init__(self, function):
2539
+ assert isinstance(function, EncodedFunction), "EncodedFunction object expected"
2540
+ import nervapy.x86_64.abi
2541
+
2542
+ process_abi = nervapy.x86_64.abi.detect()
2543
+ if process_abi != function.abi:
2544
+ raise ValueError(
2545
+ "Function ABI (%s) does not match process ABI (%s)"
2546
+ % (str(function.abi), str(process_abi))
2547
+ )
2548
+
2549
+ self.code_segment = bytearray(function.code_section.content)
2550
+ self.const_segment = bytearray(function.const_section.content)
2551
+
2552
+ import nervapy.loader
2553
+
2554
+ self.loader = nervapy.loader.Loader(
2555
+ len(self.code_segment), len(self.const_segment)
2556
+ )
2557
+
2558
+ # Apply relocations
2559
+ from nervapy.util import is_sint32
2560
+ from nervapy.x86_64.meta import RelocationType
2561
+
2562
+ for relocation in function.code_section.relocations:
2563
+ assert relocation.type == RelocationType.rip_disp32
2564
+ assert relocation.symbol in function.const_section.symbols
2565
+ old_value = (
2566
+ self.code_segment[relocation.offset]
2567
+ | (self.code_segment[relocation.offset + 1] << 8)
2568
+ | (self.code_segment[relocation.offset + 2] << 16)
2569
+ | (self.code_segment[relocation.offset + 3] << 24)
2570
+ )
2571
+ new_value = (
2572
+ old_value
2573
+ + (self.loader.data_address + relocation.symbol.offset)
2574
+ - (self.loader.code_address + relocation.program_counter)
2575
+ )
2576
+ assert is_sint32(new_value)
2577
+ self.code_segment[relocation.offset] = new_value & 0xFF
2578
+ self.code_segment[relocation.offset + 1] = (new_value >> 8) & 0xFF
2579
+ self.code_segment[relocation.offset + 2] = (new_value >> 16) & 0xFF
2580
+ self.code_segment[relocation.offset + 3] = (new_value >> 24) & 0xFF
2581
+ assert not function.const_section.relocations
2582
+
2583
+ self.loader.copy_code(self.code_segment)
2584
+ self.loader.copy_data(self.const_segment)
2585
+
2586
+ import ctypes
2587
+
2588
+ result_type = (
2589
+ None
2590
+ if function.result_type is None
2591
+ else function.result_type.as_ctypes_type
2592
+ )
2593
+ argument_types = [arg.c_type.as_ctypes_type for arg in function.arguments]
2594
+ self.function_type = ctypes.CFUNCTYPE(result_type, *argument_types)
2595
+ self.function_pointer = self.function_type(self.loader.code_address)
2596
+
2597
+ def __call__(self, *args):
2598
+ return self.function_pointer(*args)
2599
+
2600
+ def __del__(self):
2601
+ del self.loader
2602
+ self.loader = None
2603
+ self.function_pointer = None
2604
+
2605
+
2606
+ class LocalVariable:
2607
+ def __init__(self, size_option, alignment=None):
2608
+ from nervapy.util import is_int
2609
+
2610
+ if alignment is not None and not is_int(alignment):
2611
+ raise TypeError("alignment %s is not an integer" % str(alignment))
2612
+ if alignment is not None and alignment <= 0:
2613
+ raise ValueError("alignment %d is not a positive integer" % alignment)
2614
+ self.alignment = alignment
2615
+ if is_int(size_option):
2616
+ if size_option <= 0:
2617
+ raise ValueError("size %d is not a positive integer" % size_option)
2618
+ self.size = size_option
2619
+ elif isinstance(size_option, nervapy.x86_64.registers.Register):
2620
+ self.size = size_option.size
2621
+ else:
2622
+ raise TypeError(
2623
+ "Unsupported size specification %s: register or integer expected"
2624
+ % size_option
2625
+ )
2626
+ if self.alignment is None:
2627
+ self.alignment = self.size
2628
+ self._address = None
2629
+ self._offset = 0
2630
+ self.parent = None
2631
+
2632
+ def __eq__(self, other):
2633
+ return (
2634
+ isinstance(other, LocalVariable)
2635
+ and self.root is other.root
2636
+ and self.size == other.size
2637
+ and self.offset == other.offset
2638
+ )
2639
+
2640
+ def __ne__(self, other):
2641
+ return (
2642
+ not isinstance(other, LocalVariable)
2643
+ or self.root is not other.root
2644
+ or self.size != other.size
2645
+ or self.offset != other.offset
2646
+ )
2647
+
2648
+ def __hash__(self):
2649
+ return id(self.root) ^ hash(self.size) ^ hash(self.offset)
2650
+
2651
+ def __str__(self):
2652
+ if self.address is not None:
2653
+ return "[" + str(self.address) + "]"
2654
+ else:
2655
+ return "local-variable<%d[%d:%d]>" % (
2656
+ id(self.root),
2657
+ self.offset,
2658
+ self.offset + self.size,
2659
+ )
2660
+
2661
+ def __repr__(self):
2662
+ return str(self)
2663
+
2664
+ @property
2665
+ def is_subvariable(self):
2666
+ return self.parent is not None
2667
+
2668
+ @property
2669
+ def root(self):
2670
+ root = self
2671
+ while root.parent is not None:
2672
+ root = root.parent
2673
+ return root
2674
+
2675
+ @property
2676
+ def offset(self):
2677
+ node = self
2678
+ offset = 0
2679
+ while node.parent is not None:
2680
+ offset += node._offset
2681
+ node = node.parent
2682
+ return offset
2683
+
2684
+ @property
2685
+ def address(self):
2686
+ if self.is_subvariable:
2687
+ base_address = self.root.address
2688
+ if base_address is not None:
2689
+ return base_address + self.offset
2690
+ else:
2691
+ return self._address
2692
+
2693
+ @property
2694
+ def lo(self):
2695
+ assert self.size % 2 == 0
2696
+ child = LocalVariable(self.size // 2, min(self.size // 2, self.alignment))
2697
+ child.parent = self
2698
+ child._offset = 0
2699
+ return child
2700
+
2701
+ @property
2702
+ def hi(self):
2703
+ assert self.size % 2 == 0
2704
+ child = LocalVariable(self.size // 2, min(self.size // 2, self.alignment))
2705
+ child.parent = self
2706
+ child._offset = self.size // 2
2707
+ return child