PyNerva 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nervapy/__init__.py +50 -0
- nervapy/abi.py +91 -0
- nervapy/arm/__init__.py +124 -0
- nervapy/arm/__main__.py +0 -0
- nervapy/arm/abi.py +138 -0
- nervapy/arm/formats.py +49 -0
- nervapy/arm/function.py +2465 -0
- nervapy/arm/generic.py +10796 -0
- nervapy/arm/instructions.py +519 -0
- nervapy/arm/isa.py +409 -0
- nervapy/arm/literal_pool.py +331 -0
- nervapy/arm/microarchitecture.py +211 -0
- nervapy/arm/pseudo.py +652 -0
- nervapy/arm/registers.py +1458 -0
- nervapy/arm/vfpneon.py +4092 -0
- nervapy/arm.py +13 -0
- nervapy/c/__init__.py +1 -0
- nervapy/c/types.py +436 -0
- nervapy/codegen.py +99 -0
- nervapy/common/__init__.py +4 -0
- nervapy/common/function.py +5 -0
- nervapy/common/regalloc.py +121 -0
- nervapy/constant_data.py +282 -0
- nervapy/encoder.py +246 -0
- nervapy/formats/__init__.py +2 -0
- nervapy/formats/elf/__init__.py +4 -0
- nervapy/formats/elf/file.py +178 -0
- nervapy/formats/elf/image.py +106 -0
- nervapy/formats/elf/section.py +422 -0
- nervapy/formats/elf/symbol.py +281 -0
- nervapy/formats/macho/__init__.py +2 -0
- nervapy/formats/macho/file.py +123 -0
- nervapy/formats/macho/image.py +143 -0
- nervapy/formats/macho/section.py +322 -0
- nervapy/formats/macho/symbol.py +158 -0
- nervapy/formats/mscoff/__init__.py +8 -0
- nervapy/formats/mscoff/image.py +132 -0
- nervapy/formats/mscoff/section.py +181 -0
- nervapy/formats/mscoff/symbol.py +148 -0
- nervapy/function.py +136 -0
- nervapy/literal.py +731 -0
- nervapy/loader.py +188 -0
- nervapy/name.py +159 -0
- nervapy/parse.py +52 -0
- nervapy/stream.py +58 -0
- nervapy/util.py +126 -0
- nervapy/writer.py +518 -0
- nervapy/x86_64/__init__.py +324 -0
- nervapy/x86_64/__main__.py +407 -0
- nervapy/x86_64/abi.py +517 -0
- nervapy/x86_64/amd.py +6464 -0
- nervapy/x86_64/avx.py +102029 -0
- nervapy/x86_64/crypto.py +1533 -0
- nervapy/x86_64/encoding.py +424 -0
- nervapy/x86_64/fma.py +19138 -0
- nervapy/x86_64/function.py +2707 -0
- nervapy/x86_64/generic.py +23384 -0
- nervapy/x86_64/instructions.py +500 -0
- nervapy/x86_64/isa.py +476 -0
- nervapy/x86_64/lower.py +126 -0
- nervapy/x86_64/mask.py +2593 -0
- nervapy/x86_64/meta.py +143 -0
- nervapy/x86_64/mmxsse.py +17265 -0
- nervapy/x86_64/nacl.py +327 -0
- nervapy/x86_64/operand.py +1204 -0
- nervapy/x86_64/options.py +21 -0
- nervapy/x86_64/pseudo.py +686 -0
- nervapy/x86_64/registers.py +1225 -0
- nervapy/x86_64/types.py +17 -0
- nervapy/x86_64/uarch.py +580 -0
- pynerva-0.0.7.dist-info/METADATA +310 -0
- pynerva-0.0.7.dist-info/RECORD +74 -0
- pynerva-0.0.7.dist-info/WHEEL +4 -0
- pynerva-0.0.7.dist-info/licenses/LICENSE.rst +15 -0
nervapy/x86_64/isa.py
ADDED
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
# This file is part of PeachPy package and is licensed under the Simplified BSD license.
|
|
2
|
+
# See license.rst for the full text of the license.
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Extension:
|
|
6
|
+
def __init__(self, name, safe_name=None):
|
|
7
|
+
assert isinstance(name, str), "name must be a string"
|
|
8
|
+
self.name = name
|
|
9
|
+
if safe_name is None:
|
|
10
|
+
self.safe_name = self.name
|
|
11
|
+
else:
|
|
12
|
+
self.safe_name = safe_name
|
|
13
|
+
|
|
14
|
+
def __hash__(self):
|
|
15
|
+
return hash(self.name)
|
|
16
|
+
|
|
17
|
+
def __eq__(self, other):
|
|
18
|
+
return self.name == other.name
|
|
19
|
+
|
|
20
|
+
def __ne__(self, other):
|
|
21
|
+
return self.name != other.name
|
|
22
|
+
|
|
23
|
+
def __gt__(self, other):
|
|
24
|
+
return other in self.prerequisites
|
|
25
|
+
|
|
26
|
+
def __lt__(self, other):
|
|
27
|
+
return self in other.prerequisites
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def prerequisites(self):
|
|
31
|
+
return {
|
|
32
|
+
"RDTSC": (rdtsc,),
|
|
33
|
+
"RDTSCP": (rdtsc, rdtscp),
|
|
34
|
+
"CPUID": (cpuid,),
|
|
35
|
+
"MMX": (mmx,),
|
|
36
|
+
"MMX+": (mmx, mmx_plus),
|
|
37
|
+
"3dnow!": (mmx, three_d_now, prefetch, prefetchw),
|
|
38
|
+
"3dnow!+": (mmx, three_d_now, three_d_now_plus, prefetch, prefetchw),
|
|
39
|
+
"FEMMS": (mmx, femms),
|
|
40
|
+
"SSE": (mmx, mmx_plus, sse),
|
|
41
|
+
"SSE2": (mmx, mmx_plus, sse, sse2),
|
|
42
|
+
"SSE3": (mmx, mmx_plus, sse, sse2, sse3),
|
|
43
|
+
"SSSE3": (mmx, mmx_plus, sse, sse2, sse3, ssse3),
|
|
44
|
+
"SSE4A": (mmx, mmx_plus, sse, sse2, sse3, sse4a),
|
|
45
|
+
"SSE4.1": (mmx, mmx_plus, sse, sse2, sse3, ssse3, sse4_1),
|
|
46
|
+
"SSE4.2": (mmx, mmx_plus, sse, sse2, sse3, ssse3, sse4_1, sse4_2),
|
|
47
|
+
"AES": (mmx, mmx_plus, sse, sse2, sse3, ssse3, sse4_1, sse4_2, aes),
|
|
48
|
+
"PCLMULQDQ": (
|
|
49
|
+
mmx,
|
|
50
|
+
mmx_plus,
|
|
51
|
+
sse,
|
|
52
|
+
sse2,
|
|
53
|
+
sse3,
|
|
54
|
+
ssse3,
|
|
55
|
+
sse4_1,
|
|
56
|
+
sse4_2,
|
|
57
|
+
pclmulqdq,
|
|
58
|
+
),
|
|
59
|
+
"RDRAND": (rdrand,),
|
|
60
|
+
"RDSEED": (rdrand, rdseed),
|
|
61
|
+
"SHA": (sha,),
|
|
62
|
+
"AVX": (mmx, mmx_plus, sse, sse2, sse3, ssse3, sse4_1, sse4_2, avx),
|
|
63
|
+
"F16C": (mmx, mmx_plus, sse, sse2, sse3, ssse3, sse4_1, sse4_2, avx, f16c),
|
|
64
|
+
"AVX2": (
|
|
65
|
+
mmx,
|
|
66
|
+
mmx_plus,
|
|
67
|
+
sse,
|
|
68
|
+
sse2,
|
|
69
|
+
sse3,
|
|
70
|
+
ssse3,
|
|
71
|
+
sse4_1,
|
|
72
|
+
sse4_2,
|
|
73
|
+
sse4_2,
|
|
74
|
+
avx,
|
|
75
|
+
f16c,
|
|
76
|
+
fma3,
|
|
77
|
+
avx2,
|
|
78
|
+
),
|
|
79
|
+
"XOP": (mmx, mmx_plus, sse, sse2, sse3, ssse3, sse4_1, sse4_2, avx, xop),
|
|
80
|
+
"FMA3": (mmx, mmx_plus, sse, sse2, sse3, ssse3, sse4_1, sse4_2, avx, fma3),
|
|
81
|
+
"FMA4": (mmx, mmx_plus, sse, sse2, sse3, ssse3, sse4_1, sse4_2, avx, fma4),
|
|
82
|
+
"AVX512F": (
|
|
83
|
+
mmx,
|
|
84
|
+
mmx_plus,
|
|
85
|
+
sse,
|
|
86
|
+
sse2,
|
|
87
|
+
sse3,
|
|
88
|
+
ssse3,
|
|
89
|
+
sse4_1,
|
|
90
|
+
sse4_2,
|
|
91
|
+
sse4_2,
|
|
92
|
+
avx,
|
|
93
|
+
f16c,
|
|
94
|
+
fma3,
|
|
95
|
+
avx2,
|
|
96
|
+
avx512f,
|
|
97
|
+
),
|
|
98
|
+
"AVX512BW": (
|
|
99
|
+
mmx,
|
|
100
|
+
mmx_plus,
|
|
101
|
+
sse,
|
|
102
|
+
sse2,
|
|
103
|
+
sse3,
|
|
104
|
+
ssse3,
|
|
105
|
+
sse4_1,
|
|
106
|
+
sse4_2,
|
|
107
|
+
sse4_2,
|
|
108
|
+
avx,
|
|
109
|
+
f16c,
|
|
110
|
+
fma3,
|
|
111
|
+
avx2,
|
|
112
|
+
avx512f,
|
|
113
|
+
avx512bw,
|
|
114
|
+
),
|
|
115
|
+
"AVX512DQ": (
|
|
116
|
+
mmx,
|
|
117
|
+
mmx_plus,
|
|
118
|
+
sse,
|
|
119
|
+
sse2,
|
|
120
|
+
sse3,
|
|
121
|
+
ssse3,
|
|
122
|
+
sse4_1,
|
|
123
|
+
sse4_2,
|
|
124
|
+
sse4_2,
|
|
125
|
+
avx,
|
|
126
|
+
f16c,
|
|
127
|
+
fma3,
|
|
128
|
+
avx2,
|
|
129
|
+
avx512f,
|
|
130
|
+
avx512dq,
|
|
131
|
+
),
|
|
132
|
+
"AVX512VL": (
|
|
133
|
+
mmx,
|
|
134
|
+
mmx_plus,
|
|
135
|
+
sse,
|
|
136
|
+
sse2,
|
|
137
|
+
sse3,
|
|
138
|
+
ssse3,
|
|
139
|
+
sse4_1,
|
|
140
|
+
sse4_2,
|
|
141
|
+
sse4_2,
|
|
142
|
+
avx,
|
|
143
|
+
f16c,
|
|
144
|
+
fma3,
|
|
145
|
+
avx2,
|
|
146
|
+
avx512f,
|
|
147
|
+
avx512vl,
|
|
148
|
+
),
|
|
149
|
+
"AVX512CD": (
|
|
150
|
+
mmx,
|
|
151
|
+
mmx_plus,
|
|
152
|
+
sse,
|
|
153
|
+
sse2,
|
|
154
|
+
sse3,
|
|
155
|
+
ssse3,
|
|
156
|
+
sse4_1,
|
|
157
|
+
sse4_2,
|
|
158
|
+
sse4_2,
|
|
159
|
+
avx,
|
|
160
|
+
f16c,
|
|
161
|
+
fma3,
|
|
162
|
+
avx2,
|
|
163
|
+
avx512f,
|
|
164
|
+
avx512cd,
|
|
165
|
+
),
|
|
166
|
+
"AVX512PF": (
|
|
167
|
+
mmx,
|
|
168
|
+
mmx_plus,
|
|
169
|
+
sse,
|
|
170
|
+
sse2,
|
|
171
|
+
sse3,
|
|
172
|
+
ssse3,
|
|
173
|
+
sse4_1,
|
|
174
|
+
sse4_2,
|
|
175
|
+
sse4_2,
|
|
176
|
+
avx,
|
|
177
|
+
f16c,
|
|
178
|
+
fma3,
|
|
179
|
+
avx2,
|
|
180
|
+
avx512f,
|
|
181
|
+
avx512pf,
|
|
182
|
+
),
|
|
183
|
+
"AVX512ER": (
|
|
184
|
+
mmx,
|
|
185
|
+
mmx_plus,
|
|
186
|
+
sse,
|
|
187
|
+
sse2,
|
|
188
|
+
sse3,
|
|
189
|
+
ssse3,
|
|
190
|
+
sse4_1,
|
|
191
|
+
sse4_2,
|
|
192
|
+
sse4_2,
|
|
193
|
+
avx,
|
|
194
|
+
f16c,
|
|
195
|
+
fma3,
|
|
196
|
+
avx2,
|
|
197
|
+
avx512f,
|
|
198
|
+
avx512er,
|
|
199
|
+
),
|
|
200
|
+
"AVX512VBMI": (
|
|
201
|
+
mmx,
|
|
202
|
+
mmx_plus,
|
|
203
|
+
sse,
|
|
204
|
+
sse2,
|
|
205
|
+
sse3,
|
|
206
|
+
ssse3,
|
|
207
|
+
sse4_1,
|
|
208
|
+
sse4_2,
|
|
209
|
+
sse4_2,
|
|
210
|
+
avx,
|
|
211
|
+
f16c,
|
|
212
|
+
fma3,
|
|
213
|
+
avx2,
|
|
214
|
+
avx512f,
|
|
215
|
+
avx512vbmi,
|
|
216
|
+
),
|
|
217
|
+
"AVX512IFMA": (
|
|
218
|
+
mmx,
|
|
219
|
+
mmx_plus,
|
|
220
|
+
sse,
|
|
221
|
+
sse2,
|
|
222
|
+
sse3,
|
|
223
|
+
ssse3,
|
|
224
|
+
sse4_1,
|
|
225
|
+
sse4_2,
|
|
226
|
+
sse4_2,
|
|
227
|
+
avx,
|
|
228
|
+
f16c,
|
|
229
|
+
fma3,
|
|
230
|
+
avx2,
|
|
231
|
+
avx512f,
|
|
232
|
+
avx512ifma,
|
|
233
|
+
),
|
|
234
|
+
"AVX512VPOPCNTDQ": (
|
|
235
|
+
mmx,
|
|
236
|
+
mmx_plus,
|
|
237
|
+
sse,
|
|
238
|
+
sse2,
|
|
239
|
+
sse3,
|
|
240
|
+
ssse3,
|
|
241
|
+
sse4_1,
|
|
242
|
+
sse4_2,
|
|
243
|
+
sse4_2,
|
|
244
|
+
avx,
|
|
245
|
+
f16c,
|
|
246
|
+
fma3,
|
|
247
|
+
avx2,
|
|
248
|
+
avx512f,
|
|
249
|
+
avx512vpopcntdq,
|
|
250
|
+
),
|
|
251
|
+
"AVX512_4VNNIW": (
|
|
252
|
+
mmx,
|
|
253
|
+
mmx_plus,
|
|
254
|
+
sse,
|
|
255
|
+
sse2,
|
|
256
|
+
sse3,
|
|
257
|
+
ssse3,
|
|
258
|
+
sse4_1,
|
|
259
|
+
sse4_2,
|
|
260
|
+
sse4_2,
|
|
261
|
+
avx,
|
|
262
|
+
f16c,
|
|
263
|
+
fma3,
|
|
264
|
+
avx2,
|
|
265
|
+
avx512f,
|
|
266
|
+
avx512_4vnniw,
|
|
267
|
+
),
|
|
268
|
+
"AVX512_4FMAPS": (
|
|
269
|
+
mmx,
|
|
270
|
+
mmx_plus,
|
|
271
|
+
sse,
|
|
272
|
+
sse2,
|
|
273
|
+
sse3,
|
|
274
|
+
ssse3,
|
|
275
|
+
sse4_1,
|
|
276
|
+
sse4_2,
|
|
277
|
+
sse4_2,
|
|
278
|
+
avx,
|
|
279
|
+
f16c,
|
|
280
|
+
fma3,
|
|
281
|
+
avx2,
|
|
282
|
+
avx512f,
|
|
283
|
+
avx512_4fmaps,
|
|
284
|
+
),
|
|
285
|
+
"PREFETCH": (prefetch,),
|
|
286
|
+
"PREFETCHW": (prefetchw,),
|
|
287
|
+
"PREFETCHWT1": (prefetchwt1,),
|
|
288
|
+
"CLFLUSH": (clflush,),
|
|
289
|
+
"CLFLUSHOPT": (
|
|
290
|
+
clflush,
|
|
291
|
+
clflushopt,
|
|
292
|
+
),
|
|
293
|
+
"CLWB": (clwb,),
|
|
294
|
+
"CLZERO": (clzero,),
|
|
295
|
+
"CMOV": (cmov,),
|
|
296
|
+
"POPCNT": (popcnt,),
|
|
297
|
+
"LZCNT": (lzcnt,),
|
|
298
|
+
"MOVBE": (movbe,),
|
|
299
|
+
"BMI": (bmi,),
|
|
300
|
+
"BMI2": (bmi, bmi2),
|
|
301
|
+
"TBM": (tbm,),
|
|
302
|
+
"ADX": (adx,),
|
|
303
|
+
}[self.name]
|
|
304
|
+
|
|
305
|
+
@property
|
|
306
|
+
def ancestors(self):
|
|
307
|
+
return {
|
|
308
|
+
"RDTSC": (rdtsc,),
|
|
309
|
+
"RDTSCP": (rdtsc, rdtscp),
|
|
310
|
+
"CPUID": (cpuid,),
|
|
311
|
+
"MMX": (mmx,),
|
|
312
|
+
"MMX+": (mmx, mmx_plus),
|
|
313
|
+
"3dnow!": (mmx, three_d_now),
|
|
314
|
+
"3dnow!+": (mmx, three_d_now, three_d_now_plus),
|
|
315
|
+
"FEMMS": (femms,),
|
|
316
|
+
"SSE": (sse,),
|
|
317
|
+
"SSE2": (sse, sse2),
|
|
318
|
+
"SSE3": (sse, sse2, sse3),
|
|
319
|
+
"SSSE3": (sse, sse2, sse3, ssse3),
|
|
320
|
+
"SSE4A": (sse, sse2, sse3, sse4a),
|
|
321
|
+
"SSE4.1": (sse, sse2, sse3, ssse3, sse4_1),
|
|
322
|
+
"SSE4.2": (sse, sse2, sse3, ssse3, sse4_1, sse4_2),
|
|
323
|
+
"AES": (aes,),
|
|
324
|
+
"PCLMULQDQ": (pclmulqdq,),
|
|
325
|
+
"RDRAND": (rdrand,),
|
|
326
|
+
"RDSEED": (rdrand, rdseed),
|
|
327
|
+
"SHA": (sha,),
|
|
328
|
+
"AVX": (avx,),
|
|
329
|
+
"F16C": (f16c,),
|
|
330
|
+
"AVX2": (avx, avx2),
|
|
331
|
+
"XOP": (xop,),
|
|
332
|
+
"FMA3": (fma3,),
|
|
333
|
+
"FMA4": (fma4,),
|
|
334
|
+
"AVX512F": (avx, fma3, f16c, avx2, avx512f),
|
|
335
|
+
"AVX512BW": (avx, fma3, f16c, avx2, avx512f, avx512bw),
|
|
336
|
+
"AVX512DQ": (avx, fma3, f16c, avx2, avx512f, avx512dq),
|
|
337
|
+
"AVX512VL": (avx, fma3, f16c, avx2, avx512f, avx512vl),
|
|
338
|
+
"AVX512ER": (avx, fma3, f16c, avx2, avx512f, avx512er),
|
|
339
|
+
"AVX512PF": (avx, fma3, f16c, avx2, avx512f, avx512pf),
|
|
340
|
+
"AVX512CD": (avx, fma3, f16c, avx2, avx512f, avx512cd),
|
|
341
|
+
"AVX512VBMI": (avx, fma3, f16c, avx2, avx512f, avx512vbmi),
|
|
342
|
+
"AVX512IFMA": (avx, fma3, f16c, avx2, avx512f, avx512ifma),
|
|
343
|
+
"AVX512VPOPCNTDQ": (avx, f16c, fma3, avx2, avx512f, avx512vpopcntdq),
|
|
344
|
+
"AVX512_4VNNIW": (avx, f16c, fma3, avx2, avx512f, avx512_4vnniw),
|
|
345
|
+
"AVX512_4FMAPS": (avx, f16c, fma3, avx2, avx512f, avx512_4fmaps),
|
|
346
|
+
"PREFETCH": (prefetch,),
|
|
347
|
+
"PREFETCHW": (prefetchw,),
|
|
348
|
+
"PREFETCHWT1": (prefetchwt1,),
|
|
349
|
+
"CLFLUSH": (clflush,),
|
|
350
|
+
"CLFLUSHOPT": (
|
|
351
|
+
clflush,
|
|
352
|
+
clflushopt,
|
|
353
|
+
),
|
|
354
|
+
"CLWB": (clwb,),
|
|
355
|
+
"CLZERO": (clzero,),
|
|
356
|
+
"CMOV": (cmov,),
|
|
357
|
+
"POPCNT": (popcnt,),
|
|
358
|
+
"LZCNT": (lzcnt,),
|
|
359
|
+
"MOVBE": (movbe,),
|
|
360
|
+
"BMI": (bmi,),
|
|
361
|
+
"BMI2": (bmi, bmi2),
|
|
362
|
+
"TBM": (tbm,),
|
|
363
|
+
"ADX": (adx,),
|
|
364
|
+
}[self.name]
|
|
365
|
+
|
|
366
|
+
def __add__(self, extension):
|
|
367
|
+
return Extensions(self, extension)
|
|
368
|
+
|
|
369
|
+
def __str__(self):
|
|
370
|
+
return self.name
|
|
371
|
+
|
|
372
|
+
def __repr__(self):
|
|
373
|
+
return str(self)
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
rdtsc = Extension("RDTSC")
|
|
377
|
+
rdtscp = Extension("RDTSCP")
|
|
378
|
+
cpuid = Extension("CPUID")
|
|
379
|
+
mmx = Extension("MMX")
|
|
380
|
+
mmx_plus = Extension("MMX+", safe_name="MMXPlus")
|
|
381
|
+
three_d_now = Extension("3dnow!", safe_name="3dnow")
|
|
382
|
+
three_d_now_plus = Extension("3dnow!+", safe_name="3dnowPlus")
|
|
383
|
+
femms = Extension("FEMMS")
|
|
384
|
+
sse = Extension("SSE")
|
|
385
|
+
sse2 = Extension("SSE2")
|
|
386
|
+
sse3 = Extension("SSE3")
|
|
387
|
+
ssse3 = Extension("SSSE3")
|
|
388
|
+
sse4a = Extension("SSE4A")
|
|
389
|
+
sse4_1 = Extension("SSE4.1", safe_name="SSE4_1")
|
|
390
|
+
sse4_2 = Extension("SSE4.2", safe_name="SSE4_2")
|
|
391
|
+
aes = Extension("AES")
|
|
392
|
+
pclmulqdq = Extension("PCLMULQDQ")
|
|
393
|
+
rdrand = Extension("RDRAND")
|
|
394
|
+
rdseed = Extension("RDSEED")
|
|
395
|
+
sha = Extension("SHA")
|
|
396
|
+
avx = Extension("AVX")
|
|
397
|
+
avx2 = Extension("AVX2")
|
|
398
|
+
avx512f = Extension("AVX512F")
|
|
399
|
+
avx512pf = Extension("AVX512PF")
|
|
400
|
+
avx512cd = Extension("AVX512CD")
|
|
401
|
+
avx512er = Extension("AVX512ER")
|
|
402
|
+
avx512dq = Extension("AVX512DQ")
|
|
403
|
+
avx512bw = Extension("AVX512BW")
|
|
404
|
+
avx512vl = Extension("AVX512VL")
|
|
405
|
+
avx512ifma = Extension("AVX512IFMA")
|
|
406
|
+
avx512vbmi = Extension("AVX512VBMI")
|
|
407
|
+
avx512vpopcntdq = Extension("AVX512VPOPCNTDQ")
|
|
408
|
+
avx512_4vnniw = Extension("AVX512_4VNNIW")
|
|
409
|
+
avx512_4fmaps = Extension("AVX512_4FMAPS")
|
|
410
|
+
prefetch = Extension("PREFETCH")
|
|
411
|
+
prefetchw = Extension("PREFETCHW")
|
|
412
|
+
prefetchwt1 = Extension("PREFETCHWT1")
|
|
413
|
+
clflush = Extension("CLFLUSH")
|
|
414
|
+
clflushopt = Extension("CLFLUSHOPT")
|
|
415
|
+
clwb = Extension("CLWB")
|
|
416
|
+
clzero = Extension("CLZERO")
|
|
417
|
+
xop = Extension("XOP")
|
|
418
|
+
f16c = Extension("F16C")
|
|
419
|
+
fma3 = Extension("FMA3")
|
|
420
|
+
fma4 = Extension("FMA4")
|
|
421
|
+
cmov = Extension("CMOV")
|
|
422
|
+
popcnt = Extension("POPCNT")
|
|
423
|
+
lzcnt = Extension("LZCNT")
|
|
424
|
+
movbe = Extension("MOVBE")
|
|
425
|
+
bmi = Extension("BMI")
|
|
426
|
+
bmi2 = Extension("BMI2")
|
|
427
|
+
tbm = Extension("TBM")
|
|
428
|
+
adx = Extension("ADX")
|
|
429
|
+
default = (cpuid, rdtsc, cmov, mmx, mmx_plus, sse, sse2)
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
class Extensions:
|
|
433
|
+
def __init__(self, *args):
|
|
434
|
+
self.extensions = set()
|
|
435
|
+
for extension in args:
|
|
436
|
+
assert extension is None or isinstance(
|
|
437
|
+
extension, (Extension, Extensions)
|
|
438
|
+
), "Each argument must be an Extension or Extensions object"
|
|
439
|
+
if isinstance(extension, Extensions):
|
|
440
|
+
self.extensions.add(extension.extensions)
|
|
441
|
+
elif isinstance(extension, Extension):
|
|
442
|
+
self.extensions.add(extension)
|
|
443
|
+
|
|
444
|
+
def minify(self):
|
|
445
|
+
extensions = list(reversed(sorted(self.extensions)))
|
|
446
|
+
for extension in extensions:
|
|
447
|
+
for ancestor in extension.ancestors:
|
|
448
|
+
if ancestor != extension and ancestor in extensions:
|
|
449
|
+
extensions.remove(ancestor)
|
|
450
|
+
return extensions
|
|
451
|
+
|
|
452
|
+
def __add__(self, extension):
|
|
453
|
+
return Extensions(extension, *self.extensions)
|
|
454
|
+
|
|
455
|
+
def __sub__(self, extension):
|
|
456
|
+
extensions = set(self.extensions)
|
|
457
|
+
if extension in extensions:
|
|
458
|
+
del extensions[extension]
|
|
459
|
+
else:
|
|
460
|
+
raise KeyError("Extension set does not contain {0}".format(extension))
|
|
461
|
+
return Extensions(*extensions)
|
|
462
|
+
|
|
463
|
+
def __str__(self):
|
|
464
|
+
return ", ".join(sorted(map(str, self.minify())))
|
|
465
|
+
|
|
466
|
+
def __contains__(self, extension):
|
|
467
|
+
return extension in self.extensions
|
|
468
|
+
|
|
469
|
+
def __len__(self):
|
|
470
|
+
return len(self.extensions)
|
|
471
|
+
|
|
472
|
+
def __not__(self):
|
|
473
|
+
return not self.extensions
|
|
474
|
+
|
|
475
|
+
def __iter__(self):
|
|
476
|
+
return iter(self.extensions)
|
nervapy/x86_64/lower.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from nervapy import Type
|
|
2
|
+
from nervapy.stream import NullStream
|
|
3
|
+
from nervapy.x86_64 import m128, m128d, m128i, m256, m256d, m256i
|
|
4
|
+
from nervapy.x86_64.avx import VMOVAPD, VMOVAPS, VMOVDQA, VMOVSD, VMOVSS
|
|
5
|
+
from nervapy.x86_64.generic import MOV, MOVSX, MOVSXD, MOVZX
|
|
6
|
+
from nervapy.x86_64.mmxsse import MOVAPD, MOVAPS, MOVDQA, MOVQ, MOVSD, MOVSS
|
|
7
|
+
from nervapy.x86_64.operand import byte, dword, word
|
|
8
|
+
from nervapy.x86_64.registers import (GeneralPurposeRegister, MMXRegister,
|
|
9
|
+
XMMRegister, YMMRegister)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def load_register(dst_reg, src_reg, data_type, prototype):
|
|
13
|
+
assert dst_reg.size >= src_reg.size
|
|
14
|
+
assert isinstance(data_type, Type)
|
|
15
|
+
with NullStream():
|
|
16
|
+
if isinstance(dst_reg, GeneralPurposeRegister):
|
|
17
|
+
if dst_reg.size == src_reg.size:
|
|
18
|
+
if dst_reg != src_reg or dst_reg.size == 4:
|
|
19
|
+
return MOV(dst_reg, src_reg, prototype=prototype)
|
|
20
|
+
elif (dst_reg.size, src_reg.size) == (8, 4):
|
|
21
|
+
if data_type.is_signed_integer:
|
|
22
|
+
return MOVSXD(dst_reg, src_reg, prototype=prototype)
|
|
23
|
+
else:
|
|
24
|
+
return MOV(dst_reg.as_dword, src_reg, prototype=prototype)
|
|
25
|
+
else:
|
|
26
|
+
if data_type.is_signed_integer:
|
|
27
|
+
return MOVSX(dst_reg, src_reg, prototype=prototype)
|
|
28
|
+
else:
|
|
29
|
+
if dst_reg.size == 8:
|
|
30
|
+
return MOVZX(dst_reg.as_dword, src_reg, prototype=prototype)
|
|
31
|
+
else:
|
|
32
|
+
return MOVZX(dst_reg, src_reg, prototype=prototype)
|
|
33
|
+
elif isinstance(dst_reg, MMXRegister):
|
|
34
|
+
if dst_reg != src_reg:
|
|
35
|
+
return MOVQ(dst_reg, src_reg, prototype=prototype)
|
|
36
|
+
elif isinstance(dst_reg, XMMRegister):
|
|
37
|
+
if dst_reg != src_reg:
|
|
38
|
+
if data_type.is_floating_point:
|
|
39
|
+
assert data_type.size in [4, 8]
|
|
40
|
+
xmm_fp_mov = {
|
|
41
|
+
(4, True): VMOVAPS,
|
|
42
|
+
(4, False): MOVSS,
|
|
43
|
+
(8, True): VMOVAPD,
|
|
44
|
+
(8, False): MOVSD,
|
|
45
|
+
}[(data_type.size, bool(prototype.avx_mode))]
|
|
46
|
+
return xmm_fp_mov(dst_reg, src_reg, prototype=prototype)
|
|
47
|
+
else:
|
|
48
|
+
assert data_type in [m128, m128d, m128i]
|
|
49
|
+
xmm_mov = {
|
|
50
|
+
(m128, True): VMOVAPS,
|
|
51
|
+
(m128, False): MOVAPS,
|
|
52
|
+
(m128d, True): VMOVAPD,
|
|
53
|
+
(m128d, False): MOVAPD,
|
|
54
|
+
(m128i, True): VMOVDQA,
|
|
55
|
+
(m128i, False): MOVDQA,
|
|
56
|
+
}[(data_type, bool(prototype.avx_mode))]
|
|
57
|
+
return xmm_mov(dst_reg, src_reg, prototype=prototype)
|
|
58
|
+
elif isinstance(dst_reg, YMMRegister):
|
|
59
|
+
if dst_reg != src_reg:
|
|
60
|
+
ymm_mov = {m256: VMOVAPS, m256d: VMOVAPD, m256i: VMOVDQA}[data_type]
|
|
61
|
+
return ymm_mov(dst_reg, src_reg, prototype=prototype)
|
|
62
|
+
else:
|
|
63
|
+
assert False, "Unexpected type: " + dst_reg.__class__
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def load_memory(dst_reg, src_address, src_type, prototype):
|
|
67
|
+
assert dst_reg.size >= src_type.size
|
|
68
|
+
assert isinstance(src_type, Type)
|
|
69
|
+
with NullStream():
|
|
70
|
+
if isinstance(dst_reg, GeneralPurposeRegister):
|
|
71
|
+
if dst_reg.size == src_type.size:
|
|
72
|
+
return MOV(dst_reg, [src_address], prototype=prototype)
|
|
73
|
+
elif (dst_reg.size, src_type.size) == (8, 4):
|
|
74
|
+
if src_type.is_signed_integer:
|
|
75
|
+
return MOVSXD(dst_reg, dword[src_address], prototype=prototype)
|
|
76
|
+
else:
|
|
77
|
+
return MOV(
|
|
78
|
+
dst_reg.as_dword, dword[src_address], prototype=prototype
|
|
79
|
+
)
|
|
80
|
+
else:
|
|
81
|
+
size_spec = {1: byte, 2: word, 4: dword}[src_type.size]
|
|
82
|
+
if src_type.is_signed_integer:
|
|
83
|
+
return MOVSX(dst_reg, size_spec[src_address], prototype=prototype)
|
|
84
|
+
else:
|
|
85
|
+
if dst_reg.size == 8:
|
|
86
|
+
return MOVZX(
|
|
87
|
+
dst_reg.as_dword,
|
|
88
|
+
size_spec[src_address],
|
|
89
|
+
prototype=prototype,
|
|
90
|
+
)
|
|
91
|
+
else:
|
|
92
|
+
return MOVZX(
|
|
93
|
+
dst_reg, size_spec[src_address], prototype=prototype
|
|
94
|
+
)
|
|
95
|
+
elif isinstance(dst_reg, MMXRegister):
|
|
96
|
+
return MOVQ(dst_reg, [src_address], prototype)
|
|
97
|
+
elif isinstance(dst_reg, XMMRegister):
|
|
98
|
+
if src_type.is_floating_point:
|
|
99
|
+
assert src_type.size in [4, 8]
|
|
100
|
+
if src_type.size == 4:
|
|
101
|
+
if prototype.avx_mode:
|
|
102
|
+
return VMOVSS(dst_reg, [src_address], prototype=prototype)
|
|
103
|
+
else:
|
|
104
|
+
return MOVSS(dst_reg, [src_address], prototype=prototype)
|
|
105
|
+
else:
|
|
106
|
+
if prototype.avx_mode:
|
|
107
|
+
return VMOVSD(dst_reg, [src_address], prototype=prototype)
|
|
108
|
+
else:
|
|
109
|
+
return MOVSD(dst_reg, [src_address], prototype=prototype)
|
|
110
|
+
else:
|
|
111
|
+
assert src_type in [m128, m128d, m128i]
|
|
112
|
+
if src_type == m128:
|
|
113
|
+
if prototype.avx_mode:
|
|
114
|
+
return VMOVAPS(dst_reg, [src_address], prototype=prototype)
|
|
115
|
+
else:
|
|
116
|
+
return MOVAPS(dst_reg, [src_address], prototype=prototype)
|
|
117
|
+
elif src_type == m128d:
|
|
118
|
+
if prototype.avx_mode:
|
|
119
|
+
return VMOVAPD(dst_reg, [src_address], prototype=prototype)
|
|
120
|
+
else:
|
|
121
|
+
return MOVAPD(dst_reg, [src_address], prototype=prototype)
|
|
122
|
+
else:
|
|
123
|
+
if prototype.avx_mode:
|
|
124
|
+
return VMOVDQA(dst_reg, [src_address], prototype=prototype)
|
|
125
|
+
else:
|
|
126
|
+
return MOVDQA(dst_reg, [src_address], prototype=prototype)
|