ring-native 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/Gemfile +3 -0
  4. data/README.md +22 -0
  5. data/Rakefile +1 -0
  6. data/ext/ring/extconf.rb +29 -0
  7. data/lib/ring/native.rb +8 -0
  8. data/lib/ring/native/version.rb +5 -0
  9. data/ring-native.gemspec +25 -0
  10. data/vendor/ring/BUILDING.md +40 -0
  11. data/vendor/ring/Cargo.toml +43 -0
  12. data/vendor/ring/LICENSE +185 -0
  13. data/vendor/ring/Makefile +35 -0
  14. data/vendor/ring/PORTING.md +163 -0
  15. data/vendor/ring/README.md +113 -0
  16. data/vendor/ring/STYLE.md +197 -0
  17. data/vendor/ring/appveyor.yml +27 -0
  18. data/vendor/ring/build.rs +108 -0
  19. data/vendor/ring/crypto/aes/aes.c +1142 -0
  20. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +25 -0
  21. data/vendor/ring/crypto/aes/aes_test.cc +93 -0
  22. data/vendor/ring/crypto/aes/asm/aes-586.pl +2368 -0
  23. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +1249 -0
  24. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +2246 -0
  25. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +1318 -0
  26. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +2084 -0
  27. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +675 -0
  28. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +1364 -0
  29. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +1565 -0
  30. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +841 -0
  31. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +1116 -0
  32. data/vendor/ring/crypto/aes/internal.h +87 -0
  33. data/vendor/ring/crypto/aes/mode_wrappers.c +61 -0
  34. data/vendor/ring/crypto/bn/add.c +394 -0
  35. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +694 -0
  36. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +1503 -0
  37. data/vendor/ring/crypto/bn/asm/bn-586.pl +774 -0
  38. data/vendor/ring/crypto/bn/asm/co-586.pl +287 -0
  39. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +1882 -0
  40. data/vendor/ring/crypto/bn/asm/x86-mont.pl +592 -0
  41. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +599 -0
  42. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +1393 -0
  43. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +3507 -0
  44. data/vendor/ring/crypto/bn/bn.c +352 -0
  45. data/vendor/ring/crypto/bn/bn_asn1.c +74 -0
  46. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +25 -0
  47. data/vendor/ring/crypto/bn/bn_test.cc +1696 -0
  48. data/vendor/ring/crypto/bn/cmp.c +200 -0
  49. data/vendor/ring/crypto/bn/convert.c +433 -0
  50. data/vendor/ring/crypto/bn/ctx.c +311 -0
  51. data/vendor/ring/crypto/bn/div.c +594 -0
  52. data/vendor/ring/crypto/bn/exponentiation.c +1335 -0
  53. data/vendor/ring/crypto/bn/gcd.c +711 -0
  54. data/vendor/ring/crypto/bn/generic.c +1019 -0
  55. data/vendor/ring/crypto/bn/internal.h +316 -0
  56. data/vendor/ring/crypto/bn/montgomery.c +516 -0
  57. data/vendor/ring/crypto/bn/mul.c +888 -0
  58. data/vendor/ring/crypto/bn/prime.c +829 -0
  59. data/vendor/ring/crypto/bn/random.c +334 -0
  60. data/vendor/ring/crypto/bn/rsaz_exp.c +262 -0
  61. data/vendor/ring/crypto/bn/rsaz_exp.h +53 -0
  62. data/vendor/ring/crypto/bn/shift.c +276 -0
  63. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +25 -0
  64. data/vendor/ring/crypto/bytestring/bytestring_test.cc +421 -0
  65. data/vendor/ring/crypto/bytestring/cbb.c +399 -0
  66. data/vendor/ring/crypto/bytestring/cbs.c +227 -0
  67. data/vendor/ring/crypto/bytestring/internal.h +46 -0
  68. data/vendor/ring/crypto/chacha/chacha_generic.c +140 -0
  69. data/vendor/ring/crypto/chacha/chacha_vec.c +323 -0
  70. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +1447 -0
  71. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +153 -0
  72. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +25 -0
  73. data/vendor/ring/crypto/cipher/e_aes.c +390 -0
  74. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +208 -0
  75. data/vendor/ring/crypto/cipher/internal.h +173 -0
  76. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +543 -0
  77. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +9 -0
  78. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +475 -0
  79. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +23 -0
  80. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +422 -0
  81. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +484 -0
  82. data/vendor/ring/crypto/cipher/test/cipher_test.txt +100 -0
  83. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +25 -0
  84. data/vendor/ring/crypto/constant_time_test.c +304 -0
  85. data/vendor/ring/crypto/cpu-arm-asm.S +32 -0
  86. data/vendor/ring/crypto/cpu-arm.c +199 -0
  87. data/vendor/ring/crypto/cpu-intel.c +261 -0
  88. data/vendor/ring/crypto/crypto.c +151 -0
  89. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +2118 -0
  90. data/vendor/ring/crypto/curve25519/curve25519.c +4888 -0
  91. data/vendor/ring/crypto/curve25519/x25519_test.cc +128 -0
  92. data/vendor/ring/crypto/digest/md32_common.h +181 -0
  93. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +2725 -0
  94. data/vendor/ring/crypto/ec/ec.c +193 -0
  95. data/vendor/ring/crypto/ec/ec_curves.c +61 -0
  96. data/vendor/ring/crypto/ec/ec_key.c +228 -0
  97. data/vendor/ring/crypto/ec/ec_montgomery.c +114 -0
  98. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +25 -0
  99. data/vendor/ring/crypto/ec/internal.h +243 -0
  100. data/vendor/ring/crypto/ec/oct.c +253 -0
  101. data/vendor/ring/crypto/ec/p256-64.c +1794 -0
  102. data/vendor/ring/crypto/ec/p256-x86_64-table.h +9548 -0
  103. data/vendor/ring/crypto/ec/p256-x86_64.c +509 -0
  104. data/vendor/ring/crypto/ec/simple.c +1007 -0
  105. data/vendor/ring/crypto/ec/util-64.c +183 -0
  106. data/vendor/ring/crypto/ec/wnaf.c +508 -0
  107. data/vendor/ring/crypto/ecdh/ecdh.c +155 -0
  108. data/vendor/ring/crypto/ecdsa/ecdsa.c +304 -0
  109. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +193 -0
  110. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +25 -0
  111. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +327 -0
  112. data/vendor/ring/crypto/header_removed.h +17 -0
  113. data/vendor/ring/crypto/internal.h +495 -0
  114. data/vendor/ring/crypto/libring.Windows.vcxproj +101 -0
  115. data/vendor/ring/crypto/mem.c +98 -0
  116. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +1045 -0
  117. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +517 -0
  118. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +1393 -0
  119. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +1741 -0
  120. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +422 -0
  121. data/vendor/ring/crypto/modes/ctr.c +226 -0
  122. data/vendor/ring/crypto/modes/gcm.c +1206 -0
  123. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +25 -0
  124. data/vendor/ring/crypto/modes/gcm_test.c +348 -0
  125. data/vendor/ring/crypto/modes/internal.h +299 -0
  126. data/vendor/ring/crypto/perlasm/arm-xlate.pl +170 -0
  127. data/vendor/ring/crypto/perlasm/readme +100 -0
  128. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +1164 -0
  129. data/vendor/ring/crypto/perlasm/x86asm.pl +292 -0
  130. data/vendor/ring/crypto/perlasm/x86gas.pl +263 -0
  131. data/vendor/ring/crypto/perlasm/x86masm.pl +200 -0
  132. data/vendor/ring/crypto/perlasm/x86nasm.pl +187 -0
  133. data/vendor/ring/crypto/poly1305/poly1305.c +331 -0
  134. data/vendor/ring/crypto/poly1305/poly1305_arm.c +301 -0
  135. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +2015 -0
  136. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +25 -0
  137. data/vendor/ring/crypto/poly1305/poly1305_test.cc +80 -0
  138. data/vendor/ring/crypto/poly1305/poly1305_test.txt +52 -0
  139. data/vendor/ring/crypto/poly1305/poly1305_vec.c +892 -0
  140. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +75 -0
  141. data/vendor/ring/crypto/rand/internal.h +32 -0
  142. data/vendor/ring/crypto/rand/rand.c +189 -0
  143. data/vendor/ring/crypto/rand/urandom.c +219 -0
  144. data/vendor/ring/crypto/rand/windows.c +56 -0
  145. data/vendor/ring/crypto/refcount_c11.c +66 -0
  146. data/vendor/ring/crypto/refcount_lock.c +53 -0
  147. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +25 -0
  148. data/vendor/ring/crypto/refcount_test.c +58 -0
  149. data/vendor/ring/crypto/rsa/blinding.c +462 -0
  150. data/vendor/ring/crypto/rsa/internal.h +108 -0
  151. data/vendor/ring/crypto/rsa/padding.c +300 -0
  152. data/vendor/ring/crypto/rsa/rsa.c +450 -0
  153. data/vendor/ring/crypto/rsa/rsa_asn1.c +261 -0
  154. data/vendor/ring/crypto/rsa/rsa_impl.c +944 -0
  155. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +25 -0
  156. data/vendor/ring/crypto/rsa/rsa_test.cc +437 -0
  157. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +436 -0
  158. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +2390 -0
  159. data/vendor/ring/crypto/sha/asm/sha256-586.pl +1275 -0
  160. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +735 -0
  161. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +14 -0
  162. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +14 -0
  163. data/vendor/ring/crypto/sha/asm/sha512-586.pl +911 -0
  164. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +666 -0
  165. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +14 -0
  166. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +14 -0
  167. data/vendor/ring/crypto/sha/sha1.c +271 -0
  168. data/vendor/ring/crypto/sha/sha256.c +204 -0
  169. data/vendor/ring/crypto/sha/sha512.c +355 -0
  170. data/vendor/ring/crypto/test/file_test.cc +326 -0
  171. data/vendor/ring/crypto/test/file_test.h +181 -0
  172. data/vendor/ring/crypto/test/malloc.cc +150 -0
  173. data/vendor/ring/crypto/test/scoped_types.h +95 -0
  174. data/vendor/ring/crypto/test/test.Windows.vcxproj +35 -0
  175. data/vendor/ring/crypto/test/test_util.cc +46 -0
  176. data/vendor/ring/crypto/test/test_util.h +41 -0
  177. data/vendor/ring/crypto/thread_none.c +55 -0
  178. data/vendor/ring/crypto/thread_pthread.c +165 -0
  179. data/vendor/ring/crypto/thread_test.Windows.vcxproj +25 -0
  180. data/vendor/ring/crypto/thread_test.c +200 -0
  181. data/vendor/ring/crypto/thread_win.c +282 -0
  182. data/vendor/ring/examples/checkdigest.rs +103 -0
  183. data/vendor/ring/include/openssl/aes.h +121 -0
  184. data/vendor/ring/include/openssl/arm_arch.h +129 -0
  185. data/vendor/ring/include/openssl/base.h +156 -0
  186. data/vendor/ring/include/openssl/bn.h +794 -0
  187. data/vendor/ring/include/openssl/buffer.h +18 -0
  188. data/vendor/ring/include/openssl/bytestring.h +235 -0
  189. data/vendor/ring/include/openssl/chacha.h +37 -0
  190. data/vendor/ring/include/openssl/cmac.h +76 -0
  191. data/vendor/ring/include/openssl/cpu.h +184 -0
  192. data/vendor/ring/include/openssl/crypto.h +43 -0
  193. data/vendor/ring/include/openssl/curve25519.h +88 -0
  194. data/vendor/ring/include/openssl/ec.h +225 -0
  195. data/vendor/ring/include/openssl/ec_key.h +129 -0
  196. data/vendor/ring/include/openssl/ecdh.h +110 -0
  197. data/vendor/ring/include/openssl/ecdsa.h +156 -0
  198. data/vendor/ring/include/openssl/err.h +201 -0
  199. data/vendor/ring/include/openssl/mem.h +101 -0
  200. data/vendor/ring/include/openssl/obj_mac.h +71 -0
  201. data/vendor/ring/include/openssl/opensslfeatures.h +68 -0
  202. data/vendor/ring/include/openssl/opensslv.h +18 -0
  203. data/vendor/ring/include/openssl/ossl_typ.h +18 -0
  204. data/vendor/ring/include/openssl/poly1305.h +51 -0
  205. data/vendor/ring/include/openssl/rand.h +70 -0
  206. data/vendor/ring/include/openssl/rsa.h +399 -0
  207. data/vendor/ring/include/openssl/thread.h +133 -0
  208. data/vendor/ring/include/openssl/type_check.h +71 -0
  209. data/vendor/ring/mk/Common.props +63 -0
  210. data/vendor/ring/mk/Windows.props +42 -0
  211. data/vendor/ring/mk/WindowsTest.props +18 -0
  212. data/vendor/ring/mk/appveyor.bat +62 -0
  213. data/vendor/ring/mk/bottom_of_makefile.mk +54 -0
  214. data/vendor/ring/mk/ring.mk +266 -0
  215. data/vendor/ring/mk/top_of_makefile.mk +214 -0
  216. data/vendor/ring/mk/travis.sh +40 -0
  217. data/vendor/ring/mk/update-travis-yml.py +229 -0
  218. data/vendor/ring/ring.sln +153 -0
  219. data/vendor/ring/src/aead.rs +682 -0
  220. data/vendor/ring/src/agreement.rs +248 -0
  221. data/vendor/ring/src/c.rs +129 -0
  222. data/vendor/ring/src/constant_time.rs +37 -0
  223. data/vendor/ring/src/der.rs +96 -0
  224. data/vendor/ring/src/digest.rs +690 -0
  225. data/vendor/ring/src/digest_tests.txt +57 -0
  226. data/vendor/ring/src/ecc.rs +28 -0
  227. data/vendor/ring/src/ecc_build.rs +279 -0
  228. data/vendor/ring/src/ecc_curves.rs +117 -0
  229. data/vendor/ring/src/ed25519_tests.txt +2579 -0
  230. data/vendor/ring/src/exe_tests.rs +46 -0
  231. data/vendor/ring/src/ffi.rs +29 -0
  232. data/vendor/ring/src/file_test.rs +187 -0
  233. data/vendor/ring/src/hkdf.rs +153 -0
  234. data/vendor/ring/src/hkdf_tests.txt +59 -0
  235. data/vendor/ring/src/hmac.rs +414 -0
  236. data/vendor/ring/src/hmac_tests.txt +97 -0
  237. data/vendor/ring/src/input.rs +312 -0
  238. data/vendor/ring/src/lib.rs +41 -0
  239. data/vendor/ring/src/pbkdf2.rs +265 -0
  240. data/vendor/ring/src/pbkdf2_tests.txt +113 -0
  241. data/vendor/ring/src/polyfill.rs +57 -0
  242. data/vendor/ring/src/rand.rs +28 -0
  243. data/vendor/ring/src/signature.rs +314 -0
  244. data/vendor/ring/third-party/NIST/README.md +9 -0
  245. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +263 -0
  246. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +309 -0
  247. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +267 -0
  248. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +263 -0
  249. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +309 -0
  250. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +267 -0
  251. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +263 -0
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +309 -0
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +267 -0
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +519 -0
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +309 -0
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +523 -0
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +519 -0
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +309 -0
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +523 -0
  260. data/vendor/ring/third-party/NIST/sha256sums.txt +1 -0
  261. metadata +333 -0
@@ -0,0 +1,2246 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # ====================================================================
4
+ # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5
+ # project. The module is, however, dual licensed under OpenSSL and
6
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
7
+ # details see http://www.openssl.org/~appro/cryptogams/.
8
+ # ====================================================================
9
+ #
10
+ # Version 2.1.
11
+ #
12
+ # aes-*-cbc benchmarks are improved by >70% [compared to gcc 3.3.2 on
13
+ # Opteron 240 CPU] plus all the bells-n-whistles from 32-bit version
14
+ # [you'll notice a lot of resemblance], such as compressed S-boxes
15
+ # in little-endian byte order, prefetch of these tables in CBC mode,
16
+ # as well as avoiding L1 cache aliasing between stack frame and key
17
+ # schedule and already mentioned tables, compressed Td4...
18
+ #
19
+ # Performance in number of cycles per processed byte for 128-bit key:
20
+ #
21
+ # ECB encrypt ECB decrypt CBC large chunk
22
+ # AMD64 33 43 13.0
23
+ # EM64T 38 56 18.6(*)
24
+ # Core 2 30 42 14.5(*)
25
+ # Atom 65 86 32.1(*)
26
+ #
27
+ # (*) with hyper-threading off
28
+
29
+ $flavour = shift;
30
+ $output = shift;
31
+ if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
32
+
33
+ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
34
+
35
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
36
+ ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
37
+ ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
38
+ die "can't locate x86_64-xlate.pl";
39
+
40
+ open OUT,"| \"$^X\" $xlate $flavour $output";
41
+ *STDOUT=*OUT;
42
+
43
+ $verticalspin=1; # unlike 32-bit version $verticalspin performs
44
+ # ~15% better on both AMD and Intel cores
45
+ $speed_limit=512; # see aes-586.pl for details
46
+
47
+ $code=".text\n";
48
+
49
+ $s0="%eax";
50
+ $s1="%ebx";
51
+ $s2="%ecx";
52
+ $s3="%edx";
53
+ $acc0="%esi"; $mask80="%rsi";
54
+ $acc1="%edi"; $maskfe="%rdi";
55
+ $acc2="%ebp"; $mask1b="%rbp";
56
+ $inp="%r8";
57
+ $out="%r9";
58
+ $t0="%r10d";
59
+ $t1="%r11d";
60
+ $t2="%r12d";
61
+ $rnds="%r13d";
62
+ $sbox="%r14";
63
+ $key="%r15";
64
+
65
+ sub hi() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1h/; $r; }
66
+ sub lo() { my $r=shift; $r =~ s/%[er]([a-d])x/%\1l/;
67
+ $r =~ s/%[er]([sd]i)/%\1l/;
68
+ $r =~ s/%(r[0-9]+)[d]?/%\1b/; $r; }
69
+ sub LO() { my $r=shift; $r =~ s/%r([a-z]+)/%e\1/;
70
+ $r =~ s/%r([0-9]+)/%r\1d/; $r; }
71
+ sub _data_word()
72
+ { my $i;
73
+ while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
74
+ }
75
+ sub data_word()
76
+ { my $i;
77
+ my $last=pop(@_);
78
+ $code.=".long\t";
79
+ while(defined($i=shift)) { $code.=sprintf"0x%08x,",$i; }
80
+ $code.=sprintf"0x%08x\n",$last;
81
+ }
82
+
83
+ sub data_byte()
84
+ { my $i;
85
+ my $last=pop(@_);
86
+ $code.=".byte\t";
87
+ while(defined($i=shift)) { $code.=sprintf"0x%02x,",$i&0xff; }
88
+ $code.=sprintf"0x%02x\n",$last&0xff;
89
+ }
90
+
91
+ sub encvert()
92
+ { my $t3="%r8d"; # zaps $inp!
93
+
94
+ $code.=<<___;
95
+ # favor 3-way issue Opteron pipeline...
96
+ movzb `&lo("$s0")`,$acc0
97
+ movzb `&lo("$s1")`,$acc1
98
+ movzb `&lo("$s2")`,$acc2
99
+ mov 0($sbox,$acc0,8),$t0
100
+ mov 0($sbox,$acc1,8),$t1
101
+ mov 0($sbox,$acc2,8),$t2
102
+
103
+ movzb `&hi("$s1")`,$acc0
104
+ movzb `&hi("$s2")`,$acc1
105
+ movzb `&lo("$s3")`,$acc2
106
+ xor 3($sbox,$acc0,8),$t0
107
+ xor 3($sbox,$acc1,8),$t1
108
+ mov 0($sbox,$acc2,8),$t3
109
+
110
+ movzb `&hi("$s3")`,$acc0
111
+ shr \$16,$s2
112
+ movzb `&hi("$s0")`,$acc2
113
+ xor 3($sbox,$acc0,8),$t2
114
+ shr \$16,$s3
115
+ xor 3($sbox,$acc2,8),$t3
116
+
117
+ shr \$16,$s1
118
+ lea 16($key),$key
119
+ shr \$16,$s0
120
+
121
+ movzb `&lo("$s2")`,$acc0
122
+ movzb `&lo("$s3")`,$acc1
123
+ movzb `&lo("$s0")`,$acc2
124
+ xor 2($sbox,$acc0,8),$t0
125
+ xor 2($sbox,$acc1,8),$t1
126
+ xor 2($sbox,$acc2,8),$t2
127
+
128
+ movzb `&hi("$s3")`,$acc0
129
+ movzb `&hi("$s0")`,$acc1
130
+ movzb `&lo("$s1")`,$acc2
131
+ xor 1($sbox,$acc0,8),$t0
132
+ xor 1($sbox,$acc1,8),$t1
133
+ xor 2($sbox,$acc2,8),$t3
134
+
135
+ mov 12($key),$s3
136
+ movzb `&hi("$s1")`,$acc1
137
+ movzb `&hi("$s2")`,$acc2
138
+ mov 0($key),$s0
139
+ xor 1($sbox,$acc1,8),$t2
140
+ xor 1($sbox,$acc2,8),$t3
141
+
142
+ mov 4($key),$s1
143
+ mov 8($key),$s2
144
+ xor $t0,$s0
145
+ xor $t1,$s1
146
+ xor $t2,$s2
147
+ xor $t3,$s3
148
+ ___
149
+ }
150
+
151
+ sub enclastvert()
152
+ { my $t3="%r8d"; # zaps $inp!
153
+
154
+ $code.=<<___;
155
+ movzb `&lo("$s0")`,$acc0
156
+ movzb `&lo("$s1")`,$acc1
157
+ movzb `&lo("$s2")`,$acc2
158
+ movzb 2($sbox,$acc0,8),$t0
159
+ movzb 2($sbox,$acc1,8),$t1
160
+ movzb 2($sbox,$acc2,8),$t2
161
+
162
+ movzb `&lo("$s3")`,$acc0
163
+ movzb `&hi("$s1")`,$acc1
164
+ movzb `&hi("$s2")`,$acc2
165
+ movzb 2($sbox,$acc0,8),$t3
166
+ mov 0($sbox,$acc1,8),$acc1 #$t0
167
+ mov 0($sbox,$acc2,8),$acc2 #$t1
168
+
169
+ and \$0x0000ff00,$acc1
170
+ and \$0x0000ff00,$acc2
171
+
172
+ xor $acc1,$t0
173
+ xor $acc2,$t1
174
+ shr \$16,$s2
175
+
176
+ movzb `&hi("$s3")`,$acc0
177
+ movzb `&hi("$s0")`,$acc1
178
+ shr \$16,$s3
179
+ mov 0($sbox,$acc0,8),$acc0 #$t2
180
+ mov 0($sbox,$acc1,8),$acc1 #$t3
181
+
182
+ and \$0x0000ff00,$acc0
183
+ and \$0x0000ff00,$acc1
184
+ shr \$16,$s1
185
+ xor $acc0,$t2
186
+ xor $acc1,$t3
187
+ shr \$16,$s0
188
+
189
+ movzb `&lo("$s2")`,$acc0
190
+ movzb `&lo("$s3")`,$acc1
191
+ movzb `&lo("$s0")`,$acc2
192
+ mov 0($sbox,$acc0,8),$acc0 #$t0
193
+ mov 0($sbox,$acc1,8),$acc1 #$t1
194
+ mov 0($sbox,$acc2,8),$acc2 #$t2
195
+
196
+ and \$0x00ff0000,$acc0
197
+ and \$0x00ff0000,$acc1
198
+ and \$0x00ff0000,$acc2
199
+
200
+ xor $acc0,$t0
201
+ xor $acc1,$t1
202
+ xor $acc2,$t2
203
+
204
+ movzb `&lo("$s1")`,$acc0
205
+ movzb `&hi("$s3")`,$acc1
206
+ movzb `&hi("$s0")`,$acc2
207
+ mov 0($sbox,$acc0,8),$acc0 #$t3
208
+ mov 2($sbox,$acc1,8),$acc1 #$t0
209
+ mov 2($sbox,$acc2,8),$acc2 #$t1
210
+
211
+ and \$0x00ff0000,$acc0
212
+ and \$0xff000000,$acc1
213
+ and \$0xff000000,$acc2
214
+
215
+ xor $acc0,$t3
216
+ xor $acc1,$t0
217
+ xor $acc2,$t1
218
+
219
+ movzb `&hi("$s1")`,$acc0
220
+ movzb `&hi("$s2")`,$acc1
221
+ mov 16+12($key),$s3
222
+ mov 2($sbox,$acc0,8),$acc0 #$t2
223
+ mov 2($sbox,$acc1,8),$acc1 #$t3
224
+ mov 16+0($key),$s0
225
+
226
+ and \$0xff000000,$acc0
227
+ and \$0xff000000,$acc1
228
+
229
+ xor $acc0,$t2
230
+ xor $acc1,$t3
231
+
232
+ mov 16+4($key),$s1
233
+ mov 16+8($key),$s2
234
+ xor $t0,$s0
235
+ xor $t1,$s1
236
+ xor $t2,$s2
237
+ xor $t3,$s3
238
+ ___
239
+ }
240
+
241
+ sub encstep()
242
+ { my ($i,@s) = @_;
243
+ my $tmp0=$acc0;
244
+ my $tmp1=$acc1;
245
+ my $tmp2=$acc2;
246
+ my $out=($t0,$t1,$t2,$s[0])[$i];
247
+
248
+ if ($i==3) {
249
+ $tmp0=$s[1];
250
+ $tmp1=$s[2];
251
+ $tmp2=$s[3];
252
+ }
253
+ $code.=" movzb ".&lo($s[0]).",$out\n";
254
+ $code.=" mov $s[2],$tmp1\n" if ($i!=3);
255
+ $code.=" lea 16($key),$key\n" if ($i==0);
256
+
257
+ $code.=" movzb ".&hi($s[1]).",$tmp0\n";
258
+ $code.=" mov 0($sbox,$out,8),$out\n";
259
+
260
+ $code.=" shr \$16,$tmp1\n";
261
+ $code.=" mov $s[3],$tmp2\n" if ($i!=3);
262
+ $code.=" xor 3($sbox,$tmp0,8),$out\n";
263
+
264
+ $code.=" movzb ".&lo($tmp1).",$tmp1\n";
265
+ $code.=" shr \$24,$tmp2\n";
266
+ $code.=" xor 4*$i($key),$out\n";
267
+
268
+ $code.=" xor 2($sbox,$tmp1,8),$out\n";
269
+ $code.=" xor 1($sbox,$tmp2,8),$out\n";
270
+
271
+ $code.=" mov $t0,$s[1]\n" if ($i==3);
272
+ $code.=" mov $t1,$s[2]\n" if ($i==3);
273
+ $code.=" mov $t2,$s[3]\n" if ($i==3);
274
+ $code.="\n";
275
+ }
276
+
277
+ sub enclast()
278
+ { my ($i,@s)=@_;
279
+ my $tmp0=$acc0;
280
+ my $tmp1=$acc1;
281
+ my $tmp2=$acc2;
282
+ my $out=($t0,$t1,$t2,$s[0])[$i];
283
+
284
+ if ($i==3) {
285
+ $tmp0=$s[1];
286
+ $tmp1=$s[2];
287
+ $tmp2=$s[3];
288
+ }
289
+ $code.=" movzb ".&lo($s[0]).",$out\n";
290
+ $code.=" mov $s[2],$tmp1\n" if ($i!=3);
291
+
292
+ $code.=" mov 2($sbox,$out,8),$out\n";
293
+ $code.=" shr \$16,$tmp1\n";
294
+ $code.=" mov $s[3],$tmp2\n" if ($i!=3);
295
+
296
+ $code.=" and \$0x000000ff,$out\n";
297
+ $code.=" movzb ".&hi($s[1]).",$tmp0\n";
298
+ $code.=" movzb ".&lo($tmp1).",$tmp1\n";
299
+ $code.=" shr \$24,$tmp2\n";
300
+
301
+ $code.=" mov 0($sbox,$tmp0,8),$tmp0\n";
302
+ $code.=" mov 0($sbox,$tmp1,8),$tmp1\n";
303
+ $code.=" mov 2($sbox,$tmp2,8),$tmp2\n";
304
+
305
+ $code.=" and \$0x0000ff00,$tmp0\n";
306
+ $code.=" and \$0x00ff0000,$tmp1\n";
307
+ $code.=" and \$0xff000000,$tmp2\n";
308
+
309
+ $code.=" xor $tmp0,$out\n";
310
+ $code.=" mov $t0,$s[1]\n" if ($i==3);
311
+ $code.=" xor $tmp1,$out\n";
312
+ $code.=" mov $t1,$s[2]\n" if ($i==3);
313
+ $code.=" xor $tmp2,$out\n";
314
+ $code.=" mov $t2,$s[3]\n" if ($i==3);
315
+ $code.="\n";
316
+ }
317
+
318
+ $code.=<<___;
319
+ .type _x86_64_AES_encrypt,\@abi-omnipotent
320
+ .align 16
321
+ _x86_64_AES_encrypt:
322
+ xor 0($key),$s0 # xor with key
323
+ xor 4($key),$s1
324
+ xor 8($key),$s2
325
+ xor 12($key),$s3
326
+
327
+ mov 240($key),$rnds # load key->rounds
328
+ sub \$1,$rnds
329
+ jmp .Lenc_loop
330
+ .align 16
331
+ .Lenc_loop:
332
+ ___
333
+ if ($verticalspin) { &encvert(); }
334
+ else { &encstep(0,$s0,$s1,$s2,$s3);
335
+ &encstep(1,$s1,$s2,$s3,$s0);
336
+ &encstep(2,$s2,$s3,$s0,$s1);
337
+ &encstep(3,$s3,$s0,$s1,$s2);
338
+ }
339
+ $code.=<<___;
340
+ sub \$1,$rnds
341
+ jnz .Lenc_loop
342
+ ___
343
+ if ($verticalspin) { &enclastvert(); }
344
+ else { &enclast(0,$s0,$s1,$s2,$s3);
345
+ &enclast(1,$s1,$s2,$s3,$s0);
346
+ &enclast(2,$s2,$s3,$s0,$s1);
347
+ &enclast(3,$s3,$s0,$s1,$s2);
348
+ $code.=<<___;
349
+ xor 16+0($key),$s0 # xor with key
350
+ xor 16+4($key),$s1
351
+ xor 16+8($key),$s2
352
+ xor 16+12($key),$s3
353
+ ___
354
+ }
355
+ $code.=<<___;
356
+ .byte 0xf3,0xc3 # rep ret
357
+ .size _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
358
+ ___
359
+
360
+ # it's possible to implement this by shifting tN by 8, filling least
361
+ # significant byte with byte load and finally bswap-ing at the end,
362
+ # but such partial register load kills Core 2...
363
+ sub enccompactvert()
364
+ { my ($t3,$t4,$t5)=("%r8d","%r9d","%r13d");
365
+
366
+ $code.=<<___;
367
+ movzb `&lo("$s0")`,$t0
368
+ movzb `&lo("$s1")`,$t1
369
+ movzb `&lo("$s2")`,$t2
370
+ movzb `&lo("$s3")`,$t3
371
+ movzb `&hi("$s1")`,$acc0
372
+ movzb `&hi("$s2")`,$acc1
373
+ shr \$16,$s2
374
+ movzb `&hi("$s3")`,$acc2
375
+ movzb ($sbox,$t0,1),$t0
376
+ movzb ($sbox,$t1,1),$t1
377
+ movzb ($sbox,$t2,1),$t2
378
+ movzb ($sbox,$t3,1),$t3
379
+
380
+ movzb ($sbox,$acc0,1),$t4 #$t0
381
+ movzb `&hi("$s0")`,$acc0
382
+ movzb ($sbox,$acc1,1),$t5 #$t1
383
+ movzb `&lo("$s2")`,$acc1
384
+ movzb ($sbox,$acc2,1),$acc2 #$t2
385
+ movzb ($sbox,$acc0,1),$acc0 #$t3
386
+
387
+ shl \$8,$t4
388
+ shr \$16,$s3
389
+ shl \$8,$t5
390
+ xor $t4,$t0
391
+ shr \$16,$s0
392
+ movzb `&lo("$s3")`,$t4
393
+ shr \$16,$s1
394
+ xor $t5,$t1
395
+ shl \$8,$acc2
396
+ movzb `&lo("$s0")`,$t5
397
+ movzb ($sbox,$acc1,1),$acc1 #$t0
398
+ xor $acc2,$t2
399
+
400
+ shl \$8,$acc0
401
+ movzb `&lo("$s1")`,$acc2
402
+ shl \$16,$acc1
403
+ xor $acc0,$t3
404
+ movzb ($sbox,$t4,1),$t4 #$t1
405
+ movzb `&hi("$s3")`,$acc0
406
+ movzb ($sbox,$t5,1),$t5 #$t2
407
+ xor $acc1,$t0
408
+
409
+ shr \$8,$s2
410
+ movzb `&hi("$s0")`,$acc1
411
+ shl \$16,$t4
412
+ shr \$8,$s1
413
+ shl \$16,$t5
414
+ xor $t4,$t1
415
+ movzb ($sbox,$acc2,1),$acc2 #$t3
416
+ movzb ($sbox,$acc0,1),$acc0 #$t0
417
+ movzb ($sbox,$acc1,1),$acc1 #$t1
418
+ movzb ($sbox,$s2,1),$s3 #$t3
419
+ movzb ($sbox,$s1,1),$s2 #$t2
420
+
421
+ shl \$16,$acc2
422
+ xor $t5,$t2
423
+ shl \$24,$acc0
424
+ xor $acc2,$t3
425
+ shl \$24,$acc1
426
+ xor $acc0,$t0
427
+ shl \$24,$s3
428
+ xor $acc1,$t1
429
+ shl \$24,$s2
430
+ mov $t0,$s0
431
+ mov $t1,$s1
432
+ xor $t2,$s2
433
+ xor $t3,$s3
434
+ ___
435
+ }
436
+
437
+ sub enctransform_ref()
438
+ { my $sn = shift;
439
+ my ($acc,$r2,$tmp)=("%r8d","%r9d","%r13d");
440
+
441
+ $code.=<<___;
442
+ mov $sn,$acc
443
+ and \$0x80808080,$acc
444
+ mov $acc,$tmp
445
+ shr \$7,$tmp
446
+ lea ($sn,$sn),$r2
447
+ sub $tmp,$acc
448
+ and \$0xfefefefe,$r2
449
+ and \$0x1b1b1b1b,$acc
450
+ mov $sn,$tmp
451
+ xor $acc,$r2
452
+
453
+ xor $r2,$sn
454
+ rol \$24,$sn
455
+ xor $r2,$sn
456
+ ror \$16,$tmp
457
+ xor $tmp,$sn
458
+ ror \$8,$tmp
459
+ xor $tmp,$sn
460
+ ___
461
+ }
462
+
463
+ # unlike decrypt case it does not pay off to parallelize enctransform
464
+ sub enctransform()
465
+ { my ($t3,$r20,$r21)=($acc2,"%r8d","%r9d");
466
+
467
+ $code.=<<___;
468
+ mov \$0x80808080,$t0
469
+ mov \$0x80808080,$t1
470
+ and $s0,$t0
471
+ and $s1,$t1
472
+ mov $t0,$acc0
473
+ mov $t1,$acc1
474
+ shr \$7,$t0
475
+ lea ($s0,$s0),$r20
476
+ shr \$7,$t1
477
+ lea ($s1,$s1),$r21
478
+ sub $t0,$acc0
479
+ sub $t1,$acc1
480
+ and \$0xfefefefe,$r20
481
+ and \$0xfefefefe,$r21
482
+ and \$0x1b1b1b1b,$acc0
483
+ and \$0x1b1b1b1b,$acc1
484
+ mov $s0,$t0
485
+ mov $s1,$t1
486
+ xor $acc0,$r20
487
+ xor $acc1,$r21
488
+
489
+ xor $r20,$s0
490
+ xor $r21,$s1
491
+ mov \$0x80808080,$t2
492
+ rol \$24,$s0
493
+ mov \$0x80808080,$t3
494
+ rol \$24,$s1
495
+ and $s2,$t2
496
+ and $s3,$t3
497
+ xor $r20,$s0
498
+ xor $r21,$s1
499
+ mov $t2,$acc0
500
+ ror \$16,$t0
501
+ mov $t3,$acc1
502
+ ror \$16,$t1
503
+ lea ($s2,$s2),$r20
504
+ shr \$7,$t2
505
+ xor $t0,$s0
506
+ shr \$7,$t3
507
+ xor $t1,$s1
508
+ ror \$8,$t0
509
+ lea ($s3,$s3),$r21
510
+ ror \$8,$t1
511
+ sub $t2,$acc0
512
+ sub $t3,$acc1
513
+ xor $t0,$s0
514
+ xor $t1,$s1
515
+
516
+ and \$0xfefefefe,$r20
517
+ and \$0xfefefefe,$r21
518
+ and \$0x1b1b1b1b,$acc0
519
+ and \$0x1b1b1b1b,$acc1
520
+ mov $s2,$t2
521
+ mov $s3,$t3
522
+ xor $acc0,$r20
523
+ xor $acc1,$r21
524
+
525
+ ror \$16,$t2
526
+ xor $r20,$s2
527
+ ror \$16,$t3
528
+ xor $r21,$s3
529
+ rol \$24,$s2
530
+ mov 0($sbox),$acc0 # prefetch Te4
531
+ rol \$24,$s3
532
+ xor $r20,$s2
533
+ mov 64($sbox),$acc1
534
+ xor $r21,$s3
535
+ mov 128($sbox),$r20
536
+ xor $t2,$s2
537
+ ror \$8,$t2
538
+ xor $t3,$s3
539
+ ror \$8,$t3
540
+ xor $t2,$s2
541
+ mov 192($sbox),$r21
542
+ xor $t3,$s3
543
+ ___
544
+ }
545
+
546
+ $code.=<<___;
547
+ .type _x86_64_AES_encrypt_compact,\@abi-omnipotent
548
+ .align 16
549
+ _x86_64_AES_encrypt_compact:
550
+ lea 128($sbox),$inp # size optimization
551
+ mov 0-128($inp),$acc1 # prefetch Te4
552
+ mov 32-128($inp),$acc2
553
+ mov 64-128($inp),$t0
554
+ mov 96-128($inp),$t1
555
+ mov 128-128($inp),$acc1
556
+ mov 160-128($inp),$acc2
557
+ mov 192-128($inp),$t0
558
+ mov 224-128($inp),$t1
559
+ jmp .Lenc_loop_compact
560
+ .align 16
561
+ .Lenc_loop_compact:
562
+ xor 0($key),$s0 # xor with key
563
+ xor 4($key),$s1
564
+ xor 8($key),$s2
565
+ xor 12($key),$s3
566
+ lea 16($key),$key
567
+ ___
568
+ &enccompactvert();
569
+ $code.=<<___;
570
+ cmp 16(%rsp),$key
571
+ je .Lenc_compact_done
572
+ ___
573
+ &enctransform();
574
+ $code.=<<___;
575
+ jmp .Lenc_loop_compact
576
+ .align 16
577
+ .Lenc_compact_done:
578
+ xor 0($key),$s0
579
+ xor 4($key),$s1
580
+ xor 8($key),$s2
581
+ xor 12($key),$s3
582
+ .byte 0xf3,0xc3 # rep ret
583
+ .size _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
584
+ ___
585
+
586
+ # void asm_AES_encrypt (const void *inp,void *out,const AES_KEY *key);
587
+ $code.=<<___;
588
+ .align 16
589
+ .globl asm_AES_encrypt
590
+ .type asm_AES_encrypt,\@function,3
591
+ .hidden asm_AES_encrypt
592
+ asm_AES_encrypt:
593
+ push %rbx
594
+ push %rbp
595
+ push %r12
596
+ push %r13
597
+ push %r14
598
+ push %r15
599
+
600
+ # allocate frame "above" key schedule
601
+ mov %rsp,%r10
602
+ lea -63(%rdx),%rcx # %rdx is key argument
603
+ and \$-64,%rsp
604
+ sub %rsp,%rcx
605
+ neg %rcx
606
+ and \$0x3c0,%rcx
607
+ sub %rcx,%rsp
608
+ sub \$32,%rsp
609
+
610
+ mov %rsi,16(%rsp) # save out
611
+ mov %r10,24(%rsp) # save real stack pointer
612
+ .Lenc_prologue:
613
+
614
+ mov %rdx,$key
615
+ mov 240($key),$rnds # load rounds
616
+
617
+ mov 0(%rdi),$s0 # load input vector
618
+ mov 4(%rdi),$s1
619
+ mov 8(%rdi),$s2
620
+ mov 12(%rdi),$s3
621
+
622
+ shl \$4,$rnds
623
+ lea ($key,$rnds),%rbp
624
+ mov $key,(%rsp) # key schedule
625
+ mov %rbp,8(%rsp) # end of key schedule
626
+
627
+ # pick Te4 copy which can't "overlap" with stack frame or key schedule
628
+ lea .LAES_Te+2048(%rip),$sbox
629
+ lea 768(%rsp),%rbp
630
+ sub $sbox,%rbp
631
+ and \$0x300,%rbp
632
+ lea ($sbox,%rbp),$sbox
633
+
634
+ call _x86_64_AES_encrypt_compact
635
+
636
+ mov 16(%rsp),$out # restore out
637
+ mov 24(%rsp),%rsi # restore saved stack pointer
638
+ mov $s0,0($out) # write output vector
639
+ mov $s1,4($out)
640
+ mov $s2,8($out)
641
+ mov $s3,12($out)
642
+
643
+ mov (%rsi),%r15
644
+ mov 8(%rsi),%r14
645
+ mov 16(%rsi),%r13
646
+ mov 24(%rsi),%r12
647
+ mov 32(%rsi),%rbp
648
+ mov 40(%rsi),%rbx
649
+ lea 48(%rsi),%rsp
650
+ .Lenc_epilogue:
651
+ ret
652
+ .size asm_AES_encrypt,.-asm_AES_encrypt
653
+ ___
654
+
655
+ #------------------------------------------------------------------#
656
+
657
+ sub decvert()
658
+ { my $t3="%r8d"; # zaps $inp!
659
+
660
+ $code.=<<___;
661
+ # favor 3-way issue Opteron pipeline...
662
+ movzb `&lo("$s0")`,$acc0
663
+ movzb `&lo("$s1")`,$acc1
664
+ movzb `&lo("$s2")`,$acc2
665
+ mov 0($sbox,$acc0,8),$t0
666
+ mov 0($sbox,$acc1,8),$t1
667
+ mov 0($sbox,$acc2,8),$t2
668
+
669
+ movzb `&hi("$s3")`,$acc0
670
+ movzb `&hi("$s0")`,$acc1
671
+ movzb `&lo("$s3")`,$acc2
672
+ xor 3($sbox,$acc0,8),$t0
673
+ xor 3($sbox,$acc1,8),$t1
674
+ mov 0($sbox,$acc2,8),$t3
675
+
676
+ movzb `&hi("$s1")`,$acc0
677
+ shr \$16,$s0
678
+ movzb `&hi("$s2")`,$acc2
679
+ xor 3($sbox,$acc0,8),$t2
680
+ shr \$16,$s3
681
+ xor 3($sbox,$acc2,8),$t3
682
+
683
+ shr \$16,$s1
684
+ lea 16($key),$key
685
+ shr \$16,$s2
686
+
687
+ movzb `&lo("$s2")`,$acc0
688
+ movzb `&lo("$s3")`,$acc1
689
+ movzb `&lo("$s0")`,$acc2
690
+ xor 2($sbox,$acc0,8),$t0
691
+ xor 2($sbox,$acc1,8),$t1
692
+ xor 2($sbox,$acc2,8),$t2
693
+
694
+ movzb `&hi("$s1")`,$acc0
695
+ movzb `&hi("$s2")`,$acc1
696
+ movzb `&lo("$s1")`,$acc2
697
+ xor 1($sbox,$acc0,8),$t0
698
+ xor 1($sbox,$acc1,8),$t1
699
+ xor 2($sbox,$acc2,8),$t3
700
+
701
+ movzb `&hi("$s3")`,$acc0
702
+ mov 12($key),$s3
703
+ movzb `&hi("$s0")`,$acc2
704
+ xor 1($sbox,$acc0,8),$t2
705
+ mov 0($key),$s0
706
+ xor 1($sbox,$acc2,8),$t3
707
+
708
+ xor $t0,$s0
709
+ mov 4($key),$s1
710
+ mov 8($key),$s2
711
+ xor $t2,$s2
712
+ xor $t1,$s1
713
+ xor $t3,$s3
714
+ ___
715
+ }
716
+
717
+ sub declastvert()
718
+ { my $t3="%r8d"; # zaps $inp!
719
+
720
+ $code.=<<___;
721
+ lea 2048($sbox),$sbox # size optimization
722
+ movzb `&lo("$s0")`,$acc0
723
+ movzb `&lo("$s1")`,$acc1
724
+ movzb `&lo("$s2")`,$acc2
725
+ movzb ($sbox,$acc0,1),$t0
726
+ movzb ($sbox,$acc1,1),$t1
727
+ movzb ($sbox,$acc2,1),$t2
728
+
729
+ movzb `&lo("$s3")`,$acc0
730
+ movzb `&hi("$s3")`,$acc1
731
+ movzb `&hi("$s0")`,$acc2
732
+ movzb ($sbox,$acc0,1),$t3
733
+ movzb ($sbox,$acc1,1),$acc1 #$t0
734
+ movzb ($sbox,$acc2,1),$acc2 #$t1
735
+
736
+ shl \$8,$acc1
737
+ shl \$8,$acc2
738
+
739
+ xor $acc1,$t0
740
+ xor $acc2,$t1
741
+ shr \$16,$s3
742
+
743
+ movzb `&hi("$s1")`,$acc0
744
+ movzb `&hi("$s2")`,$acc1
745
+ shr \$16,$s0
746
+ movzb ($sbox,$acc0,1),$acc0 #$t2
747
+ movzb ($sbox,$acc1,1),$acc1 #$t3
748
+
749
+ shl \$8,$acc0
750
+ shl \$8,$acc1
751
+ shr \$16,$s1
752
+ xor $acc0,$t2
753
+ xor $acc1,$t3
754
+ shr \$16,$s2
755
+
756
+ movzb `&lo("$s2")`,$acc0
757
+ movzb `&lo("$s3")`,$acc1
758
+ movzb `&lo("$s0")`,$acc2
759
+ movzb ($sbox,$acc0,1),$acc0 #$t0
760
+ movzb ($sbox,$acc1,1),$acc1 #$t1
761
+ movzb ($sbox,$acc2,1),$acc2 #$t2
762
+
763
+ shl \$16,$acc0
764
+ shl \$16,$acc1
765
+ shl \$16,$acc2
766
+
767
+ xor $acc0,$t0
768
+ xor $acc1,$t1
769
+ xor $acc2,$t2
770
+
771
+ movzb `&lo("$s1")`,$acc0
772
+ movzb `&hi("$s1")`,$acc1
773
+ movzb `&hi("$s2")`,$acc2
774
+ movzb ($sbox,$acc0,1),$acc0 #$t3
775
+ movzb ($sbox,$acc1,1),$acc1 #$t0
776
+ movzb ($sbox,$acc2,1),$acc2 #$t1
777
+
778
+ shl \$16,$acc0
779
+ shl \$24,$acc1
780
+ shl \$24,$acc2
781
+
782
+ xor $acc0,$t3
783
+ xor $acc1,$t0
784
+ xor $acc2,$t1
785
+
786
+ movzb `&hi("$s3")`,$acc0
787
+ movzb `&hi("$s0")`,$acc1
788
+ mov 16+12($key),$s3
789
+ movzb ($sbox,$acc0,1),$acc0 #$t2
790
+ movzb ($sbox,$acc1,1),$acc1 #$t3
791
+ mov 16+0($key),$s0
792
+
793
+ shl \$24,$acc0
794
+ shl \$24,$acc1
795
+
796
+ xor $acc0,$t2
797
+ xor $acc1,$t3
798
+
799
+ mov 16+4($key),$s1
800
+ mov 16+8($key),$s2
801
+ lea -2048($sbox),$sbox
802
+ xor $t0,$s0
803
+ xor $t1,$s1
804
+ xor $t2,$s2
805
+ xor $t3,$s3
806
+ ___
807
+ }
808
+
809
+ sub decstep()
810
+ { my ($i,@s) = @_;
811
+ my $tmp0=$acc0;
812
+ my $tmp1=$acc1;
813
+ my $tmp2=$acc2;
814
+ my $out=($t0,$t1,$t2,$s[0])[$i];
815
+
816
+ $code.=" mov $s[0],$out\n" if ($i!=3);
817
+ $tmp1=$s[2] if ($i==3);
818
+ $code.=" mov $s[2],$tmp1\n" if ($i!=3);
819
+ $code.=" and \$0xFF,$out\n";
820
+
821
+ $code.=" mov 0($sbox,$out,8),$out\n";
822
+ $code.=" shr \$16,$tmp1\n";
823
+ $tmp2=$s[3] if ($i==3);
824
+ $code.=" mov $s[3],$tmp2\n" if ($i!=3);
825
+
826
+ $tmp0=$s[1] if ($i==3);
827
+ $code.=" movzb ".&hi($s[1]).",$tmp0\n";
828
+ $code.=" and \$0xFF,$tmp1\n";
829
+ $code.=" shr \$24,$tmp2\n";
830
+
831
+ $code.=" xor 3($sbox,$tmp0,8),$out\n";
832
+ $code.=" xor 2($sbox,$tmp1,8),$out\n";
833
+ $code.=" xor 1($sbox,$tmp2,8),$out\n";
834
+
835
+ $code.=" mov $t2,$s[1]\n" if ($i==3);
836
+ $code.=" mov $t1,$s[2]\n" if ($i==3);
837
+ $code.=" mov $t0,$s[3]\n" if ($i==3);
838
+ $code.="\n";
839
+ }
840
+
841
+ sub declast()
842
+ { my ($i,@s)=@_;
843
+ my $tmp0=$acc0;
844
+ my $tmp1=$acc1;
845
+ my $tmp2=$acc2;
846
+ my $out=($t0,$t1,$t2,$s[0])[$i];
847
+
848
+ $code.=" mov $s[0],$out\n" if ($i!=3);
849
+ $tmp1=$s[2] if ($i==3);
850
+ $code.=" mov $s[2],$tmp1\n" if ($i!=3);
851
+ $code.=" and \$0xFF,$out\n";
852
+
853
+ $code.=" movzb 2048($sbox,$out,1),$out\n";
854
+ $code.=" shr \$16,$tmp1\n";
855
+ $tmp2=$s[3] if ($i==3);
856
+ $code.=" mov $s[3],$tmp2\n" if ($i!=3);
857
+
858
+ $tmp0=$s[1] if ($i==3);
859
+ $code.=" movzb ".&hi($s[1]).",$tmp0\n";
860
+ $code.=" and \$0xFF,$tmp1\n";
861
+ $code.=" shr \$24,$tmp2\n";
862
+
863
+ $code.=" movzb 2048($sbox,$tmp0,1),$tmp0\n";
864
+ $code.=" movzb 2048($sbox,$tmp1,1),$tmp1\n";
865
+ $code.=" movzb 2048($sbox,$tmp2,1),$tmp2\n";
866
+
867
+ $code.=" shl \$8,$tmp0\n";
868
+ $code.=" shl \$16,$tmp1\n";
869
+ $code.=" shl \$24,$tmp2\n";
870
+
871
+ $code.=" xor $tmp0,$out\n";
872
+ $code.=" mov $t2,$s[1]\n" if ($i==3);
873
+ $code.=" xor $tmp1,$out\n";
874
+ $code.=" mov $t1,$s[2]\n" if ($i==3);
875
+ $code.=" xor $tmp2,$out\n";
876
+ $code.=" mov $t0,$s[3]\n" if ($i==3);
877
+ $code.="\n";
878
+ }
879
+
880
+ $code.=<<___;
881
+ .type _x86_64_AES_decrypt,\@abi-omnipotent
882
+ .align 16
883
+ _x86_64_AES_decrypt:
884
+ xor 0($key),$s0 # xor with key
885
+ xor 4($key),$s1
886
+ xor 8($key),$s2
887
+ xor 12($key),$s3
888
+
889
+ mov 240($key),$rnds # load key->rounds
890
+ sub \$1,$rnds
891
+ jmp .Ldec_loop
892
+ .align 16
893
+ .Ldec_loop:
894
+ ___
895
+ if ($verticalspin) { &decvert(); }
896
+ else { &decstep(0,$s0,$s3,$s2,$s1);
897
+ &decstep(1,$s1,$s0,$s3,$s2);
898
+ &decstep(2,$s2,$s1,$s0,$s3);
899
+ &decstep(3,$s3,$s2,$s1,$s0);
900
+ $code.=<<___;
901
+ lea 16($key),$key
902
+ xor 0($key),$s0 # xor with key
903
+ xor 4($key),$s1
904
+ xor 8($key),$s2
905
+ xor 12($key),$s3
906
+ ___
907
+ }
908
+ $code.=<<___;
909
+ sub \$1,$rnds
910
+ jnz .Ldec_loop
911
+ ___
912
+ if ($verticalspin) { &declastvert(); }
913
+ else { &declast(0,$s0,$s3,$s2,$s1);
914
+ &declast(1,$s1,$s0,$s3,$s2);
915
+ &declast(2,$s2,$s1,$s0,$s3);
916
+ &declast(3,$s3,$s2,$s1,$s0);
917
+ $code.=<<___;
918
+ xor 16+0($key),$s0 # xor with key
919
+ xor 16+4($key),$s1
920
+ xor 16+8($key),$s2
921
+ xor 16+12($key),$s3
922
+ ___
923
+ }
924
+ $code.=<<___;
925
+ .byte 0xf3,0xc3 # rep ret
926
+ .size _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
927
+ ___
928
+
929
+ sub deccompactvert()
930
+ { my ($t3,$t4,$t5)=("%r8d","%r9d","%r13d");
931
+
932
+ $code.=<<___;
933
+ movzb `&lo("$s0")`,$t0
934
+ movzb `&lo("$s1")`,$t1
935
+ movzb `&lo("$s2")`,$t2
936
+ movzb `&lo("$s3")`,$t3
937
+ movzb `&hi("$s3")`,$acc0
938
+ movzb `&hi("$s0")`,$acc1
939
+ shr \$16,$s3
940
+ movzb `&hi("$s1")`,$acc2
941
+ movzb ($sbox,$t0,1),$t0
942
+ movzb ($sbox,$t1,1),$t1
943
+ movzb ($sbox,$t2,1),$t2
944
+ movzb ($sbox,$t3,1),$t3
945
+
946
+ movzb ($sbox,$acc0,1),$t4 #$t0
947
+ movzb `&hi("$s2")`,$acc0
948
+ movzb ($sbox,$acc1,1),$t5 #$t1
949
+ movzb ($sbox,$acc2,1),$acc2 #$t2
950
+ movzb ($sbox,$acc0,1),$acc0 #$t3
951
+
952
+ shr \$16,$s2
953
+ shl \$8,$t5
954
+ shl \$8,$t4
955
+ movzb `&lo("$s2")`,$acc1
956
+ shr \$16,$s0
957
+ xor $t4,$t0
958
+ shr \$16,$s1
959
+ movzb `&lo("$s3")`,$t4
960
+
961
+ shl \$8,$acc2
962
+ xor $t5,$t1
963
+ shl \$8,$acc0
964
+ movzb `&lo("$s0")`,$t5
965
+ movzb ($sbox,$acc1,1),$acc1 #$t0
966
+ xor $acc2,$t2
967
+ movzb `&lo("$s1")`,$acc2
968
+
969
+ shl \$16,$acc1
970
+ xor $acc0,$t3
971
+ movzb ($sbox,$t4,1),$t4 #$t1
972
+ movzb `&hi("$s1")`,$acc0
973
+ movzb ($sbox,$acc2,1),$acc2 #$t3
974
+ xor $acc1,$t0
975
+ movzb ($sbox,$t5,1),$t5 #$t2
976
+ movzb `&hi("$s2")`,$acc1
977
+
978
+ shl \$16,$acc2
979
+ shl \$16,$t4
980
+ shl \$16,$t5
981
+ xor $acc2,$t3
982
+ movzb `&hi("$s3")`,$acc2
983
+ xor $t4,$t1
984
+ shr \$8,$s0
985
+ xor $t5,$t2
986
+
987
+ movzb ($sbox,$acc0,1),$acc0 #$t0
988
+ movzb ($sbox,$acc1,1),$s1 #$t1
989
+ movzb ($sbox,$acc2,1),$s2 #$t2
990
+ movzb ($sbox,$s0,1),$s3 #$t3
991
+
992
+ mov $t0,$s0
993
+ shl \$24,$acc0
994
+ shl \$24,$s1
995
+ shl \$24,$s2
996
+ xor $acc0,$s0
997
+ shl \$24,$s3
998
+ xor $t1,$s1
999
+ xor $t2,$s2
1000
+ xor $t3,$s3
1001
+ ___
1002
+ }
1003
+
1004
+ # parallelized version! input is pair of 64-bit values: %rax=s1.s0
1005
+ # and %rcx=s3.s2, output is four 32-bit values in %eax=s0, %ebx=s1,
1006
+ # %ecx=s2 and %edx=s3.
1007
+ sub dectransform()
1008
+ { my ($tp10,$tp20,$tp40,$tp80,$acc0)=("%rax","%r8", "%r9", "%r10","%rbx");
1009
+ my ($tp18,$tp28,$tp48,$tp88,$acc8)=("%rcx","%r11","%r12","%r13","%rdx");
1010
+ my $prefetch = shift;
1011
+
1012
+ $code.=<<___;
1013
+ mov $mask80,$tp40
1014
+ mov $mask80,$tp48
1015
+ and $tp10,$tp40
1016
+ and $tp18,$tp48
1017
+ mov $tp40,$acc0
1018
+ mov $tp48,$acc8
1019
+ shr \$7,$tp40
1020
+ lea ($tp10,$tp10),$tp20
1021
+ shr \$7,$tp48
1022
+ lea ($tp18,$tp18),$tp28
1023
+ sub $tp40,$acc0
1024
+ sub $tp48,$acc8
1025
+ and $maskfe,$tp20
1026
+ and $maskfe,$tp28
1027
+ and $mask1b,$acc0
1028
+ and $mask1b,$acc8
1029
+ xor $acc0,$tp20
1030
+ xor $acc8,$tp28
1031
+ mov $mask80,$tp80
1032
+ mov $mask80,$tp88
1033
+
1034
+ and $tp20,$tp80
1035
+ and $tp28,$tp88
1036
+ mov $tp80,$acc0
1037
+ mov $tp88,$acc8
1038
+ shr \$7,$tp80
1039
+ lea ($tp20,$tp20),$tp40
1040
+ shr \$7,$tp88
1041
+ lea ($tp28,$tp28),$tp48
1042
+ sub $tp80,$acc0
1043
+ sub $tp88,$acc8
1044
+ and $maskfe,$tp40
1045
+ and $maskfe,$tp48
1046
+ and $mask1b,$acc0
1047
+ and $mask1b,$acc8
1048
+ xor $acc0,$tp40
1049
+ xor $acc8,$tp48
1050
+ mov $mask80,$tp80
1051
+ mov $mask80,$tp88
1052
+
1053
+ and $tp40,$tp80
1054
+ and $tp48,$tp88
1055
+ mov $tp80,$acc0
1056
+ mov $tp88,$acc8
1057
+ shr \$7,$tp80
1058
+ xor $tp10,$tp20 # tp2^=tp1
1059
+ shr \$7,$tp88
1060
+ xor $tp18,$tp28 # tp2^=tp1
1061
+ sub $tp80,$acc0
1062
+ sub $tp88,$acc8
1063
+ lea ($tp40,$tp40),$tp80
1064
+ lea ($tp48,$tp48),$tp88
1065
+ xor $tp10,$tp40 # tp4^=tp1
1066
+ xor $tp18,$tp48 # tp4^=tp1
1067
+ and $maskfe,$tp80
1068
+ and $maskfe,$tp88
1069
+ and $mask1b,$acc0
1070
+ and $mask1b,$acc8
1071
+ xor $acc0,$tp80
1072
+ xor $acc8,$tp88
1073
+
1074
+ xor $tp80,$tp10 # tp1^=tp8
1075
+ xor $tp88,$tp18 # tp1^=tp8
1076
+ xor $tp80,$tp20 # tp2^tp1^=tp8
1077
+ xor $tp88,$tp28 # tp2^tp1^=tp8
1078
+ mov $tp10,$acc0
1079
+ mov $tp18,$acc8
1080
+ xor $tp80,$tp40 # tp4^tp1^=tp8
1081
+ shr \$32,$acc0
1082
+ xor $tp88,$tp48 # tp4^tp1^=tp8
1083
+ shr \$32,$acc8
1084
+ xor $tp20,$tp80 # tp8^=tp8^tp2^tp1=tp2^tp1
1085
+ rol \$8,`&LO("$tp10")` # ROTATE(tp1^tp8,8)
1086
+ xor $tp28,$tp88 # tp8^=tp8^tp2^tp1=tp2^tp1
1087
+ rol \$8,`&LO("$tp18")` # ROTATE(tp1^tp8,8)
1088
+ xor $tp40,$tp80 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
1089
+ rol \$8,`&LO("$acc0")` # ROTATE(tp1^tp8,8)
1090
+ xor $tp48,$tp88 # tp2^tp1^=tp8^tp4^tp1=tp8^tp4^tp2
1091
+
1092
+ rol \$8,`&LO("$acc8")` # ROTATE(tp1^tp8,8)
1093
+ xor `&LO("$tp80")`,`&LO("$tp10")`
1094
+ shr \$32,$tp80
1095
+ xor `&LO("$tp88")`,`&LO("$tp18")`
1096
+ shr \$32,$tp88
1097
+ xor `&LO("$tp80")`,`&LO("$acc0")`
1098
+ xor `&LO("$tp88")`,`&LO("$acc8")`
1099
+
1100
+ mov $tp20,$tp80
1101
+ rol \$24,`&LO("$tp20")` # ROTATE(tp2^tp1^tp8,24)
1102
+ mov $tp28,$tp88
1103
+ rol \$24,`&LO("$tp28")` # ROTATE(tp2^tp1^tp8,24)
1104
+ shr \$32,$tp80
1105
+ xor `&LO("$tp20")`,`&LO("$tp10")`
1106
+ shr \$32,$tp88
1107
+ xor `&LO("$tp28")`,`&LO("$tp18")`
1108
+ rol \$24,`&LO("$tp80")` # ROTATE(tp2^tp1^tp8,24)
1109
+ mov $tp40,$tp20
1110
+ rol \$24,`&LO("$tp88")` # ROTATE(tp2^tp1^tp8,24)
1111
+ mov $tp48,$tp28
1112
+ shr \$32,$tp20
1113
+ xor `&LO("$tp80")`,`&LO("$acc0")`
1114
+ shr \$32,$tp28
1115
+ xor `&LO("$tp88")`,`&LO("$acc8")`
1116
+
1117
+ `"mov 0($sbox),$mask80" if ($prefetch)`
1118
+ rol \$16,`&LO("$tp40")` # ROTATE(tp4^tp1^tp8,16)
1119
+ `"mov 64($sbox),$maskfe" if ($prefetch)`
1120
+ rol \$16,`&LO("$tp48")` # ROTATE(tp4^tp1^tp8,16)
1121
+ `"mov 128($sbox),$mask1b" if ($prefetch)`
1122
+ rol \$16,`&LO("$tp20")` # ROTATE(tp4^tp1^tp8,16)
1123
+ `"mov 192($sbox),$tp80" if ($prefetch)`
1124
+ xor `&LO("$tp40")`,`&LO("$tp10")`
1125
+ rol \$16,`&LO("$tp28")` # ROTATE(tp4^tp1^tp8,16)
1126
+ xor `&LO("$tp48")`,`&LO("$tp18")`
1127
+ `"mov 256($sbox),$tp88" if ($prefetch)`
1128
+ xor `&LO("$tp20")`,`&LO("$acc0")`
1129
+ xor `&LO("$tp28")`,`&LO("$acc8")`
1130
+ ___
1131
+ }
1132
+
1133
+ $code.=<<___;
1134
+ .type _x86_64_AES_decrypt_compact,\@abi-omnipotent
1135
+ .align 16
1136
+ _x86_64_AES_decrypt_compact:
1137
+ lea 128($sbox),$inp # size optimization
1138
+ mov 0-128($inp),$acc1 # prefetch Td4
1139
+ mov 32-128($inp),$acc2
1140
+ mov 64-128($inp),$t0
1141
+ mov 96-128($inp),$t1
1142
+ mov 128-128($inp),$acc1
1143
+ mov 160-128($inp),$acc2
1144
+ mov 192-128($inp),$t0
1145
+ mov 224-128($inp),$t1
1146
+ jmp .Ldec_loop_compact
1147
+
1148
+ .align 16
1149
+ .Ldec_loop_compact:
1150
+ xor 0($key),$s0 # xor with key
1151
+ xor 4($key),$s1
1152
+ xor 8($key),$s2
1153
+ xor 12($key),$s3
1154
+ lea 16($key),$key
1155
+ ___
1156
+ &deccompactvert();
1157
+ $code.=<<___;
1158
+ cmp 16(%rsp),$key
1159
+ je .Ldec_compact_done
1160
+
1161
+ mov 256+0($sbox),$mask80
1162
+ shl \$32,%rbx
1163
+ shl \$32,%rdx
1164
+ mov 256+8($sbox),$maskfe
1165
+ or %rbx,%rax
1166
+ or %rdx,%rcx
1167
+ mov 256+16($sbox),$mask1b
1168
+ ___
1169
+ &dectransform(1);
1170
+ $code.=<<___;
1171
+ jmp .Ldec_loop_compact
1172
+ .align 16
1173
+ .Ldec_compact_done:
1174
+ xor 0($key),$s0
1175
+ xor 4($key),$s1
1176
+ xor 8($key),$s2
1177
+ xor 12($key),$s3
1178
+ .byte 0xf3,0xc3 # rep ret
1179
+ .size _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
1180
+ ___
1181
+
1182
+ # void asm_AES_decrypt (const void *inp,void *out,const AES_KEY *key);
1183
+ $code.=<<___;
1184
+ .align 16
1185
+ .globl asm_AES_decrypt
1186
+ .type asm_AES_decrypt,\@function,3
1187
+ .hidden asm_AES_decrypt
1188
+ asm_AES_decrypt:
1189
+ push %rbx
1190
+ push %rbp
1191
+ push %r12
1192
+ push %r13
1193
+ push %r14
1194
+ push %r15
1195
+
1196
+ # allocate frame "above" key schedule
1197
+ mov %rsp,%r10
1198
+ lea -63(%rdx),%rcx # %rdx is key argument
1199
+ and \$-64,%rsp
1200
+ sub %rsp,%rcx
1201
+ neg %rcx
1202
+ and \$0x3c0,%rcx
1203
+ sub %rcx,%rsp
1204
+ sub \$32,%rsp
1205
+
1206
+ mov %rsi,16(%rsp) # save out
1207
+ mov %r10,24(%rsp) # save real stack pointer
1208
+ .Ldec_prologue:
1209
+
1210
+ mov %rdx,$key
1211
+ mov 240($key),$rnds # load rounds
1212
+
1213
+ mov 0(%rdi),$s0 # load input vector
1214
+ mov 4(%rdi),$s1
1215
+ mov 8(%rdi),$s2
1216
+ mov 12(%rdi),$s3
1217
+
1218
+ shl \$4,$rnds
1219
+ lea ($key,$rnds),%rbp
1220
+ mov $key,(%rsp) # key schedule
1221
+ mov %rbp,8(%rsp) # end of key schedule
1222
+
1223
+ # pick Td4 copy which can't "overlap" with stack frame or key schedule
1224
+ lea .LAES_Td+2048(%rip),$sbox
1225
+ lea 768(%rsp),%rbp
1226
+ sub $sbox,%rbp
1227
+ and \$0x300,%rbp
1228
+ lea ($sbox,%rbp),$sbox
1229
+ shr \$3,%rbp # recall "magic" constants!
1230
+ add %rbp,$sbox
1231
+
1232
+ call _x86_64_AES_decrypt_compact
1233
+
1234
+ mov 16(%rsp),$out # restore out
1235
+ mov 24(%rsp),%rsi # restore saved stack pointer
1236
+ mov $s0,0($out) # write output vector
1237
+ mov $s1,4($out)
1238
+ mov $s2,8($out)
1239
+ mov $s3,12($out)
1240
+
1241
+ mov (%rsi),%r15
1242
+ mov 8(%rsi),%r14
1243
+ mov 16(%rsi),%r13
1244
+ mov 24(%rsi),%r12
1245
+ mov 32(%rsi),%rbp
1246
+ mov 40(%rsi),%rbx
1247
+ lea 48(%rsi),%rsp
1248
+ .Ldec_epilogue:
1249
+ ret
1250
+ .size asm_AES_decrypt,.-asm_AES_decrypt
1251
+ ___
1252
+ #------------------------------------------------------------------#
1253
+
1254
+ sub enckey()
1255
+ {
1256
+ $code.=<<___;
1257
+ movz %dl,%esi # rk[i]>>0
1258
+ movzb -128(%rbp,%rsi),%ebx
1259
+ movz %dh,%esi # rk[i]>>8
1260
+ shl \$24,%ebx
1261
+ xor %ebx,%eax
1262
+
1263
+ movzb -128(%rbp,%rsi),%ebx
1264
+ shr \$16,%edx
1265
+ movz %dl,%esi # rk[i]>>16
1266
+ xor %ebx,%eax
1267
+
1268
+ movzb -128(%rbp,%rsi),%ebx
1269
+ movz %dh,%esi # rk[i]>>24
1270
+ shl \$8,%ebx
1271
+ xor %ebx,%eax
1272
+
1273
+ movzb -128(%rbp,%rsi),%ebx
1274
+ shl \$16,%ebx
1275
+ xor %ebx,%eax
1276
+
1277
+ xor 1024-128(%rbp,%rcx,4),%eax # rcon
1278
+ ___
1279
+ }
1280
+
1281
+ # int asm_AES_set_encrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
1282
+ $code.=<<___;
1283
+ .align 16
1284
+ .globl asm_AES_set_encrypt_key
1285
+ .type asm_AES_set_encrypt_key,\@function,3
1286
+ asm_AES_set_encrypt_key:
1287
+ push %rbx
1288
+ push %rbp
1289
+ push %r12 # redundant, but allows to share
1290
+ push %r13 # exception handler...
1291
+ push %r14
1292
+ push %r15
1293
+ sub \$8,%rsp
1294
+ .Lenc_key_prologue:
1295
+
1296
+ call _x86_64_AES_set_encrypt_key
1297
+
1298
+ mov 40(%rsp),%rbp
1299
+ mov 48(%rsp),%rbx
1300
+ add \$56,%rsp
1301
+ .Lenc_key_epilogue:
1302
+ ret
1303
+ .size asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key
1304
+
1305
+ .type _x86_64_AES_set_encrypt_key,\@abi-omnipotent
1306
+ .align 16
1307
+ _x86_64_AES_set_encrypt_key:
1308
+ mov %esi,%ecx # %ecx=bits
1309
+ mov %rdi,%rsi # %rsi=userKey
1310
+ mov %rdx,%rdi # %rdi=key
1311
+
1312
+ test \$-1,%rsi
1313
+ jz .Lbadpointer
1314
+ test \$-1,%rdi
1315
+ jz .Lbadpointer
1316
+
1317
+ lea .LAES_Te(%rip),%rbp
1318
+ lea 2048+128(%rbp),%rbp
1319
+
1320
+ # prefetch Te4
1321
+ mov 0-128(%rbp),%eax
1322
+ mov 32-128(%rbp),%ebx
1323
+ mov 64-128(%rbp),%r8d
1324
+ mov 96-128(%rbp),%edx
1325
+ mov 128-128(%rbp),%eax
1326
+ mov 160-128(%rbp),%ebx
1327
+ mov 192-128(%rbp),%r8d
1328
+ mov 224-128(%rbp),%edx
1329
+
1330
+ cmp \$128,%ecx
1331
+ je .L10rounds
1332
+ cmp \$192,%ecx
1333
+ je .L12rounds
1334
+ cmp \$256,%ecx
1335
+ je .L14rounds
1336
+ mov \$-2,%rax # invalid number of bits
1337
+ jmp .Lexit
1338
+
1339
+ .L10rounds:
1340
+ mov 0(%rsi),%rax # copy first 4 dwords
1341
+ mov 8(%rsi),%rdx
1342
+ mov %rax,0(%rdi)
1343
+ mov %rdx,8(%rdi)
1344
+
1345
+ shr \$32,%rdx
1346
+ xor %ecx,%ecx
1347
+ jmp .L10shortcut
1348
+ .align 4
1349
+ .L10loop:
1350
+ mov 0(%rdi),%eax # rk[0]
1351
+ mov 12(%rdi),%edx # rk[3]
1352
+ .L10shortcut:
1353
+ ___
1354
+ &enckey ();
1355
+ $code.=<<___;
1356
+ mov %eax,16(%rdi) # rk[4]
1357
+ xor 4(%rdi),%eax
1358
+ mov %eax,20(%rdi) # rk[5]
1359
+ xor 8(%rdi),%eax
1360
+ mov %eax,24(%rdi) # rk[6]
1361
+ xor 12(%rdi),%eax
1362
+ mov %eax,28(%rdi) # rk[7]
1363
+ add \$1,%ecx
1364
+ lea 16(%rdi),%rdi
1365
+ cmp \$10,%ecx
1366
+ jl .L10loop
1367
+
1368
+ movl \$10,80(%rdi) # setup number of rounds
1369
+ xor %rax,%rax
1370
+ jmp .Lexit
1371
+
1372
+ .L12rounds:
1373
+ mov 0(%rsi),%rax # copy first 6 dwords
1374
+ mov 8(%rsi),%rbx
1375
+ mov 16(%rsi),%rdx
1376
+ mov %rax,0(%rdi)
1377
+ mov %rbx,8(%rdi)
1378
+ mov %rdx,16(%rdi)
1379
+
1380
+ shr \$32,%rdx
1381
+ xor %ecx,%ecx
1382
+ jmp .L12shortcut
1383
+ .align 4
1384
+ .L12loop:
1385
+ mov 0(%rdi),%eax # rk[0]
1386
+ mov 20(%rdi),%edx # rk[5]
1387
+ .L12shortcut:
1388
+ ___
1389
+ &enckey ();
1390
+ $code.=<<___;
1391
+ mov %eax,24(%rdi) # rk[6]
1392
+ xor 4(%rdi),%eax
1393
+ mov %eax,28(%rdi) # rk[7]
1394
+ xor 8(%rdi),%eax
1395
+ mov %eax,32(%rdi) # rk[8]
1396
+ xor 12(%rdi),%eax
1397
+ mov %eax,36(%rdi) # rk[9]
1398
+
1399
+ cmp \$7,%ecx
1400
+ je .L12break
1401
+ add \$1,%ecx
1402
+
1403
+ xor 16(%rdi),%eax
1404
+ mov %eax,40(%rdi) # rk[10]
1405
+ xor 20(%rdi),%eax
1406
+ mov %eax,44(%rdi) # rk[11]
1407
+
1408
+ lea 24(%rdi),%rdi
1409
+ jmp .L12loop
1410
+ .L12break:
1411
+ movl \$12,72(%rdi) # setup number of rounds
1412
+ xor %rax,%rax
1413
+ jmp .Lexit
1414
+
1415
+ .L14rounds:
1416
+ mov 0(%rsi),%rax # copy first 8 dwords
1417
+ mov 8(%rsi),%rbx
1418
+ mov 16(%rsi),%rcx
1419
+ mov 24(%rsi),%rdx
1420
+ mov %rax,0(%rdi)
1421
+ mov %rbx,8(%rdi)
1422
+ mov %rcx,16(%rdi)
1423
+ mov %rdx,24(%rdi)
1424
+
1425
+ shr \$32,%rdx
1426
+ xor %ecx,%ecx
1427
+ jmp .L14shortcut
1428
+ .align 4
1429
+ .L14loop:
1430
+ mov 0(%rdi),%eax # rk[0]
1431
+ mov 28(%rdi),%edx # rk[4]
1432
+ .L14shortcut:
1433
+ ___
1434
+ &enckey ();
1435
+ $code.=<<___;
1436
+ mov %eax,32(%rdi) # rk[8]
1437
+ xor 4(%rdi),%eax
1438
+ mov %eax,36(%rdi) # rk[9]
1439
+ xor 8(%rdi),%eax
1440
+ mov %eax,40(%rdi) # rk[10]
1441
+ xor 12(%rdi),%eax
1442
+ mov %eax,44(%rdi) # rk[11]
1443
+
1444
+ cmp \$6,%ecx
1445
+ je .L14break
1446
+ add \$1,%ecx
1447
+
1448
+ mov %eax,%edx
1449
+ mov 16(%rdi),%eax # rk[4]
1450
+ movz %dl,%esi # rk[11]>>0
1451
+ movzb -128(%rbp,%rsi),%ebx
1452
+ movz %dh,%esi # rk[11]>>8
1453
+ xor %ebx,%eax
1454
+
1455
+ movzb -128(%rbp,%rsi),%ebx
1456
+ shr \$16,%edx
1457
+ shl \$8,%ebx
1458
+ movz %dl,%esi # rk[11]>>16
1459
+ xor %ebx,%eax
1460
+
1461
+ movzb -128(%rbp,%rsi),%ebx
1462
+ movz %dh,%esi # rk[11]>>24
1463
+ shl \$16,%ebx
1464
+ xor %ebx,%eax
1465
+
1466
+ movzb -128(%rbp,%rsi),%ebx
1467
+ shl \$24,%ebx
1468
+ xor %ebx,%eax
1469
+
1470
+ mov %eax,48(%rdi) # rk[12]
1471
+ xor 20(%rdi),%eax
1472
+ mov %eax,52(%rdi) # rk[13]
1473
+ xor 24(%rdi),%eax
1474
+ mov %eax,56(%rdi) # rk[14]
1475
+ xor 28(%rdi),%eax
1476
+ mov %eax,60(%rdi) # rk[15]
1477
+
1478
+ lea 32(%rdi),%rdi
1479
+ jmp .L14loop
1480
+ .L14break:
1481
+ movl \$14,48(%rdi) # setup number of rounds
1482
+ xor %rax,%rax
1483
+ jmp .Lexit
1484
+
1485
+ .Lbadpointer:
1486
+ mov \$-1,%rax
1487
+ .Lexit:
1488
+ .byte 0xf3,0xc3 # rep ret
1489
+ .size _x86_64_AES_set_encrypt_key,.-_x86_64_AES_set_encrypt_key
1490
+ ___
1491
+
1492
+ sub deckey_ref()
1493
+ { my ($i,$ptr,$te,$td) = @_;
1494
+ my ($tp1,$tp2,$tp4,$tp8,$acc)=("%eax","%ebx","%edi","%edx","%r8d");
1495
+ $code.=<<___;
1496
+ mov $i($ptr),$tp1
1497
+ mov $tp1,$acc
1498
+ and \$0x80808080,$acc
1499
+ mov $acc,$tp4
1500
+ shr \$7,$tp4
1501
+ lea 0($tp1,$tp1),$tp2
1502
+ sub $tp4,$acc
1503
+ and \$0xfefefefe,$tp2
1504
+ and \$0x1b1b1b1b,$acc
1505
+ xor $tp2,$acc
1506
+ mov $acc,$tp2
1507
+
1508
+ and \$0x80808080,$acc
1509
+ mov $acc,$tp8
1510
+ shr \$7,$tp8
1511
+ lea 0($tp2,$tp2),$tp4
1512
+ sub $tp8,$acc
1513
+ and \$0xfefefefe,$tp4
1514
+ and \$0x1b1b1b1b,$acc
1515
+ xor $tp1,$tp2 # tp2^tp1
1516
+ xor $tp4,$acc
1517
+ mov $acc,$tp4
1518
+
1519
+ and \$0x80808080,$acc
1520
+ mov $acc,$tp8
1521
+ shr \$7,$tp8
1522
+ sub $tp8,$acc
1523
+ lea 0($tp4,$tp4),$tp8
1524
+ xor $tp1,$tp4 # tp4^tp1
1525
+ and \$0xfefefefe,$tp8
1526
+ and \$0x1b1b1b1b,$acc
1527
+ xor $acc,$tp8
1528
+
1529
+ xor $tp8,$tp1 # tp1^tp8
1530
+ rol \$8,$tp1 # ROTATE(tp1^tp8,8)
1531
+ xor $tp8,$tp2 # tp2^tp1^tp8
1532
+ xor $tp8,$tp4 # tp4^tp1^tp8
1533
+ xor $tp2,$tp8
1534
+ xor $tp4,$tp8 # tp8^(tp8^tp4^tp1)^(tp8^tp2^tp1)=tp8^tp4^tp2
1535
+
1536
+ xor $tp8,$tp1
1537
+ rol \$24,$tp2 # ROTATE(tp2^tp1^tp8,24)
1538
+ xor $tp2,$tp1
1539
+ rol \$16,$tp4 # ROTATE(tp4^tp1^tp8,16)
1540
+ xor $tp4,$tp1
1541
+
1542
+ mov $tp1,$i($ptr)
1543
+ ___
1544
+ }
1545
+
1546
+ # int asm_AES_set_decrypt_key(const unsigned char *userKey, const int bits, AES_KEY *key)
1547
+ $code.=<<___;
1548
+ .align 16
1549
+ .globl asm_AES_set_decrypt_key
1550
+ .type asm_AES_set_decrypt_key,\@function,3
1551
+ asm_AES_set_decrypt_key:
1552
+ push %rbx
1553
+ push %rbp
1554
+ push %r12
1555
+ push %r13
1556
+ push %r14
1557
+ push %r15
1558
+ push %rdx # save key schedule
1559
+ .Ldec_key_prologue:
1560
+
1561
+ call _x86_64_AES_set_encrypt_key
1562
+ mov (%rsp),%r8 # restore key schedule
1563
+ cmp \$0,%eax
1564
+ jne .Labort
1565
+
1566
+ mov 240(%r8),%r14d # pull number of rounds
1567
+ xor %rdi,%rdi
1568
+ lea (%rdi,%r14d,4),%rcx
1569
+ mov %r8,%rsi
1570
+ lea (%r8,%rcx,4),%rdi # pointer to last chunk
1571
+ .align 4
1572
+ .Linvert:
1573
+ mov 0(%rsi),%rax
1574
+ mov 8(%rsi),%rbx
1575
+ mov 0(%rdi),%rcx
1576
+ mov 8(%rdi),%rdx
1577
+ mov %rax,0(%rdi)
1578
+ mov %rbx,8(%rdi)
1579
+ mov %rcx,0(%rsi)
1580
+ mov %rdx,8(%rsi)
1581
+ lea 16(%rsi),%rsi
1582
+ lea -16(%rdi),%rdi
1583
+ cmp %rsi,%rdi
1584
+ jne .Linvert
1585
+
1586
+ lea .LAES_Te+2048+1024(%rip),%rax # rcon
1587
+
1588
+ mov 40(%rax),$mask80
1589
+ mov 48(%rax),$maskfe
1590
+ mov 56(%rax),$mask1b
1591
+
1592
+ mov %r8,$key
1593
+ sub \$1,%r14d
1594
+ .align 4
1595
+ .Lpermute:
1596
+ lea 16($key),$key
1597
+ mov 0($key),%rax
1598
+ mov 8($key),%rcx
1599
+ ___
1600
+ &dectransform ();
1601
+ $code.=<<___;
1602
+ mov %eax,0($key)
1603
+ mov %ebx,4($key)
1604
+ mov %ecx,8($key)
1605
+ mov %edx,12($key)
1606
+ sub \$1,%r14d
1607
+ jnz .Lpermute
1608
+
1609
+ xor %rax,%rax
1610
+ .Labort:
1611
+ mov 8(%rsp),%r15
1612
+ mov 16(%rsp),%r14
1613
+ mov 24(%rsp),%r13
1614
+ mov 32(%rsp),%r12
1615
+ mov 40(%rsp),%rbp
1616
+ mov 48(%rsp),%rbx
1617
+ add \$56,%rsp
1618
+ .Ldec_key_epilogue:
1619
+ ret
1620
+ .size asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key
1621
+ ___
1622
+
1623
+ $code.=<<___;
1624
+ .align 64
1625
+ .LAES_Te:
1626
+ ___
1627
+ &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
1628
+ &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
1629
+ &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
1630
+ &_data_word(0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec);
1631
+ &_data_word(0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa);
1632
+ &_data_word(0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb);
1633
+ &_data_word(0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45);
1634
+ &_data_word(0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b);
1635
+ &_data_word(0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c);
1636
+ &_data_word(0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83);
1637
+ &_data_word(0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9);
1638
+ &_data_word(0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a);
1639
+ &_data_word(0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d);
1640
+ &_data_word(0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f);
1641
+ &_data_word(0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df);
1642
+ &_data_word(0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea);
1643
+ &_data_word(0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34);
1644
+ &_data_word(0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b);
1645
+ &_data_word(0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d);
1646
+ &_data_word(0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413);
1647
+ &_data_word(0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1);
1648
+ &_data_word(0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6);
1649
+ &_data_word(0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972);
1650
+ &_data_word(0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85);
1651
+ &_data_word(0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed);
1652
+ &_data_word(0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511);
1653
+ &_data_word(0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe);
1654
+ &_data_word(0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b);
1655
+ &_data_word(0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05);
1656
+ &_data_word(0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1);
1657
+ &_data_word(0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142);
1658
+ &_data_word(0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf);
1659
+ &_data_word(0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3);
1660
+ &_data_word(0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e);
1661
+ &_data_word(0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a);
1662
+ &_data_word(0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6);
1663
+ &_data_word(0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3);
1664
+ &_data_word(0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b);
1665
+ &_data_word(0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428);
1666
+ &_data_word(0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad);
1667
+ &_data_word(0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14);
1668
+ &_data_word(0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8);
1669
+ &_data_word(0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4);
1670
+ &_data_word(0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2);
1671
+ &_data_word(0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda);
1672
+ &_data_word(0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949);
1673
+ &_data_word(0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf);
1674
+ &_data_word(0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810);
1675
+ &_data_word(0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c);
1676
+ &_data_word(0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697);
1677
+ &_data_word(0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e);
1678
+ &_data_word(0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f);
1679
+ &_data_word(0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc);
1680
+ &_data_word(0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c);
1681
+ &_data_word(0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969);
1682
+ &_data_word(0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27);
1683
+ &_data_word(0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122);
1684
+ &_data_word(0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433);
1685
+ &_data_word(0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9);
1686
+ &_data_word(0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5);
1687
+ &_data_word(0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a);
1688
+ &_data_word(0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0);
1689
+ &_data_word(0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e);
1690
+ &_data_word(0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c);
1691
+
1692
+ #Te4 # four copies of Te4 to choose from to avoid L1 aliasing
1693
+ &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
1694
+ &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
1695
+ &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
1696
+ &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
1697
+ &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
1698
+ &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
1699
+ &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
1700
+ &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
1701
+ &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
1702
+ &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
1703
+ &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
1704
+ &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
1705
+ &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
1706
+ &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
1707
+ &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
1708
+ &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
1709
+ &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
1710
+ &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
1711
+ &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
1712
+ &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
1713
+ &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
1714
+ &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
1715
+ &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
1716
+ &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
1717
+ &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
1718
+ &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
1719
+ &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
1720
+ &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
1721
+ &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
1722
+ &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
1723
+ &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
1724
+ &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
1725
+
1726
+ &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
1727
+ &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
1728
+ &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
1729
+ &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
1730
+ &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
1731
+ &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
1732
+ &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
1733
+ &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
1734
+ &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
1735
+ &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
1736
+ &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
1737
+ &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
1738
+ &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
1739
+ &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
1740
+ &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
1741
+ &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
1742
+ &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
1743
+ &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
1744
+ &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
1745
+ &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
1746
+ &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
1747
+ &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
1748
+ &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
1749
+ &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
1750
+ &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
1751
+ &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
1752
+ &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
1753
+ &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
1754
+ &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
1755
+ &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
1756
+ &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
1757
+ &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
1758
+
1759
+ &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
1760
+ &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
1761
+ &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
1762
+ &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
1763
+ &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
1764
+ &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
1765
+ &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
1766
+ &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
1767
+ &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
1768
+ &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
1769
+ &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
1770
+ &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
1771
+ &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
1772
+ &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
1773
+ &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
1774
+ &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
1775
+ &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
1776
+ &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
1777
+ &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
1778
+ &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
1779
+ &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
1780
+ &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
1781
+ &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
1782
+ &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
1783
+ &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
1784
+ &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
1785
+ &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
1786
+ &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
1787
+ &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
1788
+ &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
1789
+ &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
1790
+ &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
1791
+
1792
+ &data_byte(0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5);
1793
+ &data_byte(0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76);
1794
+ &data_byte(0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0);
1795
+ &data_byte(0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0);
1796
+ &data_byte(0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc);
1797
+ &data_byte(0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15);
1798
+ &data_byte(0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a);
1799
+ &data_byte(0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75);
1800
+ &data_byte(0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0);
1801
+ &data_byte(0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84);
1802
+ &data_byte(0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b);
1803
+ &data_byte(0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf);
1804
+ &data_byte(0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85);
1805
+ &data_byte(0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8);
1806
+ &data_byte(0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5);
1807
+ &data_byte(0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2);
1808
+ &data_byte(0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17);
1809
+ &data_byte(0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73);
1810
+ &data_byte(0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88);
1811
+ &data_byte(0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb);
1812
+ &data_byte(0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c);
1813
+ &data_byte(0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79);
1814
+ &data_byte(0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9);
1815
+ &data_byte(0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08);
1816
+ &data_byte(0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6);
1817
+ &data_byte(0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a);
1818
+ &data_byte(0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e);
1819
+ &data_byte(0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e);
1820
+ &data_byte(0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94);
1821
+ &data_byte(0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf);
1822
+ &data_byte(0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68);
1823
+ &data_byte(0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16);
1824
+ #rcon:
1825
+ $code.=<<___;
1826
+ .long 0x00000001, 0x00000002, 0x00000004, 0x00000008
1827
+ .long 0x00000010, 0x00000020, 0x00000040, 0x00000080
1828
+ .long 0x0000001b, 0x00000036, 0x80808080, 0x80808080
1829
+ .long 0xfefefefe, 0xfefefefe, 0x1b1b1b1b, 0x1b1b1b1b
1830
+ ___
1831
+ $code.=<<___;
1832
+ .align 64
1833
+ .LAES_Td:
1834
+ ___
1835
+ &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
1836
+ &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
1837
+ &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
1838
+ &_data_word(0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5);
1839
+ &_data_word(0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d);
1840
+ &_data_word(0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b);
1841
+ &_data_word(0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295);
1842
+ &_data_word(0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e);
1843
+ &_data_word(0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927);
1844
+ &_data_word(0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d);
1845
+ &_data_word(0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362);
1846
+ &_data_word(0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9);
1847
+ &_data_word(0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52);
1848
+ &_data_word(0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566);
1849
+ &_data_word(0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3);
1850
+ &_data_word(0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed);
1851
+ &_data_word(0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e);
1852
+ &_data_word(0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4);
1853
+ &_data_word(0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4);
1854
+ &_data_word(0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd);
1855
+ &_data_word(0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d);
1856
+ &_data_word(0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060);
1857
+ &_data_word(0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967);
1858
+ &_data_word(0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879);
1859
+ &_data_word(0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000);
1860
+ &_data_word(0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c);
1861
+ &_data_word(0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36);
1862
+ &_data_word(0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624);
1863
+ &_data_word(0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b);
1864
+ &_data_word(0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c);
1865
+ &_data_word(0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12);
1866
+ &_data_word(0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14);
1867
+ &_data_word(0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3);
1868
+ &_data_word(0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b);
1869
+ &_data_word(0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8);
1870
+ &_data_word(0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684);
1871
+ &_data_word(0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7);
1872
+ &_data_word(0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177);
1873
+ &_data_word(0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947);
1874
+ &_data_word(0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322);
1875
+ &_data_word(0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498);
1876
+ &_data_word(0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f);
1877
+ &_data_word(0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54);
1878
+ &_data_word(0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382);
1879
+ &_data_word(0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf);
1880
+ &_data_word(0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb);
1881
+ &_data_word(0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83);
1882
+ &_data_word(0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef);
1883
+ &_data_word(0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029);
1884
+ &_data_word(0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235);
1885
+ &_data_word(0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733);
1886
+ &_data_word(0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117);
1887
+ &_data_word(0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4);
1888
+ &_data_word(0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546);
1889
+ &_data_word(0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb);
1890
+ &_data_word(0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d);
1891
+ &_data_word(0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb);
1892
+ &_data_word(0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a);
1893
+ &_data_word(0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773);
1894
+ &_data_word(0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478);
1895
+ &_data_word(0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2);
1896
+ &_data_word(0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff);
1897
+ &_data_word(0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664);
1898
+ &_data_word(0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0);
1899
+
1900
+ #Td4: # four copies of Td4 to choose from to avoid L1 aliasing
1901
+ &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
1902
+ &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
1903
+ &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
1904
+ &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
1905
+ &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
1906
+ &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
1907
+ &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
1908
+ &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
1909
+ &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
1910
+ &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
1911
+ &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
1912
+ &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
1913
+ &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
1914
+ &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
1915
+ &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
1916
+ &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
1917
+ &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
1918
+ &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
1919
+ &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
1920
+ &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
1921
+ &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
1922
+ &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
1923
+ &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
1924
+ &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
1925
+ &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
1926
+ &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
1927
+ &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
1928
+ &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
1929
+ &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
1930
+ &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
1931
+ &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
1932
+ &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
1933
+ $code.=<<___;
1934
+ .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
1935
+ .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
1936
+ ___
1937
+ &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
1938
+ &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
1939
+ &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
1940
+ &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
1941
+ &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
1942
+ &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
1943
+ &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
1944
+ &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
1945
+ &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
1946
+ &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
1947
+ &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
1948
+ &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
1949
+ &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
1950
+ &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
1951
+ &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
1952
+ &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
1953
+ &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
1954
+ &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
1955
+ &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
1956
+ &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
1957
+ &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
1958
+ &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
1959
+ &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
1960
+ &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
1961
+ &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
1962
+ &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
1963
+ &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
1964
+ &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
1965
+ &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
1966
+ &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
1967
+ &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
1968
+ &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
1969
+ $code.=<<___;
1970
+ .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
1971
+ .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
1972
+ ___
1973
+ &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
1974
+ &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
1975
+ &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
1976
+ &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
1977
+ &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
1978
+ &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
1979
+ &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
1980
+ &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
1981
+ &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
1982
+ &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
1983
+ &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
1984
+ &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
1985
+ &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
1986
+ &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
1987
+ &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
1988
+ &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
1989
+ &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
1990
+ &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
1991
+ &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
1992
+ &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
1993
+ &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
1994
+ &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
1995
+ &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
1996
+ &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
1997
+ &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
1998
+ &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
1999
+ &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
2000
+ &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
2001
+ &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
2002
+ &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
2003
+ &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
2004
+ &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
2005
+ $code.=<<___;
2006
+ .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
2007
+ .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
2008
+ ___
2009
+ &data_byte(0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38);
2010
+ &data_byte(0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb);
2011
+ &data_byte(0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87);
2012
+ &data_byte(0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb);
2013
+ &data_byte(0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d);
2014
+ &data_byte(0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e);
2015
+ &data_byte(0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2);
2016
+ &data_byte(0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25);
2017
+ &data_byte(0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16);
2018
+ &data_byte(0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92);
2019
+ &data_byte(0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda);
2020
+ &data_byte(0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84);
2021
+ &data_byte(0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a);
2022
+ &data_byte(0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06);
2023
+ &data_byte(0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02);
2024
+ &data_byte(0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b);
2025
+ &data_byte(0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea);
2026
+ &data_byte(0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73);
2027
+ &data_byte(0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85);
2028
+ &data_byte(0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e);
2029
+ &data_byte(0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89);
2030
+ &data_byte(0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b);
2031
+ &data_byte(0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20);
2032
+ &data_byte(0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4);
2033
+ &data_byte(0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31);
2034
+ &data_byte(0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f);
2035
+ &data_byte(0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d);
2036
+ &data_byte(0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef);
2037
+ &data_byte(0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0);
2038
+ &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
2039
+ &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
2040
+ &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
2041
+ $code.=<<___;
2042
+ .long 0x80808080, 0x80808080, 0xfefefefe, 0xfefefefe
2043
+ .long 0x1b1b1b1b, 0x1b1b1b1b, 0, 0
2044
+ .asciz "AES for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
2045
+ .align 64
2046
+ ___
2047
+
2048
+ # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
2049
+ # CONTEXT *context,DISPATCHER_CONTEXT *disp)
2050
+ if ($win64) {
2051
+ $rec="%rcx";
2052
+ $frame="%rdx";
2053
+ $context="%r8";
2054
+ $disp="%r9";
2055
+
2056
+ $code.=<<___;
2057
+ .extern __imp_RtlVirtualUnwind
2058
+ .type block_se_handler,\@abi-omnipotent
2059
+ .align 16
2060
+ block_se_handler:
2061
+ push %rsi
2062
+ push %rdi
2063
+ push %rbx
2064
+ push %rbp
2065
+ push %r12
2066
+ push %r13
2067
+ push %r14
2068
+ push %r15
2069
+ pushfq
2070
+ sub \$64,%rsp
2071
+
2072
+ mov 120($context),%rax # pull context->Rax
2073
+ mov 248($context),%rbx # pull context->Rip
2074
+
2075
+ mov 8($disp),%rsi # disp->ImageBase
2076
+ mov 56($disp),%r11 # disp->HandlerData
2077
+
2078
+ mov 0(%r11),%r10d # HandlerData[0]
2079
+ lea (%rsi,%r10),%r10 # prologue label
2080
+ cmp %r10,%rbx # context->Rip<prologue label
2081
+ jb .Lin_block_prologue
2082
+
2083
+ mov 152($context),%rax # pull context->Rsp
2084
+
2085
+ mov 4(%r11),%r10d # HandlerData[1]
2086
+ lea (%rsi,%r10),%r10 # epilogue label
2087
+ cmp %r10,%rbx # context->Rip>=epilogue label
2088
+ jae .Lin_block_prologue
2089
+
2090
+ mov 24(%rax),%rax # pull saved real stack pointer
2091
+ lea 48(%rax),%rax # adjust...
2092
+
2093
+ mov -8(%rax),%rbx
2094
+ mov -16(%rax),%rbp
2095
+ mov -24(%rax),%r12
2096
+ mov -32(%rax),%r13
2097
+ mov -40(%rax),%r14
2098
+ mov -48(%rax),%r15
2099
+ mov %rbx,144($context) # restore context->Rbx
2100
+ mov %rbp,160($context) # restore context->Rbp
2101
+ mov %r12,216($context) # restore context->R12
2102
+ mov %r13,224($context) # restore context->R13
2103
+ mov %r14,232($context) # restore context->R14
2104
+ mov %r15,240($context) # restore context->R15
2105
+
2106
+ .Lin_block_prologue:
2107
+ mov 8(%rax),%rdi
2108
+ mov 16(%rax),%rsi
2109
+ mov %rax,152($context) # restore context->Rsp
2110
+ mov %rsi,168($context) # restore context->Rsi
2111
+ mov %rdi,176($context) # restore context->Rdi
2112
+
2113
+ jmp .Lcommon_seh_exit
2114
+ .size block_se_handler,.-block_se_handler
2115
+
2116
+ .type key_se_handler,\@abi-omnipotent
2117
+ .align 16
2118
+ key_se_handler:
2119
+ push %rsi
2120
+ push %rdi
2121
+ push %rbx
2122
+ push %rbp
2123
+ push %r12
2124
+ push %r13
2125
+ push %r14
2126
+ push %r15
2127
+ pushfq
2128
+ sub \$64,%rsp
2129
+
2130
+ mov 120($context),%rax # pull context->Rax
2131
+ mov 248($context),%rbx # pull context->Rip
2132
+
2133
+ mov 8($disp),%rsi # disp->ImageBase
2134
+ mov 56($disp),%r11 # disp->HandlerData
2135
+
2136
+ mov 0(%r11),%r10d # HandlerData[0]
2137
+ lea (%rsi,%r10),%r10 # prologue label
2138
+ cmp %r10,%rbx # context->Rip<prologue label
2139
+ jb .Lin_key_prologue
2140
+
2141
+ mov 152($context),%rax # pull context->Rsp
2142
+
2143
+ mov 4(%r11),%r10d # HandlerData[1]
2144
+ lea (%rsi,%r10),%r10 # epilogue label
2145
+ cmp %r10,%rbx # context->Rip>=epilogue label
2146
+ jae .Lin_key_prologue
2147
+
2148
+ lea 56(%rax),%rax
2149
+
2150
+ mov -8(%rax),%rbx
2151
+ mov -16(%rax),%rbp
2152
+ mov -24(%rax),%r12
2153
+ mov -32(%rax),%r13
2154
+ mov -40(%rax),%r14
2155
+ mov -48(%rax),%r15
2156
+ mov %rbx,144($context) # restore context->Rbx
2157
+ mov %rbp,160($context) # restore context->Rbp
2158
+ mov %r12,216($context) # restore context->R12
2159
+ mov %r13,224($context) # restore context->R13
2160
+ mov %r14,232($context) # restore context->R14
2161
+ mov %r15,240($context) # restore context->R15
2162
+
2163
+ .Lin_key_prologue:
2164
+ mov 8(%rax),%rdi
2165
+ mov 16(%rax),%rsi
2166
+ mov %rax,152($context) # restore context->Rsp
2167
+ mov %rsi,168($context) # restore context->Rsi
2168
+ mov %rdi,176($context) # restore context->Rdi
2169
+
2170
+ mov 40($disp),%rdi # disp->ContextRecord
2171
+ mov $context,%rsi # context
2172
+ mov \$`1232/8`,%ecx # sizeof(CONTEXT)
2173
+ .long 0xa548f3fc # cld; rep movsq
2174
+
2175
+ mov $disp,%rsi
2176
+ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
2177
+ mov 8(%rsi),%rdx # arg2, disp->ImageBase
2178
+ mov 0(%rsi),%r8 # arg3, disp->ControlPc
2179
+ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
2180
+ mov 40(%rsi),%r10 # disp->ContextRecord
2181
+ lea 56(%rsi),%r11 # &disp->HandlerData
2182
+ lea 24(%rsi),%r12 # &disp->EstablisherFrame
2183
+ mov %r10,32(%rsp) # arg5
2184
+ mov %r11,40(%rsp) # arg6
2185
+ mov %r12,48(%rsp) # arg7
2186
+ mov %rcx,56(%rsp) # arg8, (NULL)
2187
+ call *__imp_RtlVirtualUnwind(%rip)
2188
+
2189
+ mov \$1,%eax # ExceptionContinueSearch
2190
+ add \$64,%rsp
2191
+ popfq
2192
+ pop %r15
2193
+ pop %r14
2194
+ pop %r13
2195
+ pop %r12
2196
+ pop %rbp
2197
+ pop %rbx
2198
+ pop %rdi
2199
+ pop %rsi
2200
+ ret
2201
+ .size key_se_handler,.-key_se_handler
2202
+
2203
+ .section .pdata
2204
+ .align 4
2205
+ .rva .LSEH_begin_asm_AES_encrypt
2206
+ .rva .LSEH_end_asm_AES_encrypt
2207
+ .rva .LSEH_info_asm_AES_encrypt
2208
+
2209
+ .rva .LSEH_begin_asm_AES_decrypt
2210
+ .rva .LSEH_end_asm_AES_decrypt
2211
+ .rva .LSEH_info_asm_AES_decrypt
2212
+
2213
+ .rva .LSEH_begin_asm_AES_set_encrypt_key
2214
+ .rva .LSEH_end_asm_AES_set_encrypt_key
2215
+ .rva .LSEH_info_asm_AES_set_encrypt_key
2216
+
2217
+ .rva .LSEH_begin_asm_AES_set_decrypt_key
2218
+ .rva .LSEH_end_asm_AES_set_decrypt_key
2219
+ .rva .LSEH_info_asm_AES_set_decrypt_key
2220
+
2221
+ .section .xdata
2222
+ .align 8
2223
+ .LSEH_info_asm_AES_encrypt:
2224
+ .byte 9,0,0,0
2225
+ .rva block_se_handler
2226
+ .rva .Lenc_prologue,.Lenc_epilogue # HandlerData[]
2227
+ .LSEH_info_asm_AES_decrypt:
2228
+ .byte 9,0,0,0
2229
+ .rva block_se_handler
2230
+ .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[]
2231
+ .LSEH_info_asm_AES_set_encrypt_key:
2232
+ .byte 9,0,0,0
2233
+ .rva key_se_handler
2234
+ .rva .Lenc_key_prologue,.Lenc_key_epilogue # HandlerData[]
2235
+ .LSEH_info_asm_AES_set_decrypt_key:
2236
+ .byte 9,0,0,0
2237
+ .rva key_se_handler
2238
+ .rva .Ldec_key_prologue,.Ldec_key_epilogue # HandlerData[]
2239
+ ___
2240
+ }
2241
+
2242
+ $code =~ s/\`([^\`]*)\`/eval($1)/gem;
2243
+
2244
+ print $code;
2245
+
2246
+ close STDOUT;