ring-native 0.0.0 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (267) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/CHANGES.md +7 -0
  4. data/Makefile +5 -0
  5. data/README.md +12 -5
  6. data/Rakefile +4 -0
  7. data/ext/ring/extconf.rb +4 -5
  8. data/lib/ring/native.rb +3 -1
  9. data/lib/ring/native/version.rb +5 -1
  10. data/ring-native.gemspec +6 -6
  11. data/vendor/ring-ffi/Cargo.lock +26 -0
  12. data/vendor/ring-ffi/Cargo.toml +45 -0
  13. data/vendor/ring-ffi/LICENSE +16 -0
  14. data/vendor/ring-ffi/README.md +59 -0
  15. data/vendor/ring-ffi/src/lib.rs +79 -0
  16. metadata +10 -255
  17. data/vendor/ring/BUILDING.md +0 -40
  18. data/vendor/ring/Cargo.toml +0 -43
  19. data/vendor/ring/LICENSE +0 -185
  20. data/vendor/ring/Makefile +0 -35
  21. data/vendor/ring/PORTING.md +0 -163
  22. data/vendor/ring/README.md +0 -113
  23. data/vendor/ring/STYLE.md +0 -197
  24. data/vendor/ring/appveyor.yml +0 -27
  25. data/vendor/ring/build.rs +0 -108
  26. data/vendor/ring/crypto/aes/aes.c +0 -1142
  27. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +0 -25
  28. data/vendor/ring/crypto/aes/aes_test.cc +0 -93
  29. data/vendor/ring/crypto/aes/asm/aes-586.pl +0 -2368
  30. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +0 -1249
  31. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +0 -2246
  32. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +0 -1318
  33. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +0 -2084
  34. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +0 -675
  35. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +0 -1364
  36. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +0 -1565
  37. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +0 -841
  38. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +0 -1116
  39. data/vendor/ring/crypto/aes/internal.h +0 -87
  40. data/vendor/ring/crypto/aes/mode_wrappers.c +0 -61
  41. data/vendor/ring/crypto/bn/add.c +0 -394
  42. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +0 -694
  43. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +0 -1503
  44. data/vendor/ring/crypto/bn/asm/bn-586.pl +0 -774
  45. data/vendor/ring/crypto/bn/asm/co-586.pl +0 -287
  46. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +0 -1882
  47. data/vendor/ring/crypto/bn/asm/x86-mont.pl +0 -592
  48. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +0 -599
  49. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +0 -1393
  50. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +0 -3507
  51. data/vendor/ring/crypto/bn/bn.c +0 -352
  52. data/vendor/ring/crypto/bn/bn_asn1.c +0 -74
  53. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +0 -25
  54. data/vendor/ring/crypto/bn/bn_test.cc +0 -1696
  55. data/vendor/ring/crypto/bn/cmp.c +0 -200
  56. data/vendor/ring/crypto/bn/convert.c +0 -433
  57. data/vendor/ring/crypto/bn/ctx.c +0 -311
  58. data/vendor/ring/crypto/bn/div.c +0 -594
  59. data/vendor/ring/crypto/bn/exponentiation.c +0 -1335
  60. data/vendor/ring/crypto/bn/gcd.c +0 -711
  61. data/vendor/ring/crypto/bn/generic.c +0 -1019
  62. data/vendor/ring/crypto/bn/internal.h +0 -316
  63. data/vendor/ring/crypto/bn/montgomery.c +0 -516
  64. data/vendor/ring/crypto/bn/mul.c +0 -888
  65. data/vendor/ring/crypto/bn/prime.c +0 -829
  66. data/vendor/ring/crypto/bn/random.c +0 -334
  67. data/vendor/ring/crypto/bn/rsaz_exp.c +0 -262
  68. data/vendor/ring/crypto/bn/rsaz_exp.h +0 -53
  69. data/vendor/ring/crypto/bn/shift.c +0 -276
  70. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +0 -25
  71. data/vendor/ring/crypto/bytestring/bytestring_test.cc +0 -421
  72. data/vendor/ring/crypto/bytestring/cbb.c +0 -399
  73. data/vendor/ring/crypto/bytestring/cbs.c +0 -227
  74. data/vendor/ring/crypto/bytestring/internal.h +0 -46
  75. data/vendor/ring/crypto/chacha/chacha_generic.c +0 -140
  76. data/vendor/ring/crypto/chacha/chacha_vec.c +0 -323
  77. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +0 -1447
  78. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +0 -153
  79. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +0 -25
  80. data/vendor/ring/crypto/cipher/e_aes.c +0 -390
  81. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +0 -208
  82. data/vendor/ring/crypto/cipher/internal.h +0 -173
  83. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +0 -543
  84. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +0 -9
  85. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +0 -475
  86. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +0 -23
  87. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +0 -422
  88. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +0 -484
  89. data/vendor/ring/crypto/cipher/test/cipher_test.txt +0 -100
  90. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +0 -25
  91. data/vendor/ring/crypto/constant_time_test.c +0 -304
  92. data/vendor/ring/crypto/cpu-arm-asm.S +0 -32
  93. data/vendor/ring/crypto/cpu-arm.c +0 -199
  94. data/vendor/ring/crypto/cpu-intel.c +0 -261
  95. data/vendor/ring/crypto/crypto.c +0 -151
  96. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +0 -2118
  97. data/vendor/ring/crypto/curve25519/curve25519.c +0 -4888
  98. data/vendor/ring/crypto/curve25519/x25519_test.cc +0 -128
  99. data/vendor/ring/crypto/digest/md32_common.h +0 -181
  100. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +0 -2725
  101. data/vendor/ring/crypto/ec/ec.c +0 -193
  102. data/vendor/ring/crypto/ec/ec_curves.c +0 -61
  103. data/vendor/ring/crypto/ec/ec_key.c +0 -228
  104. data/vendor/ring/crypto/ec/ec_montgomery.c +0 -114
  105. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +0 -25
  106. data/vendor/ring/crypto/ec/internal.h +0 -243
  107. data/vendor/ring/crypto/ec/oct.c +0 -253
  108. data/vendor/ring/crypto/ec/p256-64.c +0 -1794
  109. data/vendor/ring/crypto/ec/p256-x86_64-table.h +0 -9548
  110. data/vendor/ring/crypto/ec/p256-x86_64.c +0 -509
  111. data/vendor/ring/crypto/ec/simple.c +0 -1007
  112. data/vendor/ring/crypto/ec/util-64.c +0 -183
  113. data/vendor/ring/crypto/ec/wnaf.c +0 -508
  114. data/vendor/ring/crypto/ecdh/ecdh.c +0 -155
  115. data/vendor/ring/crypto/ecdsa/ecdsa.c +0 -304
  116. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +0 -193
  117. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +0 -25
  118. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +0 -327
  119. data/vendor/ring/crypto/header_removed.h +0 -17
  120. data/vendor/ring/crypto/internal.h +0 -495
  121. data/vendor/ring/crypto/libring.Windows.vcxproj +0 -101
  122. data/vendor/ring/crypto/mem.c +0 -98
  123. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +0 -1045
  124. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +0 -517
  125. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +0 -1393
  126. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +0 -1741
  127. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +0 -422
  128. data/vendor/ring/crypto/modes/ctr.c +0 -226
  129. data/vendor/ring/crypto/modes/gcm.c +0 -1206
  130. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +0 -25
  131. data/vendor/ring/crypto/modes/gcm_test.c +0 -348
  132. data/vendor/ring/crypto/modes/internal.h +0 -299
  133. data/vendor/ring/crypto/perlasm/arm-xlate.pl +0 -170
  134. data/vendor/ring/crypto/perlasm/readme +0 -100
  135. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +0 -1164
  136. data/vendor/ring/crypto/perlasm/x86asm.pl +0 -292
  137. data/vendor/ring/crypto/perlasm/x86gas.pl +0 -263
  138. data/vendor/ring/crypto/perlasm/x86masm.pl +0 -200
  139. data/vendor/ring/crypto/perlasm/x86nasm.pl +0 -187
  140. data/vendor/ring/crypto/poly1305/poly1305.c +0 -331
  141. data/vendor/ring/crypto/poly1305/poly1305_arm.c +0 -301
  142. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +0 -2015
  143. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +0 -25
  144. data/vendor/ring/crypto/poly1305/poly1305_test.cc +0 -80
  145. data/vendor/ring/crypto/poly1305/poly1305_test.txt +0 -52
  146. data/vendor/ring/crypto/poly1305/poly1305_vec.c +0 -892
  147. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +0 -75
  148. data/vendor/ring/crypto/rand/internal.h +0 -32
  149. data/vendor/ring/crypto/rand/rand.c +0 -189
  150. data/vendor/ring/crypto/rand/urandom.c +0 -219
  151. data/vendor/ring/crypto/rand/windows.c +0 -56
  152. data/vendor/ring/crypto/refcount_c11.c +0 -66
  153. data/vendor/ring/crypto/refcount_lock.c +0 -53
  154. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +0 -25
  155. data/vendor/ring/crypto/refcount_test.c +0 -58
  156. data/vendor/ring/crypto/rsa/blinding.c +0 -462
  157. data/vendor/ring/crypto/rsa/internal.h +0 -108
  158. data/vendor/ring/crypto/rsa/padding.c +0 -300
  159. data/vendor/ring/crypto/rsa/rsa.c +0 -450
  160. data/vendor/ring/crypto/rsa/rsa_asn1.c +0 -261
  161. data/vendor/ring/crypto/rsa/rsa_impl.c +0 -944
  162. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +0 -25
  163. data/vendor/ring/crypto/rsa/rsa_test.cc +0 -437
  164. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +0 -436
  165. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +0 -2390
  166. data/vendor/ring/crypto/sha/asm/sha256-586.pl +0 -1275
  167. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +0 -735
  168. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +0 -14
  169. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +0 -14
  170. data/vendor/ring/crypto/sha/asm/sha512-586.pl +0 -911
  171. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +0 -666
  172. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +0 -14
  173. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +0 -14
  174. data/vendor/ring/crypto/sha/sha1.c +0 -271
  175. data/vendor/ring/crypto/sha/sha256.c +0 -204
  176. data/vendor/ring/crypto/sha/sha512.c +0 -355
  177. data/vendor/ring/crypto/test/file_test.cc +0 -326
  178. data/vendor/ring/crypto/test/file_test.h +0 -181
  179. data/vendor/ring/crypto/test/malloc.cc +0 -150
  180. data/vendor/ring/crypto/test/scoped_types.h +0 -95
  181. data/vendor/ring/crypto/test/test.Windows.vcxproj +0 -35
  182. data/vendor/ring/crypto/test/test_util.cc +0 -46
  183. data/vendor/ring/crypto/test/test_util.h +0 -41
  184. data/vendor/ring/crypto/thread_none.c +0 -55
  185. data/vendor/ring/crypto/thread_pthread.c +0 -165
  186. data/vendor/ring/crypto/thread_test.Windows.vcxproj +0 -25
  187. data/vendor/ring/crypto/thread_test.c +0 -200
  188. data/vendor/ring/crypto/thread_win.c +0 -282
  189. data/vendor/ring/examples/checkdigest.rs +0 -103
  190. data/vendor/ring/include/openssl/aes.h +0 -121
  191. data/vendor/ring/include/openssl/arm_arch.h +0 -129
  192. data/vendor/ring/include/openssl/base.h +0 -156
  193. data/vendor/ring/include/openssl/bn.h +0 -794
  194. data/vendor/ring/include/openssl/buffer.h +0 -18
  195. data/vendor/ring/include/openssl/bytestring.h +0 -235
  196. data/vendor/ring/include/openssl/chacha.h +0 -37
  197. data/vendor/ring/include/openssl/cmac.h +0 -76
  198. data/vendor/ring/include/openssl/cpu.h +0 -184
  199. data/vendor/ring/include/openssl/crypto.h +0 -43
  200. data/vendor/ring/include/openssl/curve25519.h +0 -88
  201. data/vendor/ring/include/openssl/ec.h +0 -225
  202. data/vendor/ring/include/openssl/ec_key.h +0 -129
  203. data/vendor/ring/include/openssl/ecdh.h +0 -110
  204. data/vendor/ring/include/openssl/ecdsa.h +0 -156
  205. data/vendor/ring/include/openssl/err.h +0 -201
  206. data/vendor/ring/include/openssl/mem.h +0 -101
  207. data/vendor/ring/include/openssl/obj_mac.h +0 -71
  208. data/vendor/ring/include/openssl/opensslfeatures.h +0 -68
  209. data/vendor/ring/include/openssl/opensslv.h +0 -18
  210. data/vendor/ring/include/openssl/ossl_typ.h +0 -18
  211. data/vendor/ring/include/openssl/poly1305.h +0 -51
  212. data/vendor/ring/include/openssl/rand.h +0 -70
  213. data/vendor/ring/include/openssl/rsa.h +0 -399
  214. data/vendor/ring/include/openssl/thread.h +0 -133
  215. data/vendor/ring/include/openssl/type_check.h +0 -71
  216. data/vendor/ring/mk/Common.props +0 -63
  217. data/vendor/ring/mk/Windows.props +0 -42
  218. data/vendor/ring/mk/WindowsTest.props +0 -18
  219. data/vendor/ring/mk/appveyor.bat +0 -62
  220. data/vendor/ring/mk/bottom_of_makefile.mk +0 -54
  221. data/vendor/ring/mk/ring.mk +0 -266
  222. data/vendor/ring/mk/top_of_makefile.mk +0 -214
  223. data/vendor/ring/mk/travis.sh +0 -40
  224. data/vendor/ring/mk/update-travis-yml.py +0 -229
  225. data/vendor/ring/ring.sln +0 -153
  226. data/vendor/ring/src/aead.rs +0 -682
  227. data/vendor/ring/src/agreement.rs +0 -248
  228. data/vendor/ring/src/c.rs +0 -129
  229. data/vendor/ring/src/constant_time.rs +0 -37
  230. data/vendor/ring/src/der.rs +0 -96
  231. data/vendor/ring/src/digest.rs +0 -690
  232. data/vendor/ring/src/digest_tests.txt +0 -57
  233. data/vendor/ring/src/ecc.rs +0 -28
  234. data/vendor/ring/src/ecc_build.rs +0 -279
  235. data/vendor/ring/src/ecc_curves.rs +0 -117
  236. data/vendor/ring/src/ed25519_tests.txt +0 -2579
  237. data/vendor/ring/src/exe_tests.rs +0 -46
  238. data/vendor/ring/src/ffi.rs +0 -29
  239. data/vendor/ring/src/file_test.rs +0 -187
  240. data/vendor/ring/src/hkdf.rs +0 -153
  241. data/vendor/ring/src/hkdf_tests.txt +0 -59
  242. data/vendor/ring/src/hmac.rs +0 -414
  243. data/vendor/ring/src/hmac_tests.txt +0 -97
  244. data/vendor/ring/src/input.rs +0 -312
  245. data/vendor/ring/src/lib.rs +0 -41
  246. data/vendor/ring/src/pbkdf2.rs +0 -265
  247. data/vendor/ring/src/pbkdf2_tests.txt +0 -113
  248. data/vendor/ring/src/polyfill.rs +0 -57
  249. data/vendor/ring/src/rand.rs +0 -28
  250. data/vendor/ring/src/signature.rs +0 -314
  251. data/vendor/ring/third-party/NIST/README.md +0 -9
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +0 -263
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +0 -309
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +0 -267
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +0 -263
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +0 -309
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +0 -267
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +0 -263
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +0 -309
  260. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +0 -267
  261. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +0 -519
  262. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +0 -309
  263. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +0 -523
  264. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +0 -519
  265. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +0 -309
  266. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +0 -523
  267. data/vendor/ring/third-party/NIST/sha256sums.txt +0 -1
@@ -1,1565 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- ###################################################################
4
- ### AES-128 [originally in CTR mode] ###
5
- ### bitsliced implementation for Intel Core 2 processors ###
6
- ### requires support of SSE extensions up to SSSE3 ###
7
- ### Author: Emilia Käsper and Peter Schwabe ###
8
- ### Date: 2009-03-19 ###
9
- ### Public domain ###
10
- ### ###
11
- ### See http://homes.esat.kuleuven.be/~ekasper/#software for ###
12
- ### further information. ###
13
- ###################################################################
14
- #
15
- # September 2011.
16
- #
17
- # Started as transliteration to "perlasm" the original code has
18
- # undergone following changes:
19
- #
20
- # - code was made position-independent;
21
- # - rounds were folded into a loop resulting in >5x size reduction
22
- # from 12.5KB to 2.2KB;
23
- # - above was possibile thanks to mixcolumns() modification that
24
- # allowed to feed its output back to aesenc[last], this was
25
- # achieved at cost of two additional inter-registers moves;
26
- # - some instruction reordering and interleaving;
27
- # - this module doesn't implement key setup subroutine, instead it
28
- # relies on conversion of "conventional" key schedule as returned
29
- # by AES_set_encrypt_key (see discussion below);
30
- # - first and last round keys are treated differently, which allowed
31
- # to skip one shiftrows(), reduce bit-sliced key schedule and
32
- # speed-up conversion by 22%;
33
- # - support for 192- and 256-bit keys was added;
34
- #
35
- # Resulting performance in CPU cycles spent to encrypt one byte out
36
- # of 4096-byte buffer with 128-bit key is:
37
- #
38
- # Emilia's this(*) difference
39
- #
40
- # Core 2 9.30 8.69 +7%
41
- # Nehalem(**) 7.63 6.88 +11%
42
- # Atom 17.1 16.4 +4%
43
- # Silvermont - 12.9
44
- #
45
- # (*) Comparison is not completely fair, because "this" is ECB,
46
- # i.e. no extra processing such as counter values calculation
47
- # and xor-ing input as in Emilia's CTR implementation is
48
- # performed. However, the CTR calculations stand for not more
49
- # than 1% of total time, so comparison is *rather* fair.
50
- #
51
- # (**) Results were collected on Westmere, which is considered to
52
- # be equivalent to Nehalem for this code.
53
- #
54
- # As for key schedule conversion subroutine. Interface to OpenSSL
55
- # relies on per-invocation on-the-fly conversion. This naturally
56
- # has impact on performance, especially for short inputs. Conversion
57
- # time in CPU cycles and its ratio to CPU cycles spent in 8x block
58
- # function is:
59
- #
60
- # conversion conversion/8x block
61
- # Core 2 240 0.22
62
- # Nehalem 180 0.20
63
- # Atom 430 0.20
64
- #
65
- # The ratio values mean that 128-byte blocks will be processed
66
- # 16-18% slower, 256-byte blocks - 9-10%, 384-byte blocks - 6-7%,
67
- # etc. Then keep in mind that input sizes not divisible by 128 are
68
- # *effectively* slower, especially shortest ones, e.g. consecutive
69
- # 144-byte blocks are processed 44% slower than one would expect,
70
- # 272 - 29%, 400 - 22%, etc. Yet, despite all these "shortcomings"
71
- # it's still faster than ["hyper-threading-safe" code path in]
72
- # aes-x86_64.pl on all lengths above 64 bytes...
73
- #
74
- # October 2011.
75
- #
76
- # Add decryption procedure. Performance in CPU cycles spent to decrypt
77
- # one byte out of 4096-byte buffer with 128-bit key is:
78
- #
79
- # Core 2 9.98
80
- # Nehalem 7.80
81
- # Atom 17.9
82
- # Silvermont 14.0
83
- #
84
- # November 2011.
85
- #
86
- # Add bsaes_xts_[en|de]crypt. Less-than-80-bytes-block performance is
87
- # suboptimal, but XTS is meant to be used with larger blocks...
88
- #
89
- # <appro@openssl.org>
90
-
91
- $flavour = shift;
92
- $output = shift;
93
- if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
94
-
95
- $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
96
-
97
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
98
- ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
99
- ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
100
- die "can't locate x86_64-xlate.pl";
101
-
102
- open OUT,"| \"$^X\" $xlate $flavour $output";
103
- *STDOUT=*OUT;
104
-
105
- my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx");
106
- my @XMM=map("%xmm$_",(15,0..14)); # best on Atom, +10% over (0..15)
107
-
108
- {
109
- my ($key,$rounds,$const)=("%rax","%r10d","%r11");
110
-
111
- sub Sbox {
112
- # input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
113
- # output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb
114
- my @b=@_[0..7];
115
- my @t=@_[8..11];
116
- my @s=@_[12..15];
117
- &InBasisChange (@b);
118
- &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s);
119
- &OutBasisChange (@b[7,1,4,2,6,5,0,3]);
120
- }
121
-
122
- sub InBasisChange {
123
- # input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
124
- # output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb
125
- my @b=@_[0..7];
126
- $code.=<<___;
127
- pxor @b[6], @b[5]
128
- pxor @b[1], @b[2]
129
- pxor @b[0], @b[3]
130
- pxor @b[2], @b[6]
131
- pxor @b[0], @b[5]
132
-
133
- pxor @b[3], @b[6]
134
- pxor @b[7], @b[3]
135
- pxor @b[5], @b[7]
136
- pxor @b[4], @b[3]
137
- pxor @b[5], @b[4]
138
- pxor @b[1], @b[3]
139
-
140
- pxor @b[7], @b[2]
141
- pxor @b[5], @b[1]
142
- ___
143
- }
144
-
145
- sub OutBasisChange {
146
- # input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
147
- # output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb
148
- my @b=@_[0..7];
149
- $code.=<<___;
150
- pxor @b[6], @b[0]
151
- pxor @b[4], @b[1]
152
- pxor @b[0], @b[2]
153
- pxor @b[6], @b[4]
154
- pxor @b[1], @b[6]
155
-
156
- pxor @b[5], @b[1]
157
- pxor @b[3], @b[5]
158
- pxor @b[7], @b[3]
159
- pxor @b[5], @b[7]
160
- pxor @b[5], @b[2]
161
-
162
- pxor @b[7], @b[4]
163
- ___
164
- }
165
-
166
- sub InvSbox {
167
- # input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb
168
- # output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb
169
- my @b=@_[0..7];
170
- my @t=@_[8..11];
171
- my @s=@_[12..15];
172
- &InvInBasisChange (@b);
173
- &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s);
174
- &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]);
175
- }
176
-
177
- sub InvInBasisChange { # OutBasisChange in reverse
178
- my @b=@_[5,1,2,6,3,7,0,4];
179
- $code.=<<___
180
- pxor @b[7], @b[4]
181
-
182
- pxor @b[5], @b[7]
183
- pxor @b[5], @b[2]
184
- pxor @b[7], @b[3]
185
- pxor @b[3], @b[5]
186
- pxor @b[5], @b[1]
187
-
188
- pxor @b[1], @b[6]
189
- pxor @b[0], @b[2]
190
- pxor @b[6], @b[4]
191
- pxor @b[6], @b[0]
192
- pxor @b[4], @b[1]
193
- ___
194
- }
195
-
196
- sub InvOutBasisChange { # InBasisChange in reverse
197
- my @b=@_[2,5,7,3,6,1,0,4];
198
- $code.=<<___;
199
- pxor @b[5], @b[1]
200
- pxor @b[7], @b[2]
201
-
202
- pxor @b[1], @b[3]
203
- pxor @b[5], @b[4]
204
- pxor @b[5], @b[7]
205
- pxor @b[4], @b[3]
206
- pxor @b[0], @b[5]
207
- pxor @b[7], @b[3]
208
- pxor @b[2], @b[6]
209
- pxor @b[1], @b[2]
210
- pxor @b[3], @b[6]
211
-
212
- pxor @b[0], @b[3]
213
- pxor @b[6], @b[5]
214
- ___
215
- }
216
-
217
- sub Mul_GF4 {
218
- #;*************************************************************
219
- #;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) *
220
- #;*************************************************************
221
- my ($x0,$x1,$y0,$y1,$t0)=@_;
222
- $code.=<<___;
223
- movdqa $y0, $t0
224
- pxor $y1, $t0
225
- pand $x0, $t0
226
- pxor $x1, $x0
227
- pand $y0, $x1
228
- pand $y1, $x0
229
- pxor $x1, $x0
230
- pxor $t0, $x1
231
- ___
232
- }
233
-
234
- sub Mul_GF4_N { # not used, see next subroutine
235
- # multiply and scale by N
236
- my ($x0,$x1,$y0,$y1,$t0)=@_;
237
- $code.=<<___;
238
- movdqa $y0, $t0
239
- pxor $y1, $t0
240
- pand $x0, $t0
241
- pxor $x1, $x0
242
- pand $y0, $x1
243
- pand $y1, $x0
244
- pxor $x0, $x1
245
- pxor $t0, $x0
246
- ___
247
- }
248
-
249
- sub Mul_GF4_N_GF4 {
250
- # interleaved Mul_GF4_N and Mul_GF4
251
- my ($x0,$x1,$y0,$y1,$t0,
252
- $x2,$x3,$y2,$y3,$t1)=@_;
253
- $code.=<<___;
254
- movdqa $y0, $t0
255
- movdqa $y2, $t1
256
- pxor $y1, $t0
257
- pxor $y3, $t1
258
- pand $x0, $t0
259
- pand $x2, $t1
260
- pxor $x1, $x0
261
- pxor $x3, $x2
262
- pand $y0, $x1
263
- pand $y2, $x3
264
- pand $y1, $x0
265
- pand $y3, $x2
266
- pxor $x0, $x1
267
- pxor $x3, $x2
268
- pxor $t0, $x0
269
- pxor $t1, $x3
270
- ___
271
- }
272
- sub Mul_GF16_2 {
273
- my @x=@_[0..7];
274
- my @y=@_[8..11];
275
- my @t=@_[12..15];
276
- $code.=<<___;
277
- movdqa @x[0], @t[0]
278
- movdqa @x[1], @t[1]
279
- ___
280
- &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2]);
281
- $code.=<<___;
282
- pxor @x[2], @t[0]
283
- pxor @x[3], @t[1]
284
- pxor @y[2], @y[0]
285
- pxor @y[3], @y[1]
286
- ___
287
- Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
288
- @x[2], @x[3], @y[2], @y[3], @t[2]);
289
- $code.=<<___;
290
- pxor @t[0], @x[0]
291
- pxor @t[0], @x[2]
292
- pxor @t[1], @x[1]
293
- pxor @t[1], @x[3]
294
-
295
- movdqa @x[4], @t[0]
296
- movdqa @x[5], @t[1]
297
- pxor @x[6], @t[0]
298
- pxor @x[7], @t[1]
299
- ___
300
- &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3],
301
- @x[6], @x[7], @y[2], @y[3], @t[2]);
302
- $code.=<<___;
303
- pxor @y[2], @y[0]
304
- pxor @y[3], @y[1]
305
- ___
306
- &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[3]);
307
- $code.=<<___;
308
- pxor @t[0], @x[4]
309
- pxor @t[0], @x[6]
310
- pxor @t[1], @x[5]
311
- pxor @t[1], @x[7]
312
- ___
313
- }
314
- sub Inv_GF256 {
315
- #;********************************************************************
316
- #;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) *
317
- #;********************************************************************
318
- my @x=@_[0..7];
319
- my @t=@_[8..11];
320
- my @s=@_[12..15];
321
- # direct optimizations from hardware
322
- $code.=<<___;
323
- movdqa @x[4], @t[3]
324
- movdqa @x[5], @t[2]
325
- movdqa @x[1], @t[1]
326
- movdqa @x[7], @s[1]
327
- movdqa @x[0], @s[0]
328
-
329
- pxor @x[6], @t[3]
330
- pxor @x[7], @t[2]
331
- pxor @x[3], @t[1]
332
- movdqa @t[3], @s[2]
333
- pxor @x[6], @s[1]
334
- movdqa @t[2], @t[0]
335
- pxor @x[2], @s[0]
336
- movdqa @t[3], @s[3]
337
-
338
- por @t[1], @t[2]
339
- por @s[0], @t[3]
340
- pxor @t[0], @s[3]
341
- pand @s[0], @s[2]
342
- pxor @t[1], @s[0]
343
- pand @t[1], @t[0]
344
- pand @s[0], @s[3]
345
- movdqa @x[3], @s[0]
346
- pxor @x[2], @s[0]
347
- pand @s[0], @s[1]
348
- pxor @s[1], @t[3]
349
- pxor @s[1], @t[2]
350
- movdqa @x[4], @s[1]
351
- movdqa @x[1], @s[0]
352
- pxor @x[5], @s[1]
353
- pxor @x[0], @s[0]
354
- movdqa @s[1], @t[1]
355
- pand @s[0], @s[1]
356
- por @s[0], @t[1]
357
- pxor @s[1], @t[0]
358
- pxor @s[3], @t[3]
359
- pxor @s[2], @t[2]
360
- pxor @s[3], @t[1]
361
- movdqa @x[7], @s[0]
362
- pxor @s[2], @t[0]
363
- movdqa @x[6], @s[1]
364
- pxor @s[2], @t[1]
365
- movdqa @x[5], @s[2]
366
- pand @x[3], @s[0]
367
- movdqa @x[4], @s[3]
368
- pand @x[2], @s[1]
369
- pand @x[1], @s[2]
370
- por @x[0], @s[3]
371
- pxor @s[0], @t[3]
372
- pxor @s[1], @t[2]
373
- pxor @s[2], @t[1]
374
- pxor @s[3], @t[0]
375
-
376
- #Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3
377
-
378
- # new smaller inversion
379
-
380
- movdqa @t[3], @s[0]
381
- pand @t[1], @t[3]
382
- pxor @t[2], @s[0]
383
-
384
- movdqa @t[0], @s[2]
385
- movdqa @s[0], @s[3]
386
- pxor @t[3], @s[2]
387
- pand @s[2], @s[3]
388
-
389
- movdqa @t[1], @s[1]
390
- pxor @t[2], @s[3]
391
- pxor @t[0], @s[1]
392
-
393
- pxor @t[2], @t[3]
394
-
395
- pand @t[3], @s[1]
396
-
397
- movdqa @s[2], @t[2]
398
- pxor @t[0], @s[1]
399
-
400
- pxor @s[1], @t[2]
401
- pxor @s[1], @t[1]
402
-
403
- pand @t[0], @t[2]
404
-
405
- pxor @t[2], @s[2]
406
- pxor @t[2], @t[1]
407
-
408
- pand @s[3], @s[2]
409
-
410
- pxor @s[0], @s[2]
411
- ___
412
- # output in s3, s2, s1, t1
413
-
414
- # Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3
415
-
416
- # Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
417
- &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]);
418
-
419
- ### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb
420
- }
421
-
422
- # AES linear components
423
-
424
- sub ShiftRows {
425
- my @x=@_[0..7];
426
- my $mask=pop;
427
- $code.=<<___;
428
- pxor 0x00($key),@x[0]
429
- pxor 0x10($key),@x[1]
430
- pxor 0x20($key),@x[2]
431
- pxor 0x30($key),@x[3]
432
- pshufb $mask,@x[0]
433
- pshufb $mask,@x[1]
434
- pxor 0x40($key),@x[4]
435
- pxor 0x50($key),@x[5]
436
- pshufb $mask,@x[2]
437
- pshufb $mask,@x[3]
438
- pxor 0x60($key),@x[6]
439
- pxor 0x70($key),@x[7]
440
- pshufb $mask,@x[4]
441
- pshufb $mask,@x[5]
442
- pshufb $mask,@x[6]
443
- pshufb $mask,@x[7]
444
- lea 0x80($key),$key
445
- ___
446
- }
447
-
448
- sub MixColumns {
449
- # modified to emit output in order suitable for feeding back to aesenc[last]
450
- my @x=@_[0..7];
451
- my @t=@_[8..15];
452
- my $inv=@_[16]; # optional
453
- $code.=<<___;
454
- pshufd \$0x93, @x[0], @t[0] # x0 <<< 32
455
- pshufd \$0x93, @x[1], @t[1]
456
- pxor @t[0], @x[0] # x0 ^ (x0 <<< 32)
457
- pshufd \$0x93, @x[2], @t[2]
458
- pxor @t[1], @x[1]
459
- pshufd \$0x93, @x[3], @t[3]
460
- pxor @t[2], @x[2]
461
- pshufd \$0x93, @x[4], @t[4]
462
- pxor @t[3], @x[3]
463
- pshufd \$0x93, @x[5], @t[5]
464
- pxor @t[4], @x[4]
465
- pshufd \$0x93, @x[6], @t[6]
466
- pxor @t[5], @x[5]
467
- pshufd \$0x93, @x[7], @t[7]
468
- pxor @t[6], @x[6]
469
- pxor @t[7], @x[7]
470
-
471
- pxor @x[0], @t[1]
472
- pxor @x[7], @t[0]
473
- pxor @x[7], @t[1]
474
- pshufd \$0x4E, @x[0], @x[0] # (x0 ^ (x0 <<< 32)) <<< 64)
475
- pxor @x[1], @t[2]
476
- pshufd \$0x4E, @x[1], @x[1]
477
- pxor @x[4], @t[5]
478
- pxor @t[0], @x[0]
479
- pxor @x[5], @t[6]
480
- pxor @t[1], @x[1]
481
- pxor @x[3], @t[4]
482
- pshufd \$0x4E, @x[4], @t[0]
483
- pxor @x[6], @t[7]
484
- pshufd \$0x4E, @x[5], @t[1]
485
- pxor @x[2], @t[3]
486
- pshufd \$0x4E, @x[3], @x[4]
487
- pxor @x[7], @t[3]
488
- pshufd \$0x4E, @x[7], @x[5]
489
- pxor @x[7], @t[4]
490
- pshufd \$0x4E, @x[6], @x[3]
491
- pxor @t[4], @t[0]
492
- pshufd \$0x4E, @x[2], @x[6]
493
- pxor @t[5], @t[1]
494
- ___
495
- $code.=<<___ if (!$inv);
496
- pxor @t[3], @x[4]
497
- pxor @t[7], @x[5]
498
- pxor @t[6], @x[3]
499
- movdqa @t[0], @x[2]
500
- pxor @t[2], @x[6]
501
- movdqa @t[1], @x[7]
502
- ___
503
- $code.=<<___ if ($inv);
504
- pxor @x[4], @t[3]
505
- pxor @t[7], @x[5]
506
- pxor @x[3], @t[6]
507
- movdqa @t[0], @x[3]
508
- pxor @t[2], @x[6]
509
- movdqa @t[6], @x[2]
510
- movdqa @t[1], @x[7]
511
- movdqa @x[6], @x[4]
512
- movdqa @t[3], @x[6]
513
- ___
514
- }
515
-
516
- sub InvMixColumns_orig {
517
- my @x=@_[0..7];
518
- my @t=@_[8..15];
519
-
520
- $code.=<<___;
521
- # multiplication by 0x0e
522
- pshufd \$0x93, @x[7], @t[7]
523
- movdqa @x[2], @t[2]
524
- pxor @x[5], @x[7] # 7 5
525
- pxor @x[5], @x[2] # 2 5
526
- pshufd \$0x93, @x[0], @t[0]
527
- movdqa @x[5], @t[5]
528
- pxor @x[0], @x[5] # 5 0 [1]
529
- pxor @x[1], @x[0] # 0 1
530
- pshufd \$0x93, @x[1], @t[1]
531
- pxor @x[2], @x[1] # 1 25
532
- pxor @x[6], @x[0] # 01 6 [2]
533
- pxor @x[3], @x[1] # 125 3 [4]
534
- pshufd \$0x93, @x[3], @t[3]
535
- pxor @x[0], @x[2] # 25 016 [3]
536
- pxor @x[7], @x[3] # 3 75
537
- pxor @x[6], @x[7] # 75 6 [0]
538
- pshufd \$0x93, @x[6], @t[6]
539
- movdqa @x[4], @t[4]
540
- pxor @x[4], @x[6] # 6 4
541
- pxor @x[3], @x[4] # 4 375 [6]
542
- pxor @x[7], @x[3] # 375 756=36
543
- pxor @t[5], @x[6] # 64 5 [7]
544
- pxor @t[2], @x[3] # 36 2
545
- pxor @t[4], @x[3] # 362 4 [5]
546
- pshufd \$0x93, @t[5], @t[5]
547
- ___
548
- my @y = @x[7,5,0,2,1,3,4,6];
549
- $code.=<<___;
550
- # multiplication by 0x0b
551
- pxor @y[0], @y[1]
552
- pxor @t[0], @y[0]
553
- pxor @t[1], @y[1]
554
- pshufd \$0x93, @t[2], @t[2]
555
- pxor @t[5], @y[0]
556
- pxor @t[6], @y[1]
557
- pxor @t[7], @y[0]
558
- pshufd \$0x93, @t[4], @t[4]
559
- pxor @t[6], @t[7] # clobber t[7]
560
- pxor @y[0], @y[1]
561
-
562
- pxor @t[0], @y[3]
563
- pshufd \$0x93, @t[0], @t[0]
564
- pxor @t[1], @y[2]
565
- pxor @t[1], @y[4]
566
- pxor @t[2], @y[2]
567
- pshufd \$0x93, @t[1], @t[1]
568
- pxor @t[2], @y[3]
569
- pxor @t[2], @y[5]
570
- pxor @t[7], @y[2]
571
- pshufd \$0x93, @t[2], @t[2]
572
- pxor @t[3], @y[3]
573
- pxor @t[3], @y[6]
574
- pxor @t[3], @y[4]
575
- pshufd \$0x93, @t[3], @t[3]
576
- pxor @t[4], @y[7]
577
- pxor @t[4], @y[5]
578
- pxor @t[7], @y[7]
579
- pxor @t[5], @y[3]
580
- pxor @t[4], @y[4]
581
- pxor @t[5], @t[7] # clobber t[7] even more
582
-
583
- pxor @t[7], @y[5]
584
- pshufd \$0x93, @t[4], @t[4]
585
- pxor @t[7], @y[6]
586
- pxor @t[7], @y[4]
587
-
588
- pxor @t[5], @t[7]
589
- pshufd \$0x93, @t[5], @t[5]
590
- pxor @t[6], @t[7] # restore t[7]
591
-
592
- # multiplication by 0x0d
593
- pxor @y[7], @y[4]
594
- pxor @t[4], @y[7]
595
- pshufd \$0x93, @t[6], @t[6]
596
- pxor @t[0], @y[2]
597
- pxor @t[5], @y[7]
598
- pxor @t[2], @y[2]
599
- pshufd \$0x93, @t[7], @t[7]
600
-
601
- pxor @y[1], @y[3]
602
- pxor @t[1], @y[1]
603
- pxor @t[0], @y[0]
604
- pxor @t[0], @y[3]
605
- pxor @t[5], @y[1]
606
- pxor @t[5], @y[0]
607
- pxor @t[7], @y[1]
608
- pshufd \$0x93, @t[0], @t[0]
609
- pxor @t[6], @y[0]
610
- pxor @y[1], @y[3]
611
- pxor @t[1], @y[4]
612
- pshufd \$0x93, @t[1], @t[1]
613
-
614
- pxor @t[7], @y[7]
615
- pxor @t[2], @y[4]
616
- pxor @t[2], @y[5]
617
- pshufd \$0x93, @t[2], @t[2]
618
- pxor @t[6], @y[2]
619
- pxor @t[3], @t[6] # clobber t[6]
620
- pxor @y[7], @y[4]
621
- pxor @t[6], @y[3]
622
-
623
- pxor @t[6], @y[6]
624
- pxor @t[5], @y[5]
625
- pxor @t[4], @y[6]
626
- pshufd \$0x93, @t[4], @t[4]
627
- pxor @t[6], @y[5]
628
- pxor @t[7], @y[6]
629
- pxor @t[3], @t[6] # restore t[6]
630
-
631
- pshufd \$0x93, @t[5], @t[5]
632
- pshufd \$0x93, @t[6], @t[6]
633
- pshufd \$0x93, @t[7], @t[7]
634
- pshufd \$0x93, @t[3], @t[3]
635
-
636
- # multiplication by 0x09
637
- pxor @y[1], @y[4]
638
- pxor @y[1], @t[1] # t[1]=y[1]
639
- pxor @t[5], @t[0] # clobber t[0]
640
- pxor @t[5], @t[1]
641
- pxor @t[0], @y[3]
642
- pxor @y[0], @t[0] # t[0]=y[0]
643
- pxor @t[6], @t[1]
644
- pxor @t[7], @t[6] # clobber t[6]
645
- pxor @t[1], @y[4]
646
- pxor @t[4], @y[7]
647
- pxor @y[4], @t[4] # t[4]=y[4]
648
- pxor @t[3], @y[6]
649
- pxor @y[3], @t[3] # t[3]=y[3]
650
- pxor @t[2], @y[5]
651
- pxor @y[2], @t[2] # t[2]=y[2]
652
- pxor @t[7], @t[3]
653
- pxor @y[5], @t[5] # t[5]=y[5]
654
- pxor @t[6], @t[2]
655
- pxor @t[6], @t[5]
656
- pxor @y[6], @t[6] # t[6]=y[6]
657
- pxor @y[7], @t[7] # t[7]=y[7]
658
-
659
- movdqa @t[0],@XMM[0]
660
- movdqa @t[1],@XMM[1]
661
- movdqa @t[2],@XMM[2]
662
- movdqa @t[3],@XMM[3]
663
- movdqa @t[4],@XMM[4]
664
- movdqa @t[5],@XMM[5]
665
- movdqa @t[6],@XMM[6]
666
- movdqa @t[7],@XMM[7]
667
- ___
668
- }
669
-
670
- sub InvMixColumns {
671
- my @x=@_[0..7];
672
- my @t=@_[8..15];
673
-
674
- # Thanks to Jussi Kivilinna for providing pointer to
675
- #
676
- # | 0e 0b 0d 09 | | 02 03 01 01 | | 05 00 04 00 |
677
- # | 09 0e 0b 0d | = | 01 02 03 01 | x | 00 05 00 04 |
678
- # | 0d 09 0e 0b | | 01 01 02 03 | | 04 00 05 00 |
679
- # | 0b 0d 09 0e | | 03 01 01 02 | | 00 04 00 05 |
680
-
681
- $code.=<<___;
682
- # multiplication by 0x05-0x00-0x04-0x00
683
- pshufd \$0x4E, @x[0], @t[0]
684
- pshufd \$0x4E, @x[6], @t[6]
685
- pxor @x[0], @t[0]
686
- pshufd \$0x4E, @x[7], @t[7]
687
- pxor @x[6], @t[6]
688
- pshufd \$0x4E, @x[1], @t[1]
689
- pxor @x[7], @t[7]
690
- pshufd \$0x4E, @x[2], @t[2]
691
- pxor @x[1], @t[1]
692
- pshufd \$0x4E, @x[3], @t[3]
693
- pxor @x[2], @t[2]
694
- pxor @t[6], @x[0]
695
- pxor @t[6], @x[1]
696
- pshufd \$0x4E, @x[4], @t[4]
697
- pxor @x[3], @t[3]
698
- pxor @t[0], @x[2]
699
- pxor @t[1], @x[3]
700
- pshufd \$0x4E, @x[5], @t[5]
701
- pxor @x[4], @t[4]
702
- pxor @t[7], @x[1]
703
- pxor @t[2], @x[4]
704
- pxor @x[5], @t[5]
705
-
706
- pxor @t[7], @x[2]
707
- pxor @t[6], @x[3]
708
- pxor @t[6], @x[4]
709
- pxor @t[3], @x[5]
710
- pxor @t[4], @x[6]
711
- pxor @t[7], @x[4]
712
- pxor @t[7], @x[5]
713
- pxor @t[5], @x[7]
714
- ___
715
- &MixColumns (@x,@t,1); # flipped 2<->3 and 4<->6
716
- }
717
-
718
- sub aesenc { # not used
719
- my @b=@_[0..7];
720
- my @t=@_[8..15];
721
- $code.=<<___;
722
- movdqa 0x30($const),@t[0] # .LSR
723
- ___
724
- &ShiftRows (@b,@t[0]);
725
- &Sbox (@b,@t);
726
- &MixColumns (@b[0,1,4,6,3,7,2,5],@t);
727
- }
728
-
729
- sub aesenclast { # not used
730
- my @b=@_[0..7];
731
- my @t=@_[8..15];
732
- $code.=<<___;
733
- movdqa 0x40($const),@t[0] # .LSRM0
734
- ___
735
- &ShiftRows (@b,@t[0]);
736
- &Sbox (@b,@t);
737
- $code.=<<___
738
- pxor 0x00($key),@b[0]
739
- pxor 0x10($key),@b[1]
740
- pxor 0x20($key),@b[4]
741
- pxor 0x30($key),@b[6]
742
- pxor 0x40($key),@b[3]
743
- pxor 0x50($key),@b[7]
744
- pxor 0x60($key),@b[2]
745
- pxor 0x70($key),@b[5]
746
- ___
747
- }
748
-
749
- sub swapmove {
750
- my ($a,$b,$n,$mask,$t)=@_;
751
- $code.=<<___;
752
- movdqa $b,$t
753
- psrlq \$$n,$b
754
- pxor $a,$b
755
- pand $mask,$b
756
- pxor $b,$a
757
- psllq \$$n,$b
758
- pxor $t,$b
759
- ___
760
- }
761
- sub swapmove2x {
762
- my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_;
763
- $code.=<<___;
764
- movdqa $b0,$t0
765
- psrlq \$$n,$b0
766
- movdqa $b1,$t1
767
- psrlq \$$n,$b1
768
- pxor $a0,$b0
769
- pxor $a1,$b1
770
- pand $mask,$b0
771
- pand $mask,$b1
772
- pxor $b0,$a0
773
- psllq \$$n,$b0
774
- pxor $b1,$a1
775
- psllq \$$n,$b1
776
- pxor $t0,$b0
777
- pxor $t1,$b1
778
- ___
779
- }
780
-
781
- sub bitslice {
782
- my @x=reverse(@_[0..7]);
783
- my ($t0,$t1,$t2,$t3)=@_[8..11];
784
- $code.=<<___;
785
- movdqa 0x00($const),$t0 # .LBS0
786
- movdqa 0x10($const),$t1 # .LBS1
787
- ___
788
- &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3);
789
- &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
790
- $code.=<<___;
791
- movdqa 0x20($const),$t0 # .LBS2
792
- ___
793
- &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3);
794
- &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
795
-
796
- &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3);
797
- &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3);
798
- }
799
-
800
- $code.=<<___;
801
- .text
802
-
803
- .extern asm_AES_encrypt
804
- .extern asm_AES_decrypt
805
-
806
- .type _bsaes_encrypt8,\@abi-omnipotent
807
- .align 64
808
- _bsaes_encrypt8:
809
- lea .LBS0(%rip), $const # constants table
810
-
811
- movdqa ($key), @XMM[9] # round 0 key
812
- lea 0x10($key), $key
813
- movdqa 0x50($const), @XMM[8] # .LM0SR
814
- pxor @XMM[9], @XMM[0] # xor with round0 key
815
- pxor @XMM[9], @XMM[1]
816
- pxor @XMM[9], @XMM[2]
817
- pxor @XMM[9], @XMM[3]
818
- pshufb @XMM[8], @XMM[0]
819
- pshufb @XMM[8], @XMM[1]
820
- pxor @XMM[9], @XMM[4]
821
- pxor @XMM[9], @XMM[5]
822
- pshufb @XMM[8], @XMM[2]
823
- pshufb @XMM[8], @XMM[3]
824
- pxor @XMM[9], @XMM[6]
825
- pxor @XMM[9], @XMM[7]
826
- pshufb @XMM[8], @XMM[4]
827
- pshufb @XMM[8], @XMM[5]
828
- pshufb @XMM[8], @XMM[6]
829
- pshufb @XMM[8], @XMM[7]
830
- _bsaes_encrypt8_bitslice:
831
- ___
832
- &bitslice (@XMM[0..7, 8..11]);
833
- $code.=<<___;
834
- dec $rounds
835
- jmp .Lenc_sbox
836
- .align 16
837
- .Lenc_loop:
838
- ___
839
- &ShiftRows (@XMM[0..7, 8]);
840
- $code.=".Lenc_sbox:\n";
841
- &Sbox (@XMM[0..7, 8..15]);
842
- $code.=<<___;
843
- dec $rounds
844
- jl .Lenc_done
845
- ___
846
- &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]);
847
- $code.=<<___;
848
- movdqa 0x30($const), @XMM[8] # .LSR
849
- jnz .Lenc_loop
850
- movdqa 0x40($const), @XMM[8] # .LSRM0
851
- jmp .Lenc_loop
852
- .align 16
853
- .Lenc_done:
854
- ___
855
- # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb
856
- &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]);
857
- $code.=<<___;
858
- movdqa ($key), @XMM[8] # last round key
859
- pxor @XMM[8], @XMM[4]
860
- pxor @XMM[8], @XMM[6]
861
- pxor @XMM[8], @XMM[3]
862
- pxor @XMM[8], @XMM[7]
863
- pxor @XMM[8], @XMM[2]
864
- pxor @XMM[8], @XMM[5]
865
- pxor @XMM[8], @XMM[0]
866
- pxor @XMM[8], @XMM[1]
867
- ret
868
- .size _bsaes_encrypt8,.-_bsaes_encrypt8
869
-
870
- .type _bsaes_decrypt8,\@abi-omnipotent
871
- .align 64
872
- _bsaes_decrypt8:
873
- lea .LBS0(%rip), $const # constants table
874
-
875
- movdqa ($key), @XMM[9] # round 0 key
876
- lea 0x10($key), $key
877
- movdqa -0x30($const), @XMM[8] # .LM0ISR
878
- pxor @XMM[9], @XMM[0] # xor with round0 key
879
- pxor @XMM[9], @XMM[1]
880
- pxor @XMM[9], @XMM[2]
881
- pxor @XMM[9], @XMM[3]
882
- pshufb @XMM[8], @XMM[0]
883
- pshufb @XMM[8], @XMM[1]
884
- pxor @XMM[9], @XMM[4]
885
- pxor @XMM[9], @XMM[5]
886
- pshufb @XMM[8], @XMM[2]
887
- pshufb @XMM[8], @XMM[3]
888
- pxor @XMM[9], @XMM[6]
889
- pxor @XMM[9], @XMM[7]
890
- pshufb @XMM[8], @XMM[4]
891
- pshufb @XMM[8], @XMM[5]
892
- pshufb @XMM[8], @XMM[6]
893
- pshufb @XMM[8], @XMM[7]
894
- ___
895
- &bitslice (@XMM[0..7, 8..11]);
896
- $code.=<<___;
897
- dec $rounds
898
- jmp .Ldec_sbox
899
- .align 16
900
- .Ldec_loop:
901
- ___
902
- &ShiftRows (@XMM[0..7, 8]);
903
- $code.=".Ldec_sbox:\n";
904
- &InvSbox (@XMM[0..7, 8..15]);
905
- $code.=<<___;
906
- dec $rounds
907
- jl .Ldec_done
908
- ___
909
- &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]);
910
- $code.=<<___;
911
- movdqa -0x10($const), @XMM[8] # .LISR
912
- jnz .Ldec_loop
913
- movdqa -0x20($const), @XMM[8] # .LISRM0
914
- jmp .Ldec_loop
915
- .align 16
916
- .Ldec_done:
917
- ___
918
- &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]);
919
- $code.=<<___;
920
- movdqa ($key), @XMM[8] # last round key
921
- pxor @XMM[8], @XMM[6]
922
- pxor @XMM[8], @XMM[4]
923
- pxor @XMM[8], @XMM[2]
924
- pxor @XMM[8], @XMM[7]
925
- pxor @XMM[8], @XMM[3]
926
- pxor @XMM[8], @XMM[5]
927
- pxor @XMM[8], @XMM[0]
928
- pxor @XMM[8], @XMM[1]
929
- ret
930
- .size _bsaes_decrypt8,.-_bsaes_decrypt8
931
- ___
932
- }
933
- {
934
- my ($out,$inp,$rounds,$const)=("%rax","%rcx","%r10d","%r11");
935
-
936
- sub bitslice_key {
937
- my @x=reverse(@_[0..7]);
938
- my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12];
939
-
940
- &swapmove (@x[0,1],1,$bs0,$t2,$t3);
941
- $code.=<<___;
942
- #&swapmove(@x[2,3],1,$t0,$t2,$t3);
943
- movdqa @x[0], @x[2]
944
- movdqa @x[1], @x[3]
945
- ___
946
- #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3);
947
-
948
- &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3);
949
- $code.=<<___;
950
- #&swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3);
951
- movdqa @x[0], @x[4]
952
- movdqa @x[2], @x[6]
953
- movdqa @x[1], @x[5]
954
- movdqa @x[3], @x[7]
955
- ___
956
- &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3);
957
- &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3);
958
- }
959
-
960
- $code.=<<___;
961
- .type _bsaes_key_convert,\@abi-omnipotent
962
- .align 16
963
- _bsaes_key_convert:
964
- lea .Lmasks(%rip), $const
965
- movdqu ($inp), %xmm7 # load round 0 key
966
- lea 0x10($inp), $inp
967
- movdqa 0x00($const), %xmm0 # 0x01...
968
- movdqa 0x10($const), %xmm1 # 0x02...
969
- movdqa 0x20($const), %xmm2 # 0x04...
970
- movdqa 0x30($const), %xmm3 # 0x08...
971
- movdqa 0x40($const), %xmm4 # .LM0
972
- pcmpeqd %xmm5, %xmm5 # .LNOT
973
-
974
- movdqu ($inp), %xmm6 # load round 1 key
975
- movdqa %xmm7, ($out) # save round 0 key
976
- lea 0x10($out), $out
977
- dec $rounds
978
- jmp .Lkey_loop
979
- .align 16
980
- .Lkey_loop:
981
- pshufb %xmm4, %xmm6 # .LM0
982
-
983
- movdqa %xmm0, %xmm8
984
- movdqa %xmm1, %xmm9
985
-
986
- pand %xmm6, %xmm8
987
- pand %xmm6, %xmm9
988
- movdqa %xmm2, %xmm10
989
- pcmpeqb %xmm0, %xmm8
990
- psllq \$4, %xmm0 # 0x10...
991
- movdqa %xmm3, %xmm11
992
- pcmpeqb %xmm1, %xmm9
993
- psllq \$4, %xmm1 # 0x20...
994
-
995
- pand %xmm6, %xmm10
996
- pand %xmm6, %xmm11
997
- movdqa %xmm0, %xmm12
998
- pcmpeqb %xmm2, %xmm10
999
- psllq \$4, %xmm2 # 0x40...
1000
- movdqa %xmm1, %xmm13
1001
- pcmpeqb %xmm3, %xmm11
1002
- psllq \$4, %xmm3 # 0x80...
1003
-
1004
- movdqa %xmm2, %xmm14
1005
- movdqa %xmm3, %xmm15
1006
- pxor %xmm5, %xmm8 # "pnot"
1007
- pxor %xmm5, %xmm9
1008
-
1009
- pand %xmm6, %xmm12
1010
- pand %xmm6, %xmm13
1011
- movdqa %xmm8, 0x00($out) # write bit-sliced round key
1012
- pcmpeqb %xmm0, %xmm12
1013
- psrlq \$4, %xmm0 # 0x01...
1014
- movdqa %xmm9, 0x10($out)
1015
- pcmpeqb %xmm1, %xmm13
1016
- psrlq \$4, %xmm1 # 0x02...
1017
- lea 0x10($inp), $inp
1018
-
1019
- pand %xmm6, %xmm14
1020
- pand %xmm6, %xmm15
1021
- movdqa %xmm10, 0x20($out)
1022
- pcmpeqb %xmm2, %xmm14
1023
- psrlq \$4, %xmm2 # 0x04...
1024
- movdqa %xmm11, 0x30($out)
1025
- pcmpeqb %xmm3, %xmm15
1026
- psrlq \$4, %xmm3 # 0x08...
1027
- movdqu ($inp), %xmm6 # load next round key
1028
-
1029
- pxor %xmm5, %xmm13 # "pnot"
1030
- pxor %xmm5, %xmm14
1031
- movdqa %xmm12, 0x40($out)
1032
- movdqa %xmm13, 0x50($out)
1033
- movdqa %xmm14, 0x60($out)
1034
- movdqa %xmm15, 0x70($out)
1035
- lea 0x80($out),$out
1036
- dec $rounds
1037
- jnz .Lkey_loop
1038
-
1039
- movdqa 0x50($const), %xmm7 # .L63
1040
- #movdqa %xmm6, ($out) # don't save last round key
1041
- ret
1042
- .size _bsaes_key_convert,.-_bsaes_key_convert
1043
- ___
1044
- }
1045
-
1046
- if (0 && !$win64) { # following four functions are unsupported interface
1047
- # used for benchmarking...
1048
- $code.=<<___;
1049
- .globl bsaes_enc_key_convert
1050
- .type bsaes_enc_key_convert,\@function,2
1051
- .align 16
1052
- bsaes_enc_key_convert:
1053
- mov 240($inp),%r10d # pass rounds
1054
- mov $inp,%rcx # pass key
1055
- mov $out,%rax # pass key schedule
1056
- call _bsaes_key_convert
1057
- pxor %xmm6,%xmm7 # fix up last round key
1058
- movdqa %xmm7,(%rax) # save last round key
1059
- ret
1060
- .size bsaes_enc_key_convert,.-bsaes_enc_key_convert
1061
-
1062
- .globl bsaes_encrypt_128
1063
- .type bsaes_encrypt_128,\@function,4
1064
- .align 16
1065
- bsaes_encrypt_128:
1066
- .Lenc128_loop:
1067
- movdqu 0x00($inp), @XMM[0] # load input
1068
- movdqu 0x10($inp), @XMM[1]
1069
- movdqu 0x20($inp), @XMM[2]
1070
- movdqu 0x30($inp), @XMM[3]
1071
- movdqu 0x40($inp), @XMM[4]
1072
- movdqu 0x50($inp), @XMM[5]
1073
- movdqu 0x60($inp), @XMM[6]
1074
- movdqu 0x70($inp), @XMM[7]
1075
- mov $key, %rax # pass the $key
1076
- lea 0x80($inp), $inp
1077
- mov \$10,%r10d
1078
-
1079
- call _bsaes_encrypt8
1080
-
1081
- movdqu @XMM[0], 0x00($out) # write output
1082
- movdqu @XMM[1], 0x10($out)
1083
- movdqu @XMM[4], 0x20($out)
1084
- movdqu @XMM[6], 0x30($out)
1085
- movdqu @XMM[3], 0x40($out)
1086
- movdqu @XMM[7], 0x50($out)
1087
- movdqu @XMM[2], 0x60($out)
1088
- movdqu @XMM[5], 0x70($out)
1089
- lea 0x80($out), $out
1090
- sub \$0x80,$len
1091
- ja .Lenc128_loop
1092
- ret
1093
- .size bsaes_encrypt_128,.-bsaes_encrypt_128
1094
-
1095
- .globl bsaes_dec_key_convert
1096
- .type bsaes_dec_key_convert,\@function,2
1097
- .align 16
1098
- bsaes_dec_key_convert:
1099
- mov 240($inp),%r10d # pass rounds
1100
- mov $inp,%rcx # pass key
1101
- mov $out,%rax # pass key schedule
1102
- call _bsaes_key_convert
1103
- pxor ($out),%xmm7 # fix up round 0 key
1104
- movdqa %xmm6,(%rax) # save last round key
1105
- movdqa %xmm7,($out)
1106
- ret
1107
- .size bsaes_dec_key_convert,.-bsaes_dec_key_convert
1108
-
1109
- .globl bsaes_decrypt_128
1110
- .type bsaes_decrypt_128,\@function,4
1111
- .align 16
1112
- bsaes_decrypt_128:
1113
- .Ldec128_loop:
1114
- movdqu 0x00($inp), @XMM[0] # load input
1115
- movdqu 0x10($inp), @XMM[1]
1116
- movdqu 0x20($inp), @XMM[2]
1117
- movdqu 0x30($inp), @XMM[3]
1118
- movdqu 0x40($inp), @XMM[4]
1119
- movdqu 0x50($inp), @XMM[5]
1120
- movdqu 0x60($inp), @XMM[6]
1121
- movdqu 0x70($inp), @XMM[7]
1122
- mov $key, %rax # pass the $key
1123
- lea 0x80($inp), $inp
1124
- mov \$10,%r10d
1125
-
1126
- call _bsaes_decrypt8
1127
-
1128
- movdqu @XMM[0], 0x00($out) # write output
1129
- movdqu @XMM[1], 0x10($out)
1130
- movdqu @XMM[6], 0x20($out)
1131
- movdqu @XMM[4], 0x30($out)
1132
- movdqu @XMM[2], 0x40($out)
1133
- movdqu @XMM[7], 0x50($out)
1134
- movdqu @XMM[3], 0x60($out)
1135
- movdqu @XMM[5], 0x70($out)
1136
- lea 0x80($out), $out
1137
- sub \$0x80,$len
1138
- ja .Ldec128_loop
1139
- ret
1140
- .size bsaes_decrypt_128,.-bsaes_decrypt_128
1141
- ___
1142
- }
1143
- {
1144
- ######################################################################
1145
- #
1146
- # OpenSSL interface
1147
- #
1148
- my ($arg1,$arg2,$arg3,$arg4,$arg5,$arg6)=$win64 ? ("%rcx","%rdx","%r8","%r9","%r10","%r11d")
1149
- : ("%rdi","%rsi","%rdx","%rcx","%r8","%r9d");
1150
- my ($inp,$out,$len,$key)=("%r12","%r13","%r14","%r15");
1151
-
1152
- $code.=<<___;
1153
- .globl bsaes_ctr32_encrypt_blocks
1154
- .type bsaes_ctr32_encrypt_blocks,\@abi-omnipotent
1155
- .align 16
1156
- bsaes_ctr32_encrypt_blocks:
1157
- mov %rsp, %rax
1158
- .Lctr_enc_prologue:
1159
- push %rbp
1160
- push %rbx
1161
- push %r12
1162
- push %r13
1163
- push %r14
1164
- push %r15
1165
- lea -0x48(%rsp), %rsp
1166
- ___
1167
- $code.=<<___ if ($win64);
1168
- mov 0xa0(%rsp),$arg5 # pull ivp
1169
- lea -0xa0(%rsp), %rsp
1170
- movaps %xmm6, 0x40(%rsp)
1171
- movaps %xmm7, 0x50(%rsp)
1172
- movaps %xmm8, 0x60(%rsp)
1173
- movaps %xmm9, 0x70(%rsp)
1174
- movaps %xmm10, 0x80(%rsp)
1175
- movaps %xmm11, 0x90(%rsp)
1176
- movaps %xmm12, 0xa0(%rsp)
1177
- movaps %xmm13, 0xb0(%rsp)
1178
- movaps %xmm14, 0xc0(%rsp)
1179
- movaps %xmm15, 0xd0(%rsp)
1180
- .Lctr_enc_body:
1181
- ___
1182
- $code.=<<___;
1183
- mov %rsp, %rbp # backup %rsp
1184
- movdqu ($arg5), %xmm0 # load counter
1185
- mov 240($arg4), %eax # rounds
1186
- mov $arg1, $inp # backup arguments
1187
- mov $arg2, $out
1188
- mov $arg3, $len
1189
- mov $arg4, $key
1190
- movdqa %xmm0, 0x20(%rbp) # copy counter
1191
- cmp \$8, $arg3
1192
- jb .Lctr_enc_short
1193
-
1194
- mov %eax, %ebx # rounds
1195
- shl \$7, %rax # 128 bytes per inner round key
1196
- sub \$`128-32`, %rax # size of bit-sliced key schedule
1197
- sub %rax, %rsp
1198
-
1199
- mov %rsp, %rax # pass key schedule
1200
- mov $key, %rcx # pass key
1201
- mov %ebx, %r10d # pass rounds
1202
- call _bsaes_key_convert
1203
- pxor %xmm6,%xmm7 # fix up last round key
1204
- movdqa %xmm7,(%rax) # save last round key
1205
-
1206
- movdqa (%rsp), @XMM[9] # load round0 key
1207
- lea .LADD1(%rip), %r11
1208
- movdqa 0x20(%rbp), @XMM[0] # counter copy
1209
- movdqa -0x20(%r11), @XMM[8] # .LSWPUP
1210
- pshufb @XMM[8], @XMM[9] # byte swap upper part
1211
- pshufb @XMM[8], @XMM[0]
1212
- movdqa @XMM[9], (%rsp) # save adjusted round0 key
1213
- jmp .Lctr_enc_loop
1214
- .align 16
1215
- .Lctr_enc_loop:
1216
- movdqa @XMM[0], 0x20(%rbp) # save counter
1217
- movdqa @XMM[0], @XMM[1] # prepare 8 counter values
1218
- movdqa @XMM[0], @XMM[2]
1219
- paddd 0x00(%r11), @XMM[1] # .LADD1
1220
- movdqa @XMM[0], @XMM[3]
1221
- paddd 0x10(%r11), @XMM[2] # .LADD2
1222
- movdqa @XMM[0], @XMM[4]
1223
- paddd 0x20(%r11), @XMM[3] # .LADD3
1224
- movdqa @XMM[0], @XMM[5]
1225
- paddd 0x30(%r11), @XMM[4] # .LADD4
1226
- movdqa @XMM[0], @XMM[6]
1227
- paddd 0x40(%r11), @XMM[5] # .LADD5
1228
- movdqa @XMM[0], @XMM[7]
1229
- paddd 0x50(%r11), @XMM[6] # .LADD6
1230
- paddd 0x60(%r11), @XMM[7] # .LADD7
1231
-
1232
- # Borrow prologue from _bsaes_encrypt8 to use the opportunity
1233
- # to flip byte order in 32-bit counter
1234
- movdqa (%rsp), @XMM[9] # round 0 key
1235
- lea 0x10(%rsp), %rax # pass key schedule
1236
- movdqa -0x10(%r11), @XMM[8] # .LSWPUPM0SR
1237
- pxor @XMM[9], @XMM[0] # xor with round0 key
1238
- pxor @XMM[9], @XMM[1]
1239
- pxor @XMM[9], @XMM[2]
1240
- pxor @XMM[9], @XMM[3]
1241
- pshufb @XMM[8], @XMM[0]
1242
- pshufb @XMM[8], @XMM[1]
1243
- pxor @XMM[9], @XMM[4]
1244
- pxor @XMM[9], @XMM[5]
1245
- pshufb @XMM[8], @XMM[2]
1246
- pshufb @XMM[8], @XMM[3]
1247
- pxor @XMM[9], @XMM[6]
1248
- pxor @XMM[9], @XMM[7]
1249
- pshufb @XMM[8], @XMM[4]
1250
- pshufb @XMM[8], @XMM[5]
1251
- pshufb @XMM[8], @XMM[6]
1252
- pshufb @XMM[8], @XMM[7]
1253
- lea .LBS0(%rip), %r11 # constants table
1254
- mov %ebx,%r10d # pass rounds
1255
-
1256
- call _bsaes_encrypt8_bitslice
1257
-
1258
- sub \$8,$len
1259
- jc .Lctr_enc_loop_done
1260
-
1261
- movdqu 0x00($inp), @XMM[8] # load input
1262
- movdqu 0x10($inp), @XMM[9]
1263
- movdqu 0x20($inp), @XMM[10]
1264
- movdqu 0x30($inp), @XMM[11]
1265
- movdqu 0x40($inp), @XMM[12]
1266
- movdqu 0x50($inp), @XMM[13]
1267
- movdqu 0x60($inp), @XMM[14]
1268
- movdqu 0x70($inp), @XMM[15]
1269
- lea 0x80($inp),$inp
1270
- pxor @XMM[0], @XMM[8]
1271
- movdqa 0x20(%rbp), @XMM[0] # load counter
1272
- pxor @XMM[9], @XMM[1]
1273
- movdqu @XMM[8], 0x00($out) # write output
1274
- pxor @XMM[10], @XMM[4]
1275
- movdqu @XMM[1], 0x10($out)
1276
- pxor @XMM[11], @XMM[6]
1277
- movdqu @XMM[4], 0x20($out)
1278
- pxor @XMM[12], @XMM[3]
1279
- movdqu @XMM[6], 0x30($out)
1280
- pxor @XMM[13], @XMM[7]
1281
- movdqu @XMM[3], 0x40($out)
1282
- pxor @XMM[14], @XMM[2]
1283
- movdqu @XMM[7], 0x50($out)
1284
- pxor @XMM[15], @XMM[5]
1285
- movdqu @XMM[2], 0x60($out)
1286
- lea .LADD1(%rip), %r11
1287
- movdqu @XMM[5], 0x70($out)
1288
- lea 0x80($out), $out
1289
- paddd 0x70(%r11), @XMM[0] # .LADD8
1290
- jnz .Lctr_enc_loop
1291
-
1292
- jmp .Lctr_enc_done
1293
- .align 16
1294
- .Lctr_enc_loop_done:
1295
- add \$8, $len
1296
- movdqu 0x00($inp), @XMM[8] # load input
1297
- pxor @XMM[8], @XMM[0]
1298
- movdqu @XMM[0], 0x00($out) # write output
1299
- cmp \$2,$len
1300
- jb .Lctr_enc_done
1301
- movdqu 0x10($inp), @XMM[9]
1302
- pxor @XMM[9], @XMM[1]
1303
- movdqu @XMM[1], 0x10($out)
1304
- je .Lctr_enc_done
1305
- movdqu 0x20($inp), @XMM[10]
1306
- pxor @XMM[10], @XMM[4]
1307
- movdqu @XMM[4], 0x20($out)
1308
- cmp \$4,$len
1309
- jb .Lctr_enc_done
1310
- movdqu 0x30($inp), @XMM[11]
1311
- pxor @XMM[11], @XMM[6]
1312
- movdqu @XMM[6], 0x30($out)
1313
- je .Lctr_enc_done
1314
- movdqu 0x40($inp), @XMM[12]
1315
- pxor @XMM[12], @XMM[3]
1316
- movdqu @XMM[3], 0x40($out)
1317
- cmp \$6,$len
1318
- jb .Lctr_enc_done
1319
- movdqu 0x50($inp), @XMM[13]
1320
- pxor @XMM[13], @XMM[7]
1321
- movdqu @XMM[7], 0x50($out)
1322
- je .Lctr_enc_done
1323
- movdqu 0x60($inp), @XMM[14]
1324
- pxor @XMM[14], @XMM[2]
1325
- movdqu @XMM[2], 0x60($out)
1326
- jmp .Lctr_enc_done
1327
-
1328
- .align 16
1329
- .Lctr_enc_short:
1330
- lea 0x20(%rbp), $arg1
1331
- lea 0x30(%rbp), $arg2
1332
- lea ($key), $arg3
1333
- call asm_AES_encrypt
1334
- movdqu ($inp), @XMM[1]
1335
- lea 16($inp), $inp
1336
- mov 0x2c(%rbp), %eax # load 32-bit counter
1337
- bswap %eax
1338
- pxor 0x30(%rbp), @XMM[1]
1339
- inc %eax # increment
1340
- movdqu @XMM[1], ($out)
1341
- bswap %eax
1342
- lea 16($out), $out
1343
- mov %eax, 0x2c(%rsp) # save 32-bit counter
1344
- dec $len
1345
- jnz .Lctr_enc_short
1346
-
1347
- .Lctr_enc_done:
1348
- lea (%rsp), %rax
1349
- pxor %xmm0, %xmm0
1350
- .Lctr_enc_bzero: # wipe key schedule [if any]
1351
- movdqa %xmm0, 0x00(%rax)
1352
- movdqa %xmm0, 0x10(%rax)
1353
- lea 0x20(%rax), %rax
1354
- cmp %rax, %rbp
1355
- ja .Lctr_enc_bzero
1356
-
1357
- lea (%rbp),%rsp # restore %rsp
1358
- ___
1359
- $code.=<<___ if ($win64);
1360
- movaps 0x40(%rbp), %xmm6
1361
- movaps 0x50(%rbp), %xmm7
1362
- movaps 0x60(%rbp), %xmm8
1363
- movaps 0x70(%rbp), %xmm9
1364
- movaps 0x80(%rbp), %xmm10
1365
- movaps 0x90(%rbp), %xmm11
1366
- movaps 0xa0(%rbp), %xmm12
1367
- movaps 0xb0(%rbp), %xmm13
1368
- movaps 0xc0(%rbp), %xmm14
1369
- movaps 0xd0(%rbp), %xmm15
1370
- lea 0xa0(%rbp), %rsp
1371
- ___
1372
- $code.=<<___;
1373
- mov 0x48(%rsp), %r15
1374
- mov 0x50(%rsp), %r14
1375
- mov 0x58(%rsp), %r13
1376
- mov 0x60(%rsp), %r12
1377
- mov 0x68(%rsp), %rbx
1378
- mov 0x70(%rsp), %rax
1379
- lea 0x78(%rsp), %rsp
1380
- mov %rax, %rbp
1381
- .Lctr_enc_epilogue:
1382
- ret
1383
- .size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks
1384
- ___
1385
- }
1386
- $code.=<<___;
1387
- .type _bsaes_const,\@object
1388
- .align 64
1389
- _bsaes_const:
1390
- .LM0ISR: # InvShiftRows constants
1391
- .quad 0x0a0e0206070b0f03, 0x0004080c0d010509
1392
- .LISRM0:
1393
- .quad 0x01040b0e0205080f, 0x0306090c00070a0d
1394
- .LISR:
1395
- .quad 0x0504070602010003, 0x0f0e0d0c080b0a09
1396
- .LBS0: # bit-slice constants
1397
- .quad 0x5555555555555555, 0x5555555555555555
1398
- .LBS1:
1399
- .quad 0x3333333333333333, 0x3333333333333333
1400
- .LBS2:
1401
- .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
1402
- .LSR: # shiftrows constants
1403
- .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
1404
- .LSRM0:
1405
- .quad 0x0304090e00050a0f, 0x01060b0c0207080d
1406
- .LM0SR:
1407
- .quad 0x0a0e02060f03070b, 0x0004080c05090d01
1408
- .LSWPUP: # byte-swap upper dword
1409
- .quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
1410
- .LSWPUPM0SR:
1411
- .quad 0x0a0d02060c03070b, 0x0004080f05090e01
1412
- .LADD1: # counter increment constants
1413
- .quad 0x0000000000000000, 0x0000000100000000
1414
- .LADD2:
1415
- .quad 0x0000000000000000, 0x0000000200000000
1416
- .LADD3:
1417
- .quad 0x0000000000000000, 0x0000000300000000
1418
- .LADD4:
1419
- .quad 0x0000000000000000, 0x0000000400000000
1420
- .LADD5:
1421
- .quad 0x0000000000000000, 0x0000000500000000
1422
- .LADD6:
1423
- .quad 0x0000000000000000, 0x0000000600000000
1424
- .LADD7:
1425
- .quad 0x0000000000000000, 0x0000000700000000
1426
- .LADD8:
1427
- .quad 0x0000000000000000, 0x0000000800000000
1428
- .Lmasks:
1429
- .quad 0x0101010101010101, 0x0101010101010101
1430
- .quad 0x0202020202020202, 0x0202020202020202
1431
- .quad 0x0404040404040404, 0x0404040404040404
1432
- .quad 0x0808080808080808, 0x0808080808080808
1433
- .LM0:
1434
- .quad 0x02060a0e03070b0f, 0x0004080c0105090d
1435
- .L63:
1436
- .quad 0x6363636363636363, 0x6363636363636363
1437
- .asciz "Bit-sliced AES for x86_64/SSSE3, Emilia Käsper, Peter Schwabe, Andy Polyakov"
1438
- .align 64
1439
- .size _bsaes_const,.-_bsaes_const
1440
- ___
1441
-
1442
- # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
1443
- # CONTEXT *context,DISPATCHER_CONTEXT *disp)
1444
- if ($win64) {
1445
- $rec="%rcx";
1446
- $frame="%rdx";
1447
- $context="%r8";
1448
- $disp="%r9";
1449
-
1450
- $code.=<<___;
1451
- .extern __imp_RtlVirtualUnwind
1452
- .type se_handler,\@abi-omnipotent
1453
- .align 16
1454
- se_handler:
1455
- push %rsi
1456
- push %rdi
1457
- push %rbx
1458
- push %rbp
1459
- push %r12
1460
- push %r13
1461
- push %r14
1462
- push %r15
1463
- pushfq
1464
- sub \$64,%rsp
1465
-
1466
- mov 120($context),%rax # pull context->Rax
1467
- mov 248($context),%rbx # pull context->Rip
1468
-
1469
- mov 8($disp),%rsi # disp->ImageBase
1470
- mov 56($disp),%r11 # disp->HandlerData
1471
-
1472
- mov 0(%r11),%r10d # HandlerData[0]
1473
- lea (%rsi,%r10),%r10 # prologue label
1474
- cmp %r10,%rbx # context->Rip<prologue label
1475
- jb .Lin_prologue
1476
-
1477
- mov 152($context),%rax # pull context->Rsp
1478
-
1479
- mov 4(%r11),%r10d # HandlerData[1]
1480
- lea (%rsi,%r10),%r10 # epilogue label
1481
- cmp %r10,%rbx # context->Rip>=epilogue label
1482
- jae .Lin_prologue
1483
-
1484
- mov 160($context),%rax # pull context->Rbp
1485
-
1486
- lea 0x40(%rax),%rsi # %xmm save area
1487
- lea 512($context),%rdi # &context.Xmm6
1488
- mov \$20,%ecx # 10*sizeof(%xmm0)/sizeof(%rax)
1489
- .long 0xa548f3fc # cld; rep movsq
1490
- lea 0xa0(%rax),%rax # adjust stack pointer
1491
-
1492
- mov 0x70(%rax),%rbp
1493
- mov 0x68(%rax),%rbx
1494
- mov 0x60(%rax),%r12
1495
- mov 0x58(%rax),%r13
1496
- mov 0x50(%rax),%r14
1497
- mov 0x48(%rax),%r15
1498
- lea 0x78(%rax),%rax # adjust stack pointer
1499
- mov %rbx,144($context) # restore context->Rbx
1500
- mov %rbp,160($context) # restore context->Rbp
1501
- mov %r12,216($context) # restore context->R12
1502
- mov %r13,224($context) # restore context->R13
1503
- mov %r14,232($context) # restore context->R14
1504
- mov %r15,240($context) # restore context->R15
1505
-
1506
- .Lin_prologue:
1507
- mov %rax,152($context) # restore context->Rsp
1508
-
1509
- mov 40($disp),%rdi # disp->ContextRecord
1510
- mov $context,%rsi # context
1511
- mov \$`1232/8`,%ecx # sizeof(CONTEXT)
1512
- .long 0xa548f3fc # cld; rep movsq
1513
-
1514
- mov $disp,%rsi
1515
- xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
1516
- mov 8(%rsi),%rdx # arg2, disp->ImageBase
1517
- mov 0(%rsi),%r8 # arg3, disp->ControlPc
1518
- mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
1519
- mov 40(%rsi),%r10 # disp->ContextRecord
1520
- lea 56(%rsi),%r11 # &disp->HandlerData
1521
- lea 24(%rsi),%r12 # &disp->EstablisherFrame
1522
- mov %r10,32(%rsp) # arg5
1523
- mov %r11,40(%rsp) # arg6
1524
- mov %r12,48(%rsp) # arg7
1525
- mov %rcx,56(%rsp) # arg8, (NULL)
1526
- call *__imp_RtlVirtualUnwind(%rip)
1527
-
1528
- mov \$1,%eax # ExceptionContinueSearch
1529
- add \$64,%rsp
1530
- popfq
1531
- pop %r15
1532
- pop %r14
1533
- pop %r13
1534
- pop %r12
1535
- pop %rbp
1536
- pop %rbx
1537
- pop %rdi
1538
- pop %rsi
1539
- ret
1540
- .size se_handler,.-se_handler
1541
-
1542
- .section .pdata
1543
- .align 4
1544
- ___
1545
- $code.=<<___;
1546
- .rva .Lctr_enc_prologue
1547
- .rva .Lctr_enc_epilogue
1548
- .rva .Lctr_enc_info
1549
-
1550
- .section .xdata
1551
- .align 8
1552
- ___
1553
- $code.=<<___;
1554
- .Lctr_enc_info:
1555
- .byte 9,0,0,0
1556
- .rva se_handler
1557
- .rva .Lctr_enc_body,.Lctr_enc_epilogue # HandlerData[]
1558
- ___
1559
- }
1560
-
1561
- $code =~ s/\`([^\`]*)\`/eval($1)/gem;
1562
-
1563
- print $code;
1564
-
1565
- close STDOUT;