ring-native 0.0.0 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (267) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/CHANGES.md +7 -0
  4. data/Makefile +5 -0
  5. data/README.md +12 -5
  6. data/Rakefile +4 -0
  7. data/ext/ring/extconf.rb +4 -5
  8. data/lib/ring/native.rb +3 -1
  9. data/lib/ring/native/version.rb +5 -1
  10. data/ring-native.gemspec +6 -6
  11. data/vendor/ring-ffi/Cargo.lock +26 -0
  12. data/vendor/ring-ffi/Cargo.toml +45 -0
  13. data/vendor/ring-ffi/LICENSE +16 -0
  14. data/vendor/ring-ffi/README.md +59 -0
  15. data/vendor/ring-ffi/src/lib.rs +79 -0
  16. metadata +10 -255
  17. data/vendor/ring/BUILDING.md +0 -40
  18. data/vendor/ring/Cargo.toml +0 -43
  19. data/vendor/ring/LICENSE +0 -185
  20. data/vendor/ring/Makefile +0 -35
  21. data/vendor/ring/PORTING.md +0 -163
  22. data/vendor/ring/README.md +0 -113
  23. data/vendor/ring/STYLE.md +0 -197
  24. data/vendor/ring/appveyor.yml +0 -27
  25. data/vendor/ring/build.rs +0 -108
  26. data/vendor/ring/crypto/aes/aes.c +0 -1142
  27. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +0 -25
  28. data/vendor/ring/crypto/aes/aes_test.cc +0 -93
  29. data/vendor/ring/crypto/aes/asm/aes-586.pl +0 -2368
  30. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +0 -1249
  31. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +0 -2246
  32. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +0 -1318
  33. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +0 -2084
  34. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +0 -675
  35. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +0 -1364
  36. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +0 -1565
  37. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +0 -841
  38. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +0 -1116
  39. data/vendor/ring/crypto/aes/internal.h +0 -87
  40. data/vendor/ring/crypto/aes/mode_wrappers.c +0 -61
  41. data/vendor/ring/crypto/bn/add.c +0 -394
  42. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +0 -694
  43. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +0 -1503
  44. data/vendor/ring/crypto/bn/asm/bn-586.pl +0 -774
  45. data/vendor/ring/crypto/bn/asm/co-586.pl +0 -287
  46. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +0 -1882
  47. data/vendor/ring/crypto/bn/asm/x86-mont.pl +0 -592
  48. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +0 -599
  49. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +0 -1393
  50. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +0 -3507
  51. data/vendor/ring/crypto/bn/bn.c +0 -352
  52. data/vendor/ring/crypto/bn/bn_asn1.c +0 -74
  53. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +0 -25
  54. data/vendor/ring/crypto/bn/bn_test.cc +0 -1696
  55. data/vendor/ring/crypto/bn/cmp.c +0 -200
  56. data/vendor/ring/crypto/bn/convert.c +0 -433
  57. data/vendor/ring/crypto/bn/ctx.c +0 -311
  58. data/vendor/ring/crypto/bn/div.c +0 -594
  59. data/vendor/ring/crypto/bn/exponentiation.c +0 -1335
  60. data/vendor/ring/crypto/bn/gcd.c +0 -711
  61. data/vendor/ring/crypto/bn/generic.c +0 -1019
  62. data/vendor/ring/crypto/bn/internal.h +0 -316
  63. data/vendor/ring/crypto/bn/montgomery.c +0 -516
  64. data/vendor/ring/crypto/bn/mul.c +0 -888
  65. data/vendor/ring/crypto/bn/prime.c +0 -829
  66. data/vendor/ring/crypto/bn/random.c +0 -334
  67. data/vendor/ring/crypto/bn/rsaz_exp.c +0 -262
  68. data/vendor/ring/crypto/bn/rsaz_exp.h +0 -53
  69. data/vendor/ring/crypto/bn/shift.c +0 -276
  70. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +0 -25
  71. data/vendor/ring/crypto/bytestring/bytestring_test.cc +0 -421
  72. data/vendor/ring/crypto/bytestring/cbb.c +0 -399
  73. data/vendor/ring/crypto/bytestring/cbs.c +0 -227
  74. data/vendor/ring/crypto/bytestring/internal.h +0 -46
  75. data/vendor/ring/crypto/chacha/chacha_generic.c +0 -140
  76. data/vendor/ring/crypto/chacha/chacha_vec.c +0 -323
  77. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +0 -1447
  78. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +0 -153
  79. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +0 -25
  80. data/vendor/ring/crypto/cipher/e_aes.c +0 -390
  81. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +0 -208
  82. data/vendor/ring/crypto/cipher/internal.h +0 -173
  83. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +0 -543
  84. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +0 -9
  85. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +0 -475
  86. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +0 -23
  87. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +0 -422
  88. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +0 -484
  89. data/vendor/ring/crypto/cipher/test/cipher_test.txt +0 -100
  90. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +0 -25
  91. data/vendor/ring/crypto/constant_time_test.c +0 -304
  92. data/vendor/ring/crypto/cpu-arm-asm.S +0 -32
  93. data/vendor/ring/crypto/cpu-arm.c +0 -199
  94. data/vendor/ring/crypto/cpu-intel.c +0 -261
  95. data/vendor/ring/crypto/crypto.c +0 -151
  96. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +0 -2118
  97. data/vendor/ring/crypto/curve25519/curve25519.c +0 -4888
  98. data/vendor/ring/crypto/curve25519/x25519_test.cc +0 -128
  99. data/vendor/ring/crypto/digest/md32_common.h +0 -181
  100. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +0 -2725
  101. data/vendor/ring/crypto/ec/ec.c +0 -193
  102. data/vendor/ring/crypto/ec/ec_curves.c +0 -61
  103. data/vendor/ring/crypto/ec/ec_key.c +0 -228
  104. data/vendor/ring/crypto/ec/ec_montgomery.c +0 -114
  105. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +0 -25
  106. data/vendor/ring/crypto/ec/internal.h +0 -243
  107. data/vendor/ring/crypto/ec/oct.c +0 -253
  108. data/vendor/ring/crypto/ec/p256-64.c +0 -1794
  109. data/vendor/ring/crypto/ec/p256-x86_64-table.h +0 -9548
  110. data/vendor/ring/crypto/ec/p256-x86_64.c +0 -509
  111. data/vendor/ring/crypto/ec/simple.c +0 -1007
  112. data/vendor/ring/crypto/ec/util-64.c +0 -183
  113. data/vendor/ring/crypto/ec/wnaf.c +0 -508
  114. data/vendor/ring/crypto/ecdh/ecdh.c +0 -155
  115. data/vendor/ring/crypto/ecdsa/ecdsa.c +0 -304
  116. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +0 -193
  117. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +0 -25
  118. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +0 -327
  119. data/vendor/ring/crypto/header_removed.h +0 -17
  120. data/vendor/ring/crypto/internal.h +0 -495
  121. data/vendor/ring/crypto/libring.Windows.vcxproj +0 -101
  122. data/vendor/ring/crypto/mem.c +0 -98
  123. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +0 -1045
  124. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +0 -517
  125. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +0 -1393
  126. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +0 -1741
  127. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +0 -422
  128. data/vendor/ring/crypto/modes/ctr.c +0 -226
  129. data/vendor/ring/crypto/modes/gcm.c +0 -1206
  130. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +0 -25
  131. data/vendor/ring/crypto/modes/gcm_test.c +0 -348
  132. data/vendor/ring/crypto/modes/internal.h +0 -299
  133. data/vendor/ring/crypto/perlasm/arm-xlate.pl +0 -170
  134. data/vendor/ring/crypto/perlasm/readme +0 -100
  135. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +0 -1164
  136. data/vendor/ring/crypto/perlasm/x86asm.pl +0 -292
  137. data/vendor/ring/crypto/perlasm/x86gas.pl +0 -263
  138. data/vendor/ring/crypto/perlasm/x86masm.pl +0 -200
  139. data/vendor/ring/crypto/perlasm/x86nasm.pl +0 -187
  140. data/vendor/ring/crypto/poly1305/poly1305.c +0 -331
  141. data/vendor/ring/crypto/poly1305/poly1305_arm.c +0 -301
  142. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +0 -2015
  143. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +0 -25
  144. data/vendor/ring/crypto/poly1305/poly1305_test.cc +0 -80
  145. data/vendor/ring/crypto/poly1305/poly1305_test.txt +0 -52
  146. data/vendor/ring/crypto/poly1305/poly1305_vec.c +0 -892
  147. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +0 -75
  148. data/vendor/ring/crypto/rand/internal.h +0 -32
  149. data/vendor/ring/crypto/rand/rand.c +0 -189
  150. data/vendor/ring/crypto/rand/urandom.c +0 -219
  151. data/vendor/ring/crypto/rand/windows.c +0 -56
  152. data/vendor/ring/crypto/refcount_c11.c +0 -66
  153. data/vendor/ring/crypto/refcount_lock.c +0 -53
  154. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +0 -25
  155. data/vendor/ring/crypto/refcount_test.c +0 -58
  156. data/vendor/ring/crypto/rsa/blinding.c +0 -462
  157. data/vendor/ring/crypto/rsa/internal.h +0 -108
  158. data/vendor/ring/crypto/rsa/padding.c +0 -300
  159. data/vendor/ring/crypto/rsa/rsa.c +0 -450
  160. data/vendor/ring/crypto/rsa/rsa_asn1.c +0 -261
  161. data/vendor/ring/crypto/rsa/rsa_impl.c +0 -944
  162. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +0 -25
  163. data/vendor/ring/crypto/rsa/rsa_test.cc +0 -437
  164. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +0 -436
  165. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +0 -2390
  166. data/vendor/ring/crypto/sha/asm/sha256-586.pl +0 -1275
  167. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +0 -735
  168. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +0 -14
  169. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +0 -14
  170. data/vendor/ring/crypto/sha/asm/sha512-586.pl +0 -911
  171. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +0 -666
  172. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +0 -14
  173. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +0 -14
  174. data/vendor/ring/crypto/sha/sha1.c +0 -271
  175. data/vendor/ring/crypto/sha/sha256.c +0 -204
  176. data/vendor/ring/crypto/sha/sha512.c +0 -355
  177. data/vendor/ring/crypto/test/file_test.cc +0 -326
  178. data/vendor/ring/crypto/test/file_test.h +0 -181
  179. data/vendor/ring/crypto/test/malloc.cc +0 -150
  180. data/vendor/ring/crypto/test/scoped_types.h +0 -95
  181. data/vendor/ring/crypto/test/test.Windows.vcxproj +0 -35
  182. data/vendor/ring/crypto/test/test_util.cc +0 -46
  183. data/vendor/ring/crypto/test/test_util.h +0 -41
  184. data/vendor/ring/crypto/thread_none.c +0 -55
  185. data/vendor/ring/crypto/thread_pthread.c +0 -165
  186. data/vendor/ring/crypto/thread_test.Windows.vcxproj +0 -25
  187. data/vendor/ring/crypto/thread_test.c +0 -200
  188. data/vendor/ring/crypto/thread_win.c +0 -282
  189. data/vendor/ring/examples/checkdigest.rs +0 -103
  190. data/vendor/ring/include/openssl/aes.h +0 -121
  191. data/vendor/ring/include/openssl/arm_arch.h +0 -129
  192. data/vendor/ring/include/openssl/base.h +0 -156
  193. data/vendor/ring/include/openssl/bn.h +0 -794
  194. data/vendor/ring/include/openssl/buffer.h +0 -18
  195. data/vendor/ring/include/openssl/bytestring.h +0 -235
  196. data/vendor/ring/include/openssl/chacha.h +0 -37
  197. data/vendor/ring/include/openssl/cmac.h +0 -76
  198. data/vendor/ring/include/openssl/cpu.h +0 -184
  199. data/vendor/ring/include/openssl/crypto.h +0 -43
  200. data/vendor/ring/include/openssl/curve25519.h +0 -88
  201. data/vendor/ring/include/openssl/ec.h +0 -225
  202. data/vendor/ring/include/openssl/ec_key.h +0 -129
  203. data/vendor/ring/include/openssl/ecdh.h +0 -110
  204. data/vendor/ring/include/openssl/ecdsa.h +0 -156
  205. data/vendor/ring/include/openssl/err.h +0 -201
  206. data/vendor/ring/include/openssl/mem.h +0 -101
  207. data/vendor/ring/include/openssl/obj_mac.h +0 -71
  208. data/vendor/ring/include/openssl/opensslfeatures.h +0 -68
  209. data/vendor/ring/include/openssl/opensslv.h +0 -18
  210. data/vendor/ring/include/openssl/ossl_typ.h +0 -18
  211. data/vendor/ring/include/openssl/poly1305.h +0 -51
  212. data/vendor/ring/include/openssl/rand.h +0 -70
  213. data/vendor/ring/include/openssl/rsa.h +0 -399
  214. data/vendor/ring/include/openssl/thread.h +0 -133
  215. data/vendor/ring/include/openssl/type_check.h +0 -71
  216. data/vendor/ring/mk/Common.props +0 -63
  217. data/vendor/ring/mk/Windows.props +0 -42
  218. data/vendor/ring/mk/WindowsTest.props +0 -18
  219. data/vendor/ring/mk/appveyor.bat +0 -62
  220. data/vendor/ring/mk/bottom_of_makefile.mk +0 -54
  221. data/vendor/ring/mk/ring.mk +0 -266
  222. data/vendor/ring/mk/top_of_makefile.mk +0 -214
  223. data/vendor/ring/mk/travis.sh +0 -40
  224. data/vendor/ring/mk/update-travis-yml.py +0 -229
  225. data/vendor/ring/ring.sln +0 -153
  226. data/vendor/ring/src/aead.rs +0 -682
  227. data/vendor/ring/src/agreement.rs +0 -248
  228. data/vendor/ring/src/c.rs +0 -129
  229. data/vendor/ring/src/constant_time.rs +0 -37
  230. data/vendor/ring/src/der.rs +0 -96
  231. data/vendor/ring/src/digest.rs +0 -690
  232. data/vendor/ring/src/digest_tests.txt +0 -57
  233. data/vendor/ring/src/ecc.rs +0 -28
  234. data/vendor/ring/src/ecc_build.rs +0 -279
  235. data/vendor/ring/src/ecc_curves.rs +0 -117
  236. data/vendor/ring/src/ed25519_tests.txt +0 -2579
  237. data/vendor/ring/src/exe_tests.rs +0 -46
  238. data/vendor/ring/src/ffi.rs +0 -29
  239. data/vendor/ring/src/file_test.rs +0 -187
  240. data/vendor/ring/src/hkdf.rs +0 -153
  241. data/vendor/ring/src/hkdf_tests.txt +0 -59
  242. data/vendor/ring/src/hmac.rs +0 -414
  243. data/vendor/ring/src/hmac_tests.txt +0 -97
  244. data/vendor/ring/src/input.rs +0 -312
  245. data/vendor/ring/src/lib.rs +0 -41
  246. data/vendor/ring/src/pbkdf2.rs +0 -265
  247. data/vendor/ring/src/pbkdf2_tests.txt +0 -113
  248. data/vendor/ring/src/polyfill.rs +0 -57
  249. data/vendor/ring/src/rand.rs +0 -28
  250. data/vendor/ring/src/signature.rs +0 -314
  251. data/vendor/ring/third-party/NIST/README.md +0 -9
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +0 -263
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +0 -309
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +0 -267
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +0 -263
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +0 -309
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +0 -267
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +0 -263
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +0 -309
  260. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +0 -267
  261. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +0 -519
  262. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +0 -309
  263. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +0 -523
  264. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +0 -519
  265. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +0 -309
  266. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +0 -523
  267. data/vendor/ring/third-party/NIST/sha256sums.txt +0 -1
@@ -1,1318 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- # ====================================================================
4
- # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5
- # project. The module is, however, dual licensed under OpenSSL and
6
- # CRYPTOGAMS licenses depending on where you obtain it. For further
7
- # details see http://www.openssl.org/~appro/cryptogams/.
8
- # ====================================================================
9
- #
10
- # This module implements support for Intel AES-NI extension. In
11
- # OpenSSL context it's used with Intel engine, but can also be used as
12
- # drop-in replacement for crypto/aes/asm/aes-586.pl [see below for
13
- # details].
14
- #
15
- # Performance.
16
- #
17
- # To start with see corresponding paragraph in aesni-x86_64.pl...
18
- # Instead of filling table similar to one found there I've chosen to
19
- # summarize *comparison* results for raw ECB, CTR and CBC benchmarks.
20
- # The simplified table below represents 32-bit performance relative
21
- # to 64-bit one in every given point. Ratios vary for different
22
- # encryption modes, therefore interval values.
23
- #
24
- # 16-byte 64-byte 256-byte 1-KB 8-KB
25
- # 53-67% 67-84% 91-94% 95-98% 97-99.5%
26
- #
27
- # Lower ratios for smaller block sizes are perfectly understandable,
28
- # because function call overhead is higher in 32-bit mode. Largest
29
- # 8-KB block performance is virtually same: 32-bit code is less than
30
- # 1% slower for ECB, CBC and CCM, and ~3% slower otherwise.
31
-
32
- # January 2011
33
- #
34
- # See aesni-x86_64.pl for details. Unlike x86_64 version this module
35
- # interleaves at most 6 aes[enc|dec] instructions, because there are
36
- # not enough registers for 8x interleave [which should be optimal for
37
- # Sandy Bridge]. Actually, performance results for 6x interleave
38
- # factor presented in aesni-x86_64.pl (except for CTR) are for this
39
- # module.
40
-
41
- # April 2011
42
- #
43
- # Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing
44
- # one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09.
45
-
46
- ######################################################################
47
- # Current large-block performance in cycles per byte processed with
48
- # 128-bit key (less is better).
49
- #
50
- # CBC en-/decrypt CTR XTS ECB
51
- # Westmere 3.77/1.37 1.37 1.52 1.27
52
- # * Bridge 5.07/0.98 0.99 1.09 0.91
53
- # Haswell 4.44/0.80 0.97 1.03 0.72
54
- # Silvermont 5.77/3.56 3.67 4.03 3.46
55
- # Bulldozer 5.80/0.98 1.05 1.24 0.93
56
-
57
- $PREFIX="aesni"; # if $PREFIX is set to "AES", the script
58
- # generates drop-in replacement for
59
- # crypto/aes/asm/aes-586.pl:-)
60
- $inline=1; # inline _aesni_[en|de]crypt
61
-
62
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
63
- push(@INC,"${dir}","${dir}../../perlasm");
64
- require "x86asm.pl";
65
-
66
- &asm_init($ARGV[0],$0);
67
-
68
- &external_label("OPENSSL_ia32cap_P");
69
- &static_label("key_const");
70
-
71
- if ($PREFIX eq "aesni") { $movekey=\&movups; }
72
- else { $movekey=\&movups; }
73
-
74
- $len="eax";
75
- $rounds="ecx";
76
- $key="edx";
77
- $inp="esi";
78
- $out="edi";
79
- $rounds_="ebx"; # backup copy for $rounds
80
- $key_="ebp"; # backup copy for $key
81
-
82
- $rndkey0="xmm0";
83
- $rndkey1="xmm1";
84
- $inout0="xmm2";
85
- $inout1="xmm3";
86
- $inout2="xmm4";
87
- $inout3="xmm5"; $in1="xmm5";
88
- $inout4="xmm6"; $in0="xmm6";
89
- $inout5="xmm7"; $ivec="xmm7";
90
-
91
- # AESNI extension
92
- sub aeskeygenassist
93
- { my($dst,$src,$imm)=@_;
94
- if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
95
- { &data_byte(0x66,0x0f,0x3a,0xdf,0xc0|($1<<3)|$2,$imm); }
96
- }
97
- sub aescommon
98
- { my($opcodelet,$dst,$src)=@_;
99
- if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
100
- { &data_byte(0x66,0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2);}
101
- }
102
- sub aesimc { aescommon(0xdb,@_); }
103
- sub aesenc { aescommon(0xdc,@_); }
104
- sub aesenclast { aescommon(0xdd,@_); }
105
- sub aesdec { aescommon(0xde,@_); }
106
- sub aesdeclast { aescommon(0xdf,@_); }
107
-
108
- # Inline version of internal aesni_[en|de]crypt1
109
- { my $sn;
110
- sub aesni_inline_generate1
111
- { my ($p,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout));
112
- $sn++;
113
-
114
- &$movekey ($rndkey0,&QWP(0,$key));
115
- &$movekey ($rndkey1,&QWP(16,$key));
116
- &xorps ($ivec,$rndkey0) if (defined($ivec));
117
- &lea ($key,&DWP(32,$key));
118
- &xorps ($inout,$ivec) if (defined($ivec));
119
- &xorps ($inout,$rndkey0) if (!defined($ivec));
120
- &set_label("${p}1_loop_$sn");
121
- eval"&aes${p} ($inout,$rndkey1)";
122
- &dec ($rounds);
123
- &$movekey ($rndkey1,&QWP(0,$key));
124
- &lea ($key,&DWP(16,$key));
125
- &jnz (&label("${p}1_loop_$sn"));
126
- eval"&aes${p}last ($inout,$rndkey1)";
127
- }}
128
-
129
- sub aesni_generate1 # fully unrolled loop
130
- { my ($p,$inout)=@_; $inout=$inout0 if (!defined($inout));
131
-
132
- &function_begin_B("_aesni_${p}rypt1");
133
- &movups ($rndkey0,&QWP(0,$key));
134
- &$movekey ($rndkey1,&QWP(0x10,$key));
135
- &xorps ($inout,$rndkey0);
136
- &$movekey ($rndkey0,&QWP(0x20,$key));
137
- &lea ($key,&DWP(0x30,$key));
138
- &cmp ($rounds,11);
139
- &jb (&label("${p}128"));
140
- &lea ($key,&DWP(0x20,$key));
141
- &je (&label("${p}192"));
142
- &lea ($key,&DWP(0x20,$key));
143
- eval"&aes${p} ($inout,$rndkey1)";
144
- &$movekey ($rndkey1,&QWP(-0x40,$key));
145
- eval"&aes${p} ($inout,$rndkey0)";
146
- &$movekey ($rndkey0,&QWP(-0x30,$key));
147
- &set_label("${p}192");
148
- eval"&aes${p} ($inout,$rndkey1)";
149
- &$movekey ($rndkey1,&QWP(-0x20,$key));
150
- eval"&aes${p} ($inout,$rndkey0)";
151
- &$movekey ($rndkey0,&QWP(-0x10,$key));
152
- &set_label("${p}128");
153
- eval"&aes${p} ($inout,$rndkey1)";
154
- &$movekey ($rndkey1,&QWP(0,$key));
155
- eval"&aes${p} ($inout,$rndkey0)";
156
- &$movekey ($rndkey0,&QWP(0x10,$key));
157
- eval"&aes${p} ($inout,$rndkey1)";
158
- &$movekey ($rndkey1,&QWP(0x20,$key));
159
- eval"&aes${p} ($inout,$rndkey0)";
160
- &$movekey ($rndkey0,&QWP(0x30,$key));
161
- eval"&aes${p} ($inout,$rndkey1)";
162
- &$movekey ($rndkey1,&QWP(0x40,$key));
163
- eval"&aes${p} ($inout,$rndkey0)";
164
- &$movekey ($rndkey0,&QWP(0x50,$key));
165
- eval"&aes${p} ($inout,$rndkey1)";
166
- &$movekey ($rndkey1,&QWP(0x60,$key));
167
- eval"&aes${p} ($inout,$rndkey0)";
168
- &$movekey ($rndkey0,&QWP(0x70,$key));
169
- eval"&aes${p} ($inout,$rndkey1)";
170
- eval"&aes${p}last ($inout,$rndkey0)";
171
- &ret();
172
- &function_end_B("_aesni_${p}rypt1");
173
- }
174
-
175
- # void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
176
- &aesni_generate1("enc") if (!$inline);
177
- &function_begin_B("${PREFIX}_encrypt");
178
- &mov ("eax",&wparam(0));
179
- &mov ($key,&wparam(2));
180
- &movups ($inout0,&QWP(0,"eax"));
181
- &mov ($rounds,&DWP(240,$key));
182
- &mov ("eax",&wparam(1));
183
- if ($inline)
184
- { &aesni_inline_generate1("enc"); }
185
- else
186
- { &call ("_aesni_encrypt1"); }
187
- &pxor ($rndkey0,$rndkey0); # clear register bank
188
- &pxor ($rndkey1,$rndkey1);
189
- &movups (&QWP(0,"eax"),$inout0);
190
- &pxor ($inout0,$inout0);
191
- &ret ();
192
- &function_end_B("${PREFIX}_encrypt");
193
-
194
- # void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key);
195
- &aesni_generate1("dec") if(!$inline);
196
- &function_begin_B("${PREFIX}_decrypt");
197
- &mov ("eax",&wparam(0));
198
- &mov ($key,&wparam(2));
199
- &movups ($inout0,&QWP(0,"eax"));
200
- &mov ($rounds,&DWP(240,$key));
201
- &mov ("eax",&wparam(1));
202
- if ($inline)
203
- { &aesni_inline_generate1("dec"); }
204
- else
205
- { &call ("_aesni_decrypt1"); }
206
- &pxor ($rndkey0,$rndkey0); # clear register bank
207
- &pxor ($rndkey1,$rndkey1);
208
- &movups (&QWP(0,"eax"),$inout0);
209
- &pxor ($inout0,$inout0);
210
- &ret ();
211
- &function_end_B("${PREFIX}_decrypt");
212
-
213
- # _aesni_[en|de]cryptN are private interfaces, N denotes interleave
214
- # factor. Why 3x subroutine were originally used in loops? Even though
215
- # aes[enc|dec] latency was originally 6, it could be scheduled only
216
- # every *2nd* cycle. Thus 3x interleave was the one providing optimal
217
- # utilization, i.e. when subroutine's throughput is virtually same as
218
- # of non-interleaved subroutine [for number of input blocks up to 3].
219
- # This is why it originally made no sense to implement 2x subroutine.
220
- # But times change and it became appropriate to spend extra 192 bytes
221
- # on 2x subroutine on Atom Silvermont account. For processors that
222
- # can schedule aes[enc|dec] every cycle optimal interleave factor
223
- # equals to corresponding instructions latency. 8x is optimal for
224
- # * Bridge, but it's unfeasible to accommodate such implementation
225
- # in XMM registers addreassable in 32-bit mode and therefore maximum
226
- # of 6x is used instead...
227
-
228
- sub aesni_generate2
229
- { my $p=shift;
230
-
231
- &function_begin_B("_aesni_${p}rypt2");
232
- &$movekey ($rndkey0,&QWP(0,$key));
233
- &shl ($rounds,4);
234
- &$movekey ($rndkey1,&QWP(16,$key));
235
- &xorps ($inout0,$rndkey0);
236
- &pxor ($inout1,$rndkey0);
237
- &$movekey ($rndkey0,&QWP(32,$key));
238
- &lea ($key,&DWP(32,$key,$rounds));
239
- &neg ($rounds);
240
- &add ($rounds,16);
241
-
242
- &set_label("${p}2_loop");
243
- eval"&aes${p} ($inout0,$rndkey1)";
244
- eval"&aes${p} ($inout1,$rndkey1)";
245
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
246
- &add ($rounds,32);
247
- eval"&aes${p} ($inout0,$rndkey0)";
248
- eval"&aes${p} ($inout1,$rndkey0)";
249
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
250
- &jnz (&label("${p}2_loop"));
251
- eval"&aes${p} ($inout0,$rndkey1)";
252
- eval"&aes${p} ($inout1,$rndkey1)";
253
- eval"&aes${p}last ($inout0,$rndkey0)";
254
- eval"&aes${p}last ($inout1,$rndkey0)";
255
- &ret();
256
- &function_end_B("_aesni_${p}rypt2");
257
- }
258
-
259
- sub aesni_generate3
260
- { my $p=shift;
261
-
262
- &function_begin_B("_aesni_${p}rypt3");
263
- &$movekey ($rndkey0,&QWP(0,$key));
264
- &shl ($rounds,4);
265
- &$movekey ($rndkey1,&QWP(16,$key));
266
- &xorps ($inout0,$rndkey0);
267
- &pxor ($inout1,$rndkey0);
268
- &pxor ($inout2,$rndkey0);
269
- &$movekey ($rndkey0,&QWP(32,$key));
270
- &lea ($key,&DWP(32,$key,$rounds));
271
- &neg ($rounds);
272
- &add ($rounds,16);
273
-
274
- &set_label("${p}3_loop");
275
- eval"&aes${p} ($inout0,$rndkey1)";
276
- eval"&aes${p} ($inout1,$rndkey1)";
277
- eval"&aes${p} ($inout2,$rndkey1)";
278
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
279
- &add ($rounds,32);
280
- eval"&aes${p} ($inout0,$rndkey0)";
281
- eval"&aes${p} ($inout1,$rndkey0)";
282
- eval"&aes${p} ($inout2,$rndkey0)";
283
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
284
- &jnz (&label("${p}3_loop"));
285
- eval"&aes${p} ($inout0,$rndkey1)";
286
- eval"&aes${p} ($inout1,$rndkey1)";
287
- eval"&aes${p} ($inout2,$rndkey1)";
288
- eval"&aes${p}last ($inout0,$rndkey0)";
289
- eval"&aes${p}last ($inout1,$rndkey0)";
290
- eval"&aes${p}last ($inout2,$rndkey0)";
291
- &ret();
292
- &function_end_B("_aesni_${p}rypt3");
293
- }
294
-
295
- # 4x interleave is implemented to improve small block performance,
296
- # most notably [and naturally] 4 block by ~30%. One can argue that one
297
- # should have implemented 5x as well, but improvement would be <20%,
298
- # so it's not worth it...
299
- sub aesni_generate4
300
- { my $p=shift;
301
-
302
- &function_begin_B("_aesni_${p}rypt4");
303
- &$movekey ($rndkey0,&QWP(0,$key));
304
- &$movekey ($rndkey1,&QWP(16,$key));
305
- &shl ($rounds,4);
306
- &xorps ($inout0,$rndkey0);
307
- &pxor ($inout1,$rndkey0);
308
- &pxor ($inout2,$rndkey0);
309
- &pxor ($inout3,$rndkey0);
310
- &$movekey ($rndkey0,&QWP(32,$key));
311
- &lea ($key,&DWP(32,$key,$rounds));
312
- &neg ($rounds);
313
- &data_byte (0x0f,0x1f,0x40,0x00);
314
- &add ($rounds,16);
315
-
316
- &set_label("${p}4_loop");
317
- eval"&aes${p} ($inout0,$rndkey1)";
318
- eval"&aes${p} ($inout1,$rndkey1)";
319
- eval"&aes${p} ($inout2,$rndkey1)";
320
- eval"&aes${p} ($inout3,$rndkey1)";
321
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
322
- &add ($rounds,32);
323
- eval"&aes${p} ($inout0,$rndkey0)";
324
- eval"&aes${p} ($inout1,$rndkey0)";
325
- eval"&aes${p} ($inout2,$rndkey0)";
326
- eval"&aes${p} ($inout3,$rndkey0)";
327
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
328
- &jnz (&label("${p}4_loop"));
329
-
330
- eval"&aes${p} ($inout0,$rndkey1)";
331
- eval"&aes${p} ($inout1,$rndkey1)";
332
- eval"&aes${p} ($inout2,$rndkey1)";
333
- eval"&aes${p} ($inout3,$rndkey1)";
334
- eval"&aes${p}last ($inout0,$rndkey0)";
335
- eval"&aes${p}last ($inout1,$rndkey0)";
336
- eval"&aes${p}last ($inout2,$rndkey0)";
337
- eval"&aes${p}last ($inout3,$rndkey0)";
338
- &ret();
339
- &function_end_B("_aesni_${p}rypt4");
340
- }
341
-
342
- sub aesni_generate6
343
- { my $p=shift;
344
-
345
- &function_begin_B("_aesni_${p}rypt6");
346
- &static_label("_aesni_${p}rypt6_enter");
347
- &$movekey ($rndkey0,&QWP(0,$key));
348
- &shl ($rounds,4);
349
- &$movekey ($rndkey1,&QWP(16,$key));
350
- &xorps ($inout0,$rndkey0);
351
- &pxor ($inout1,$rndkey0); # pxor does better here
352
- &pxor ($inout2,$rndkey0);
353
- eval"&aes${p} ($inout0,$rndkey1)";
354
- &pxor ($inout3,$rndkey0);
355
- &pxor ($inout4,$rndkey0);
356
- eval"&aes${p} ($inout1,$rndkey1)";
357
- &lea ($key,&DWP(32,$key,$rounds));
358
- &neg ($rounds);
359
- eval"&aes${p} ($inout2,$rndkey1)";
360
- &pxor ($inout5,$rndkey0);
361
- &$movekey ($rndkey0,&QWP(0,$key,$rounds));
362
- &add ($rounds,16);
363
- &jmp (&label("_aesni_${p}rypt6_inner"));
364
-
365
- &set_label("${p}6_loop",16);
366
- eval"&aes${p} ($inout0,$rndkey1)";
367
- eval"&aes${p} ($inout1,$rndkey1)";
368
- eval"&aes${p} ($inout2,$rndkey1)";
369
- &set_label("_aesni_${p}rypt6_inner");
370
- eval"&aes${p} ($inout3,$rndkey1)";
371
- eval"&aes${p} ($inout4,$rndkey1)";
372
- eval"&aes${p} ($inout5,$rndkey1)";
373
- &set_label("_aesni_${p}rypt6_enter");
374
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
375
- &add ($rounds,32);
376
- eval"&aes${p} ($inout0,$rndkey0)";
377
- eval"&aes${p} ($inout1,$rndkey0)";
378
- eval"&aes${p} ($inout2,$rndkey0)";
379
- eval"&aes${p} ($inout3,$rndkey0)";
380
- eval"&aes${p} ($inout4,$rndkey0)";
381
- eval"&aes${p} ($inout5,$rndkey0)";
382
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
383
- &jnz (&label("${p}6_loop"));
384
-
385
- eval"&aes${p} ($inout0,$rndkey1)";
386
- eval"&aes${p} ($inout1,$rndkey1)";
387
- eval"&aes${p} ($inout2,$rndkey1)";
388
- eval"&aes${p} ($inout3,$rndkey1)";
389
- eval"&aes${p} ($inout4,$rndkey1)";
390
- eval"&aes${p} ($inout5,$rndkey1)";
391
- eval"&aes${p}last ($inout0,$rndkey0)";
392
- eval"&aes${p}last ($inout1,$rndkey0)";
393
- eval"&aes${p}last ($inout2,$rndkey0)";
394
- eval"&aes${p}last ($inout3,$rndkey0)";
395
- eval"&aes${p}last ($inout4,$rndkey0)";
396
- eval"&aes${p}last ($inout5,$rndkey0)";
397
- &ret();
398
- &function_end_B("_aesni_${p}rypt6");
399
- }
400
- &aesni_generate2("enc") if ($PREFIX eq "aesni");
401
- &aesni_generate2("dec");
402
- &aesni_generate3("enc") if ($PREFIX eq "aesni");
403
- &aesni_generate3("dec");
404
- &aesni_generate4("enc") if ($PREFIX eq "aesni");
405
- &aesni_generate4("dec");
406
- &aesni_generate6("enc") if ($PREFIX eq "aesni");
407
- &aesni_generate6("dec");
408
-
409
- if ($PREFIX eq "aesni") {
410
- ######################################################################
411
- # void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out,
412
- # size_t blocks, const AES_KEY *key,
413
- # const char *ivec,char *cmac);
414
- #
415
- # Handles only complete blocks, operates on 64-bit counter and
416
- # does not update *ivec! Nor does it finalize CMAC value
417
- # (see engine/eng_aesni.c for details)
418
- #
419
- { my $cmac=$inout1;
420
- &function_begin("aesni_ccm64_encrypt_blocks");
421
- &mov ($inp,&wparam(0));
422
- &mov ($out,&wparam(1));
423
- &mov ($len,&wparam(2));
424
- &mov ($key,&wparam(3));
425
- &mov ($rounds_,&wparam(4));
426
- &mov ($rounds,&wparam(5));
427
- &mov ($key_,"esp");
428
- &sub ("esp",60);
429
- &and ("esp",-16); # align stack
430
- &mov (&DWP(48,"esp"),$key_);
431
-
432
- &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec
433
- &movdqu ($cmac,&QWP(0,$rounds)); # load cmac
434
- &mov ($rounds,&DWP(240,$key));
435
-
436
- # compose byte-swap control mask for pshufb on stack
437
- &mov (&DWP(0,"esp"),0x0c0d0e0f);
438
- &mov (&DWP(4,"esp"),0x08090a0b);
439
- &mov (&DWP(8,"esp"),0x04050607);
440
- &mov (&DWP(12,"esp"),0x00010203);
441
-
442
- # compose counter increment vector on stack
443
- &mov ($rounds_,1);
444
- &xor ($key_,$key_);
445
- &mov (&DWP(16,"esp"),$rounds_);
446
- &mov (&DWP(20,"esp"),$key_);
447
- &mov (&DWP(24,"esp"),$key_);
448
- &mov (&DWP(28,"esp"),$key_);
449
-
450
- &shl ($rounds,4);
451
- &mov ($rounds_,16);
452
- &lea ($key_,&DWP(0,$key));
453
- &movdqa ($inout3,&QWP(0,"esp"));
454
- &movdqa ($inout0,$ivec);
455
- &lea ($key,&DWP(32,$key,$rounds));
456
- &sub ($rounds_,$rounds);
457
- &pshufb ($ivec,$inout3);
458
-
459
- &set_label("ccm64_enc_outer");
460
- &$movekey ($rndkey0,&QWP(0,$key_));
461
- &mov ($rounds,$rounds_);
462
- &movups ($in0,&QWP(0,$inp));
463
-
464
- &xorps ($inout0,$rndkey0);
465
- &$movekey ($rndkey1,&QWP(16,$key_));
466
- &xorps ($rndkey0,$in0);
467
- &xorps ($cmac,$rndkey0); # cmac^=inp
468
- &$movekey ($rndkey0,&QWP(32,$key_));
469
-
470
- &set_label("ccm64_enc2_loop");
471
- &aesenc ($inout0,$rndkey1);
472
- &aesenc ($cmac,$rndkey1);
473
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
474
- &add ($rounds,32);
475
- &aesenc ($inout0,$rndkey0);
476
- &aesenc ($cmac,$rndkey0);
477
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
478
- &jnz (&label("ccm64_enc2_loop"));
479
- &aesenc ($inout0,$rndkey1);
480
- &aesenc ($cmac,$rndkey1);
481
- &paddq ($ivec,&QWP(16,"esp"));
482
- &dec ($len);
483
- &aesenclast ($inout0,$rndkey0);
484
- &aesenclast ($cmac,$rndkey0);
485
-
486
- &lea ($inp,&DWP(16,$inp));
487
- &xorps ($in0,$inout0); # inp^=E(ivec)
488
- &movdqa ($inout0,$ivec);
489
- &movups (&QWP(0,$out),$in0); # save output
490
- &pshufb ($inout0,$inout3);
491
- &lea ($out,&DWP(16,$out));
492
- &jnz (&label("ccm64_enc_outer"));
493
-
494
- &mov ("esp",&DWP(48,"esp"));
495
- &mov ($out,&wparam(5));
496
- &movups (&QWP(0,$out),$cmac);
497
-
498
- &pxor ("xmm0","xmm0"); # clear register bank
499
- &pxor ("xmm1","xmm1");
500
- &pxor ("xmm2","xmm2");
501
- &pxor ("xmm3","xmm3");
502
- &pxor ("xmm4","xmm4");
503
- &pxor ("xmm5","xmm5");
504
- &pxor ("xmm6","xmm6");
505
- &pxor ("xmm7","xmm7");
506
- &function_end("aesni_ccm64_encrypt_blocks");
507
-
508
- &function_begin("aesni_ccm64_decrypt_blocks");
509
- &mov ($inp,&wparam(0));
510
- &mov ($out,&wparam(1));
511
- &mov ($len,&wparam(2));
512
- &mov ($key,&wparam(3));
513
- &mov ($rounds_,&wparam(4));
514
- &mov ($rounds,&wparam(5));
515
- &mov ($key_,"esp");
516
- &sub ("esp",60);
517
- &and ("esp",-16); # align stack
518
- &mov (&DWP(48,"esp"),$key_);
519
-
520
- &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec
521
- &movdqu ($cmac,&QWP(0,$rounds)); # load cmac
522
- &mov ($rounds,&DWP(240,$key));
523
-
524
- # compose byte-swap control mask for pshufb on stack
525
- &mov (&DWP(0,"esp"),0x0c0d0e0f);
526
- &mov (&DWP(4,"esp"),0x08090a0b);
527
- &mov (&DWP(8,"esp"),0x04050607);
528
- &mov (&DWP(12,"esp"),0x00010203);
529
-
530
- # compose counter increment vector on stack
531
- &mov ($rounds_,1);
532
- &xor ($key_,$key_);
533
- &mov (&DWP(16,"esp"),$rounds_);
534
- &mov (&DWP(20,"esp"),$key_);
535
- &mov (&DWP(24,"esp"),$key_);
536
- &mov (&DWP(28,"esp"),$key_);
537
-
538
- &movdqa ($inout3,&QWP(0,"esp")); # bswap mask
539
- &movdqa ($inout0,$ivec);
540
-
541
- &mov ($key_,$key);
542
- &mov ($rounds_,$rounds);
543
-
544
- &pshufb ($ivec,$inout3);
545
- if ($inline)
546
- { &aesni_inline_generate1("enc"); }
547
- else
548
- { &call ("_aesni_encrypt1"); }
549
- &shl ($rounds_,4);
550
- &mov ($rounds,16);
551
- &movups ($in0,&QWP(0,$inp)); # load inp
552
- &paddq ($ivec,&QWP(16,"esp"));
553
- &lea ($inp,&QWP(16,$inp));
554
- &sub ($rounds,$rounds_);
555
- &lea ($key,&DWP(32,$key_,$rounds_));
556
- &mov ($rounds_,$rounds);
557
- &jmp (&label("ccm64_dec_outer"));
558
-
559
- &set_label("ccm64_dec_outer",16);
560
- &xorps ($in0,$inout0); # inp ^= E(ivec)
561
- &movdqa ($inout0,$ivec);
562
- &movups (&QWP(0,$out),$in0); # save output
563
- &lea ($out,&DWP(16,$out));
564
- &pshufb ($inout0,$inout3);
565
-
566
- &sub ($len,1);
567
- &jz (&label("ccm64_dec_break"));
568
-
569
- &$movekey ($rndkey0,&QWP(0,$key_));
570
- &mov ($rounds,$rounds_);
571
- &$movekey ($rndkey1,&QWP(16,$key_));
572
- &xorps ($in0,$rndkey0);
573
- &xorps ($inout0,$rndkey0);
574
- &xorps ($cmac,$in0); # cmac^=out
575
- &$movekey ($rndkey0,&QWP(32,$key_));
576
-
577
- &set_label("ccm64_dec2_loop");
578
- &aesenc ($inout0,$rndkey1);
579
- &aesenc ($cmac,$rndkey1);
580
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
581
- &add ($rounds,32);
582
- &aesenc ($inout0,$rndkey0);
583
- &aesenc ($cmac,$rndkey0);
584
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
585
- &jnz (&label("ccm64_dec2_loop"));
586
- &movups ($in0,&QWP(0,$inp)); # load inp
587
- &paddq ($ivec,&QWP(16,"esp"));
588
- &aesenc ($inout0,$rndkey1);
589
- &aesenc ($cmac,$rndkey1);
590
- &aesenclast ($inout0,$rndkey0);
591
- &aesenclast ($cmac,$rndkey0);
592
- &lea ($inp,&QWP(16,$inp));
593
- &jmp (&label("ccm64_dec_outer"));
594
-
595
- &set_label("ccm64_dec_break",16);
596
- &mov ($rounds,&DWP(240,$key_));
597
- &mov ($key,$key_);
598
- if ($inline)
599
- { &aesni_inline_generate1("enc",$cmac,$in0); }
600
- else
601
- { &call ("_aesni_encrypt1",$cmac); }
602
-
603
- &mov ("esp",&DWP(48,"esp"));
604
- &mov ($out,&wparam(5));
605
- &movups (&QWP(0,$out),$cmac);
606
-
607
- &pxor ("xmm0","xmm0"); # clear register bank
608
- &pxor ("xmm1","xmm1");
609
- &pxor ("xmm2","xmm2");
610
- &pxor ("xmm3","xmm3");
611
- &pxor ("xmm4","xmm4");
612
- &pxor ("xmm5","xmm5");
613
- &pxor ("xmm6","xmm6");
614
- &pxor ("xmm7","xmm7");
615
- &function_end("aesni_ccm64_decrypt_blocks");
616
- }
617
-
618
- ######################################################################
619
- # void aesni_ctr32_encrypt_blocks (const void *in, void *out,
620
- # size_t blocks, const AES_KEY *key,
621
- # const char *ivec);
622
- #
623
- # Handles only complete blocks, operates on 32-bit counter and
624
- # does not update *ivec! (see crypto/modes/ctr128.c for details)
625
- #
626
- # stack layout:
627
- # 0 pshufb mask
628
- # 16 vector addend: 0,6,6,6
629
- # 32 counter-less ivec
630
- # 48 1st triplet of counter vector
631
- # 64 2nd triplet of counter vector
632
- # 80 saved %esp
633
-
634
- &function_begin("aesni_ctr32_encrypt_blocks");
635
- &mov ($inp,&wparam(0));
636
- &mov ($out,&wparam(1));
637
- &mov ($len,&wparam(2));
638
- &mov ($key,&wparam(3));
639
- &mov ($rounds_,&wparam(4));
640
- &mov ($key_,"esp");
641
- &sub ("esp",88);
642
- &and ("esp",-16); # align stack
643
- &mov (&DWP(80,"esp"),$key_);
644
-
645
- &cmp ($len,1);
646
- &je (&label("ctr32_one_shortcut"));
647
-
648
- &movdqu ($inout5,&QWP(0,$rounds_)); # load ivec
649
-
650
- # compose byte-swap control mask for pshufb on stack
651
- &mov (&DWP(0,"esp"),0x0c0d0e0f);
652
- &mov (&DWP(4,"esp"),0x08090a0b);
653
- &mov (&DWP(8,"esp"),0x04050607);
654
- &mov (&DWP(12,"esp"),0x00010203);
655
-
656
- # compose counter increment vector on stack
657
- &mov ($rounds,6);
658
- &xor ($key_,$key_);
659
- &mov (&DWP(16,"esp"),$rounds);
660
- &mov (&DWP(20,"esp"),$rounds);
661
- &mov (&DWP(24,"esp"),$rounds);
662
- &mov (&DWP(28,"esp"),$key_);
663
-
664
- &pextrd ($rounds_,$inout5,3); # pull 32-bit counter
665
- &pinsrd ($inout5,$key_,3); # wipe 32-bit counter
666
-
667
- &mov ($rounds,&DWP(240,$key)); # key->rounds
668
-
669
- # compose 2 vectors of 3x32-bit counters
670
- &bswap ($rounds_);
671
- &pxor ($rndkey0,$rndkey0);
672
- &pxor ($rndkey1,$rndkey1);
673
- &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask
674
- &pinsrd ($rndkey0,$rounds_,0);
675
- &lea ($key_,&DWP(3,$rounds_));
676
- &pinsrd ($rndkey1,$key_,0);
677
- &inc ($rounds_);
678
- &pinsrd ($rndkey0,$rounds_,1);
679
- &inc ($key_);
680
- &pinsrd ($rndkey1,$key_,1);
681
- &inc ($rounds_);
682
- &pinsrd ($rndkey0,$rounds_,2);
683
- &inc ($key_);
684
- &pinsrd ($rndkey1,$key_,2);
685
- &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet
686
- &pshufb ($rndkey0,$inout0); # byte swap
687
- &movdqu ($inout4,&QWP(0,$key)); # key[0]
688
- &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet
689
- &pshufb ($rndkey1,$inout0); # byte swap
690
-
691
- &pshufd ($inout0,$rndkey0,3<<6); # place counter to upper dword
692
- &pshufd ($inout1,$rndkey0,2<<6);
693
- &cmp ($len,6);
694
- &jb (&label("ctr32_tail"));
695
- &pxor ($inout5,$inout4); # counter-less ivec^key[0]
696
- &shl ($rounds,4);
697
- &mov ($rounds_,16);
698
- &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec^key[0]
699
- &mov ($key_,$key); # backup $key
700
- &sub ($rounds_,$rounds); # backup twisted $rounds
701
- &lea ($key,&DWP(32,$key,$rounds));
702
- &sub ($len,6);
703
- &jmp (&label("ctr32_loop6"));
704
-
705
- &set_label("ctr32_loop6",16);
706
- # inlining _aesni_encrypt6's prologue gives ~6% improvement...
707
- &pshufd ($inout2,$rndkey0,1<<6);
708
- &movdqa ($rndkey0,&QWP(32,"esp")); # pull counter-less ivec
709
- &pshufd ($inout3,$rndkey1,3<<6);
710
- &pxor ($inout0,$rndkey0); # merge counter-less ivec
711
- &pshufd ($inout4,$rndkey1,2<<6);
712
- &pxor ($inout1,$rndkey0);
713
- &pshufd ($inout5,$rndkey1,1<<6);
714
- &$movekey ($rndkey1,&QWP(16,$key_));
715
- &pxor ($inout2,$rndkey0);
716
- &pxor ($inout3,$rndkey0);
717
- &aesenc ($inout0,$rndkey1);
718
- &pxor ($inout4,$rndkey0);
719
- &pxor ($inout5,$rndkey0);
720
- &aesenc ($inout1,$rndkey1);
721
- &$movekey ($rndkey0,&QWP(32,$key_));
722
- &mov ($rounds,$rounds_);
723
- &aesenc ($inout2,$rndkey1);
724
- &aesenc ($inout3,$rndkey1);
725
- &aesenc ($inout4,$rndkey1);
726
- &aesenc ($inout5,$rndkey1);
727
-
728
- &call (&label("_aesni_encrypt6_enter"));
729
-
730
- &movups ($rndkey1,&QWP(0,$inp));
731
- &movups ($rndkey0,&QWP(0x10,$inp));
732
- &xorps ($inout0,$rndkey1);
733
- &movups ($rndkey1,&QWP(0x20,$inp));
734
- &xorps ($inout1,$rndkey0);
735
- &movups (&QWP(0,$out),$inout0);
736
- &movdqa ($rndkey0,&QWP(16,"esp")); # load increment
737
- &xorps ($inout2,$rndkey1);
738
- &movdqa ($rndkey1,&QWP(64,"esp")); # load 2nd triplet
739
- &movups (&QWP(0x10,$out),$inout1);
740
- &movups (&QWP(0x20,$out),$inout2);
741
-
742
- &paddd ($rndkey1,$rndkey0); # 2nd triplet increment
743
- &paddd ($rndkey0,&QWP(48,"esp")); # 1st triplet increment
744
- &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask
745
-
746
- &movups ($inout1,&QWP(0x30,$inp));
747
- &movups ($inout2,&QWP(0x40,$inp));
748
- &xorps ($inout3,$inout1);
749
- &movups ($inout1,&QWP(0x50,$inp));
750
- &lea ($inp,&DWP(0x60,$inp));
751
- &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet
752
- &pshufb ($rndkey0,$inout0); # byte swap
753
- &xorps ($inout4,$inout2);
754
- &movups (&QWP(0x30,$out),$inout3);
755
- &xorps ($inout5,$inout1);
756
- &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet
757
- &pshufb ($rndkey1,$inout0); # byte swap
758
- &movups (&QWP(0x40,$out),$inout4);
759
- &pshufd ($inout0,$rndkey0,3<<6);
760
- &movups (&QWP(0x50,$out),$inout5);
761
- &lea ($out,&DWP(0x60,$out));
762
-
763
- &pshufd ($inout1,$rndkey0,2<<6);
764
- &sub ($len,6);
765
- &jnc (&label("ctr32_loop6"));
766
-
767
- &add ($len,6);
768
- &jz (&label("ctr32_ret"));
769
- &movdqu ($inout5,&QWP(0,$key_));
770
- &mov ($key,$key_);
771
- &pxor ($inout5,&QWP(32,"esp")); # restore count-less ivec
772
- &mov ($rounds,&DWP(240,$key_)); # restore $rounds
773
-
774
- &set_label("ctr32_tail");
775
- &por ($inout0,$inout5);
776
- &cmp ($len,2);
777
- &jb (&label("ctr32_one"));
778
-
779
- &pshufd ($inout2,$rndkey0,1<<6);
780
- &por ($inout1,$inout5);
781
- &je (&label("ctr32_two"));
782
-
783
- &pshufd ($inout3,$rndkey1,3<<6);
784
- &por ($inout2,$inout5);
785
- &cmp ($len,4);
786
- &jb (&label("ctr32_three"));
787
-
788
- &pshufd ($inout4,$rndkey1,2<<6);
789
- &por ($inout3,$inout5);
790
- &je (&label("ctr32_four"));
791
-
792
- &por ($inout4,$inout5);
793
- &call ("_aesni_encrypt6");
794
- &movups ($rndkey1,&QWP(0,$inp));
795
- &movups ($rndkey0,&QWP(0x10,$inp));
796
- &xorps ($inout0,$rndkey1);
797
- &movups ($rndkey1,&QWP(0x20,$inp));
798
- &xorps ($inout1,$rndkey0);
799
- &movups ($rndkey0,&QWP(0x30,$inp));
800
- &xorps ($inout2,$rndkey1);
801
- &movups ($rndkey1,&QWP(0x40,$inp));
802
- &xorps ($inout3,$rndkey0);
803
- &movups (&QWP(0,$out),$inout0);
804
- &xorps ($inout4,$rndkey1);
805
- &movups (&QWP(0x10,$out),$inout1);
806
- &movups (&QWP(0x20,$out),$inout2);
807
- &movups (&QWP(0x30,$out),$inout3);
808
- &movups (&QWP(0x40,$out),$inout4);
809
- &jmp (&label("ctr32_ret"));
810
-
811
- &set_label("ctr32_one_shortcut",16);
812
- &movups ($inout0,&QWP(0,$rounds_)); # load ivec
813
- &mov ($rounds,&DWP(240,$key));
814
-
815
- &set_label("ctr32_one");
816
- if ($inline)
817
- { &aesni_inline_generate1("enc"); }
818
- else
819
- { &call ("_aesni_encrypt1"); }
820
- &movups ($in0,&QWP(0,$inp));
821
- &xorps ($in0,$inout0);
822
- &movups (&QWP(0,$out),$in0);
823
- &jmp (&label("ctr32_ret"));
824
-
825
- &set_label("ctr32_two",16);
826
- &call ("_aesni_encrypt2");
827
- &movups ($inout3,&QWP(0,$inp));
828
- &movups ($inout4,&QWP(0x10,$inp));
829
- &xorps ($inout0,$inout3);
830
- &xorps ($inout1,$inout4);
831
- &movups (&QWP(0,$out),$inout0);
832
- &movups (&QWP(0x10,$out),$inout1);
833
- &jmp (&label("ctr32_ret"));
834
-
835
- &set_label("ctr32_three",16);
836
- &call ("_aesni_encrypt3");
837
- &movups ($inout3,&QWP(0,$inp));
838
- &movups ($inout4,&QWP(0x10,$inp));
839
- &xorps ($inout0,$inout3);
840
- &movups ($inout5,&QWP(0x20,$inp));
841
- &xorps ($inout1,$inout4);
842
- &movups (&QWP(0,$out),$inout0);
843
- &xorps ($inout2,$inout5);
844
- &movups (&QWP(0x10,$out),$inout1);
845
- &movups (&QWP(0x20,$out),$inout2);
846
- &jmp (&label("ctr32_ret"));
847
-
848
- &set_label("ctr32_four",16);
849
- &call ("_aesni_encrypt4");
850
- &movups ($inout4,&QWP(0,$inp));
851
- &movups ($inout5,&QWP(0x10,$inp));
852
- &movups ($rndkey1,&QWP(0x20,$inp));
853
- &xorps ($inout0,$inout4);
854
- &movups ($rndkey0,&QWP(0x30,$inp));
855
- &xorps ($inout1,$inout5);
856
- &movups (&QWP(0,$out),$inout0);
857
- &xorps ($inout2,$rndkey1);
858
- &movups (&QWP(0x10,$out),$inout1);
859
- &xorps ($inout3,$rndkey0);
860
- &movups (&QWP(0x20,$out),$inout2);
861
- &movups (&QWP(0x30,$out),$inout3);
862
-
863
- &set_label("ctr32_ret");
864
- &pxor ("xmm0","xmm0"); # clear register bank
865
- &pxor ("xmm1","xmm1");
866
- &pxor ("xmm2","xmm2");
867
- &pxor ("xmm3","xmm3");
868
- &pxor ("xmm4","xmm4");
869
- &movdqa (&QWP(32,"esp"),"xmm0"); # clear stack
870
- &pxor ("xmm5","xmm5");
871
- &movdqa (&QWP(48,"esp"),"xmm0");
872
- &pxor ("xmm6","xmm6");
873
- &movdqa (&QWP(64,"esp"),"xmm0");
874
- &pxor ("xmm7","xmm7");
875
- &mov ("esp",&DWP(80,"esp"));
876
- &function_end("aesni_ctr32_encrypt_blocks");
877
-
878
- ######################################################################
879
- # Mechanical port from aesni-x86_64.pl.
880
- #
881
- # _aesni_set_encrypt_key is private interface,
882
- # input:
883
- # "eax" const unsigned char *userKey
884
- # $rounds int bits
885
- # $key AES_KEY *key
886
- # output:
887
- # "eax" return code
888
- # $round rounds
889
-
890
- &function_begin_B("_aesni_set_encrypt_key");
891
- &push ("ebp");
892
- &push ("ebx");
893
- &test ("eax","eax");
894
- &jz (&label("bad_pointer"));
895
- &test ($key,$key);
896
- &jz (&label("bad_pointer"));
897
-
898
- &call (&label("pic"));
899
- &set_label("pic");
900
- &blindpop("ebx");
901
- &lea ("ebx",&DWP(&label("key_const")."-".&label("pic"),"ebx"));
902
-
903
- &picmeup("ebp","OPENSSL_ia32cap_P","ebx",&label("key_const"));
904
- &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey
905
- &xorps ("xmm4","xmm4"); # low dword of xmm4 is assumed 0
906
- &mov ("ebp",&DWP(4,"ebp"));
907
- &lea ($key,&DWP(16,$key));
908
- &and ("ebp",1<<28|1<<11); # AVX and XOP bits
909
- &cmp ($rounds,256);
910
- &je (&label("14rounds"));
911
- &cmp ($rounds,192);
912
- &je (&label("12rounds"));
913
- &cmp ($rounds,128);
914
- &jne (&label("bad_keybits"));
915
-
916
- &set_label("10rounds",16);
917
- &cmp ("ebp",1<<28);
918
- &je (&label("10rounds_alt"));
919
-
920
- &mov ($rounds,9);
921
- &$movekey (&QWP(-16,$key),"xmm0"); # round 0
922
- &aeskeygenassist("xmm1","xmm0",0x01); # round 1
923
- &call (&label("key_128_cold"));
924
- &aeskeygenassist("xmm1","xmm0",0x2); # round 2
925
- &call (&label("key_128"));
926
- &aeskeygenassist("xmm1","xmm0",0x04); # round 3
927
- &call (&label("key_128"));
928
- &aeskeygenassist("xmm1","xmm0",0x08); # round 4
929
- &call (&label("key_128"));
930
- &aeskeygenassist("xmm1","xmm0",0x10); # round 5
931
- &call (&label("key_128"));
932
- &aeskeygenassist("xmm1","xmm0",0x20); # round 6
933
- &call (&label("key_128"));
934
- &aeskeygenassist("xmm1","xmm0",0x40); # round 7
935
- &call (&label("key_128"));
936
- &aeskeygenassist("xmm1","xmm0",0x80); # round 8
937
- &call (&label("key_128"));
938
- &aeskeygenassist("xmm1","xmm0",0x1b); # round 9
939
- &call (&label("key_128"));
940
- &aeskeygenassist("xmm1","xmm0",0x36); # round 10
941
- &call (&label("key_128"));
942
- &$movekey (&QWP(0,$key),"xmm0");
943
- &mov (&DWP(80,$key),$rounds);
944
-
945
- &jmp (&label("good_key"));
946
-
947
- &set_label("key_128",16);
948
- &$movekey (&QWP(0,$key),"xmm0");
949
- &lea ($key,&DWP(16,$key));
950
- &set_label("key_128_cold");
951
- &shufps ("xmm4","xmm0",0b00010000);
952
- &xorps ("xmm0","xmm4");
953
- &shufps ("xmm4","xmm0",0b10001100);
954
- &xorps ("xmm0","xmm4");
955
- &shufps ("xmm1","xmm1",0b11111111); # critical path
956
- &xorps ("xmm0","xmm1");
957
- &ret();
958
-
959
- &set_label("10rounds_alt",16);
960
- &movdqa ("xmm5",&QWP(0x00,"ebx"));
961
- &mov ($rounds,8);
962
- &movdqa ("xmm4",&QWP(0x20,"ebx"));
963
- &movdqa ("xmm2","xmm0");
964
- &movdqu (&QWP(-16,$key),"xmm0");
965
-
966
- &set_label("loop_key128");
967
- &pshufb ("xmm0","xmm5");
968
- &aesenclast ("xmm0","xmm4");
969
- &pslld ("xmm4",1);
970
- &lea ($key,&DWP(16,$key));
971
-
972
- &movdqa ("xmm3","xmm2");
973
- &pslldq ("xmm2",4);
974
- &pxor ("xmm3","xmm2");
975
- &pslldq ("xmm2",4);
976
- &pxor ("xmm3","xmm2");
977
- &pslldq ("xmm2",4);
978
- &pxor ("xmm2","xmm3");
979
-
980
- &pxor ("xmm0","xmm2");
981
- &movdqu (&QWP(-16,$key),"xmm0");
982
- &movdqa ("xmm2","xmm0");
983
-
984
- &dec ($rounds);
985
- &jnz (&label("loop_key128"));
986
-
987
- &movdqa ("xmm4",&QWP(0x30,"ebx"));
988
-
989
- &pshufb ("xmm0","xmm5");
990
- &aesenclast ("xmm0","xmm4");
991
- &pslld ("xmm4",1);
992
-
993
- &movdqa ("xmm3","xmm2");
994
- &pslldq ("xmm2",4);
995
- &pxor ("xmm3","xmm2");
996
- &pslldq ("xmm2",4);
997
- &pxor ("xmm3","xmm2");
998
- &pslldq ("xmm2",4);
999
- &pxor ("xmm2","xmm3");
1000
-
1001
- &pxor ("xmm0","xmm2");
1002
- &movdqu (&QWP(0,$key),"xmm0");
1003
-
1004
- &movdqa ("xmm2","xmm0");
1005
- &pshufb ("xmm0","xmm5");
1006
- &aesenclast ("xmm0","xmm4");
1007
-
1008
- &movdqa ("xmm3","xmm2");
1009
- &pslldq ("xmm2",4);
1010
- &pxor ("xmm3","xmm2");
1011
- &pslldq ("xmm2",4);
1012
- &pxor ("xmm3","xmm2");
1013
- &pslldq ("xmm2",4);
1014
- &pxor ("xmm2","xmm3");
1015
-
1016
- &pxor ("xmm0","xmm2");
1017
- &movdqu (&QWP(16,$key),"xmm0");
1018
-
1019
- &mov ($rounds,9);
1020
- &mov (&DWP(96,$key),$rounds);
1021
-
1022
- &jmp (&label("good_key"));
1023
-
1024
- &set_label("12rounds",16);
1025
- &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey
1026
- &cmp ("ebp",1<<28);
1027
- &je (&label("12rounds_alt"));
1028
-
1029
- &mov ($rounds,11);
1030
- &$movekey (&QWP(-16,$key),"xmm0"); # round 0
1031
- &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2
1032
- &call (&label("key_192a_cold"));
1033
- &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3
1034
- &call (&label("key_192b"));
1035
- &aeskeygenassist("xmm1","xmm2",0x04); # round 4,5
1036
- &call (&label("key_192a"));
1037
- &aeskeygenassist("xmm1","xmm2",0x08); # round 5,6
1038
- &call (&label("key_192b"));
1039
- &aeskeygenassist("xmm1","xmm2",0x10); # round 7,8
1040
- &call (&label("key_192a"));
1041
- &aeskeygenassist("xmm1","xmm2",0x20); # round 8,9
1042
- &call (&label("key_192b"));
1043
- &aeskeygenassist("xmm1","xmm2",0x40); # round 10,11
1044
- &call (&label("key_192a"));
1045
- &aeskeygenassist("xmm1","xmm2",0x80); # round 11,12
1046
- &call (&label("key_192b"));
1047
- &$movekey (&QWP(0,$key),"xmm0");
1048
- &mov (&DWP(48,$key),$rounds);
1049
-
1050
- &jmp (&label("good_key"));
1051
-
1052
- &set_label("key_192a",16);
1053
- &$movekey (&QWP(0,$key),"xmm0");
1054
- &lea ($key,&DWP(16,$key));
1055
- &set_label("key_192a_cold",16);
1056
- &movaps ("xmm5","xmm2");
1057
- &set_label("key_192b_warm");
1058
- &shufps ("xmm4","xmm0",0b00010000);
1059
- &movdqa ("xmm3","xmm2");
1060
- &xorps ("xmm0","xmm4");
1061
- &shufps ("xmm4","xmm0",0b10001100);
1062
- &pslldq ("xmm3",4);
1063
- &xorps ("xmm0","xmm4");
1064
- &pshufd ("xmm1","xmm1",0b01010101); # critical path
1065
- &pxor ("xmm2","xmm3");
1066
- &pxor ("xmm0","xmm1");
1067
- &pshufd ("xmm3","xmm0",0b11111111);
1068
- &pxor ("xmm2","xmm3");
1069
- &ret();
1070
-
1071
- &set_label("key_192b",16);
1072
- &movaps ("xmm3","xmm0");
1073
- &shufps ("xmm5","xmm0",0b01000100);
1074
- &$movekey (&QWP(0,$key),"xmm5");
1075
- &shufps ("xmm3","xmm2",0b01001110);
1076
- &$movekey (&QWP(16,$key),"xmm3");
1077
- &lea ($key,&DWP(32,$key));
1078
- &jmp (&label("key_192b_warm"));
1079
-
1080
- &set_label("12rounds_alt",16);
1081
- &movdqa ("xmm5",&QWP(0x10,"ebx"));
1082
- &movdqa ("xmm4",&QWP(0x20,"ebx"));
1083
- &mov ($rounds,8);
1084
- &movdqu (&QWP(-16,$key),"xmm0");
1085
-
1086
- &set_label("loop_key192");
1087
- &movq (&QWP(0,$key),"xmm2");
1088
- &movdqa ("xmm1","xmm2");
1089
- &pshufb ("xmm2","xmm5");
1090
- &aesenclast ("xmm2","xmm4");
1091
- &pslld ("xmm4",1);
1092
- &lea ($key,&DWP(24,$key));
1093
-
1094
- &movdqa ("xmm3","xmm0");
1095
- &pslldq ("xmm0",4);
1096
- &pxor ("xmm3","xmm0");
1097
- &pslldq ("xmm0",4);
1098
- &pxor ("xmm3","xmm0");
1099
- &pslldq ("xmm0",4);
1100
- &pxor ("xmm0","xmm3");
1101
-
1102
- &pshufd ("xmm3","xmm0",0xff);
1103
- &pxor ("xmm3","xmm1");
1104
- &pslldq ("xmm1",4);
1105
- &pxor ("xmm3","xmm1");
1106
-
1107
- &pxor ("xmm0","xmm2");
1108
- &pxor ("xmm2","xmm3");
1109
- &movdqu (&QWP(-16,$key),"xmm0");
1110
-
1111
- &dec ($rounds);
1112
- &jnz (&label("loop_key192"));
1113
-
1114
- &mov ($rounds,11);
1115
- &mov (&DWP(32,$key),$rounds);
1116
-
1117
- &jmp (&label("good_key"));
1118
-
1119
- &set_label("14rounds",16);
1120
- &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey
1121
- &lea ($key,&DWP(16,$key));
1122
- &cmp ("ebp",1<<28);
1123
- &je (&label("14rounds_alt"));
1124
-
1125
- &mov ($rounds,13);
1126
- &$movekey (&QWP(-32,$key),"xmm0"); # round 0
1127
- &$movekey (&QWP(-16,$key),"xmm2"); # round 1
1128
- &aeskeygenassist("xmm1","xmm2",0x01); # round 2
1129
- &call (&label("key_256a_cold"));
1130
- &aeskeygenassist("xmm1","xmm0",0x01); # round 3
1131
- &call (&label("key_256b"));
1132
- &aeskeygenassist("xmm1","xmm2",0x02); # round 4
1133
- &call (&label("key_256a"));
1134
- &aeskeygenassist("xmm1","xmm0",0x02); # round 5
1135
- &call (&label("key_256b"));
1136
- &aeskeygenassist("xmm1","xmm2",0x04); # round 6
1137
- &call (&label("key_256a"));
1138
- &aeskeygenassist("xmm1","xmm0",0x04); # round 7
1139
- &call (&label("key_256b"));
1140
- &aeskeygenassist("xmm1","xmm2",0x08); # round 8
1141
- &call (&label("key_256a"));
1142
- &aeskeygenassist("xmm1","xmm0",0x08); # round 9
1143
- &call (&label("key_256b"));
1144
- &aeskeygenassist("xmm1","xmm2",0x10); # round 10
1145
- &call (&label("key_256a"));
1146
- &aeskeygenassist("xmm1","xmm0",0x10); # round 11
1147
- &call (&label("key_256b"));
1148
- &aeskeygenassist("xmm1","xmm2",0x20); # round 12
1149
- &call (&label("key_256a"));
1150
- &aeskeygenassist("xmm1","xmm0",0x20); # round 13
1151
- &call (&label("key_256b"));
1152
- &aeskeygenassist("xmm1","xmm2",0x40); # round 14
1153
- &call (&label("key_256a"));
1154
- &$movekey (&QWP(0,$key),"xmm0");
1155
- &mov (&DWP(16,$key),$rounds);
1156
- &xor ("eax","eax");
1157
-
1158
- &jmp (&label("good_key"));
1159
-
1160
- &set_label("key_256a",16);
1161
- &$movekey (&QWP(0,$key),"xmm2");
1162
- &lea ($key,&DWP(16,$key));
1163
- &set_label("key_256a_cold");
1164
- &shufps ("xmm4","xmm0",0b00010000);
1165
- &xorps ("xmm0","xmm4");
1166
- &shufps ("xmm4","xmm0",0b10001100);
1167
- &xorps ("xmm0","xmm4");
1168
- &shufps ("xmm1","xmm1",0b11111111); # critical path
1169
- &xorps ("xmm0","xmm1");
1170
- &ret();
1171
-
1172
- &set_label("key_256b",16);
1173
- &$movekey (&QWP(0,$key),"xmm0");
1174
- &lea ($key,&DWP(16,$key));
1175
-
1176
- &shufps ("xmm4","xmm2",0b00010000);
1177
- &xorps ("xmm2","xmm4");
1178
- &shufps ("xmm4","xmm2",0b10001100);
1179
- &xorps ("xmm2","xmm4");
1180
- &shufps ("xmm1","xmm1",0b10101010); # critical path
1181
- &xorps ("xmm2","xmm1");
1182
- &ret();
1183
-
1184
- &set_label("14rounds_alt",16);
1185
- &movdqa ("xmm5",&QWP(0x00,"ebx"));
1186
- &movdqa ("xmm4",&QWP(0x20,"ebx"));
1187
- &mov ($rounds,7);
1188
- &movdqu (&QWP(-32,$key),"xmm0");
1189
- &movdqa ("xmm1","xmm2");
1190
- &movdqu (&QWP(-16,$key),"xmm2");
1191
-
1192
- &set_label("loop_key256");
1193
- &pshufb ("xmm2","xmm5");
1194
- &aesenclast ("xmm2","xmm4");
1195
-
1196
- &movdqa ("xmm3","xmm0");
1197
- &pslldq ("xmm0",4);
1198
- &pxor ("xmm3","xmm0");
1199
- &pslldq ("xmm0",4);
1200
- &pxor ("xmm3","xmm0");
1201
- &pslldq ("xmm0",4);
1202
- &pxor ("xmm0","xmm3");
1203
- &pslld ("xmm4",1);
1204
-
1205
- &pxor ("xmm0","xmm2");
1206
- &movdqu (&QWP(0,$key),"xmm0");
1207
-
1208
- &dec ($rounds);
1209
- &jz (&label("done_key256"));
1210
-
1211
- &pshufd ("xmm2","xmm0",0xff);
1212
- &pxor ("xmm3","xmm3");
1213
- &aesenclast ("xmm2","xmm3");
1214
-
1215
- &movdqa ("xmm3","xmm1")
1216
- &pslldq ("xmm1",4);
1217
- &pxor ("xmm3","xmm1");
1218
- &pslldq ("xmm1",4);
1219
- &pxor ("xmm3","xmm1");
1220
- &pslldq ("xmm1",4);
1221
- &pxor ("xmm1","xmm3");
1222
-
1223
- &pxor ("xmm2","xmm1");
1224
- &movdqu (&QWP(16,$key),"xmm2");
1225
- &lea ($key,&DWP(32,$key));
1226
- &movdqa ("xmm1","xmm2");
1227
- &jmp (&label("loop_key256"));
1228
-
1229
- &set_label("done_key256");
1230
- &mov ($rounds,13);
1231
- &mov (&DWP(16,$key),$rounds);
1232
-
1233
- &set_label("good_key");
1234
- &pxor ("xmm0","xmm0");
1235
- &pxor ("xmm1","xmm1");
1236
- &pxor ("xmm2","xmm2");
1237
- &pxor ("xmm3","xmm3");
1238
- &pxor ("xmm4","xmm4");
1239
- &pxor ("xmm5","xmm5");
1240
- &xor ("eax","eax");
1241
- &pop ("ebx");
1242
- &pop ("ebp");
1243
- &ret ();
1244
-
1245
- &set_label("bad_pointer",4);
1246
- &mov ("eax",-1);
1247
- &pop ("ebx");
1248
- &pop ("ebp");
1249
- &ret ();
1250
- &set_label("bad_keybits",4);
1251
- &pxor ("xmm0","xmm0");
1252
- &mov ("eax",-2);
1253
- &pop ("ebx");
1254
- &pop ("ebp");
1255
- &ret ();
1256
- &function_end_B("_aesni_set_encrypt_key");
1257
-
1258
- # int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits,
1259
- # AES_KEY *key)
1260
- &function_begin_B("${PREFIX}_set_encrypt_key");
1261
- &mov ("eax",&wparam(0));
1262
- &mov ($rounds,&wparam(1));
1263
- &mov ($key,&wparam(2));
1264
- &call ("_aesni_set_encrypt_key");
1265
- &ret ();
1266
- &function_end_B("${PREFIX}_set_encrypt_key");
1267
-
1268
- # int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits,
1269
- # AES_KEY *key)
1270
- &function_begin_B("${PREFIX}_set_decrypt_key");
1271
- &mov ("eax",&wparam(0));
1272
- &mov ($rounds,&wparam(1));
1273
- &mov ($key,&wparam(2));
1274
- &call ("_aesni_set_encrypt_key");
1275
- &mov ($key,&wparam(2));
1276
- &shl ($rounds,4); # rounds-1 after _aesni_set_encrypt_key
1277
- &test ("eax","eax");
1278
- &jnz (&label("dec_key_ret"));
1279
- &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule
1280
-
1281
- &$movekey ("xmm0",&QWP(0,$key)); # just swap
1282
- &$movekey ("xmm1",&QWP(0,"eax"));
1283
- &$movekey (&QWP(0,"eax"),"xmm0");
1284
- &$movekey (&QWP(0,$key),"xmm1");
1285
- &lea ($key,&DWP(16,$key));
1286
- &lea ("eax",&DWP(-16,"eax"));
1287
-
1288
- &set_label("dec_key_inverse");
1289
- &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse
1290
- &$movekey ("xmm1",&QWP(0,"eax"));
1291
- &aesimc ("xmm0","xmm0");
1292
- &aesimc ("xmm1","xmm1");
1293
- &lea ($key,&DWP(16,$key));
1294
- &lea ("eax",&DWP(-16,"eax"));
1295
- &$movekey (&QWP(16,"eax"),"xmm0");
1296
- &$movekey (&QWP(-16,$key),"xmm1");
1297
- &cmp ("eax",$key);
1298
- &ja (&label("dec_key_inverse"));
1299
-
1300
- &$movekey ("xmm0",&QWP(0,$key)); # inverse middle
1301
- &aesimc ("xmm0","xmm0");
1302
- &$movekey (&QWP(0,$key),"xmm0");
1303
-
1304
- &pxor ("xmm0","xmm0");
1305
- &pxor ("xmm1","xmm1");
1306
- &xor ("eax","eax"); # return success
1307
- &set_label("dec_key_ret");
1308
- &ret ();
1309
- &function_end_B("${PREFIX}_set_decrypt_key");
1310
-
1311
- &set_label("key_const",64);
1312
- &data_word(0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d);
1313
- &data_word(0x04070605,0x04070605,0x04070605,0x04070605);
1314
- &data_word(1,1,1,1);
1315
- &data_word(0x1b,0x1b,0x1b,0x1b);
1316
- &asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
1317
-
1318
- &asm_finish();