ring-native 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/CHANGES.md +7 -0
  4. data/Makefile +5 -0
  5. data/README.md +12 -5
  6. data/Rakefile +4 -0
  7. data/ext/ring/extconf.rb +4 -5
  8. data/lib/ring/native.rb +3 -1
  9. data/lib/ring/native/version.rb +5 -1
  10. data/ring-native.gemspec +6 -6
  11. data/vendor/ring-ffi/Cargo.lock +26 -0
  12. data/vendor/ring-ffi/Cargo.toml +45 -0
  13. data/vendor/ring-ffi/LICENSE +16 -0
  14. data/vendor/ring-ffi/README.md +59 -0
  15. data/vendor/ring-ffi/src/lib.rs +79 -0
  16. metadata +10 -255
  17. data/vendor/ring/BUILDING.md +0 -40
  18. data/vendor/ring/Cargo.toml +0 -43
  19. data/vendor/ring/LICENSE +0 -185
  20. data/vendor/ring/Makefile +0 -35
  21. data/vendor/ring/PORTING.md +0 -163
  22. data/vendor/ring/README.md +0 -113
  23. data/vendor/ring/STYLE.md +0 -197
  24. data/vendor/ring/appveyor.yml +0 -27
  25. data/vendor/ring/build.rs +0 -108
  26. data/vendor/ring/crypto/aes/aes.c +0 -1142
  27. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +0 -25
  28. data/vendor/ring/crypto/aes/aes_test.cc +0 -93
  29. data/vendor/ring/crypto/aes/asm/aes-586.pl +0 -2368
  30. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +0 -1249
  31. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +0 -2246
  32. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +0 -1318
  33. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +0 -2084
  34. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +0 -675
  35. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +0 -1364
  36. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +0 -1565
  37. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +0 -841
  38. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +0 -1116
  39. data/vendor/ring/crypto/aes/internal.h +0 -87
  40. data/vendor/ring/crypto/aes/mode_wrappers.c +0 -61
  41. data/vendor/ring/crypto/bn/add.c +0 -394
  42. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +0 -694
  43. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +0 -1503
  44. data/vendor/ring/crypto/bn/asm/bn-586.pl +0 -774
  45. data/vendor/ring/crypto/bn/asm/co-586.pl +0 -287
  46. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +0 -1882
  47. data/vendor/ring/crypto/bn/asm/x86-mont.pl +0 -592
  48. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +0 -599
  49. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +0 -1393
  50. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +0 -3507
  51. data/vendor/ring/crypto/bn/bn.c +0 -352
  52. data/vendor/ring/crypto/bn/bn_asn1.c +0 -74
  53. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +0 -25
  54. data/vendor/ring/crypto/bn/bn_test.cc +0 -1696
  55. data/vendor/ring/crypto/bn/cmp.c +0 -200
  56. data/vendor/ring/crypto/bn/convert.c +0 -433
  57. data/vendor/ring/crypto/bn/ctx.c +0 -311
  58. data/vendor/ring/crypto/bn/div.c +0 -594
  59. data/vendor/ring/crypto/bn/exponentiation.c +0 -1335
  60. data/vendor/ring/crypto/bn/gcd.c +0 -711
  61. data/vendor/ring/crypto/bn/generic.c +0 -1019
  62. data/vendor/ring/crypto/bn/internal.h +0 -316
  63. data/vendor/ring/crypto/bn/montgomery.c +0 -516
  64. data/vendor/ring/crypto/bn/mul.c +0 -888
  65. data/vendor/ring/crypto/bn/prime.c +0 -829
  66. data/vendor/ring/crypto/bn/random.c +0 -334
  67. data/vendor/ring/crypto/bn/rsaz_exp.c +0 -262
  68. data/vendor/ring/crypto/bn/rsaz_exp.h +0 -53
  69. data/vendor/ring/crypto/bn/shift.c +0 -276
  70. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +0 -25
  71. data/vendor/ring/crypto/bytestring/bytestring_test.cc +0 -421
  72. data/vendor/ring/crypto/bytestring/cbb.c +0 -399
  73. data/vendor/ring/crypto/bytestring/cbs.c +0 -227
  74. data/vendor/ring/crypto/bytestring/internal.h +0 -46
  75. data/vendor/ring/crypto/chacha/chacha_generic.c +0 -140
  76. data/vendor/ring/crypto/chacha/chacha_vec.c +0 -323
  77. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +0 -1447
  78. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +0 -153
  79. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +0 -25
  80. data/vendor/ring/crypto/cipher/e_aes.c +0 -390
  81. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +0 -208
  82. data/vendor/ring/crypto/cipher/internal.h +0 -173
  83. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +0 -543
  84. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +0 -9
  85. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +0 -475
  86. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +0 -23
  87. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +0 -422
  88. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +0 -484
  89. data/vendor/ring/crypto/cipher/test/cipher_test.txt +0 -100
  90. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +0 -25
  91. data/vendor/ring/crypto/constant_time_test.c +0 -304
  92. data/vendor/ring/crypto/cpu-arm-asm.S +0 -32
  93. data/vendor/ring/crypto/cpu-arm.c +0 -199
  94. data/vendor/ring/crypto/cpu-intel.c +0 -261
  95. data/vendor/ring/crypto/crypto.c +0 -151
  96. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +0 -2118
  97. data/vendor/ring/crypto/curve25519/curve25519.c +0 -4888
  98. data/vendor/ring/crypto/curve25519/x25519_test.cc +0 -128
  99. data/vendor/ring/crypto/digest/md32_common.h +0 -181
  100. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +0 -2725
  101. data/vendor/ring/crypto/ec/ec.c +0 -193
  102. data/vendor/ring/crypto/ec/ec_curves.c +0 -61
  103. data/vendor/ring/crypto/ec/ec_key.c +0 -228
  104. data/vendor/ring/crypto/ec/ec_montgomery.c +0 -114
  105. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +0 -25
  106. data/vendor/ring/crypto/ec/internal.h +0 -243
  107. data/vendor/ring/crypto/ec/oct.c +0 -253
  108. data/vendor/ring/crypto/ec/p256-64.c +0 -1794
  109. data/vendor/ring/crypto/ec/p256-x86_64-table.h +0 -9548
  110. data/vendor/ring/crypto/ec/p256-x86_64.c +0 -509
  111. data/vendor/ring/crypto/ec/simple.c +0 -1007
  112. data/vendor/ring/crypto/ec/util-64.c +0 -183
  113. data/vendor/ring/crypto/ec/wnaf.c +0 -508
  114. data/vendor/ring/crypto/ecdh/ecdh.c +0 -155
  115. data/vendor/ring/crypto/ecdsa/ecdsa.c +0 -304
  116. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +0 -193
  117. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +0 -25
  118. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +0 -327
  119. data/vendor/ring/crypto/header_removed.h +0 -17
  120. data/vendor/ring/crypto/internal.h +0 -495
  121. data/vendor/ring/crypto/libring.Windows.vcxproj +0 -101
  122. data/vendor/ring/crypto/mem.c +0 -98
  123. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +0 -1045
  124. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +0 -517
  125. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +0 -1393
  126. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +0 -1741
  127. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +0 -422
  128. data/vendor/ring/crypto/modes/ctr.c +0 -226
  129. data/vendor/ring/crypto/modes/gcm.c +0 -1206
  130. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +0 -25
  131. data/vendor/ring/crypto/modes/gcm_test.c +0 -348
  132. data/vendor/ring/crypto/modes/internal.h +0 -299
  133. data/vendor/ring/crypto/perlasm/arm-xlate.pl +0 -170
  134. data/vendor/ring/crypto/perlasm/readme +0 -100
  135. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +0 -1164
  136. data/vendor/ring/crypto/perlasm/x86asm.pl +0 -292
  137. data/vendor/ring/crypto/perlasm/x86gas.pl +0 -263
  138. data/vendor/ring/crypto/perlasm/x86masm.pl +0 -200
  139. data/vendor/ring/crypto/perlasm/x86nasm.pl +0 -187
  140. data/vendor/ring/crypto/poly1305/poly1305.c +0 -331
  141. data/vendor/ring/crypto/poly1305/poly1305_arm.c +0 -301
  142. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +0 -2015
  143. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +0 -25
  144. data/vendor/ring/crypto/poly1305/poly1305_test.cc +0 -80
  145. data/vendor/ring/crypto/poly1305/poly1305_test.txt +0 -52
  146. data/vendor/ring/crypto/poly1305/poly1305_vec.c +0 -892
  147. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +0 -75
  148. data/vendor/ring/crypto/rand/internal.h +0 -32
  149. data/vendor/ring/crypto/rand/rand.c +0 -189
  150. data/vendor/ring/crypto/rand/urandom.c +0 -219
  151. data/vendor/ring/crypto/rand/windows.c +0 -56
  152. data/vendor/ring/crypto/refcount_c11.c +0 -66
  153. data/vendor/ring/crypto/refcount_lock.c +0 -53
  154. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +0 -25
  155. data/vendor/ring/crypto/refcount_test.c +0 -58
  156. data/vendor/ring/crypto/rsa/blinding.c +0 -462
  157. data/vendor/ring/crypto/rsa/internal.h +0 -108
  158. data/vendor/ring/crypto/rsa/padding.c +0 -300
  159. data/vendor/ring/crypto/rsa/rsa.c +0 -450
  160. data/vendor/ring/crypto/rsa/rsa_asn1.c +0 -261
  161. data/vendor/ring/crypto/rsa/rsa_impl.c +0 -944
  162. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +0 -25
  163. data/vendor/ring/crypto/rsa/rsa_test.cc +0 -437
  164. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +0 -436
  165. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +0 -2390
  166. data/vendor/ring/crypto/sha/asm/sha256-586.pl +0 -1275
  167. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +0 -735
  168. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +0 -14
  169. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +0 -14
  170. data/vendor/ring/crypto/sha/asm/sha512-586.pl +0 -911
  171. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +0 -666
  172. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +0 -14
  173. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +0 -14
  174. data/vendor/ring/crypto/sha/sha1.c +0 -271
  175. data/vendor/ring/crypto/sha/sha256.c +0 -204
  176. data/vendor/ring/crypto/sha/sha512.c +0 -355
  177. data/vendor/ring/crypto/test/file_test.cc +0 -326
  178. data/vendor/ring/crypto/test/file_test.h +0 -181
  179. data/vendor/ring/crypto/test/malloc.cc +0 -150
  180. data/vendor/ring/crypto/test/scoped_types.h +0 -95
  181. data/vendor/ring/crypto/test/test.Windows.vcxproj +0 -35
  182. data/vendor/ring/crypto/test/test_util.cc +0 -46
  183. data/vendor/ring/crypto/test/test_util.h +0 -41
  184. data/vendor/ring/crypto/thread_none.c +0 -55
  185. data/vendor/ring/crypto/thread_pthread.c +0 -165
  186. data/vendor/ring/crypto/thread_test.Windows.vcxproj +0 -25
  187. data/vendor/ring/crypto/thread_test.c +0 -200
  188. data/vendor/ring/crypto/thread_win.c +0 -282
  189. data/vendor/ring/examples/checkdigest.rs +0 -103
  190. data/vendor/ring/include/openssl/aes.h +0 -121
  191. data/vendor/ring/include/openssl/arm_arch.h +0 -129
  192. data/vendor/ring/include/openssl/base.h +0 -156
  193. data/vendor/ring/include/openssl/bn.h +0 -794
  194. data/vendor/ring/include/openssl/buffer.h +0 -18
  195. data/vendor/ring/include/openssl/bytestring.h +0 -235
  196. data/vendor/ring/include/openssl/chacha.h +0 -37
  197. data/vendor/ring/include/openssl/cmac.h +0 -76
  198. data/vendor/ring/include/openssl/cpu.h +0 -184
  199. data/vendor/ring/include/openssl/crypto.h +0 -43
  200. data/vendor/ring/include/openssl/curve25519.h +0 -88
  201. data/vendor/ring/include/openssl/ec.h +0 -225
  202. data/vendor/ring/include/openssl/ec_key.h +0 -129
  203. data/vendor/ring/include/openssl/ecdh.h +0 -110
  204. data/vendor/ring/include/openssl/ecdsa.h +0 -156
  205. data/vendor/ring/include/openssl/err.h +0 -201
  206. data/vendor/ring/include/openssl/mem.h +0 -101
  207. data/vendor/ring/include/openssl/obj_mac.h +0 -71
  208. data/vendor/ring/include/openssl/opensslfeatures.h +0 -68
  209. data/vendor/ring/include/openssl/opensslv.h +0 -18
  210. data/vendor/ring/include/openssl/ossl_typ.h +0 -18
  211. data/vendor/ring/include/openssl/poly1305.h +0 -51
  212. data/vendor/ring/include/openssl/rand.h +0 -70
  213. data/vendor/ring/include/openssl/rsa.h +0 -399
  214. data/vendor/ring/include/openssl/thread.h +0 -133
  215. data/vendor/ring/include/openssl/type_check.h +0 -71
  216. data/vendor/ring/mk/Common.props +0 -63
  217. data/vendor/ring/mk/Windows.props +0 -42
  218. data/vendor/ring/mk/WindowsTest.props +0 -18
  219. data/vendor/ring/mk/appveyor.bat +0 -62
  220. data/vendor/ring/mk/bottom_of_makefile.mk +0 -54
  221. data/vendor/ring/mk/ring.mk +0 -266
  222. data/vendor/ring/mk/top_of_makefile.mk +0 -214
  223. data/vendor/ring/mk/travis.sh +0 -40
  224. data/vendor/ring/mk/update-travis-yml.py +0 -229
  225. data/vendor/ring/ring.sln +0 -153
  226. data/vendor/ring/src/aead.rs +0 -682
  227. data/vendor/ring/src/agreement.rs +0 -248
  228. data/vendor/ring/src/c.rs +0 -129
  229. data/vendor/ring/src/constant_time.rs +0 -37
  230. data/vendor/ring/src/der.rs +0 -96
  231. data/vendor/ring/src/digest.rs +0 -690
  232. data/vendor/ring/src/digest_tests.txt +0 -57
  233. data/vendor/ring/src/ecc.rs +0 -28
  234. data/vendor/ring/src/ecc_build.rs +0 -279
  235. data/vendor/ring/src/ecc_curves.rs +0 -117
  236. data/vendor/ring/src/ed25519_tests.txt +0 -2579
  237. data/vendor/ring/src/exe_tests.rs +0 -46
  238. data/vendor/ring/src/ffi.rs +0 -29
  239. data/vendor/ring/src/file_test.rs +0 -187
  240. data/vendor/ring/src/hkdf.rs +0 -153
  241. data/vendor/ring/src/hkdf_tests.txt +0 -59
  242. data/vendor/ring/src/hmac.rs +0 -414
  243. data/vendor/ring/src/hmac_tests.txt +0 -97
  244. data/vendor/ring/src/input.rs +0 -312
  245. data/vendor/ring/src/lib.rs +0 -41
  246. data/vendor/ring/src/pbkdf2.rs +0 -265
  247. data/vendor/ring/src/pbkdf2_tests.txt +0 -113
  248. data/vendor/ring/src/polyfill.rs +0 -57
  249. data/vendor/ring/src/rand.rs +0 -28
  250. data/vendor/ring/src/signature.rs +0 -314
  251. data/vendor/ring/third-party/NIST/README.md +0 -9
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +0 -263
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +0 -309
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +0 -267
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +0 -263
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +0 -309
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +0 -267
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +0 -263
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +0 -309
  260. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +0 -267
  261. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +0 -519
  262. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +0 -309
  263. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +0 -523
  264. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +0 -519
  265. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +0 -309
  266. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +0 -523
  267. data/vendor/ring/third-party/NIST/sha256sums.txt +0 -1
@@ -1,1318 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- # ====================================================================
4
- # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5
- # project. The module is, however, dual licensed under OpenSSL and
6
- # CRYPTOGAMS licenses depending on where you obtain it. For further
7
- # details see http://www.openssl.org/~appro/cryptogams/.
8
- # ====================================================================
9
- #
10
- # This module implements support for Intel AES-NI extension. In
11
- # OpenSSL context it's used with Intel engine, but can also be used as
12
- # drop-in replacement for crypto/aes/asm/aes-586.pl [see below for
13
- # details].
14
- #
15
- # Performance.
16
- #
17
- # To start with see corresponding paragraph in aesni-x86_64.pl...
18
- # Instead of filling table similar to one found there I've chosen to
19
- # summarize *comparison* results for raw ECB, CTR and CBC benchmarks.
20
- # The simplified table below represents 32-bit performance relative
21
- # to 64-bit one in every given point. Ratios vary for different
22
- # encryption modes, therefore interval values.
23
- #
24
- # 16-byte 64-byte 256-byte 1-KB 8-KB
25
- # 53-67% 67-84% 91-94% 95-98% 97-99.5%
26
- #
27
- # Lower ratios for smaller block sizes are perfectly understandable,
28
- # because function call overhead is higher in 32-bit mode. Largest
29
- # 8-KB block performance is virtually same: 32-bit code is less than
30
- # 1% slower for ECB, CBC and CCM, and ~3% slower otherwise.
31
-
32
- # January 2011
33
- #
34
- # See aesni-x86_64.pl for details. Unlike x86_64 version this module
35
- # interleaves at most 6 aes[enc|dec] instructions, because there are
36
- # not enough registers for 8x interleave [which should be optimal for
37
- # Sandy Bridge]. Actually, performance results for 6x interleave
38
- # factor presented in aesni-x86_64.pl (except for CTR) are for this
39
- # module.
40
-
41
- # April 2011
42
- #
43
- # Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing
44
- # one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09.
45
-
46
- ######################################################################
47
- # Current large-block performance in cycles per byte processed with
48
- # 128-bit key (less is better).
49
- #
50
- # CBC en-/decrypt CTR XTS ECB
51
- # Westmere 3.77/1.37 1.37 1.52 1.27
52
- # * Bridge 5.07/0.98 0.99 1.09 0.91
53
- # Haswell 4.44/0.80 0.97 1.03 0.72
54
- # Silvermont 5.77/3.56 3.67 4.03 3.46
55
- # Bulldozer 5.80/0.98 1.05 1.24 0.93
56
-
57
- $PREFIX="aesni"; # if $PREFIX is set to "AES", the script
58
- # generates drop-in replacement for
59
- # crypto/aes/asm/aes-586.pl:-)
60
- $inline=1; # inline _aesni_[en|de]crypt
61
-
62
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
63
- push(@INC,"${dir}","${dir}../../perlasm");
64
- require "x86asm.pl";
65
-
66
- &asm_init($ARGV[0],$0);
67
-
68
- &external_label("OPENSSL_ia32cap_P");
69
- &static_label("key_const");
70
-
71
- if ($PREFIX eq "aesni") { $movekey=\&movups; }
72
- else { $movekey=\&movups; }
73
-
74
- $len="eax";
75
- $rounds="ecx";
76
- $key="edx";
77
- $inp="esi";
78
- $out="edi";
79
- $rounds_="ebx"; # backup copy for $rounds
80
- $key_="ebp"; # backup copy for $key
81
-
82
- $rndkey0="xmm0";
83
- $rndkey1="xmm1";
84
- $inout0="xmm2";
85
- $inout1="xmm3";
86
- $inout2="xmm4";
87
- $inout3="xmm5"; $in1="xmm5";
88
- $inout4="xmm6"; $in0="xmm6";
89
- $inout5="xmm7"; $ivec="xmm7";
90
-
91
- # AESNI extension
92
- sub aeskeygenassist
93
- { my($dst,$src,$imm)=@_;
94
- if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
95
- { &data_byte(0x66,0x0f,0x3a,0xdf,0xc0|($1<<3)|$2,$imm); }
96
- }
97
- sub aescommon
98
- { my($opcodelet,$dst,$src)=@_;
99
- if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
100
- { &data_byte(0x66,0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2);}
101
- }
102
- sub aesimc { aescommon(0xdb,@_); }
103
- sub aesenc { aescommon(0xdc,@_); }
104
- sub aesenclast { aescommon(0xdd,@_); }
105
- sub aesdec { aescommon(0xde,@_); }
106
- sub aesdeclast { aescommon(0xdf,@_); }
107
-
108
- # Inline version of internal aesni_[en|de]crypt1
109
- { my $sn;
110
- sub aesni_inline_generate1
111
- { my ($p,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout));
112
- $sn++;
113
-
114
- &$movekey ($rndkey0,&QWP(0,$key));
115
- &$movekey ($rndkey1,&QWP(16,$key));
116
- &xorps ($ivec,$rndkey0) if (defined($ivec));
117
- &lea ($key,&DWP(32,$key));
118
- &xorps ($inout,$ivec) if (defined($ivec));
119
- &xorps ($inout,$rndkey0) if (!defined($ivec));
120
- &set_label("${p}1_loop_$sn");
121
- eval"&aes${p} ($inout,$rndkey1)";
122
- &dec ($rounds);
123
- &$movekey ($rndkey1,&QWP(0,$key));
124
- &lea ($key,&DWP(16,$key));
125
- &jnz (&label("${p}1_loop_$sn"));
126
- eval"&aes${p}last ($inout,$rndkey1)";
127
- }}
128
-
129
- sub aesni_generate1 # fully unrolled loop
130
- { my ($p,$inout)=@_; $inout=$inout0 if (!defined($inout));
131
-
132
- &function_begin_B("_aesni_${p}rypt1");
133
- &movups ($rndkey0,&QWP(0,$key));
134
- &$movekey ($rndkey1,&QWP(0x10,$key));
135
- &xorps ($inout,$rndkey0);
136
- &$movekey ($rndkey0,&QWP(0x20,$key));
137
- &lea ($key,&DWP(0x30,$key));
138
- &cmp ($rounds,11);
139
- &jb (&label("${p}128"));
140
- &lea ($key,&DWP(0x20,$key));
141
- &je (&label("${p}192"));
142
- &lea ($key,&DWP(0x20,$key));
143
- eval"&aes${p} ($inout,$rndkey1)";
144
- &$movekey ($rndkey1,&QWP(-0x40,$key));
145
- eval"&aes${p} ($inout,$rndkey0)";
146
- &$movekey ($rndkey0,&QWP(-0x30,$key));
147
- &set_label("${p}192");
148
- eval"&aes${p} ($inout,$rndkey1)";
149
- &$movekey ($rndkey1,&QWP(-0x20,$key));
150
- eval"&aes${p} ($inout,$rndkey0)";
151
- &$movekey ($rndkey0,&QWP(-0x10,$key));
152
- &set_label("${p}128");
153
- eval"&aes${p} ($inout,$rndkey1)";
154
- &$movekey ($rndkey1,&QWP(0,$key));
155
- eval"&aes${p} ($inout,$rndkey0)";
156
- &$movekey ($rndkey0,&QWP(0x10,$key));
157
- eval"&aes${p} ($inout,$rndkey1)";
158
- &$movekey ($rndkey1,&QWP(0x20,$key));
159
- eval"&aes${p} ($inout,$rndkey0)";
160
- &$movekey ($rndkey0,&QWP(0x30,$key));
161
- eval"&aes${p} ($inout,$rndkey1)";
162
- &$movekey ($rndkey1,&QWP(0x40,$key));
163
- eval"&aes${p} ($inout,$rndkey0)";
164
- &$movekey ($rndkey0,&QWP(0x50,$key));
165
- eval"&aes${p} ($inout,$rndkey1)";
166
- &$movekey ($rndkey1,&QWP(0x60,$key));
167
- eval"&aes${p} ($inout,$rndkey0)";
168
- &$movekey ($rndkey0,&QWP(0x70,$key));
169
- eval"&aes${p} ($inout,$rndkey1)";
170
- eval"&aes${p}last ($inout,$rndkey0)";
171
- &ret();
172
- &function_end_B("_aesni_${p}rypt1");
173
- }
174
-
175
- # void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
176
- &aesni_generate1("enc") if (!$inline);
177
- &function_begin_B("${PREFIX}_encrypt");
178
- &mov ("eax",&wparam(0));
179
- &mov ($key,&wparam(2));
180
- &movups ($inout0,&QWP(0,"eax"));
181
- &mov ($rounds,&DWP(240,$key));
182
- &mov ("eax",&wparam(1));
183
- if ($inline)
184
- { &aesni_inline_generate1("enc"); }
185
- else
186
- { &call ("_aesni_encrypt1"); }
187
- &pxor ($rndkey0,$rndkey0); # clear register bank
188
- &pxor ($rndkey1,$rndkey1);
189
- &movups (&QWP(0,"eax"),$inout0);
190
- &pxor ($inout0,$inout0);
191
- &ret ();
192
- &function_end_B("${PREFIX}_encrypt");
193
-
194
- # void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key);
195
- &aesni_generate1("dec") if(!$inline);
196
- &function_begin_B("${PREFIX}_decrypt");
197
- &mov ("eax",&wparam(0));
198
- &mov ($key,&wparam(2));
199
- &movups ($inout0,&QWP(0,"eax"));
200
- &mov ($rounds,&DWP(240,$key));
201
- &mov ("eax",&wparam(1));
202
- if ($inline)
203
- { &aesni_inline_generate1("dec"); }
204
- else
205
- { &call ("_aesni_decrypt1"); }
206
- &pxor ($rndkey0,$rndkey0); # clear register bank
207
- &pxor ($rndkey1,$rndkey1);
208
- &movups (&QWP(0,"eax"),$inout0);
209
- &pxor ($inout0,$inout0);
210
- &ret ();
211
- &function_end_B("${PREFIX}_decrypt");
212
-
213
- # _aesni_[en|de]cryptN are private interfaces, N denotes interleave
214
- # factor. Why 3x subroutine were originally used in loops? Even though
215
- # aes[enc|dec] latency was originally 6, it could be scheduled only
216
- # every *2nd* cycle. Thus 3x interleave was the one providing optimal
217
- # utilization, i.e. when subroutine's throughput is virtually same as
218
- # of non-interleaved subroutine [for number of input blocks up to 3].
219
- # This is why it originally made no sense to implement 2x subroutine.
220
- # But times change and it became appropriate to spend extra 192 bytes
221
- # on 2x subroutine on Atom Silvermont account. For processors that
222
- # can schedule aes[enc|dec] every cycle optimal interleave factor
223
- # equals to corresponding instructions latency. 8x is optimal for
224
- # * Bridge, but it's unfeasible to accommodate such implementation
225
- # in XMM registers addreassable in 32-bit mode and therefore maximum
226
- # of 6x is used instead...
227
-
228
- sub aesni_generate2
229
- { my $p=shift;
230
-
231
- &function_begin_B("_aesni_${p}rypt2");
232
- &$movekey ($rndkey0,&QWP(0,$key));
233
- &shl ($rounds,4);
234
- &$movekey ($rndkey1,&QWP(16,$key));
235
- &xorps ($inout0,$rndkey0);
236
- &pxor ($inout1,$rndkey0);
237
- &$movekey ($rndkey0,&QWP(32,$key));
238
- &lea ($key,&DWP(32,$key,$rounds));
239
- &neg ($rounds);
240
- &add ($rounds,16);
241
-
242
- &set_label("${p}2_loop");
243
- eval"&aes${p} ($inout0,$rndkey1)";
244
- eval"&aes${p} ($inout1,$rndkey1)";
245
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
246
- &add ($rounds,32);
247
- eval"&aes${p} ($inout0,$rndkey0)";
248
- eval"&aes${p} ($inout1,$rndkey0)";
249
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
250
- &jnz (&label("${p}2_loop"));
251
- eval"&aes${p} ($inout0,$rndkey1)";
252
- eval"&aes${p} ($inout1,$rndkey1)";
253
- eval"&aes${p}last ($inout0,$rndkey0)";
254
- eval"&aes${p}last ($inout1,$rndkey0)";
255
- &ret();
256
- &function_end_B("_aesni_${p}rypt2");
257
- }
258
-
259
- sub aesni_generate3
260
- { my $p=shift;
261
-
262
- &function_begin_B("_aesni_${p}rypt3");
263
- &$movekey ($rndkey0,&QWP(0,$key));
264
- &shl ($rounds,4);
265
- &$movekey ($rndkey1,&QWP(16,$key));
266
- &xorps ($inout0,$rndkey0);
267
- &pxor ($inout1,$rndkey0);
268
- &pxor ($inout2,$rndkey0);
269
- &$movekey ($rndkey0,&QWP(32,$key));
270
- &lea ($key,&DWP(32,$key,$rounds));
271
- &neg ($rounds);
272
- &add ($rounds,16);
273
-
274
- &set_label("${p}3_loop");
275
- eval"&aes${p} ($inout0,$rndkey1)";
276
- eval"&aes${p} ($inout1,$rndkey1)";
277
- eval"&aes${p} ($inout2,$rndkey1)";
278
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
279
- &add ($rounds,32);
280
- eval"&aes${p} ($inout0,$rndkey0)";
281
- eval"&aes${p} ($inout1,$rndkey0)";
282
- eval"&aes${p} ($inout2,$rndkey0)";
283
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
284
- &jnz (&label("${p}3_loop"));
285
- eval"&aes${p} ($inout0,$rndkey1)";
286
- eval"&aes${p} ($inout1,$rndkey1)";
287
- eval"&aes${p} ($inout2,$rndkey1)";
288
- eval"&aes${p}last ($inout0,$rndkey0)";
289
- eval"&aes${p}last ($inout1,$rndkey0)";
290
- eval"&aes${p}last ($inout2,$rndkey0)";
291
- &ret();
292
- &function_end_B("_aesni_${p}rypt3");
293
- }
294
-
295
- # 4x interleave is implemented to improve small block performance,
296
- # most notably [and naturally] 4 block by ~30%. One can argue that one
297
- # should have implemented 5x as well, but improvement would be <20%,
298
- # so it's not worth it...
299
- sub aesni_generate4
300
- { my $p=shift;
301
-
302
- &function_begin_B("_aesni_${p}rypt4");
303
- &$movekey ($rndkey0,&QWP(0,$key));
304
- &$movekey ($rndkey1,&QWP(16,$key));
305
- &shl ($rounds,4);
306
- &xorps ($inout0,$rndkey0);
307
- &pxor ($inout1,$rndkey0);
308
- &pxor ($inout2,$rndkey0);
309
- &pxor ($inout3,$rndkey0);
310
- &$movekey ($rndkey0,&QWP(32,$key));
311
- &lea ($key,&DWP(32,$key,$rounds));
312
- &neg ($rounds);
313
- &data_byte (0x0f,0x1f,0x40,0x00);
314
- &add ($rounds,16);
315
-
316
- &set_label("${p}4_loop");
317
- eval"&aes${p} ($inout0,$rndkey1)";
318
- eval"&aes${p} ($inout1,$rndkey1)";
319
- eval"&aes${p} ($inout2,$rndkey1)";
320
- eval"&aes${p} ($inout3,$rndkey1)";
321
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
322
- &add ($rounds,32);
323
- eval"&aes${p} ($inout0,$rndkey0)";
324
- eval"&aes${p} ($inout1,$rndkey0)";
325
- eval"&aes${p} ($inout2,$rndkey0)";
326
- eval"&aes${p} ($inout3,$rndkey0)";
327
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
328
- &jnz (&label("${p}4_loop"));
329
-
330
- eval"&aes${p} ($inout0,$rndkey1)";
331
- eval"&aes${p} ($inout1,$rndkey1)";
332
- eval"&aes${p} ($inout2,$rndkey1)";
333
- eval"&aes${p} ($inout3,$rndkey1)";
334
- eval"&aes${p}last ($inout0,$rndkey0)";
335
- eval"&aes${p}last ($inout1,$rndkey0)";
336
- eval"&aes${p}last ($inout2,$rndkey0)";
337
- eval"&aes${p}last ($inout3,$rndkey0)";
338
- &ret();
339
- &function_end_B("_aesni_${p}rypt4");
340
- }
341
-
342
- sub aesni_generate6
343
- { my $p=shift;
344
-
345
- &function_begin_B("_aesni_${p}rypt6");
346
- &static_label("_aesni_${p}rypt6_enter");
347
- &$movekey ($rndkey0,&QWP(0,$key));
348
- &shl ($rounds,4);
349
- &$movekey ($rndkey1,&QWP(16,$key));
350
- &xorps ($inout0,$rndkey0);
351
- &pxor ($inout1,$rndkey0); # pxor does better here
352
- &pxor ($inout2,$rndkey0);
353
- eval"&aes${p} ($inout0,$rndkey1)";
354
- &pxor ($inout3,$rndkey0);
355
- &pxor ($inout4,$rndkey0);
356
- eval"&aes${p} ($inout1,$rndkey1)";
357
- &lea ($key,&DWP(32,$key,$rounds));
358
- &neg ($rounds);
359
- eval"&aes${p} ($inout2,$rndkey1)";
360
- &pxor ($inout5,$rndkey0);
361
- &$movekey ($rndkey0,&QWP(0,$key,$rounds));
362
- &add ($rounds,16);
363
- &jmp (&label("_aesni_${p}rypt6_inner"));
364
-
365
- &set_label("${p}6_loop",16);
366
- eval"&aes${p} ($inout0,$rndkey1)";
367
- eval"&aes${p} ($inout1,$rndkey1)";
368
- eval"&aes${p} ($inout2,$rndkey1)";
369
- &set_label("_aesni_${p}rypt6_inner");
370
- eval"&aes${p} ($inout3,$rndkey1)";
371
- eval"&aes${p} ($inout4,$rndkey1)";
372
- eval"&aes${p} ($inout5,$rndkey1)";
373
- &set_label("_aesni_${p}rypt6_enter");
374
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
375
- &add ($rounds,32);
376
- eval"&aes${p} ($inout0,$rndkey0)";
377
- eval"&aes${p} ($inout1,$rndkey0)";
378
- eval"&aes${p} ($inout2,$rndkey0)";
379
- eval"&aes${p} ($inout3,$rndkey0)";
380
- eval"&aes${p} ($inout4,$rndkey0)";
381
- eval"&aes${p} ($inout5,$rndkey0)";
382
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
383
- &jnz (&label("${p}6_loop"));
384
-
385
- eval"&aes${p} ($inout0,$rndkey1)";
386
- eval"&aes${p} ($inout1,$rndkey1)";
387
- eval"&aes${p} ($inout2,$rndkey1)";
388
- eval"&aes${p} ($inout3,$rndkey1)";
389
- eval"&aes${p} ($inout4,$rndkey1)";
390
- eval"&aes${p} ($inout5,$rndkey1)";
391
- eval"&aes${p}last ($inout0,$rndkey0)";
392
- eval"&aes${p}last ($inout1,$rndkey0)";
393
- eval"&aes${p}last ($inout2,$rndkey0)";
394
- eval"&aes${p}last ($inout3,$rndkey0)";
395
- eval"&aes${p}last ($inout4,$rndkey0)";
396
- eval"&aes${p}last ($inout5,$rndkey0)";
397
- &ret();
398
- &function_end_B("_aesni_${p}rypt6");
399
- }
400
- &aesni_generate2("enc") if ($PREFIX eq "aesni");
401
- &aesni_generate2("dec");
402
- &aesni_generate3("enc") if ($PREFIX eq "aesni");
403
- &aesni_generate3("dec");
404
- &aesni_generate4("enc") if ($PREFIX eq "aesni");
405
- &aesni_generate4("dec");
406
- &aesni_generate6("enc") if ($PREFIX eq "aesni");
407
- &aesni_generate6("dec");
408
-
409
- if ($PREFIX eq "aesni") {
410
- ######################################################################
411
- # void aesni_ccm64_[en|de]crypt_blocks (const void *in, void *out,
412
- # size_t blocks, const AES_KEY *key,
413
- # const char *ivec,char *cmac);
414
- #
415
- # Handles only complete blocks, operates on 64-bit counter and
416
- # does not update *ivec! Nor does it finalize CMAC value
417
- # (see engine/eng_aesni.c for details)
418
- #
419
- { my $cmac=$inout1;
420
- &function_begin("aesni_ccm64_encrypt_blocks");
421
- &mov ($inp,&wparam(0));
422
- &mov ($out,&wparam(1));
423
- &mov ($len,&wparam(2));
424
- &mov ($key,&wparam(3));
425
- &mov ($rounds_,&wparam(4));
426
- &mov ($rounds,&wparam(5));
427
- &mov ($key_,"esp");
428
- &sub ("esp",60);
429
- &and ("esp",-16); # align stack
430
- &mov (&DWP(48,"esp"),$key_);
431
-
432
- &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec
433
- &movdqu ($cmac,&QWP(0,$rounds)); # load cmac
434
- &mov ($rounds,&DWP(240,$key));
435
-
436
- # compose byte-swap control mask for pshufb on stack
437
- &mov (&DWP(0,"esp"),0x0c0d0e0f);
438
- &mov (&DWP(4,"esp"),0x08090a0b);
439
- &mov (&DWP(8,"esp"),0x04050607);
440
- &mov (&DWP(12,"esp"),0x00010203);
441
-
442
- # compose counter increment vector on stack
443
- &mov ($rounds_,1);
444
- &xor ($key_,$key_);
445
- &mov (&DWP(16,"esp"),$rounds_);
446
- &mov (&DWP(20,"esp"),$key_);
447
- &mov (&DWP(24,"esp"),$key_);
448
- &mov (&DWP(28,"esp"),$key_);
449
-
450
- &shl ($rounds,4);
451
- &mov ($rounds_,16);
452
- &lea ($key_,&DWP(0,$key));
453
- &movdqa ($inout3,&QWP(0,"esp"));
454
- &movdqa ($inout0,$ivec);
455
- &lea ($key,&DWP(32,$key,$rounds));
456
- &sub ($rounds_,$rounds);
457
- &pshufb ($ivec,$inout3);
458
-
459
- &set_label("ccm64_enc_outer");
460
- &$movekey ($rndkey0,&QWP(0,$key_));
461
- &mov ($rounds,$rounds_);
462
- &movups ($in0,&QWP(0,$inp));
463
-
464
- &xorps ($inout0,$rndkey0);
465
- &$movekey ($rndkey1,&QWP(16,$key_));
466
- &xorps ($rndkey0,$in0);
467
- &xorps ($cmac,$rndkey0); # cmac^=inp
468
- &$movekey ($rndkey0,&QWP(32,$key_));
469
-
470
- &set_label("ccm64_enc2_loop");
471
- &aesenc ($inout0,$rndkey1);
472
- &aesenc ($cmac,$rndkey1);
473
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
474
- &add ($rounds,32);
475
- &aesenc ($inout0,$rndkey0);
476
- &aesenc ($cmac,$rndkey0);
477
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
478
- &jnz (&label("ccm64_enc2_loop"));
479
- &aesenc ($inout0,$rndkey1);
480
- &aesenc ($cmac,$rndkey1);
481
- &paddq ($ivec,&QWP(16,"esp"));
482
- &dec ($len);
483
- &aesenclast ($inout0,$rndkey0);
484
- &aesenclast ($cmac,$rndkey0);
485
-
486
- &lea ($inp,&DWP(16,$inp));
487
- &xorps ($in0,$inout0); # inp^=E(ivec)
488
- &movdqa ($inout0,$ivec);
489
- &movups (&QWP(0,$out),$in0); # save output
490
- &pshufb ($inout0,$inout3);
491
- &lea ($out,&DWP(16,$out));
492
- &jnz (&label("ccm64_enc_outer"));
493
-
494
- &mov ("esp",&DWP(48,"esp"));
495
- &mov ($out,&wparam(5));
496
- &movups (&QWP(0,$out),$cmac);
497
-
498
- &pxor ("xmm0","xmm0"); # clear register bank
499
- &pxor ("xmm1","xmm1");
500
- &pxor ("xmm2","xmm2");
501
- &pxor ("xmm3","xmm3");
502
- &pxor ("xmm4","xmm4");
503
- &pxor ("xmm5","xmm5");
504
- &pxor ("xmm6","xmm6");
505
- &pxor ("xmm7","xmm7");
506
- &function_end("aesni_ccm64_encrypt_blocks");
507
-
508
- &function_begin("aesni_ccm64_decrypt_blocks");
509
- &mov ($inp,&wparam(0));
510
- &mov ($out,&wparam(1));
511
- &mov ($len,&wparam(2));
512
- &mov ($key,&wparam(3));
513
- &mov ($rounds_,&wparam(4));
514
- &mov ($rounds,&wparam(5));
515
- &mov ($key_,"esp");
516
- &sub ("esp",60);
517
- &and ("esp",-16); # align stack
518
- &mov (&DWP(48,"esp"),$key_);
519
-
520
- &movdqu ($ivec,&QWP(0,$rounds_)); # load ivec
521
- &movdqu ($cmac,&QWP(0,$rounds)); # load cmac
522
- &mov ($rounds,&DWP(240,$key));
523
-
524
- # compose byte-swap control mask for pshufb on stack
525
- &mov (&DWP(0,"esp"),0x0c0d0e0f);
526
- &mov (&DWP(4,"esp"),0x08090a0b);
527
- &mov (&DWP(8,"esp"),0x04050607);
528
- &mov (&DWP(12,"esp"),0x00010203);
529
-
530
- # compose counter increment vector on stack
531
- &mov ($rounds_,1);
532
- &xor ($key_,$key_);
533
- &mov (&DWP(16,"esp"),$rounds_);
534
- &mov (&DWP(20,"esp"),$key_);
535
- &mov (&DWP(24,"esp"),$key_);
536
- &mov (&DWP(28,"esp"),$key_);
537
-
538
- &movdqa ($inout3,&QWP(0,"esp")); # bswap mask
539
- &movdqa ($inout0,$ivec);
540
-
541
- &mov ($key_,$key);
542
- &mov ($rounds_,$rounds);
543
-
544
- &pshufb ($ivec,$inout3);
545
- if ($inline)
546
- { &aesni_inline_generate1("enc"); }
547
- else
548
- { &call ("_aesni_encrypt1"); }
549
- &shl ($rounds_,4);
550
- &mov ($rounds,16);
551
- &movups ($in0,&QWP(0,$inp)); # load inp
552
- &paddq ($ivec,&QWP(16,"esp"));
553
- &lea ($inp,&QWP(16,$inp));
554
- &sub ($rounds,$rounds_);
555
- &lea ($key,&DWP(32,$key_,$rounds_));
556
- &mov ($rounds_,$rounds);
557
- &jmp (&label("ccm64_dec_outer"));
558
-
559
- &set_label("ccm64_dec_outer",16);
560
- &xorps ($in0,$inout0); # inp ^= E(ivec)
561
- &movdqa ($inout0,$ivec);
562
- &movups (&QWP(0,$out),$in0); # save output
563
- &lea ($out,&DWP(16,$out));
564
- &pshufb ($inout0,$inout3);
565
-
566
- &sub ($len,1);
567
- &jz (&label("ccm64_dec_break"));
568
-
569
- &$movekey ($rndkey0,&QWP(0,$key_));
570
- &mov ($rounds,$rounds_);
571
- &$movekey ($rndkey1,&QWP(16,$key_));
572
- &xorps ($in0,$rndkey0);
573
- &xorps ($inout0,$rndkey0);
574
- &xorps ($cmac,$in0); # cmac^=out
575
- &$movekey ($rndkey0,&QWP(32,$key_));
576
-
577
- &set_label("ccm64_dec2_loop");
578
- &aesenc ($inout0,$rndkey1);
579
- &aesenc ($cmac,$rndkey1);
580
- &$movekey ($rndkey1,&QWP(0,$key,$rounds));
581
- &add ($rounds,32);
582
- &aesenc ($inout0,$rndkey0);
583
- &aesenc ($cmac,$rndkey0);
584
- &$movekey ($rndkey0,&QWP(-16,$key,$rounds));
585
- &jnz (&label("ccm64_dec2_loop"));
586
- &movups ($in0,&QWP(0,$inp)); # load inp
587
- &paddq ($ivec,&QWP(16,"esp"));
588
- &aesenc ($inout0,$rndkey1);
589
- &aesenc ($cmac,$rndkey1);
590
- &aesenclast ($inout0,$rndkey0);
591
- &aesenclast ($cmac,$rndkey0);
592
- &lea ($inp,&QWP(16,$inp));
593
- &jmp (&label("ccm64_dec_outer"));
594
-
595
- &set_label("ccm64_dec_break",16);
596
- &mov ($rounds,&DWP(240,$key_));
597
- &mov ($key,$key_);
598
- if ($inline)
599
- { &aesni_inline_generate1("enc",$cmac,$in0); }
600
- else
601
- { &call ("_aesni_encrypt1",$cmac); }
602
-
603
- &mov ("esp",&DWP(48,"esp"));
604
- &mov ($out,&wparam(5));
605
- &movups (&QWP(0,$out),$cmac);
606
-
607
- &pxor ("xmm0","xmm0"); # clear register bank
608
- &pxor ("xmm1","xmm1");
609
- &pxor ("xmm2","xmm2");
610
- &pxor ("xmm3","xmm3");
611
- &pxor ("xmm4","xmm4");
612
- &pxor ("xmm5","xmm5");
613
- &pxor ("xmm6","xmm6");
614
- &pxor ("xmm7","xmm7");
615
- &function_end("aesni_ccm64_decrypt_blocks");
616
- }
617
-
618
- ######################################################################
619
- # void aesni_ctr32_encrypt_blocks (const void *in, void *out,
620
- # size_t blocks, const AES_KEY *key,
621
- # const char *ivec);
622
- #
623
- # Handles only complete blocks, operates on 32-bit counter and
624
- # does not update *ivec! (see crypto/modes/ctr128.c for details)
625
- #
626
- # stack layout:
627
- # 0 pshufb mask
628
- # 16 vector addend: 0,6,6,6
629
- # 32 counter-less ivec
630
- # 48 1st triplet of counter vector
631
- # 64 2nd triplet of counter vector
632
- # 80 saved %esp
633
-
634
- &function_begin("aesni_ctr32_encrypt_blocks");
635
- &mov ($inp,&wparam(0));
636
- &mov ($out,&wparam(1));
637
- &mov ($len,&wparam(2));
638
- &mov ($key,&wparam(3));
639
- &mov ($rounds_,&wparam(4));
640
- &mov ($key_,"esp");
641
- &sub ("esp",88);
642
- &and ("esp",-16); # align stack
643
- &mov (&DWP(80,"esp"),$key_);
644
-
645
- &cmp ($len,1);
646
- &je (&label("ctr32_one_shortcut"));
647
-
648
- &movdqu ($inout5,&QWP(0,$rounds_)); # load ivec
649
-
650
- # compose byte-swap control mask for pshufb on stack
651
- &mov (&DWP(0,"esp"),0x0c0d0e0f);
652
- &mov (&DWP(4,"esp"),0x08090a0b);
653
- &mov (&DWP(8,"esp"),0x04050607);
654
- &mov (&DWP(12,"esp"),0x00010203);
655
-
656
- # compose counter increment vector on stack
657
- &mov ($rounds,6);
658
- &xor ($key_,$key_);
659
- &mov (&DWP(16,"esp"),$rounds);
660
- &mov (&DWP(20,"esp"),$rounds);
661
- &mov (&DWP(24,"esp"),$rounds);
662
- &mov (&DWP(28,"esp"),$key_);
663
-
664
- &pextrd ($rounds_,$inout5,3); # pull 32-bit counter
665
- &pinsrd ($inout5,$key_,3); # wipe 32-bit counter
666
-
667
- &mov ($rounds,&DWP(240,$key)); # key->rounds
668
-
669
- # compose 2 vectors of 3x32-bit counters
670
- &bswap ($rounds_);
671
- &pxor ($rndkey0,$rndkey0);
672
- &pxor ($rndkey1,$rndkey1);
673
- &movdqa ($inout0,&QWP(0,"esp")); # load byte-swap mask
674
- &pinsrd ($rndkey0,$rounds_,0);
675
- &lea ($key_,&DWP(3,$rounds_));
676
- &pinsrd ($rndkey1,$key_,0);
677
- &inc ($rounds_);
678
- &pinsrd ($rndkey0,$rounds_,1);
679
- &inc ($key_);
680
- &pinsrd ($rndkey1,$key_,1);
681
- &inc ($rounds_);
682
- &pinsrd ($rndkey0,$rounds_,2);
683
- &inc ($key_);
684
- &pinsrd ($rndkey1,$key_,2);
685
- &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet
686
- &pshufb ($rndkey0,$inout0); # byte swap
687
- &movdqu ($inout4,&QWP(0,$key)); # key[0]
688
- &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet
689
- &pshufb ($rndkey1,$inout0); # byte swap
690
-
691
- &pshufd ($inout0,$rndkey0,3<<6); # place counter to upper dword
692
- &pshufd ($inout1,$rndkey0,2<<6);
693
- &cmp ($len,6);
694
- &jb (&label("ctr32_tail"));
695
- &pxor ($inout5,$inout4); # counter-less ivec^key[0]
696
- &shl ($rounds,4);
697
- &mov ($rounds_,16);
698
- &movdqa (&QWP(32,"esp"),$inout5); # save counter-less ivec^key[0]
699
- &mov ($key_,$key); # backup $key
700
- &sub ($rounds_,$rounds); # backup twisted $rounds
701
- &lea ($key,&DWP(32,$key,$rounds));
702
- &sub ($len,6);
703
- &jmp (&label("ctr32_loop6"));
704
-
705
- &set_label("ctr32_loop6",16);
706
- # inlining _aesni_encrypt6's prologue gives ~6% improvement...
707
- &pshufd ($inout2,$rndkey0,1<<6);
708
- &movdqa ($rndkey0,&QWP(32,"esp")); # pull counter-less ivec
709
- &pshufd ($inout3,$rndkey1,3<<6);
710
- &pxor ($inout0,$rndkey0); # merge counter-less ivec
711
- &pshufd ($inout4,$rndkey1,2<<6);
712
- &pxor ($inout1,$rndkey0);
713
- &pshufd ($inout5,$rndkey1,1<<6);
714
- &$movekey ($rndkey1,&QWP(16,$key_));
715
- &pxor ($inout2,$rndkey0);
716
- &pxor ($inout3,$rndkey0);
717
- &aesenc ($inout0,$rndkey1);
718
- &pxor ($inout4,$rndkey0);
719
- &pxor ($inout5,$rndkey0);
720
- &aesenc ($inout1,$rndkey1);
721
- &$movekey ($rndkey0,&QWP(32,$key_));
722
- &mov ($rounds,$rounds_);
723
- &aesenc ($inout2,$rndkey1);
724
- &aesenc ($inout3,$rndkey1);
725
- &aesenc ($inout4,$rndkey1);
726
- &aesenc ($inout5,$rndkey1);
727
-
728
- &call (&label("_aesni_encrypt6_enter"));
729
-
730
- &movups ($rndkey1,&QWP(0,$inp));
731
- &movups ($rndkey0,&QWP(0x10,$inp));
732
- &xorps ($inout0,$rndkey1);
733
- &movups ($rndkey1,&QWP(0x20,$inp));
734
- &xorps ($inout1,$rndkey0);
735
- &movups (&QWP(0,$out),$inout0);
736
- &movdqa ($rndkey0,&QWP(16,"esp")); # load increment
737
- &xorps ($inout2,$rndkey1);
738
- &movdqa ($rndkey1,&QWP(64,"esp")); # load 2nd triplet
739
- &movups (&QWP(0x10,$out),$inout1);
740
- &movups (&QWP(0x20,$out),$inout2);
741
-
742
- &paddd ($rndkey1,$rndkey0); # 2nd triplet increment
743
- &paddd ($rndkey0,&QWP(48,"esp")); # 1st triplet increment
744
- &movdqa ($inout0,&QWP(0,"esp")); # load byte swap mask
745
-
746
- &movups ($inout1,&QWP(0x30,$inp));
747
- &movups ($inout2,&QWP(0x40,$inp));
748
- &xorps ($inout3,$inout1);
749
- &movups ($inout1,&QWP(0x50,$inp));
750
- &lea ($inp,&DWP(0x60,$inp));
751
- &movdqa (&QWP(48,"esp"),$rndkey0); # save 1st triplet
752
- &pshufb ($rndkey0,$inout0); # byte swap
753
- &xorps ($inout4,$inout2);
754
- &movups (&QWP(0x30,$out),$inout3);
755
- &xorps ($inout5,$inout1);
756
- &movdqa (&QWP(64,"esp"),$rndkey1); # save 2nd triplet
757
- &pshufb ($rndkey1,$inout0); # byte swap
758
- &movups (&QWP(0x40,$out),$inout4);
759
- &pshufd ($inout0,$rndkey0,3<<6);
760
- &movups (&QWP(0x50,$out),$inout5);
761
- &lea ($out,&DWP(0x60,$out));
762
-
763
- &pshufd ($inout1,$rndkey0,2<<6);
764
- &sub ($len,6);
765
- &jnc (&label("ctr32_loop6"));
766
-
767
- &add ($len,6);
768
- &jz (&label("ctr32_ret"));
769
- &movdqu ($inout5,&QWP(0,$key_));
770
- &mov ($key,$key_);
771
- &pxor ($inout5,&QWP(32,"esp")); # restore count-less ivec
772
- &mov ($rounds,&DWP(240,$key_)); # restore $rounds
773
-
774
- &set_label("ctr32_tail");
775
- &por ($inout0,$inout5);
776
- &cmp ($len,2);
777
- &jb (&label("ctr32_one"));
778
-
779
- &pshufd ($inout2,$rndkey0,1<<6);
780
- &por ($inout1,$inout5);
781
- &je (&label("ctr32_two"));
782
-
783
- &pshufd ($inout3,$rndkey1,3<<6);
784
- &por ($inout2,$inout5);
785
- &cmp ($len,4);
786
- &jb (&label("ctr32_three"));
787
-
788
- &pshufd ($inout4,$rndkey1,2<<6);
789
- &por ($inout3,$inout5);
790
- &je (&label("ctr32_four"));
791
-
792
- &por ($inout4,$inout5);
793
- &call ("_aesni_encrypt6");
794
- &movups ($rndkey1,&QWP(0,$inp));
795
- &movups ($rndkey0,&QWP(0x10,$inp));
796
- &xorps ($inout0,$rndkey1);
797
- &movups ($rndkey1,&QWP(0x20,$inp));
798
- &xorps ($inout1,$rndkey0);
799
- &movups ($rndkey0,&QWP(0x30,$inp));
800
- &xorps ($inout2,$rndkey1);
801
- &movups ($rndkey1,&QWP(0x40,$inp));
802
- &xorps ($inout3,$rndkey0);
803
- &movups (&QWP(0,$out),$inout0);
804
- &xorps ($inout4,$rndkey1);
805
- &movups (&QWP(0x10,$out),$inout1);
806
- &movups (&QWP(0x20,$out),$inout2);
807
- &movups (&QWP(0x30,$out),$inout3);
808
- &movups (&QWP(0x40,$out),$inout4);
809
- &jmp (&label("ctr32_ret"));
810
-
811
- &set_label("ctr32_one_shortcut",16);
812
- &movups ($inout0,&QWP(0,$rounds_)); # load ivec
813
- &mov ($rounds,&DWP(240,$key));
814
-
815
- &set_label("ctr32_one");
816
- if ($inline)
817
- { &aesni_inline_generate1("enc"); }
818
- else
819
- { &call ("_aesni_encrypt1"); }
820
- &movups ($in0,&QWP(0,$inp));
821
- &xorps ($in0,$inout0);
822
- &movups (&QWP(0,$out),$in0);
823
- &jmp (&label("ctr32_ret"));
824
-
825
- &set_label("ctr32_two",16);
826
- &call ("_aesni_encrypt2");
827
- &movups ($inout3,&QWP(0,$inp));
828
- &movups ($inout4,&QWP(0x10,$inp));
829
- &xorps ($inout0,$inout3);
830
- &xorps ($inout1,$inout4);
831
- &movups (&QWP(0,$out),$inout0);
832
- &movups (&QWP(0x10,$out),$inout1);
833
- &jmp (&label("ctr32_ret"));
834
-
835
- &set_label("ctr32_three",16);
836
- &call ("_aesni_encrypt3");
837
- &movups ($inout3,&QWP(0,$inp));
838
- &movups ($inout4,&QWP(0x10,$inp));
839
- &xorps ($inout0,$inout3);
840
- &movups ($inout5,&QWP(0x20,$inp));
841
- &xorps ($inout1,$inout4);
842
- &movups (&QWP(0,$out),$inout0);
843
- &xorps ($inout2,$inout5);
844
- &movups (&QWP(0x10,$out),$inout1);
845
- &movups (&QWP(0x20,$out),$inout2);
846
- &jmp (&label("ctr32_ret"));
847
-
848
- &set_label("ctr32_four",16);
849
- &call ("_aesni_encrypt4");
850
- &movups ($inout4,&QWP(0,$inp));
851
- &movups ($inout5,&QWP(0x10,$inp));
852
- &movups ($rndkey1,&QWP(0x20,$inp));
853
- &xorps ($inout0,$inout4);
854
- &movups ($rndkey0,&QWP(0x30,$inp));
855
- &xorps ($inout1,$inout5);
856
- &movups (&QWP(0,$out),$inout0);
857
- &xorps ($inout2,$rndkey1);
858
- &movups (&QWP(0x10,$out),$inout1);
859
- &xorps ($inout3,$rndkey0);
860
- &movups (&QWP(0x20,$out),$inout2);
861
- &movups (&QWP(0x30,$out),$inout3);
862
-
863
- &set_label("ctr32_ret");
864
- &pxor ("xmm0","xmm0"); # clear register bank
865
- &pxor ("xmm1","xmm1");
866
- &pxor ("xmm2","xmm2");
867
- &pxor ("xmm3","xmm3");
868
- &pxor ("xmm4","xmm4");
869
- &movdqa (&QWP(32,"esp"),"xmm0"); # clear stack
870
- &pxor ("xmm5","xmm5");
871
- &movdqa (&QWP(48,"esp"),"xmm0");
872
- &pxor ("xmm6","xmm6");
873
- &movdqa (&QWP(64,"esp"),"xmm0");
874
- &pxor ("xmm7","xmm7");
875
- &mov ("esp",&DWP(80,"esp"));
876
- &function_end("aesni_ctr32_encrypt_blocks");
877
-
878
- ######################################################################
879
- # Mechanical port from aesni-x86_64.pl.
880
- #
881
- # _aesni_set_encrypt_key is private interface,
882
- # input:
883
- # "eax" const unsigned char *userKey
884
- # $rounds int bits
885
- # $key AES_KEY *key
886
- # output:
887
- # "eax" return code
888
- # $round rounds
889
-
890
- &function_begin_B("_aesni_set_encrypt_key");
891
- &push ("ebp");
892
- &push ("ebx");
893
- &test ("eax","eax");
894
- &jz (&label("bad_pointer"));
895
- &test ($key,$key);
896
- &jz (&label("bad_pointer"));
897
-
898
- &call (&label("pic"));
899
- &set_label("pic");
900
- &blindpop("ebx");
901
- &lea ("ebx",&DWP(&label("key_const")."-".&label("pic"),"ebx"));
902
-
903
- &picmeup("ebp","OPENSSL_ia32cap_P","ebx",&label("key_const"));
904
- &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey
905
- &xorps ("xmm4","xmm4"); # low dword of xmm4 is assumed 0
906
- &mov ("ebp",&DWP(4,"ebp"));
907
- &lea ($key,&DWP(16,$key));
908
- &and ("ebp",1<<28|1<<11); # AVX and XOP bits
909
- &cmp ($rounds,256);
910
- &je (&label("14rounds"));
911
- &cmp ($rounds,192);
912
- &je (&label("12rounds"));
913
- &cmp ($rounds,128);
914
- &jne (&label("bad_keybits"));
915
-
916
- &set_label("10rounds",16);
917
- &cmp ("ebp",1<<28);
918
- &je (&label("10rounds_alt"));
919
-
920
- &mov ($rounds,9);
921
- &$movekey (&QWP(-16,$key),"xmm0"); # round 0
922
- &aeskeygenassist("xmm1","xmm0",0x01); # round 1
923
- &call (&label("key_128_cold"));
924
- &aeskeygenassist("xmm1","xmm0",0x2); # round 2
925
- &call (&label("key_128"));
926
- &aeskeygenassist("xmm1","xmm0",0x04); # round 3
927
- &call (&label("key_128"));
928
- &aeskeygenassist("xmm1","xmm0",0x08); # round 4
929
- &call (&label("key_128"));
930
- &aeskeygenassist("xmm1","xmm0",0x10); # round 5
931
- &call (&label("key_128"));
932
- &aeskeygenassist("xmm1","xmm0",0x20); # round 6
933
- &call (&label("key_128"));
934
- &aeskeygenassist("xmm1","xmm0",0x40); # round 7
935
- &call (&label("key_128"));
936
- &aeskeygenassist("xmm1","xmm0",0x80); # round 8
937
- &call (&label("key_128"));
938
- &aeskeygenassist("xmm1","xmm0",0x1b); # round 9
939
- &call (&label("key_128"));
940
- &aeskeygenassist("xmm1","xmm0",0x36); # round 10
941
- &call (&label("key_128"));
942
- &$movekey (&QWP(0,$key),"xmm0");
943
- &mov (&DWP(80,$key),$rounds);
944
-
945
- &jmp (&label("good_key"));
946
-
947
- &set_label("key_128",16);
948
- &$movekey (&QWP(0,$key),"xmm0");
949
- &lea ($key,&DWP(16,$key));
950
- &set_label("key_128_cold");
951
- &shufps ("xmm4","xmm0",0b00010000);
952
- &xorps ("xmm0","xmm4");
953
- &shufps ("xmm4","xmm0",0b10001100);
954
- &xorps ("xmm0","xmm4");
955
- &shufps ("xmm1","xmm1",0b11111111); # critical path
956
- &xorps ("xmm0","xmm1");
957
- &ret();
958
-
959
- &set_label("10rounds_alt",16);
960
- &movdqa ("xmm5",&QWP(0x00,"ebx"));
961
- &mov ($rounds,8);
962
- &movdqa ("xmm4",&QWP(0x20,"ebx"));
963
- &movdqa ("xmm2","xmm0");
964
- &movdqu (&QWP(-16,$key),"xmm0");
965
-
966
- &set_label("loop_key128");
967
- &pshufb ("xmm0","xmm5");
968
- &aesenclast ("xmm0","xmm4");
969
- &pslld ("xmm4",1);
970
- &lea ($key,&DWP(16,$key));
971
-
972
- &movdqa ("xmm3","xmm2");
973
- &pslldq ("xmm2",4);
974
- &pxor ("xmm3","xmm2");
975
- &pslldq ("xmm2",4);
976
- &pxor ("xmm3","xmm2");
977
- &pslldq ("xmm2",4);
978
- &pxor ("xmm2","xmm3");
979
-
980
- &pxor ("xmm0","xmm2");
981
- &movdqu (&QWP(-16,$key),"xmm0");
982
- &movdqa ("xmm2","xmm0");
983
-
984
- &dec ($rounds);
985
- &jnz (&label("loop_key128"));
986
-
987
- &movdqa ("xmm4",&QWP(0x30,"ebx"));
988
-
989
- &pshufb ("xmm0","xmm5");
990
- &aesenclast ("xmm0","xmm4");
991
- &pslld ("xmm4",1);
992
-
993
- &movdqa ("xmm3","xmm2");
994
- &pslldq ("xmm2",4);
995
- &pxor ("xmm3","xmm2");
996
- &pslldq ("xmm2",4);
997
- &pxor ("xmm3","xmm2");
998
- &pslldq ("xmm2",4);
999
- &pxor ("xmm2","xmm3");
1000
-
1001
- &pxor ("xmm0","xmm2");
1002
- &movdqu (&QWP(0,$key),"xmm0");
1003
-
1004
- &movdqa ("xmm2","xmm0");
1005
- &pshufb ("xmm0","xmm5");
1006
- &aesenclast ("xmm0","xmm4");
1007
-
1008
- &movdqa ("xmm3","xmm2");
1009
- &pslldq ("xmm2",4);
1010
- &pxor ("xmm3","xmm2");
1011
- &pslldq ("xmm2",4);
1012
- &pxor ("xmm3","xmm2");
1013
- &pslldq ("xmm2",4);
1014
- &pxor ("xmm2","xmm3");
1015
-
1016
- &pxor ("xmm0","xmm2");
1017
- &movdqu (&QWP(16,$key),"xmm0");
1018
-
1019
- &mov ($rounds,9);
1020
- &mov (&DWP(96,$key),$rounds);
1021
-
1022
- &jmp (&label("good_key"));
1023
-
1024
- &set_label("12rounds",16);
1025
- &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey
1026
- &cmp ("ebp",1<<28);
1027
- &je (&label("12rounds_alt"));
1028
-
1029
- &mov ($rounds,11);
1030
- &$movekey (&QWP(-16,$key),"xmm0"); # round 0
1031
- &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2
1032
- &call (&label("key_192a_cold"));
1033
- &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3
1034
- &call (&label("key_192b"));
1035
- &aeskeygenassist("xmm1","xmm2",0x04); # round 4,5
1036
- &call (&label("key_192a"));
1037
- &aeskeygenassist("xmm1","xmm2",0x08); # round 5,6
1038
- &call (&label("key_192b"));
1039
- &aeskeygenassist("xmm1","xmm2",0x10); # round 7,8
1040
- &call (&label("key_192a"));
1041
- &aeskeygenassist("xmm1","xmm2",0x20); # round 8,9
1042
- &call (&label("key_192b"));
1043
- &aeskeygenassist("xmm1","xmm2",0x40); # round 10,11
1044
- &call (&label("key_192a"));
1045
- &aeskeygenassist("xmm1","xmm2",0x80); # round 11,12
1046
- &call (&label("key_192b"));
1047
- &$movekey (&QWP(0,$key),"xmm0");
1048
- &mov (&DWP(48,$key),$rounds);
1049
-
1050
- &jmp (&label("good_key"));
1051
-
1052
- &set_label("key_192a",16);
1053
- &$movekey (&QWP(0,$key),"xmm0");
1054
- &lea ($key,&DWP(16,$key));
1055
- &set_label("key_192a_cold",16);
1056
- &movaps ("xmm5","xmm2");
1057
- &set_label("key_192b_warm");
1058
- &shufps ("xmm4","xmm0",0b00010000);
1059
- &movdqa ("xmm3","xmm2");
1060
- &xorps ("xmm0","xmm4");
1061
- &shufps ("xmm4","xmm0",0b10001100);
1062
- &pslldq ("xmm3",4);
1063
- &xorps ("xmm0","xmm4");
1064
- &pshufd ("xmm1","xmm1",0b01010101); # critical path
1065
- &pxor ("xmm2","xmm3");
1066
- &pxor ("xmm0","xmm1");
1067
- &pshufd ("xmm3","xmm0",0b11111111);
1068
- &pxor ("xmm2","xmm3");
1069
- &ret();
1070
-
1071
- &set_label("key_192b",16);
1072
- &movaps ("xmm3","xmm0");
1073
- &shufps ("xmm5","xmm0",0b01000100);
1074
- &$movekey (&QWP(0,$key),"xmm5");
1075
- &shufps ("xmm3","xmm2",0b01001110);
1076
- &$movekey (&QWP(16,$key),"xmm3");
1077
- &lea ($key,&DWP(32,$key));
1078
- &jmp (&label("key_192b_warm"));
1079
-
1080
- &set_label("12rounds_alt",16);
1081
- &movdqa ("xmm5",&QWP(0x10,"ebx"));
1082
- &movdqa ("xmm4",&QWP(0x20,"ebx"));
1083
- &mov ($rounds,8);
1084
- &movdqu (&QWP(-16,$key),"xmm0");
1085
-
1086
- &set_label("loop_key192");
1087
- &movq (&QWP(0,$key),"xmm2");
1088
- &movdqa ("xmm1","xmm2");
1089
- &pshufb ("xmm2","xmm5");
1090
- &aesenclast ("xmm2","xmm4");
1091
- &pslld ("xmm4",1);
1092
- &lea ($key,&DWP(24,$key));
1093
-
1094
- &movdqa ("xmm3","xmm0");
1095
- &pslldq ("xmm0",4);
1096
- &pxor ("xmm3","xmm0");
1097
- &pslldq ("xmm0",4);
1098
- &pxor ("xmm3","xmm0");
1099
- &pslldq ("xmm0",4);
1100
- &pxor ("xmm0","xmm3");
1101
-
1102
- &pshufd ("xmm3","xmm0",0xff);
1103
- &pxor ("xmm3","xmm1");
1104
- &pslldq ("xmm1",4);
1105
- &pxor ("xmm3","xmm1");
1106
-
1107
- &pxor ("xmm0","xmm2");
1108
- &pxor ("xmm2","xmm3");
1109
- &movdqu (&QWP(-16,$key),"xmm0");
1110
-
1111
- &dec ($rounds);
1112
- &jnz (&label("loop_key192"));
1113
-
1114
- &mov ($rounds,11);
1115
- &mov (&DWP(32,$key),$rounds);
1116
-
1117
- &jmp (&label("good_key"));
1118
-
1119
- &set_label("14rounds",16);
1120
- &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey
1121
- &lea ($key,&DWP(16,$key));
1122
- &cmp ("ebp",1<<28);
1123
- &je (&label("14rounds_alt"));
1124
-
1125
- &mov ($rounds,13);
1126
- &$movekey (&QWP(-32,$key),"xmm0"); # round 0
1127
- &$movekey (&QWP(-16,$key),"xmm2"); # round 1
1128
- &aeskeygenassist("xmm1","xmm2",0x01); # round 2
1129
- &call (&label("key_256a_cold"));
1130
- &aeskeygenassist("xmm1","xmm0",0x01); # round 3
1131
- &call (&label("key_256b"));
1132
- &aeskeygenassist("xmm1","xmm2",0x02); # round 4
1133
- &call (&label("key_256a"));
1134
- &aeskeygenassist("xmm1","xmm0",0x02); # round 5
1135
- &call (&label("key_256b"));
1136
- &aeskeygenassist("xmm1","xmm2",0x04); # round 6
1137
- &call (&label("key_256a"));
1138
- &aeskeygenassist("xmm1","xmm0",0x04); # round 7
1139
- &call (&label("key_256b"));
1140
- &aeskeygenassist("xmm1","xmm2",0x08); # round 8
1141
- &call (&label("key_256a"));
1142
- &aeskeygenassist("xmm1","xmm0",0x08); # round 9
1143
- &call (&label("key_256b"));
1144
- &aeskeygenassist("xmm1","xmm2",0x10); # round 10
1145
- &call (&label("key_256a"));
1146
- &aeskeygenassist("xmm1","xmm0",0x10); # round 11
1147
- &call (&label("key_256b"));
1148
- &aeskeygenassist("xmm1","xmm2",0x20); # round 12
1149
- &call (&label("key_256a"));
1150
- &aeskeygenassist("xmm1","xmm0",0x20); # round 13
1151
- &call (&label("key_256b"));
1152
- &aeskeygenassist("xmm1","xmm2",0x40); # round 14
1153
- &call (&label("key_256a"));
1154
- &$movekey (&QWP(0,$key),"xmm0");
1155
- &mov (&DWP(16,$key),$rounds);
1156
- &xor ("eax","eax");
1157
-
1158
- &jmp (&label("good_key"));
1159
-
1160
- &set_label("key_256a",16);
1161
- &$movekey (&QWP(0,$key),"xmm2");
1162
- &lea ($key,&DWP(16,$key));
1163
- &set_label("key_256a_cold");
1164
- &shufps ("xmm4","xmm0",0b00010000);
1165
- &xorps ("xmm0","xmm4");
1166
- &shufps ("xmm4","xmm0",0b10001100);
1167
- &xorps ("xmm0","xmm4");
1168
- &shufps ("xmm1","xmm1",0b11111111); # critical path
1169
- &xorps ("xmm0","xmm1");
1170
- &ret();
1171
-
1172
- &set_label("key_256b",16);
1173
- &$movekey (&QWP(0,$key),"xmm0");
1174
- &lea ($key,&DWP(16,$key));
1175
-
1176
- &shufps ("xmm4","xmm2",0b00010000);
1177
- &xorps ("xmm2","xmm4");
1178
- &shufps ("xmm4","xmm2",0b10001100);
1179
- &xorps ("xmm2","xmm4");
1180
- &shufps ("xmm1","xmm1",0b10101010); # critical path
1181
- &xorps ("xmm2","xmm1");
1182
- &ret();
1183
-
1184
- &set_label("14rounds_alt",16);
1185
- &movdqa ("xmm5",&QWP(0x00,"ebx"));
1186
- &movdqa ("xmm4",&QWP(0x20,"ebx"));
1187
- &mov ($rounds,7);
1188
- &movdqu (&QWP(-32,$key),"xmm0");
1189
- &movdqa ("xmm1","xmm2");
1190
- &movdqu (&QWP(-16,$key),"xmm2");
1191
-
1192
- &set_label("loop_key256");
1193
- &pshufb ("xmm2","xmm5");
1194
- &aesenclast ("xmm2","xmm4");
1195
-
1196
- &movdqa ("xmm3","xmm0");
1197
- &pslldq ("xmm0",4);
1198
- &pxor ("xmm3","xmm0");
1199
- &pslldq ("xmm0",4);
1200
- &pxor ("xmm3","xmm0");
1201
- &pslldq ("xmm0",4);
1202
- &pxor ("xmm0","xmm3");
1203
- &pslld ("xmm4",1);
1204
-
1205
- &pxor ("xmm0","xmm2");
1206
- &movdqu (&QWP(0,$key),"xmm0");
1207
-
1208
- &dec ($rounds);
1209
- &jz (&label("done_key256"));
1210
-
1211
- &pshufd ("xmm2","xmm0",0xff);
1212
- &pxor ("xmm3","xmm3");
1213
- &aesenclast ("xmm2","xmm3");
1214
-
1215
- &movdqa ("xmm3","xmm1")
1216
- &pslldq ("xmm1",4);
1217
- &pxor ("xmm3","xmm1");
1218
- &pslldq ("xmm1",4);
1219
- &pxor ("xmm3","xmm1");
1220
- &pslldq ("xmm1",4);
1221
- &pxor ("xmm1","xmm3");
1222
-
1223
- &pxor ("xmm2","xmm1");
1224
- &movdqu (&QWP(16,$key),"xmm2");
1225
- &lea ($key,&DWP(32,$key));
1226
- &movdqa ("xmm1","xmm2");
1227
- &jmp (&label("loop_key256"));
1228
-
1229
- &set_label("done_key256");
1230
- &mov ($rounds,13);
1231
- &mov (&DWP(16,$key),$rounds);
1232
-
1233
- &set_label("good_key");
1234
- &pxor ("xmm0","xmm0");
1235
- &pxor ("xmm1","xmm1");
1236
- &pxor ("xmm2","xmm2");
1237
- &pxor ("xmm3","xmm3");
1238
- &pxor ("xmm4","xmm4");
1239
- &pxor ("xmm5","xmm5");
1240
- &xor ("eax","eax");
1241
- &pop ("ebx");
1242
- &pop ("ebp");
1243
- &ret ();
1244
-
1245
- &set_label("bad_pointer",4);
1246
- &mov ("eax",-1);
1247
- &pop ("ebx");
1248
- &pop ("ebp");
1249
- &ret ();
1250
- &set_label("bad_keybits",4);
1251
- &pxor ("xmm0","xmm0");
1252
- &mov ("eax",-2);
1253
- &pop ("ebx");
1254
- &pop ("ebp");
1255
- &ret ();
1256
- &function_end_B("_aesni_set_encrypt_key");
1257
-
1258
- # int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits,
1259
- # AES_KEY *key)
1260
- &function_begin_B("${PREFIX}_set_encrypt_key");
1261
- &mov ("eax",&wparam(0));
1262
- &mov ($rounds,&wparam(1));
1263
- &mov ($key,&wparam(2));
1264
- &call ("_aesni_set_encrypt_key");
1265
- &ret ();
1266
- &function_end_B("${PREFIX}_set_encrypt_key");
1267
-
1268
- # int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits,
1269
- # AES_KEY *key)
1270
- &function_begin_B("${PREFIX}_set_decrypt_key");
1271
- &mov ("eax",&wparam(0));
1272
- &mov ($rounds,&wparam(1));
1273
- &mov ($key,&wparam(2));
1274
- &call ("_aesni_set_encrypt_key");
1275
- &mov ($key,&wparam(2));
1276
- &shl ($rounds,4); # rounds-1 after _aesni_set_encrypt_key
1277
- &test ("eax","eax");
1278
- &jnz (&label("dec_key_ret"));
1279
- &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule
1280
-
1281
- &$movekey ("xmm0",&QWP(0,$key)); # just swap
1282
- &$movekey ("xmm1",&QWP(0,"eax"));
1283
- &$movekey (&QWP(0,"eax"),"xmm0");
1284
- &$movekey (&QWP(0,$key),"xmm1");
1285
- &lea ($key,&DWP(16,$key));
1286
- &lea ("eax",&DWP(-16,"eax"));
1287
-
1288
- &set_label("dec_key_inverse");
1289
- &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse
1290
- &$movekey ("xmm1",&QWP(0,"eax"));
1291
- &aesimc ("xmm0","xmm0");
1292
- &aesimc ("xmm1","xmm1");
1293
- &lea ($key,&DWP(16,$key));
1294
- &lea ("eax",&DWP(-16,"eax"));
1295
- &$movekey (&QWP(16,"eax"),"xmm0");
1296
- &$movekey (&QWP(-16,$key),"xmm1");
1297
- &cmp ("eax",$key);
1298
- &ja (&label("dec_key_inverse"));
1299
-
1300
- &$movekey ("xmm0",&QWP(0,$key)); # inverse middle
1301
- &aesimc ("xmm0","xmm0");
1302
- &$movekey (&QWP(0,$key),"xmm0");
1303
-
1304
- &pxor ("xmm0","xmm0");
1305
- &pxor ("xmm1","xmm1");
1306
- &xor ("eax","eax"); # return success
1307
- &set_label("dec_key_ret");
1308
- &ret ();
1309
- &function_end_B("${PREFIX}_set_decrypt_key");
1310
-
1311
- &set_label("key_const",64);
1312
- &data_word(0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d);
1313
- &data_word(0x04070605,0x04070605,0x04070605,0x04070605);
1314
- &data_word(1,1,1,1);
1315
- &data_word(0x1b,0x1b,0x1b,0x1b);
1316
- &asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
1317
-
1318
- &asm_finish();