ring-native 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/CHANGES.md +7 -0
  4. data/Makefile +5 -0
  5. data/README.md +12 -5
  6. data/Rakefile +4 -0
  7. data/ext/ring/extconf.rb +4 -5
  8. data/lib/ring/native.rb +3 -1
  9. data/lib/ring/native/version.rb +5 -1
  10. data/ring-native.gemspec +6 -6
  11. data/vendor/ring-ffi/Cargo.lock +26 -0
  12. data/vendor/ring-ffi/Cargo.toml +45 -0
  13. data/vendor/ring-ffi/LICENSE +16 -0
  14. data/vendor/ring-ffi/README.md +59 -0
  15. data/vendor/ring-ffi/src/lib.rs +79 -0
  16. metadata +10 -255
  17. data/vendor/ring/BUILDING.md +0 -40
  18. data/vendor/ring/Cargo.toml +0 -43
  19. data/vendor/ring/LICENSE +0 -185
  20. data/vendor/ring/Makefile +0 -35
  21. data/vendor/ring/PORTING.md +0 -163
  22. data/vendor/ring/README.md +0 -113
  23. data/vendor/ring/STYLE.md +0 -197
  24. data/vendor/ring/appveyor.yml +0 -27
  25. data/vendor/ring/build.rs +0 -108
  26. data/vendor/ring/crypto/aes/aes.c +0 -1142
  27. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +0 -25
  28. data/vendor/ring/crypto/aes/aes_test.cc +0 -93
  29. data/vendor/ring/crypto/aes/asm/aes-586.pl +0 -2368
  30. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +0 -1249
  31. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +0 -2246
  32. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +0 -1318
  33. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +0 -2084
  34. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +0 -675
  35. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +0 -1364
  36. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +0 -1565
  37. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +0 -841
  38. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +0 -1116
  39. data/vendor/ring/crypto/aes/internal.h +0 -87
  40. data/vendor/ring/crypto/aes/mode_wrappers.c +0 -61
  41. data/vendor/ring/crypto/bn/add.c +0 -394
  42. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +0 -694
  43. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +0 -1503
  44. data/vendor/ring/crypto/bn/asm/bn-586.pl +0 -774
  45. data/vendor/ring/crypto/bn/asm/co-586.pl +0 -287
  46. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +0 -1882
  47. data/vendor/ring/crypto/bn/asm/x86-mont.pl +0 -592
  48. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +0 -599
  49. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +0 -1393
  50. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +0 -3507
  51. data/vendor/ring/crypto/bn/bn.c +0 -352
  52. data/vendor/ring/crypto/bn/bn_asn1.c +0 -74
  53. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +0 -25
  54. data/vendor/ring/crypto/bn/bn_test.cc +0 -1696
  55. data/vendor/ring/crypto/bn/cmp.c +0 -200
  56. data/vendor/ring/crypto/bn/convert.c +0 -433
  57. data/vendor/ring/crypto/bn/ctx.c +0 -311
  58. data/vendor/ring/crypto/bn/div.c +0 -594
  59. data/vendor/ring/crypto/bn/exponentiation.c +0 -1335
  60. data/vendor/ring/crypto/bn/gcd.c +0 -711
  61. data/vendor/ring/crypto/bn/generic.c +0 -1019
  62. data/vendor/ring/crypto/bn/internal.h +0 -316
  63. data/vendor/ring/crypto/bn/montgomery.c +0 -516
  64. data/vendor/ring/crypto/bn/mul.c +0 -888
  65. data/vendor/ring/crypto/bn/prime.c +0 -829
  66. data/vendor/ring/crypto/bn/random.c +0 -334
  67. data/vendor/ring/crypto/bn/rsaz_exp.c +0 -262
  68. data/vendor/ring/crypto/bn/rsaz_exp.h +0 -53
  69. data/vendor/ring/crypto/bn/shift.c +0 -276
  70. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +0 -25
  71. data/vendor/ring/crypto/bytestring/bytestring_test.cc +0 -421
  72. data/vendor/ring/crypto/bytestring/cbb.c +0 -399
  73. data/vendor/ring/crypto/bytestring/cbs.c +0 -227
  74. data/vendor/ring/crypto/bytestring/internal.h +0 -46
  75. data/vendor/ring/crypto/chacha/chacha_generic.c +0 -140
  76. data/vendor/ring/crypto/chacha/chacha_vec.c +0 -323
  77. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +0 -1447
  78. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +0 -153
  79. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +0 -25
  80. data/vendor/ring/crypto/cipher/e_aes.c +0 -390
  81. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +0 -208
  82. data/vendor/ring/crypto/cipher/internal.h +0 -173
  83. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +0 -543
  84. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +0 -9
  85. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +0 -475
  86. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +0 -23
  87. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +0 -422
  88. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +0 -484
  89. data/vendor/ring/crypto/cipher/test/cipher_test.txt +0 -100
  90. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +0 -25
  91. data/vendor/ring/crypto/constant_time_test.c +0 -304
  92. data/vendor/ring/crypto/cpu-arm-asm.S +0 -32
  93. data/vendor/ring/crypto/cpu-arm.c +0 -199
  94. data/vendor/ring/crypto/cpu-intel.c +0 -261
  95. data/vendor/ring/crypto/crypto.c +0 -151
  96. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +0 -2118
  97. data/vendor/ring/crypto/curve25519/curve25519.c +0 -4888
  98. data/vendor/ring/crypto/curve25519/x25519_test.cc +0 -128
  99. data/vendor/ring/crypto/digest/md32_common.h +0 -181
  100. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +0 -2725
  101. data/vendor/ring/crypto/ec/ec.c +0 -193
  102. data/vendor/ring/crypto/ec/ec_curves.c +0 -61
  103. data/vendor/ring/crypto/ec/ec_key.c +0 -228
  104. data/vendor/ring/crypto/ec/ec_montgomery.c +0 -114
  105. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +0 -25
  106. data/vendor/ring/crypto/ec/internal.h +0 -243
  107. data/vendor/ring/crypto/ec/oct.c +0 -253
  108. data/vendor/ring/crypto/ec/p256-64.c +0 -1794
  109. data/vendor/ring/crypto/ec/p256-x86_64-table.h +0 -9548
  110. data/vendor/ring/crypto/ec/p256-x86_64.c +0 -509
  111. data/vendor/ring/crypto/ec/simple.c +0 -1007
  112. data/vendor/ring/crypto/ec/util-64.c +0 -183
  113. data/vendor/ring/crypto/ec/wnaf.c +0 -508
  114. data/vendor/ring/crypto/ecdh/ecdh.c +0 -155
  115. data/vendor/ring/crypto/ecdsa/ecdsa.c +0 -304
  116. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +0 -193
  117. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +0 -25
  118. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +0 -327
  119. data/vendor/ring/crypto/header_removed.h +0 -17
  120. data/vendor/ring/crypto/internal.h +0 -495
  121. data/vendor/ring/crypto/libring.Windows.vcxproj +0 -101
  122. data/vendor/ring/crypto/mem.c +0 -98
  123. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +0 -1045
  124. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +0 -517
  125. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +0 -1393
  126. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +0 -1741
  127. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +0 -422
  128. data/vendor/ring/crypto/modes/ctr.c +0 -226
  129. data/vendor/ring/crypto/modes/gcm.c +0 -1206
  130. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +0 -25
  131. data/vendor/ring/crypto/modes/gcm_test.c +0 -348
  132. data/vendor/ring/crypto/modes/internal.h +0 -299
  133. data/vendor/ring/crypto/perlasm/arm-xlate.pl +0 -170
  134. data/vendor/ring/crypto/perlasm/readme +0 -100
  135. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +0 -1164
  136. data/vendor/ring/crypto/perlasm/x86asm.pl +0 -292
  137. data/vendor/ring/crypto/perlasm/x86gas.pl +0 -263
  138. data/vendor/ring/crypto/perlasm/x86masm.pl +0 -200
  139. data/vendor/ring/crypto/perlasm/x86nasm.pl +0 -187
  140. data/vendor/ring/crypto/poly1305/poly1305.c +0 -331
  141. data/vendor/ring/crypto/poly1305/poly1305_arm.c +0 -301
  142. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +0 -2015
  143. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +0 -25
  144. data/vendor/ring/crypto/poly1305/poly1305_test.cc +0 -80
  145. data/vendor/ring/crypto/poly1305/poly1305_test.txt +0 -52
  146. data/vendor/ring/crypto/poly1305/poly1305_vec.c +0 -892
  147. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +0 -75
  148. data/vendor/ring/crypto/rand/internal.h +0 -32
  149. data/vendor/ring/crypto/rand/rand.c +0 -189
  150. data/vendor/ring/crypto/rand/urandom.c +0 -219
  151. data/vendor/ring/crypto/rand/windows.c +0 -56
  152. data/vendor/ring/crypto/refcount_c11.c +0 -66
  153. data/vendor/ring/crypto/refcount_lock.c +0 -53
  154. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +0 -25
  155. data/vendor/ring/crypto/refcount_test.c +0 -58
  156. data/vendor/ring/crypto/rsa/blinding.c +0 -462
  157. data/vendor/ring/crypto/rsa/internal.h +0 -108
  158. data/vendor/ring/crypto/rsa/padding.c +0 -300
  159. data/vendor/ring/crypto/rsa/rsa.c +0 -450
  160. data/vendor/ring/crypto/rsa/rsa_asn1.c +0 -261
  161. data/vendor/ring/crypto/rsa/rsa_impl.c +0 -944
  162. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +0 -25
  163. data/vendor/ring/crypto/rsa/rsa_test.cc +0 -437
  164. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +0 -436
  165. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +0 -2390
  166. data/vendor/ring/crypto/sha/asm/sha256-586.pl +0 -1275
  167. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +0 -735
  168. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +0 -14
  169. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +0 -14
  170. data/vendor/ring/crypto/sha/asm/sha512-586.pl +0 -911
  171. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +0 -666
  172. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +0 -14
  173. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +0 -14
  174. data/vendor/ring/crypto/sha/sha1.c +0 -271
  175. data/vendor/ring/crypto/sha/sha256.c +0 -204
  176. data/vendor/ring/crypto/sha/sha512.c +0 -355
  177. data/vendor/ring/crypto/test/file_test.cc +0 -326
  178. data/vendor/ring/crypto/test/file_test.h +0 -181
  179. data/vendor/ring/crypto/test/malloc.cc +0 -150
  180. data/vendor/ring/crypto/test/scoped_types.h +0 -95
  181. data/vendor/ring/crypto/test/test.Windows.vcxproj +0 -35
  182. data/vendor/ring/crypto/test/test_util.cc +0 -46
  183. data/vendor/ring/crypto/test/test_util.h +0 -41
  184. data/vendor/ring/crypto/thread_none.c +0 -55
  185. data/vendor/ring/crypto/thread_pthread.c +0 -165
  186. data/vendor/ring/crypto/thread_test.Windows.vcxproj +0 -25
  187. data/vendor/ring/crypto/thread_test.c +0 -200
  188. data/vendor/ring/crypto/thread_win.c +0 -282
  189. data/vendor/ring/examples/checkdigest.rs +0 -103
  190. data/vendor/ring/include/openssl/aes.h +0 -121
  191. data/vendor/ring/include/openssl/arm_arch.h +0 -129
  192. data/vendor/ring/include/openssl/base.h +0 -156
  193. data/vendor/ring/include/openssl/bn.h +0 -794
  194. data/vendor/ring/include/openssl/buffer.h +0 -18
  195. data/vendor/ring/include/openssl/bytestring.h +0 -235
  196. data/vendor/ring/include/openssl/chacha.h +0 -37
  197. data/vendor/ring/include/openssl/cmac.h +0 -76
  198. data/vendor/ring/include/openssl/cpu.h +0 -184
  199. data/vendor/ring/include/openssl/crypto.h +0 -43
  200. data/vendor/ring/include/openssl/curve25519.h +0 -88
  201. data/vendor/ring/include/openssl/ec.h +0 -225
  202. data/vendor/ring/include/openssl/ec_key.h +0 -129
  203. data/vendor/ring/include/openssl/ecdh.h +0 -110
  204. data/vendor/ring/include/openssl/ecdsa.h +0 -156
  205. data/vendor/ring/include/openssl/err.h +0 -201
  206. data/vendor/ring/include/openssl/mem.h +0 -101
  207. data/vendor/ring/include/openssl/obj_mac.h +0 -71
  208. data/vendor/ring/include/openssl/opensslfeatures.h +0 -68
  209. data/vendor/ring/include/openssl/opensslv.h +0 -18
  210. data/vendor/ring/include/openssl/ossl_typ.h +0 -18
  211. data/vendor/ring/include/openssl/poly1305.h +0 -51
  212. data/vendor/ring/include/openssl/rand.h +0 -70
  213. data/vendor/ring/include/openssl/rsa.h +0 -399
  214. data/vendor/ring/include/openssl/thread.h +0 -133
  215. data/vendor/ring/include/openssl/type_check.h +0 -71
  216. data/vendor/ring/mk/Common.props +0 -63
  217. data/vendor/ring/mk/Windows.props +0 -42
  218. data/vendor/ring/mk/WindowsTest.props +0 -18
  219. data/vendor/ring/mk/appveyor.bat +0 -62
  220. data/vendor/ring/mk/bottom_of_makefile.mk +0 -54
  221. data/vendor/ring/mk/ring.mk +0 -266
  222. data/vendor/ring/mk/top_of_makefile.mk +0 -214
  223. data/vendor/ring/mk/travis.sh +0 -40
  224. data/vendor/ring/mk/update-travis-yml.py +0 -229
  225. data/vendor/ring/ring.sln +0 -153
  226. data/vendor/ring/src/aead.rs +0 -682
  227. data/vendor/ring/src/agreement.rs +0 -248
  228. data/vendor/ring/src/c.rs +0 -129
  229. data/vendor/ring/src/constant_time.rs +0 -37
  230. data/vendor/ring/src/der.rs +0 -96
  231. data/vendor/ring/src/digest.rs +0 -690
  232. data/vendor/ring/src/digest_tests.txt +0 -57
  233. data/vendor/ring/src/ecc.rs +0 -28
  234. data/vendor/ring/src/ecc_build.rs +0 -279
  235. data/vendor/ring/src/ecc_curves.rs +0 -117
  236. data/vendor/ring/src/ed25519_tests.txt +0 -2579
  237. data/vendor/ring/src/exe_tests.rs +0 -46
  238. data/vendor/ring/src/ffi.rs +0 -29
  239. data/vendor/ring/src/file_test.rs +0 -187
  240. data/vendor/ring/src/hkdf.rs +0 -153
  241. data/vendor/ring/src/hkdf_tests.txt +0 -59
  242. data/vendor/ring/src/hmac.rs +0 -414
  243. data/vendor/ring/src/hmac_tests.txt +0 -97
  244. data/vendor/ring/src/input.rs +0 -312
  245. data/vendor/ring/src/lib.rs +0 -41
  246. data/vendor/ring/src/pbkdf2.rs +0 -265
  247. data/vendor/ring/src/pbkdf2_tests.txt +0 -113
  248. data/vendor/ring/src/polyfill.rs +0 -57
  249. data/vendor/ring/src/rand.rs +0 -28
  250. data/vendor/ring/src/signature.rs +0 -314
  251. data/vendor/ring/third-party/NIST/README.md +0 -9
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +0 -263
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +0 -309
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +0 -267
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +0 -263
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +0 -309
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +0 -267
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +0 -263
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +0 -309
  260. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +0 -267
  261. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +0 -519
  262. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +0 -309
  263. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +0 -523
  264. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +0 -519
  265. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +0 -309
  266. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +0 -523
  267. data/vendor/ring/third-party/NIST/sha256sums.txt +0 -1
@@ -1,841 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- ######################################################################
4
- ## Constant-time SSSE3 AES core implementation.
5
- ## version 0.1
6
- ##
7
- ## By Mike Hamburg (Stanford University), 2009
8
- ## Public domain.
9
- ##
10
- ## For details see http://shiftleft.org/papers/vector_aes/ and
11
- ## http://crypto.stanford.edu/vpaes/.
12
-
13
- ######################################################################
14
- # September 2011.
15
- #
16
- # Port vpaes-x86_64.pl as 32-bit "almost" drop-in replacement for
17
- # aes-586.pl. "Almost" refers to the fact that AES_cbc_encrypt
18
- # doesn't handle partial vectors (doesn't have to if called from
19
- # EVP only). "Drop-in" implies that this module doesn't share key
20
- # schedule structure with the original nor does it make assumption
21
- # about its alignment...
22
- #
23
- # Performance summary. aes-586.pl column lists large-block CBC
24
- # encrypt/decrypt/with-hyper-threading-off(*) results in cycles per
25
- # byte processed with 128-bit key, and vpaes-x86.pl column - [also
26
- # large-block CBC] encrypt/decrypt.
27
- #
28
- # aes-586.pl vpaes-x86.pl
29
- #
30
- # Core 2(**) 28.1/41.4/18.3 21.9/25.2(***)
31
- # Nehalem 27.9/40.4/18.1 10.2/11.9
32
- # Atom 70.7/92.1/60.1 61.1/75.4(***)
33
- # Silvermont 45.4/62.9/24.1 49.2/61.1(***)
34
- #
35
- # (*) "Hyper-threading" in the context refers rather to cache shared
36
- # among multiple cores, than to specifically Intel HTT. As vast
37
- # majority of contemporary cores share cache, slower code path
38
- # is common place. In other words "with-hyper-threading-off"
39
- # results are presented mostly for reference purposes.
40
- #
41
- # (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe.
42
- #
43
- # (***) Less impressive improvement on Core 2 and Atom is due to slow
44
- # pshufb, yet it's respectable +28%/64% improvement on Core 2
45
- # and +15% on Atom (as implied, over "hyper-threading-safe"
46
- # code path).
47
- #
48
- # <appro@openssl.org>
49
-
50
- $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
51
- push(@INC,"${dir}","${dir}../../perlasm");
52
- require "x86asm.pl";
53
-
54
- &asm_init($ARGV[0],"vpaes-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
55
-
56
- $PREFIX="vpaes";
57
-
58
- my ($round, $base, $magic, $key, $const, $inp, $out)=
59
- ("eax", "ebx", "ecx", "edx","ebp", "esi","edi");
60
-
61
- &static_label("_vpaes_consts");
62
- &static_label("_vpaes_schedule_low_round");
63
-
64
- &set_label("_vpaes_consts",64);
65
- $k_inv=-0x30; # inv, inva
66
- &data_word(0x0D080180,0x0E05060F,0x0A0B0C02,0x04070309);
67
- &data_word(0x0F0B0780,0x01040A06,0x02050809,0x030D0E0C);
68
-
69
- $k_s0F=-0x10; # s0F
70
- &data_word(0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F);
71
-
72
- $k_ipt=0x00; # input transform (lo, hi)
73
- &data_word(0x5A2A7000,0xC2B2E898,0x52227808,0xCABAE090);
74
- &data_word(0x317C4D00,0x4C01307D,0xB0FDCC81,0xCD80B1FC);
75
-
76
- $k_sb1=0x20; # sb1u, sb1t
77
- &data_word(0xCB503E00,0xB19BE18F,0x142AF544,0xA5DF7A6E);
78
- &data_word(0xFAE22300,0x3618D415,0x0D2ED9EF,0x3BF7CCC1);
79
- $k_sb2=0x40; # sb2u, sb2t
80
- &data_word(0x0B712400,0xE27A93C6,0xBC982FCD,0x5EB7E955);
81
- &data_word(0x0AE12900,0x69EB8840,0xAB82234A,0xC2A163C8);
82
- $k_sbo=0x60; # sbou, sbot
83
- &data_word(0x6FBDC700,0xD0D26D17,0xC502A878,0x15AABF7A);
84
- &data_word(0x5FBB6A00,0xCFE474A5,0x412B35FA,0x8E1E90D1);
85
-
86
- $k_mc_forward=0x80; # mc_forward
87
- &data_word(0x00030201,0x04070605,0x080B0A09,0x0C0F0E0D);
88
- &data_word(0x04070605,0x080B0A09,0x0C0F0E0D,0x00030201);
89
- &data_word(0x080B0A09,0x0C0F0E0D,0x00030201,0x04070605);
90
- &data_word(0x0C0F0E0D,0x00030201,0x04070605,0x080B0A09);
91
-
92
- $k_mc_backward=0xc0; # mc_backward
93
- &data_word(0x02010003,0x06050407,0x0A09080B,0x0E0D0C0F);
94
- &data_word(0x0E0D0C0F,0x02010003,0x06050407,0x0A09080B);
95
- &data_word(0x0A09080B,0x0E0D0C0F,0x02010003,0x06050407);
96
- &data_word(0x06050407,0x0A09080B,0x0E0D0C0F,0x02010003);
97
-
98
- $k_sr=0x100; # sr
99
- &data_word(0x03020100,0x07060504,0x0B0A0908,0x0F0E0D0C);
100
- &data_word(0x0F0A0500,0x030E0904,0x07020D08,0x0B06010C);
101
- &data_word(0x0B020900,0x0F060D04,0x030A0108,0x070E050C);
102
- &data_word(0x070A0D00,0x0B0E0104,0x0F020508,0x0306090C);
103
-
104
- $k_rcon=0x140; # rcon
105
- &data_word(0xAF9DEEB6,0x1F8391B9,0x4D7C7D81,0x702A9808);
106
-
107
- $k_s63=0x150; # s63: all equal to 0x63 transformed
108
- &data_word(0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B);
109
-
110
- $k_opt=0x160; # output transform
111
- &data_word(0xD6B66000,0xFF9F4929,0xDEBE6808,0xF7974121);
112
- &data_word(0x50BCEC00,0x01EDBD51,0xB05C0CE0,0xE10D5DB1);
113
-
114
- $k_deskew=0x180; # deskew tables: inverts the sbox's "skew"
115
- &data_word(0x47A4E300,0x07E4A340,0x5DBEF91A,0x1DFEB95A);
116
- &data_word(0x83EA6900,0x5F36B5DC,0xF49D1E77,0x2841C2AB);
117
- ##
118
- ## Decryption stuff
119
- ## Key schedule constants
120
- ##
121
- $k_dksd=0x1a0; # decryption key schedule: invskew x*D
122
- &data_word(0xA3E44700,0xFEB91A5D,0x5A1DBEF9,0x0740E3A4);
123
- &data_word(0xB5368300,0x41C277F4,0xAB289D1E,0x5FDC69EA);
124
- $k_dksb=0x1c0; # decryption key schedule: invskew x*B
125
- &data_word(0x8550D500,0x9A4FCA1F,0x1CC94C99,0x03D65386);
126
- &data_word(0xB6FC4A00,0x115BEDA7,0x7E3482C8,0xD993256F);
127
- $k_dkse=0x1e0; # decryption key schedule: invskew x*E + 0x63
128
- &data_word(0x1FC9D600,0xD5031CCA,0x994F5086,0x53859A4C);
129
- &data_word(0x4FDC7BE8,0xA2319605,0x20B31487,0xCD5EF96A);
130
- $k_dks9=0x200; # decryption key schedule: invskew x*9
131
- &data_word(0x7ED9A700,0xB6116FC8,0x82255BFC,0x4AED9334);
132
- &data_word(0x27143300,0x45765162,0xE9DAFDCE,0x8BB89FAC);
133
-
134
- ##
135
- ## Decryption stuff
136
- ## Round function constants
137
- ##
138
- $k_dipt=0x220; # decryption input transform
139
- &data_word(0x0B545F00,0x0F505B04,0x114E451A,0x154A411E);
140
- &data_word(0x60056500,0x86E383E6,0xF491F194,0x12771772);
141
-
142
- $k_dsb9=0x240; # decryption sbox output *9*u, *9*t
143
- &data_word(0x9A86D600,0x851C0353,0x4F994CC9,0xCAD51F50);
144
- &data_word(0xECD74900,0xC03B1789,0xB2FBA565,0x725E2C9E);
145
- $k_dsbd=0x260; # decryption sbox output *D*u, *D*t
146
- &data_word(0xE6B1A200,0x7D57CCDF,0x882A4439,0xF56E9B13);
147
- &data_word(0x24C6CB00,0x3CE2FAF7,0x15DEEFD3,0x2931180D);
148
- $k_dsbb=0x280; # decryption sbox output *B*u, *B*t
149
- &data_word(0x96B44200,0xD0226492,0xB0F2D404,0x602646F6);
150
- &data_word(0xCD596700,0xC19498A6,0x3255AA6B,0xF3FF0C3E);
151
- $k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t
152
- &data_word(0x26D4D000,0x46F29296,0x64B4F6B0,0x22426004);
153
- &data_word(0xFFAAC100,0x0C55A6CD,0x98593E32,0x9467F36B);
154
- $k_dsbo=0x2c0; # decryption sbox final output
155
- &data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9);
156
- &data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159);
157
- &asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)");
158
- &align (64);
159
-
160
- &function_begin_B("_vpaes_preheat");
161
- &add ($const,&DWP(0,"esp"));
162
- &movdqa ("xmm7",&QWP($k_inv,$const));
163
- &movdqa ("xmm6",&QWP($k_s0F,$const));
164
- &ret ();
165
- &function_end_B("_vpaes_preheat");
166
-
167
- ##
168
- ## _aes_encrypt_core
169
- ##
170
- ## AES-encrypt %xmm0.
171
- ##
172
- ## Inputs:
173
- ## %xmm0 = input
174
- ## %xmm6-%xmm7 as in _vpaes_preheat
175
- ## (%edx) = scheduled keys
176
- ##
177
- ## Output in %xmm0
178
- ## Clobbers %xmm1-%xmm5, %eax, %ebx, %ecx, %edx
179
- ##
180
- ##
181
- &function_begin_B("_vpaes_encrypt_core");
182
- &mov ($magic,16);
183
- &mov ($round,&DWP(240,$key));
184
- &movdqa ("xmm1","xmm6")
185
- &movdqa ("xmm2",&QWP($k_ipt,$const));
186
- &pandn ("xmm1","xmm0");
187
- &pand ("xmm0","xmm6");
188
- &movdqu ("xmm5",&QWP(0,$key));
189
- &pshufb ("xmm2","xmm0");
190
- &movdqa ("xmm0",&QWP($k_ipt+16,$const));
191
- &pxor ("xmm2","xmm5");
192
- &psrld ("xmm1",4);
193
- &add ($key,16);
194
- &pshufb ("xmm0","xmm1");
195
- &lea ($base,&DWP($k_mc_backward,$const));
196
- &pxor ("xmm0","xmm2");
197
- &jmp (&label("enc_entry"));
198
-
199
-
200
- &set_label("enc_loop",16);
201
- # middle of middle round
202
- &movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sb1u
203
- &movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sb1t
204
- &pshufb ("xmm4","xmm2"); # 4 = sb1u
205
- &pshufb ("xmm0","xmm3"); # 0 = sb1t
206
- &pxor ("xmm4","xmm5"); # 4 = sb1u + k
207
- &movdqa ("xmm5",&QWP($k_sb2,$const)); # 4 : sb2u
208
- &pxor ("xmm0","xmm4"); # 0 = A
209
- &movdqa ("xmm1",&QWP(-0x40,$base,$magic));# .Lk_mc_forward[]
210
- &pshufb ("xmm5","xmm2"); # 4 = sb2u
211
- &movdqa ("xmm2",&QWP($k_sb2+16,$const));# 2 : sb2t
212
- &movdqa ("xmm4",&QWP(0,$base,$magic)); # .Lk_mc_backward[]
213
- &pshufb ("xmm2","xmm3"); # 2 = sb2t
214
- &movdqa ("xmm3","xmm0"); # 3 = A
215
- &pxor ("xmm2","xmm5"); # 2 = 2A
216
- &pshufb ("xmm0","xmm1"); # 0 = B
217
- &add ($key,16); # next key
218
- &pxor ("xmm0","xmm2"); # 0 = 2A+B
219
- &pshufb ("xmm3","xmm4"); # 3 = D
220
- &add ($magic,16); # next mc
221
- &pxor ("xmm3","xmm0"); # 3 = 2A+B+D
222
- &pshufb ("xmm0","xmm1"); # 0 = 2B+C
223
- &and ($magic,0x30); # ... mod 4
224
- &sub ($round,1); # nr--
225
- &pxor ("xmm0","xmm3"); # 0 = 2A+3B+C+D
226
-
227
- &set_label("enc_entry");
228
- # top of round
229
- &movdqa ("xmm1","xmm6"); # 1 : i
230
- &movdqa ("xmm5",&QWP($k_inv+16,$const));# 2 : a/k
231
- &pandn ("xmm1","xmm0"); # 1 = i<<4
232
- &psrld ("xmm1",4); # 1 = i
233
- &pand ("xmm0","xmm6"); # 0 = k
234
- &pshufb ("xmm5","xmm0"); # 2 = a/k
235
- &movdqa ("xmm3","xmm7"); # 3 : 1/i
236
- &pxor ("xmm0","xmm1"); # 0 = j
237
- &pshufb ("xmm3","xmm1"); # 3 = 1/i
238
- &movdqa ("xmm4","xmm7"); # 4 : 1/j
239
- &pxor ("xmm3","xmm5"); # 3 = iak = 1/i + a/k
240
- &pshufb ("xmm4","xmm0"); # 4 = 1/j
241
- &movdqa ("xmm2","xmm7"); # 2 : 1/iak
242
- &pxor ("xmm4","xmm5"); # 4 = jak = 1/j + a/k
243
- &pshufb ("xmm2","xmm3"); # 2 = 1/iak
244
- &movdqa ("xmm3","xmm7"); # 3 : 1/jak
245
- &pxor ("xmm2","xmm0"); # 2 = io
246
- &pshufb ("xmm3","xmm4"); # 3 = 1/jak
247
- &movdqu ("xmm5",&QWP(0,$key));
248
- &pxor ("xmm3","xmm1"); # 3 = jo
249
- &jnz (&label("enc_loop"));
250
-
251
- # middle of last round
252
- &movdqa ("xmm4",&QWP($k_sbo,$const)); # 3 : sbou .Lk_sbo
253
- &movdqa ("xmm0",&QWP($k_sbo+16,$const));# 3 : sbot .Lk_sbo+16
254
- &pshufb ("xmm4","xmm2"); # 4 = sbou
255
- &pxor ("xmm4","xmm5"); # 4 = sb1u + k
256
- &pshufb ("xmm0","xmm3"); # 0 = sb1t
257
- &movdqa ("xmm1",&QWP(0x40,$base,$magic));# .Lk_sr[]
258
- &pxor ("xmm0","xmm4"); # 0 = A
259
- &pshufb ("xmm0","xmm1");
260
- &ret ();
261
- &function_end_B("_vpaes_encrypt_core");
262
-
263
- ##
264
- ## Decryption core
265
- ##
266
- ## Same API as encryption core.
267
- ##
268
- &function_begin_B("_vpaes_decrypt_core");
269
- &lea ($base,&DWP($k_dsbd,$const));
270
- &mov ($round,&DWP(240,$key));
271
- &movdqa ("xmm1","xmm6");
272
- &movdqa ("xmm2",&QWP($k_dipt-$k_dsbd,$base));
273
- &pandn ("xmm1","xmm0");
274
- &mov ($magic,$round);
275
- &psrld ("xmm1",4)
276
- &movdqu ("xmm5",&QWP(0,$key));
277
- &shl ($magic,4);
278
- &pand ("xmm0","xmm6");
279
- &pshufb ("xmm2","xmm0");
280
- &movdqa ("xmm0",&QWP($k_dipt-$k_dsbd+16,$base));
281
- &xor ($magic,0x30);
282
- &pshufb ("xmm0","xmm1");
283
- &and ($magic,0x30);
284
- &pxor ("xmm2","xmm5");
285
- &movdqa ("xmm5",&QWP($k_mc_forward+48,$const));
286
- &pxor ("xmm0","xmm2");
287
- &add ($key,16);
288
- &lea ($magic,&DWP($k_sr-$k_dsbd,$base,$magic));
289
- &jmp (&label("dec_entry"));
290
-
291
- &set_label("dec_loop",16);
292
- ##
293
- ## Inverse mix columns
294
- ##
295
- &movdqa ("xmm4",&QWP(-0x20,$base)); # 4 : sb9u
296
- &movdqa ("xmm1",&QWP(-0x10,$base)); # 0 : sb9t
297
- &pshufb ("xmm4","xmm2"); # 4 = sb9u
298
- &pshufb ("xmm1","xmm3"); # 0 = sb9t
299
- &pxor ("xmm0","xmm4");
300
- &movdqa ("xmm4",&QWP(0,$base)); # 4 : sbdu
301
- &pxor ("xmm0","xmm1"); # 0 = ch
302
- &movdqa ("xmm1",&QWP(0x10,$base)); # 0 : sbdt
303
-
304
- &pshufb ("xmm4","xmm2"); # 4 = sbdu
305
- &pshufb ("xmm0","xmm5"); # MC ch
306
- &pshufb ("xmm1","xmm3"); # 0 = sbdt
307
- &pxor ("xmm0","xmm4"); # 4 = ch
308
- &movdqa ("xmm4",&QWP(0x20,$base)); # 4 : sbbu
309
- &pxor ("xmm0","xmm1"); # 0 = ch
310
- &movdqa ("xmm1",&QWP(0x30,$base)); # 0 : sbbt
311
-
312
- &pshufb ("xmm4","xmm2"); # 4 = sbbu
313
- &pshufb ("xmm0","xmm5"); # MC ch
314
- &pshufb ("xmm1","xmm3"); # 0 = sbbt
315
- &pxor ("xmm0","xmm4"); # 4 = ch
316
- &movdqa ("xmm4",&QWP(0x40,$base)); # 4 : sbeu
317
- &pxor ("xmm0","xmm1"); # 0 = ch
318
- &movdqa ("xmm1",&QWP(0x50,$base)); # 0 : sbet
319
-
320
- &pshufb ("xmm4","xmm2"); # 4 = sbeu
321
- &pshufb ("xmm0","xmm5"); # MC ch
322
- &pshufb ("xmm1","xmm3"); # 0 = sbet
323
- &pxor ("xmm0","xmm4"); # 4 = ch
324
- &add ($key,16); # next round key
325
- &palignr("xmm5","xmm5",12);
326
- &pxor ("xmm0","xmm1"); # 0 = ch
327
- &sub ($round,1); # nr--
328
-
329
- &set_label("dec_entry");
330
- # top of round
331
- &movdqa ("xmm1","xmm6"); # 1 : i
332
- &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
333
- &pandn ("xmm1","xmm0"); # 1 = i<<4
334
- &pand ("xmm0","xmm6"); # 0 = k
335
- &psrld ("xmm1",4); # 1 = i
336
- &pshufb ("xmm2","xmm0"); # 2 = a/k
337
- &movdqa ("xmm3","xmm7"); # 3 : 1/i
338
- &pxor ("xmm0","xmm1"); # 0 = j
339
- &pshufb ("xmm3","xmm1"); # 3 = 1/i
340
- &movdqa ("xmm4","xmm7"); # 4 : 1/j
341
- &pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k
342
- &pshufb ("xmm4","xmm0"); # 4 = 1/j
343
- &pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k
344
- &movdqa ("xmm2","xmm7"); # 2 : 1/iak
345
- &pshufb ("xmm2","xmm3"); # 2 = 1/iak
346
- &movdqa ("xmm3","xmm7"); # 3 : 1/jak
347
- &pxor ("xmm2","xmm0"); # 2 = io
348
- &pshufb ("xmm3","xmm4"); # 3 = 1/jak
349
- &movdqu ("xmm0",&QWP(0,$key));
350
- &pxor ("xmm3","xmm1"); # 3 = jo
351
- &jnz (&label("dec_loop"));
352
-
353
- # middle of last round
354
- &movdqa ("xmm4",&QWP(0x60,$base)); # 3 : sbou
355
- &pshufb ("xmm4","xmm2"); # 4 = sbou
356
- &pxor ("xmm4","xmm0"); # 4 = sb1u + k
357
- &movdqa ("xmm0",&QWP(0x70,$base)); # 0 : sbot
358
- &movdqa ("xmm2",&QWP(0,$magic));
359
- &pshufb ("xmm0","xmm3"); # 0 = sb1t
360
- &pxor ("xmm0","xmm4"); # 0 = A
361
- &pshufb ("xmm0","xmm2");
362
- &ret ();
363
- &function_end_B("_vpaes_decrypt_core");
364
-
365
- ########################################################
366
- ## ##
367
- ## AES key schedule ##
368
- ## ##
369
- ########################################################
370
- &function_begin_B("_vpaes_schedule_core");
371
- &add ($const,&DWP(0,"esp"));
372
- &movdqu ("xmm0",&QWP(0,$inp)); # load key (unaligned)
373
- &movdqa ("xmm2",&QWP($k_rcon,$const)); # load rcon
374
-
375
- # input transform
376
- &movdqa ("xmm3","xmm0");
377
- &lea ($base,&DWP($k_ipt,$const));
378
- &movdqa (&QWP(4,"esp"),"xmm2"); # xmm8
379
- &call ("_vpaes_schedule_transform");
380
- &movdqa ("xmm7","xmm0");
381
-
382
- &test ($out,$out);
383
- &jnz (&label("schedule_am_decrypting"));
384
-
385
- # encrypting, output zeroth round key after transform
386
- &movdqu (&QWP(0,$key),"xmm0");
387
- &jmp (&label("schedule_go"));
388
-
389
- &set_label("schedule_am_decrypting");
390
- # decrypting, output zeroth round key after shiftrows
391
- &movdqa ("xmm1",&QWP($k_sr,$const,$magic));
392
- &pshufb ("xmm3","xmm1");
393
- &movdqu (&QWP(0,$key),"xmm3");
394
- &xor ($magic,0x30);
395
-
396
- &set_label("schedule_go");
397
- &cmp ($round,192);
398
- &ja (&label("schedule_256"));
399
- &je (&label("schedule_192"));
400
- # 128: fall though
401
-
402
- ##
403
- ## .schedule_128
404
- ##
405
- ## 128-bit specific part of key schedule.
406
- ##
407
- ## This schedule is really simple, because all its parts
408
- ## are accomplished by the subroutines.
409
- ##
410
- &set_label("schedule_128");
411
- &mov ($round,10);
412
-
413
- &set_label("loop_schedule_128");
414
- &call ("_vpaes_schedule_round");
415
- &dec ($round);
416
- &jz (&label("schedule_mangle_last"));
417
- &call ("_vpaes_schedule_mangle"); # write output
418
- &jmp (&label("loop_schedule_128"));
419
-
420
- ##
421
- ## .aes_schedule_192
422
- ##
423
- ## 192-bit specific part of key schedule.
424
- ##
425
- ## The main body of this schedule is the same as the 128-bit
426
- ## schedule, but with more smearing. The long, high side is
427
- ## stored in %xmm7 as before, and the short, low side is in
428
- ## the high bits of %xmm6.
429
- ##
430
- ## This schedule is somewhat nastier, however, because each
431
- ## round produces 192 bits of key material, or 1.5 round keys.
432
- ## Therefore, on each cycle we do 2 rounds and produce 3 round
433
- ## keys.
434
- ##
435
- &set_label("schedule_192",16);
436
- &movdqu ("xmm0",&QWP(8,$inp)); # load key part 2 (very unaligned)
437
- &call ("_vpaes_schedule_transform"); # input transform
438
- &movdqa ("xmm6","xmm0"); # save short part
439
- &pxor ("xmm4","xmm4"); # clear 4
440
- &movhlps("xmm6","xmm4"); # clobber low side with zeros
441
- &mov ($round,4);
442
-
443
- &set_label("loop_schedule_192");
444
- &call ("_vpaes_schedule_round");
445
- &palignr("xmm0","xmm6",8);
446
- &call ("_vpaes_schedule_mangle"); # save key n
447
- &call ("_vpaes_schedule_192_smear");
448
- &call ("_vpaes_schedule_mangle"); # save key n+1
449
- &call ("_vpaes_schedule_round");
450
- &dec ($round);
451
- &jz (&label("schedule_mangle_last"));
452
- &call ("_vpaes_schedule_mangle"); # save key n+2
453
- &call ("_vpaes_schedule_192_smear");
454
- &jmp (&label("loop_schedule_192"));
455
-
456
- ##
457
- ## .aes_schedule_256
458
- ##
459
- ## 256-bit specific part of key schedule.
460
- ##
461
- ## The structure here is very similar to the 128-bit
462
- ## schedule, but with an additional "low side" in
463
- ## %xmm6. The low side's rounds are the same as the
464
- ## high side's, except no rcon and no rotation.
465
- ##
466
- &set_label("schedule_256",16);
467
- &movdqu ("xmm0",&QWP(16,$inp)); # load key part 2 (unaligned)
468
- &call ("_vpaes_schedule_transform"); # input transform
469
- &mov ($round,7);
470
-
471
- &set_label("loop_schedule_256");
472
- &call ("_vpaes_schedule_mangle"); # output low result
473
- &movdqa ("xmm6","xmm0"); # save cur_lo in xmm6
474
-
475
- # high round
476
- &call ("_vpaes_schedule_round");
477
- &dec ($round);
478
- &jz (&label("schedule_mangle_last"));
479
- &call ("_vpaes_schedule_mangle");
480
-
481
- # low round. swap xmm7 and xmm6
482
- &pshufd ("xmm0","xmm0",0xFF);
483
- &movdqa (&QWP(20,"esp"),"xmm7");
484
- &movdqa ("xmm7","xmm6");
485
- &call ("_vpaes_schedule_low_round");
486
- &movdqa ("xmm7",&QWP(20,"esp"));
487
-
488
- &jmp (&label("loop_schedule_256"));
489
-
490
- ##
491
- ## .aes_schedule_mangle_last
492
- ##
493
- ## Mangler for last round of key schedule
494
- ## Mangles %xmm0
495
- ## when encrypting, outputs out(%xmm0) ^ 63
496
- ## when decrypting, outputs unskew(%xmm0)
497
- ##
498
- ## Always called right before return... jumps to cleanup and exits
499
- ##
500
- &set_label("schedule_mangle_last",16);
501
- # schedule last round key from xmm0
502
- &lea ($base,&DWP($k_deskew,$const));
503
- &test ($out,$out);
504
- &jnz (&label("schedule_mangle_last_dec"));
505
-
506
- # encrypting
507
- &movdqa ("xmm1",&QWP($k_sr,$const,$magic));
508
- &pshufb ("xmm0","xmm1"); # output permute
509
- &lea ($base,&DWP($k_opt,$const)); # prepare to output transform
510
- &add ($key,32);
511
-
512
- &set_label("schedule_mangle_last_dec");
513
- &add ($key,-16);
514
- &pxor ("xmm0",&QWP($k_s63,$const));
515
- &call ("_vpaes_schedule_transform"); # output transform
516
- &movdqu (&QWP(0,$key),"xmm0"); # save last key
517
-
518
- # cleanup
519
- &pxor ("xmm0","xmm0");
520
- &pxor ("xmm1","xmm1");
521
- &pxor ("xmm2","xmm2");
522
- &pxor ("xmm3","xmm3");
523
- &pxor ("xmm4","xmm4");
524
- &pxor ("xmm5","xmm5");
525
- &pxor ("xmm6","xmm6");
526
- &pxor ("xmm7","xmm7");
527
- &ret ();
528
- &function_end_B("_vpaes_schedule_core");
529
-
530
- ##
531
- ## .aes_schedule_192_smear
532
- ##
533
- ## Smear the short, low side in the 192-bit key schedule.
534
- ##
535
- ## Inputs:
536
- ## %xmm7: high side, b a x y
537
- ## %xmm6: low side, d c 0 0
538
- ## %xmm13: 0
539
- ##
540
- ## Outputs:
541
- ## %xmm6: b+c+d b+c 0 0
542
- ## %xmm0: b+c+d b+c b a
543
- ##
544
- &function_begin_B("_vpaes_schedule_192_smear");
545
- &pshufd ("xmm1","xmm6",0x80); # d c 0 0 -> c 0 0 0
546
- &pshufd ("xmm0","xmm7",0xFE); # b a _ _ -> b b b a
547
- &pxor ("xmm6","xmm1"); # -> c+d c 0 0
548
- &pxor ("xmm1","xmm1");
549
- &pxor ("xmm6","xmm0"); # -> b+c+d b+c b a
550
- &movdqa ("xmm0","xmm6");
551
- &movhlps("xmm6","xmm1"); # clobber low side with zeros
552
- &ret ();
553
- &function_end_B("_vpaes_schedule_192_smear");
554
-
555
- ##
556
- ## .aes_schedule_round
557
- ##
558
- ## Runs one main round of the key schedule on %xmm0, %xmm7
559
- ##
560
- ## Specifically, runs subbytes on the high dword of %xmm0
561
- ## then rotates it by one byte and xors into the low dword of
562
- ## %xmm7.
563
- ##
564
- ## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
565
- ## next rcon.
566
- ##
567
- ## Smears the dwords of %xmm7 by xoring the low into the
568
- ## second low, result into third, result into highest.
569
- ##
570
- ## Returns results in %xmm7 = %xmm0.
571
- ## Clobbers %xmm1-%xmm5.
572
- ##
573
- &function_begin_B("_vpaes_schedule_round");
574
- # extract rcon from xmm8
575
- &movdqa ("xmm2",&QWP(8,"esp")); # xmm8
576
- &pxor ("xmm1","xmm1");
577
- &palignr("xmm1","xmm2",15);
578
- &palignr("xmm2","xmm2",15);
579
- &pxor ("xmm7","xmm1");
580
-
581
- # rotate
582
- &pshufd ("xmm0","xmm0",0xFF);
583
- &palignr("xmm0","xmm0",1);
584
-
585
- # fall through...
586
- &movdqa (&QWP(8,"esp"),"xmm2"); # xmm8
587
-
588
- # low round: same as high round, but no rotation and no rcon.
589
- &set_label("_vpaes_schedule_low_round");
590
- # smear xmm7
591
- &movdqa ("xmm1","xmm7");
592
- &pslldq ("xmm7",4);
593
- &pxor ("xmm7","xmm1");
594
- &movdqa ("xmm1","xmm7");
595
- &pslldq ("xmm7",8);
596
- &pxor ("xmm7","xmm1");
597
- &pxor ("xmm7",&QWP($k_s63,$const));
598
-
599
- # subbyte
600
- &movdqa ("xmm4",&QWP($k_s0F,$const));
601
- &movdqa ("xmm5",&QWP($k_inv,$const)); # 4 : 1/j
602
- &movdqa ("xmm1","xmm4");
603
- &pandn ("xmm1","xmm0");
604
- &psrld ("xmm1",4); # 1 = i
605
- &pand ("xmm0","xmm4"); # 0 = k
606
- &movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
607
- &pshufb ("xmm2","xmm0"); # 2 = a/k
608
- &pxor ("xmm0","xmm1"); # 0 = j
609
- &movdqa ("xmm3","xmm5"); # 3 : 1/i
610
- &pshufb ("xmm3","xmm1"); # 3 = 1/i
611
- &pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k
612
- &movdqa ("xmm4","xmm5"); # 4 : 1/j
613
- &pshufb ("xmm4","xmm0"); # 4 = 1/j
614
- &pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k
615
- &movdqa ("xmm2","xmm5"); # 2 : 1/iak
616
- &pshufb ("xmm2","xmm3"); # 2 = 1/iak
617
- &pxor ("xmm2","xmm0"); # 2 = io
618
- &movdqa ("xmm3","xmm5"); # 3 : 1/jak
619
- &pshufb ("xmm3","xmm4"); # 3 = 1/jak
620
- &pxor ("xmm3","xmm1"); # 3 = jo
621
- &movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sbou
622
- &pshufb ("xmm4","xmm2"); # 4 = sbou
623
- &movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sbot
624
- &pshufb ("xmm0","xmm3"); # 0 = sb1t
625
- &pxor ("xmm0","xmm4"); # 0 = sbox output
626
-
627
- # add in smeared stuff
628
- &pxor ("xmm0","xmm7");
629
- &movdqa ("xmm7","xmm0");
630
- &ret ();
631
- &function_end_B("_vpaes_schedule_round");
632
-
633
- ##
634
- ## .aes_schedule_transform
635
- ##
636
- ## Linear-transform %xmm0 according to tables at (%ebx)
637
- ##
638
- ## Output in %xmm0
639
- ## Clobbers %xmm1, %xmm2
640
- ##
641
- &function_begin_B("_vpaes_schedule_transform");
642
- &movdqa ("xmm2",&QWP($k_s0F,$const));
643
- &movdqa ("xmm1","xmm2");
644
- &pandn ("xmm1","xmm0");
645
- &psrld ("xmm1",4);
646
- &pand ("xmm0","xmm2");
647
- &movdqa ("xmm2",&QWP(0,$base));
648
- &pshufb ("xmm2","xmm0");
649
- &movdqa ("xmm0",&QWP(16,$base));
650
- &pshufb ("xmm0","xmm1");
651
- &pxor ("xmm0","xmm2");
652
- &ret ();
653
- &function_end_B("_vpaes_schedule_transform");
654
-
655
- ##
656
- ## .aes_schedule_mangle
657
- ##
658
- ## Mangle xmm0 from (basis-transformed) standard version
659
- ## to our version.
660
- ##
661
- ## On encrypt,
662
- ## xor with 0x63
663
- ## multiply by circulant 0,1,1,1
664
- ## apply shiftrows transform
665
- ##
666
- ## On decrypt,
667
- ## xor with 0x63
668
- ## multiply by "inverse mixcolumns" circulant E,B,D,9
669
- ## deskew
670
- ## apply shiftrows transform
671
- ##
672
- ##
673
- ## Writes out to (%edx), and increments or decrements it
674
- ## Keeps track of round number mod 4 in %ecx
675
- ## Preserves xmm0
676
- ## Clobbers xmm1-xmm5
677
- ##
678
- &function_begin_B("_vpaes_schedule_mangle");
679
- &movdqa ("xmm4","xmm0"); # save xmm0 for later
680
- &movdqa ("xmm5",&QWP($k_mc_forward,$const));
681
- &test ($out,$out);
682
- &jnz (&label("schedule_mangle_dec"));
683
-
684
- # encrypting
685
- &add ($key,16);
686
- &pxor ("xmm4",&QWP($k_s63,$const));
687
- &pshufb ("xmm4","xmm5");
688
- &movdqa ("xmm3","xmm4");
689
- &pshufb ("xmm4","xmm5");
690
- &pxor ("xmm3","xmm4");
691
- &pshufb ("xmm4","xmm5");
692
- &pxor ("xmm3","xmm4");
693
-
694
- &jmp (&label("schedule_mangle_both"));
695
-
696
- &set_label("schedule_mangle_dec",16);
697
- # inverse mix columns
698
- &movdqa ("xmm2",&QWP($k_s0F,$const));
699
- &lea ($inp,&DWP($k_dksd,$const));
700
- &movdqa ("xmm1","xmm2");
701
- &pandn ("xmm1","xmm4");
702
- &psrld ("xmm1",4); # 1 = hi
703
- &pand ("xmm4","xmm2"); # 4 = lo
704
-
705
- &movdqa ("xmm2",&QWP(0,$inp));
706
- &pshufb ("xmm2","xmm4");
707
- &movdqa ("xmm3",&QWP(0x10,$inp));
708
- &pshufb ("xmm3","xmm1");
709
- &pxor ("xmm3","xmm2");
710
- &pshufb ("xmm3","xmm5");
711
-
712
- &movdqa ("xmm2",&QWP(0x20,$inp));
713
- &pshufb ("xmm2","xmm4");
714
- &pxor ("xmm2","xmm3");
715
- &movdqa ("xmm3",&QWP(0x30,$inp));
716
- &pshufb ("xmm3","xmm1");
717
- &pxor ("xmm3","xmm2");
718
- &pshufb ("xmm3","xmm5");
719
-
720
- &movdqa ("xmm2",&QWP(0x40,$inp));
721
- &pshufb ("xmm2","xmm4");
722
- &pxor ("xmm2","xmm3");
723
- &movdqa ("xmm3",&QWP(0x50,$inp));
724
- &pshufb ("xmm3","xmm1");
725
- &pxor ("xmm3","xmm2");
726
- &pshufb ("xmm3","xmm5");
727
-
728
- &movdqa ("xmm2",&QWP(0x60,$inp));
729
- &pshufb ("xmm2","xmm4");
730
- &pxor ("xmm2","xmm3");
731
- &movdqa ("xmm3",&QWP(0x70,$inp));
732
- &pshufb ("xmm3","xmm1");
733
- &pxor ("xmm3","xmm2");
734
-
735
- &add ($key,-16);
736
-
737
- &set_label("schedule_mangle_both");
738
- &movdqa ("xmm1",&QWP($k_sr,$const,$magic));
739
- &pshufb ("xmm3","xmm1");
740
- &add ($magic,-16);
741
- &and ($magic,0x30);
742
- &movdqu (&QWP(0,$key),"xmm3");
743
- &ret ();
744
- &function_end_B("_vpaes_schedule_mangle");
745
-
746
- #
747
- # Interface to OpenSSL
748
- #
749
- &function_begin("${PREFIX}_set_encrypt_key");
750
- &mov ($inp,&wparam(0)); # inp
751
- &lea ($base,&DWP(-56,"esp"));
752
- &mov ($round,&wparam(1)); # bits
753
- &and ($base,-16);
754
- &mov ($key,&wparam(2)); # key
755
- &xchg ($base,"esp"); # alloca
756
- &mov (&DWP(48,"esp"),$base);
757
-
758
- &mov ($base,$round);
759
- &shr ($base,5);
760
- &add ($base,5);
761
- &mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5;
762
- &mov ($magic,0x30);
763
- &mov ($out,0);
764
-
765
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
766
- &call ("_vpaes_schedule_core");
767
- &set_label("pic_point");
768
-
769
- &mov ("esp",&DWP(48,"esp"));
770
- &xor ("eax","eax");
771
- &function_end("${PREFIX}_set_encrypt_key");
772
-
773
- &function_begin("${PREFIX}_set_decrypt_key");
774
- &mov ($inp,&wparam(0)); # inp
775
- &lea ($base,&DWP(-56,"esp"));
776
- &mov ($round,&wparam(1)); # bits
777
- &and ($base,-16);
778
- &mov ($key,&wparam(2)); # key
779
- &xchg ($base,"esp"); # alloca
780
- &mov (&DWP(48,"esp"),$base);
781
-
782
- &mov ($base,$round);
783
- &shr ($base,5);
784
- &add ($base,5);
785
- &mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5;
786
- &shl ($base,4);
787
- &lea ($key,&DWP(16,$key,$base));
788
-
789
- &mov ($out,1);
790
- &mov ($magic,$round);
791
- &shr ($magic,1);
792
- &and ($magic,32);
793
- &xor ($magic,32); # nbist==192?0:32;
794
-
795
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
796
- &call ("_vpaes_schedule_core");
797
- &set_label("pic_point");
798
-
799
- &mov ("esp",&DWP(48,"esp"));
800
- &xor ("eax","eax");
801
- &function_end("${PREFIX}_set_decrypt_key");
802
-
803
- &function_begin("${PREFIX}_encrypt");
804
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
805
- &call ("_vpaes_preheat");
806
- &set_label("pic_point");
807
- &mov ($inp,&wparam(0)); # inp
808
- &lea ($base,&DWP(-56,"esp"));
809
- &mov ($out,&wparam(1)); # out
810
- &and ($base,-16);
811
- &mov ($key,&wparam(2)); # key
812
- &xchg ($base,"esp"); # alloca
813
- &mov (&DWP(48,"esp"),$base);
814
-
815
- &movdqu ("xmm0",&QWP(0,$inp));
816
- &call ("_vpaes_encrypt_core");
817
- &movdqu (&QWP(0,$out),"xmm0");
818
-
819
- &mov ("esp",&DWP(48,"esp"));
820
- &function_end("${PREFIX}_encrypt");
821
-
822
- &function_begin("${PREFIX}_decrypt");
823
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
824
- &call ("_vpaes_preheat");
825
- &set_label("pic_point");
826
- &mov ($inp,&wparam(0)); # inp
827
- &lea ($base,&DWP(-56,"esp"));
828
- &mov ($out,&wparam(1)); # out
829
- &and ($base,-16);
830
- &mov ($key,&wparam(2)); # key
831
- &xchg ($base,"esp"); # alloca
832
- &mov (&DWP(48,"esp"),$base);
833
-
834
- &movdqu ("xmm0",&QWP(0,$inp));
835
- &call ("_vpaes_decrypt_core");
836
- &movdqu (&QWP(0,$out),"xmm0");
837
-
838
- &mov ("esp",&DWP(48,"esp"));
839
- &function_end("${PREFIX}_decrypt");
840
-
841
- &asm_finish();