ring-native 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/Gemfile +3 -0
- data/README.md +22 -0
- data/Rakefile +1 -0
- data/ext/ring/extconf.rb +29 -0
- data/lib/ring/native.rb +8 -0
- data/lib/ring/native/version.rb +5 -0
- data/ring-native.gemspec +25 -0
- data/vendor/ring/BUILDING.md +40 -0
- data/vendor/ring/Cargo.toml +43 -0
- data/vendor/ring/LICENSE +185 -0
- data/vendor/ring/Makefile +35 -0
- data/vendor/ring/PORTING.md +163 -0
- data/vendor/ring/README.md +113 -0
- data/vendor/ring/STYLE.md +197 -0
- data/vendor/ring/appveyor.yml +27 -0
- data/vendor/ring/build.rs +108 -0
- data/vendor/ring/crypto/aes/aes.c +1142 -0
- data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/aes/aes_test.cc +93 -0
- data/vendor/ring/crypto/aes/asm/aes-586.pl +2368 -0
- data/vendor/ring/crypto/aes/asm/aes-armv4.pl +1249 -0
- data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +2246 -0
- data/vendor/ring/crypto/aes/asm/aesni-x86.pl +1318 -0
- data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +2084 -0
- data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +675 -0
- data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +1364 -0
- data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +1565 -0
- data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +841 -0
- data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +1116 -0
- data/vendor/ring/crypto/aes/internal.h +87 -0
- data/vendor/ring/crypto/aes/mode_wrappers.c +61 -0
- data/vendor/ring/crypto/bn/add.c +394 -0
- data/vendor/ring/crypto/bn/asm/armv4-mont.pl +694 -0
- data/vendor/ring/crypto/bn/asm/armv8-mont.pl +1503 -0
- data/vendor/ring/crypto/bn/asm/bn-586.pl +774 -0
- data/vendor/ring/crypto/bn/asm/co-586.pl +287 -0
- data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +1882 -0
- data/vendor/ring/crypto/bn/asm/x86-mont.pl +592 -0
- data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +599 -0
- data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +1393 -0
- data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +3507 -0
- data/vendor/ring/crypto/bn/bn.c +352 -0
- data/vendor/ring/crypto/bn/bn_asn1.c +74 -0
- data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/bn/bn_test.cc +1696 -0
- data/vendor/ring/crypto/bn/cmp.c +200 -0
- data/vendor/ring/crypto/bn/convert.c +433 -0
- data/vendor/ring/crypto/bn/ctx.c +311 -0
- data/vendor/ring/crypto/bn/div.c +594 -0
- data/vendor/ring/crypto/bn/exponentiation.c +1335 -0
- data/vendor/ring/crypto/bn/gcd.c +711 -0
- data/vendor/ring/crypto/bn/generic.c +1019 -0
- data/vendor/ring/crypto/bn/internal.h +316 -0
- data/vendor/ring/crypto/bn/montgomery.c +516 -0
- data/vendor/ring/crypto/bn/mul.c +888 -0
- data/vendor/ring/crypto/bn/prime.c +829 -0
- data/vendor/ring/crypto/bn/random.c +334 -0
- data/vendor/ring/crypto/bn/rsaz_exp.c +262 -0
- data/vendor/ring/crypto/bn/rsaz_exp.h +53 -0
- data/vendor/ring/crypto/bn/shift.c +276 -0
- data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/bytestring/bytestring_test.cc +421 -0
- data/vendor/ring/crypto/bytestring/cbb.c +399 -0
- data/vendor/ring/crypto/bytestring/cbs.c +227 -0
- data/vendor/ring/crypto/bytestring/internal.h +46 -0
- data/vendor/ring/crypto/chacha/chacha_generic.c +140 -0
- data/vendor/ring/crypto/chacha/chacha_vec.c +323 -0
- data/vendor/ring/crypto/chacha/chacha_vec_arm.S +1447 -0
- data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +153 -0
- data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/cipher/e_aes.c +390 -0
- data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +208 -0
- data/vendor/ring/crypto/cipher/internal.h +173 -0
- data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +543 -0
- data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +9 -0
- data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +475 -0
- data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +23 -0
- data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +422 -0
- data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +484 -0
- data/vendor/ring/crypto/cipher/test/cipher_test.txt +100 -0
- data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/constant_time_test.c +304 -0
- data/vendor/ring/crypto/cpu-arm-asm.S +32 -0
- data/vendor/ring/crypto/cpu-arm.c +199 -0
- data/vendor/ring/crypto/cpu-intel.c +261 -0
- data/vendor/ring/crypto/crypto.c +151 -0
- data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +2118 -0
- data/vendor/ring/crypto/curve25519/curve25519.c +4888 -0
- data/vendor/ring/crypto/curve25519/x25519_test.cc +128 -0
- data/vendor/ring/crypto/digest/md32_common.h +181 -0
- data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +2725 -0
- data/vendor/ring/crypto/ec/ec.c +193 -0
- data/vendor/ring/crypto/ec/ec_curves.c +61 -0
- data/vendor/ring/crypto/ec/ec_key.c +228 -0
- data/vendor/ring/crypto/ec/ec_montgomery.c +114 -0
- data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/ec/internal.h +243 -0
- data/vendor/ring/crypto/ec/oct.c +253 -0
- data/vendor/ring/crypto/ec/p256-64.c +1794 -0
- data/vendor/ring/crypto/ec/p256-x86_64-table.h +9548 -0
- data/vendor/ring/crypto/ec/p256-x86_64.c +509 -0
- data/vendor/ring/crypto/ec/simple.c +1007 -0
- data/vendor/ring/crypto/ec/util-64.c +183 -0
- data/vendor/ring/crypto/ec/wnaf.c +508 -0
- data/vendor/ring/crypto/ecdh/ecdh.c +155 -0
- data/vendor/ring/crypto/ecdsa/ecdsa.c +304 -0
- data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +193 -0
- data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +327 -0
- data/vendor/ring/crypto/header_removed.h +17 -0
- data/vendor/ring/crypto/internal.h +495 -0
- data/vendor/ring/crypto/libring.Windows.vcxproj +101 -0
- data/vendor/ring/crypto/mem.c +98 -0
- data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +1045 -0
- data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +517 -0
- data/vendor/ring/crypto/modes/asm/ghash-x86.pl +1393 -0
- data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +1741 -0
- data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +422 -0
- data/vendor/ring/crypto/modes/ctr.c +226 -0
- data/vendor/ring/crypto/modes/gcm.c +1206 -0
- data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/modes/gcm_test.c +348 -0
- data/vendor/ring/crypto/modes/internal.h +299 -0
- data/vendor/ring/crypto/perlasm/arm-xlate.pl +170 -0
- data/vendor/ring/crypto/perlasm/readme +100 -0
- data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +1164 -0
- data/vendor/ring/crypto/perlasm/x86asm.pl +292 -0
- data/vendor/ring/crypto/perlasm/x86gas.pl +263 -0
- data/vendor/ring/crypto/perlasm/x86masm.pl +200 -0
- data/vendor/ring/crypto/perlasm/x86nasm.pl +187 -0
- data/vendor/ring/crypto/poly1305/poly1305.c +331 -0
- data/vendor/ring/crypto/poly1305/poly1305_arm.c +301 -0
- data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +2015 -0
- data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/poly1305/poly1305_test.cc +80 -0
- data/vendor/ring/crypto/poly1305/poly1305_test.txt +52 -0
- data/vendor/ring/crypto/poly1305/poly1305_vec.c +892 -0
- data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +75 -0
- data/vendor/ring/crypto/rand/internal.h +32 -0
- data/vendor/ring/crypto/rand/rand.c +189 -0
- data/vendor/ring/crypto/rand/urandom.c +219 -0
- data/vendor/ring/crypto/rand/windows.c +56 -0
- data/vendor/ring/crypto/refcount_c11.c +66 -0
- data/vendor/ring/crypto/refcount_lock.c +53 -0
- data/vendor/ring/crypto/refcount_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/refcount_test.c +58 -0
- data/vendor/ring/crypto/rsa/blinding.c +462 -0
- data/vendor/ring/crypto/rsa/internal.h +108 -0
- data/vendor/ring/crypto/rsa/padding.c +300 -0
- data/vendor/ring/crypto/rsa/rsa.c +450 -0
- data/vendor/ring/crypto/rsa/rsa_asn1.c +261 -0
- data/vendor/ring/crypto/rsa/rsa_impl.c +944 -0
- data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/rsa/rsa_test.cc +437 -0
- data/vendor/ring/crypto/sha/asm/sha-armv8.pl +436 -0
- data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +2390 -0
- data/vendor/ring/crypto/sha/asm/sha256-586.pl +1275 -0
- data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +735 -0
- data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +14 -0
- data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +14 -0
- data/vendor/ring/crypto/sha/asm/sha512-586.pl +911 -0
- data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +666 -0
- data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +14 -0
- data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +14 -0
- data/vendor/ring/crypto/sha/sha1.c +271 -0
- data/vendor/ring/crypto/sha/sha256.c +204 -0
- data/vendor/ring/crypto/sha/sha512.c +355 -0
- data/vendor/ring/crypto/test/file_test.cc +326 -0
- data/vendor/ring/crypto/test/file_test.h +181 -0
- data/vendor/ring/crypto/test/malloc.cc +150 -0
- data/vendor/ring/crypto/test/scoped_types.h +95 -0
- data/vendor/ring/crypto/test/test.Windows.vcxproj +35 -0
- data/vendor/ring/crypto/test/test_util.cc +46 -0
- data/vendor/ring/crypto/test/test_util.h +41 -0
- data/vendor/ring/crypto/thread_none.c +55 -0
- data/vendor/ring/crypto/thread_pthread.c +165 -0
- data/vendor/ring/crypto/thread_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/thread_test.c +200 -0
- data/vendor/ring/crypto/thread_win.c +282 -0
- data/vendor/ring/examples/checkdigest.rs +103 -0
- data/vendor/ring/include/openssl/aes.h +121 -0
- data/vendor/ring/include/openssl/arm_arch.h +129 -0
- data/vendor/ring/include/openssl/base.h +156 -0
- data/vendor/ring/include/openssl/bn.h +794 -0
- data/vendor/ring/include/openssl/buffer.h +18 -0
- data/vendor/ring/include/openssl/bytestring.h +235 -0
- data/vendor/ring/include/openssl/chacha.h +37 -0
- data/vendor/ring/include/openssl/cmac.h +76 -0
- data/vendor/ring/include/openssl/cpu.h +184 -0
- data/vendor/ring/include/openssl/crypto.h +43 -0
- data/vendor/ring/include/openssl/curve25519.h +88 -0
- data/vendor/ring/include/openssl/ec.h +225 -0
- data/vendor/ring/include/openssl/ec_key.h +129 -0
- data/vendor/ring/include/openssl/ecdh.h +110 -0
- data/vendor/ring/include/openssl/ecdsa.h +156 -0
- data/vendor/ring/include/openssl/err.h +201 -0
- data/vendor/ring/include/openssl/mem.h +101 -0
- data/vendor/ring/include/openssl/obj_mac.h +71 -0
- data/vendor/ring/include/openssl/opensslfeatures.h +68 -0
- data/vendor/ring/include/openssl/opensslv.h +18 -0
- data/vendor/ring/include/openssl/ossl_typ.h +18 -0
- data/vendor/ring/include/openssl/poly1305.h +51 -0
- data/vendor/ring/include/openssl/rand.h +70 -0
- data/vendor/ring/include/openssl/rsa.h +399 -0
- data/vendor/ring/include/openssl/thread.h +133 -0
- data/vendor/ring/include/openssl/type_check.h +71 -0
- data/vendor/ring/mk/Common.props +63 -0
- data/vendor/ring/mk/Windows.props +42 -0
- data/vendor/ring/mk/WindowsTest.props +18 -0
- data/vendor/ring/mk/appveyor.bat +62 -0
- data/vendor/ring/mk/bottom_of_makefile.mk +54 -0
- data/vendor/ring/mk/ring.mk +266 -0
- data/vendor/ring/mk/top_of_makefile.mk +214 -0
- data/vendor/ring/mk/travis.sh +40 -0
- data/vendor/ring/mk/update-travis-yml.py +229 -0
- data/vendor/ring/ring.sln +153 -0
- data/vendor/ring/src/aead.rs +682 -0
- data/vendor/ring/src/agreement.rs +248 -0
- data/vendor/ring/src/c.rs +129 -0
- data/vendor/ring/src/constant_time.rs +37 -0
- data/vendor/ring/src/der.rs +96 -0
- data/vendor/ring/src/digest.rs +690 -0
- data/vendor/ring/src/digest_tests.txt +57 -0
- data/vendor/ring/src/ecc.rs +28 -0
- data/vendor/ring/src/ecc_build.rs +279 -0
- data/vendor/ring/src/ecc_curves.rs +117 -0
- data/vendor/ring/src/ed25519_tests.txt +2579 -0
- data/vendor/ring/src/exe_tests.rs +46 -0
- data/vendor/ring/src/ffi.rs +29 -0
- data/vendor/ring/src/file_test.rs +187 -0
- data/vendor/ring/src/hkdf.rs +153 -0
- data/vendor/ring/src/hkdf_tests.txt +59 -0
- data/vendor/ring/src/hmac.rs +414 -0
- data/vendor/ring/src/hmac_tests.txt +97 -0
- data/vendor/ring/src/input.rs +312 -0
- data/vendor/ring/src/lib.rs +41 -0
- data/vendor/ring/src/pbkdf2.rs +265 -0
- data/vendor/ring/src/pbkdf2_tests.txt +113 -0
- data/vendor/ring/src/polyfill.rs +57 -0
- data/vendor/ring/src/rand.rs +28 -0
- data/vendor/ring/src/signature.rs +314 -0
- data/vendor/ring/third-party/NIST/README.md +9 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +263 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +309 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +267 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +263 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +309 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +267 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +263 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +309 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +267 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +519 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +309 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +523 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +519 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +309 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +523 -0
- data/vendor/ring/third-party/NIST/sha256sums.txt +1 -0
- metadata +333 -0
|
@@ -0,0 +1,841 @@
|
|
|
1
|
+
#!/usr/bin/env perl
|
|
2
|
+
|
|
3
|
+
######################################################################
|
|
4
|
+
## Constant-time SSSE3 AES core implementation.
|
|
5
|
+
## version 0.1
|
|
6
|
+
##
|
|
7
|
+
## By Mike Hamburg (Stanford University), 2009
|
|
8
|
+
## Public domain.
|
|
9
|
+
##
|
|
10
|
+
## For details see http://shiftleft.org/papers/vector_aes/ and
|
|
11
|
+
## http://crypto.stanford.edu/vpaes/.
|
|
12
|
+
|
|
13
|
+
######################################################################
|
|
14
|
+
# September 2011.
|
|
15
|
+
#
|
|
16
|
+
# Port vpaes-x86_64.pl as 32-bit "almost" drop-in replacement for
|
|
17
|
+
# aes-586.pl. "Almost" refers to the fact that AES_cbc_encrypt
|
|
18
|
+
# doesn't handle partial vectors (doesn't have to if called from
|
|
19
|
+
# EVP only). "Drop-in" implies that this module doesn't share key
|
|
20
|
+
# schedule structure with the original nor does it make assumption
|
|
21
|
+
# about its alignment...
|
|
22
|
+
#
|
|
23
|
+
# Performance summary. aes-586.pl column lists large-block CBC
|
|
24
|
+
# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per
|
|
25
|
+
# byte processed with 128-bit key, and vpaes-x86.pl column - [also
|
|
26
|
+
# large-block CBC] encrypt/decrypt.
|
|
27
|
+
#
|
|
28
|
+
# aes-586.pl vpaes-x86.pl
|
|
29
|
+
#
|
|
30
|
+
# Core 2(**) 28.1/41.4/18.3 21.9/25.2(***)
|
|
31
|
+
# Nehalem 27.9/40.4/18.1 10.2/11.9
|
|
32
|
+
# Atom 70.7/92.1/60.1 61.1/75.4(***)
|
|
33
|
+
# Silvermont 45.4/62.9/24.1 49.2/61.1(***)
|
|
34
|
+
#
|
|
35
|
+
# (*) "Hyper-threading" in the context refers rather to cache shared
|
|
36
|
+
# among multiple cores, than to specifically Intel HTT. As vast
|
|
37
|
+
# majority of contemporary cores share cache, slower code path
|
|
38
|
+
# is common place. In other words "with-hyper-threading-off"
|
|
39
|
+
# results are presented mostly for reference purposes.
|
|
40
|
+
#
|
|
41
|
+
# (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe.
|
|
42
|
+
#
|
|
43
|
+
# (***) Less impressive improvement on Core 2 and Atom is due to slow
|
|
44
|
+
# pshufb, yet it's respectable +28%/64% improvement on Core 2
|
|
45
|
+
# and +15% on Atom (as implied, over "hyper-threading-safe"
|
|
46
|
+
# code path).
|
|
47
|
+
#
|
|
48
|
+
# <appro@openssl.org>
|
|
49
|
+
|
|
50
|
+
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
51
|
+
push(@INC,"${dir}","${dir}../../perlasm");
|
|
52
|
+
require "x86asm.pl";
|
|
53
|
+
|
|
54
|
+
&asm_init($ARGV[0],"vpaes-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
|
|
55
|
+
|
|
56
|
+
$PREFIX="vpaes";
|
|
57
|
+
|
|
58
|
+
my ($round, $base, $magic, $key, $const, $inp, $out)=
|
|
59
|
+
("eax", "ebx", "ecx", "edx","ebp", "esi","edi");
|
|
60
|
+
|
|
61
|
+
&static_label("_vpaes_consts");
|
|
62
|
+
&static_label("_vpaes_schedule_low_round");
|
|
63
|
+
|
|
64
|
+
&set_label("_vpaes_consts",64);
|
|
65
|
+
$k_inv=-0x30; # inv, inva
|
|
66
|
+
&data_word(0x0D080180,0x0E05060F,0x0A0B0C02,0x04070309);
|
|
67
|
+
&data_word(0x0F0B0780,0x01040A06,0x02050809,0x030D0E0C);
|
|
68
|
+
|
|
69
|
+
$k_s0F=-0x10; # s0F
|
|
70
|
+
&data_word(0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F);
|
|
71
|
+
|
|
72
|
+
$k_ipt=0x00; # input transform (lo, hi)
|
|
73
|
+
&data_word(0x5A2A7000,0xC2B2E898,0x52227808,0xCABAE090);
|
|
74
|
+
&data_word(0x317C4D00,0x4C01307D,0xB0FDCC81,0xCD80B1FC);
|
|
75
|
+
|
|
76
|
+
$k_sb1=0x20; # sb1u, sb1t
|
|
77
|
+
&data_word(0xCB503E00,0xB19BE18F,0x142AF544,0xA5DF7A6E);
|
|
78
|
+
&data_word(0xFAE22300,0x3618D415,0x0D2ED9EF,0x3BF7CCC1);
|
|
79
|
+
$k_sb2=0x40; # sb2u, sb2t
|
|
80
|
+
&data_word(0x0B712400,0xE27A93C6,0xBC982FCD,0x5EB7E955);
|
|
81
|
+
&data_word(0x0AE12900,0x69EB8840,0xAB82234A,0xC2A163C8);
|
|
82
|
+
$k_sbo=0x60; # sbou, sbot
|
|
83
|
+
&data_word(0x6FBDC700,0xD0D26D17,0xC502A878,0x15AABF7A);
|
|
84
|
+
&data_word(0x5FBB6A00,0xCFE474A5,0x412B35FA,0x8E1E90D1);
|
|
85
|
+
|
|
86
|
+
$k_mc_forward=0x80; # mc_forward
|
|
87
|
+
&data_word(0x00030201,0x04070605,0x080B0A09,0x0C0F0E0D);
|
|
88
|
+
&data_word(0x04070605,0x080B0A09,0x0C0F0E0D,0x00030201);
|
|
89
|
+
&data_word(0x080B0A09,0x0C0F0E0D,0x00030201,0x04070605);
|
|
90
|
+
&data_word(0x0C0F0E0D,0x00030201,0x04070605,0x080B0A09);
|
|
91
|
+
|
|
92
|
+
$k_mc_backward=0xc0; # mc_backward
|
|
93
|
+
&data_word(0x02010003,0x06050407,0x0A09080B,0x0E0D0C0F);
|
|
94
|
+
&data_word(0x0E0D0C0F,0x02010003,0x06050407,0x0A09080B);
|
|
95
|
+
&data_word(0x0A09080B,0x0E0D0C0F,0x02010003,0x06050407);
|
|
96
|
+
&data_word(0x06050407,0x0A09080B,0x0E0D0C0F,0x02010003);
|
|
97
|
+
|
|
98
|
+
$k_sr=0x100; # sr
|
|
99
|
+
&data_word(0x03020100,0x07060504,0x0B0A0908,0x0F0E0D0C);
|
|
100
|
+
&data_word(0x0F0A0500,0x030E0904,0x07020D08,0x0B06010C);
|
|
101
|
+
&data_word(0x0B020900,0x0F060D04,0x030A0108,0x070E050C);
|
|
102
|
+
&data_word(0x070A0D00,0x0B0E0104,0x0F020508,0x0306090C);
|
|
103
|
+
|
|
104
|
+
$k_rcon=0x140; # rcon
|
|
105
|
+
&data_word(0xAF9DEEB6,0x1F8391B9,0x4D7C7D81,0x702A9808);
|
|
106
|
+
|
|
107
|
+
$k_s63=0x150; # s63: all equal to 0x63 transformed
|
|
108
|
+
&data_word(0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B);
|
|
109
|
+
|
|
110
|
+
$k_opt=0x160; # output transform
|
|
111
|
+
&data_word(0xD6B66000,0xFF9F4929,0xDEBE6808,0xF7974121);
|
|
112
|
+
&data_word(0x50BCEC00,0x01EDBD51,0xB05C0CE0,0xE10D5DB1);
|
|
113
|
+
|
|
114
|
+
$k_deskew=0x180; # deskew tables: inverts the sbox's "skew"
|
|
115
|
+
&data_word(0x47A4E300,0x07E4A340,0x5DBEF91A,0x1DFEB95A);
|
|
116
|
+
&data_word(0x83EA6900,0x5F36B5DC,0xF49D1E77,0x2841C2AB);
|
|
117
|
+
##
|
|
118
|
+
## Decryption stuff
|
|
119
|
+
## Key schedule constants
|
|
120
|
+
##
|
|
121
|
+
$k_dksd=0x1a0; # decryption key schedule: invskew x*D
|
|
122
|
+
&data_word(0xA3E44700,0xFEB91A5D,0x5A1DBEF9,0x0740E3A4);
|
|
123
|
+
&data_word(0xB5368300,0x41C277F4,0xAB289D1E,0x5FDC69EA);
|
|
124
|
+
$k_dksb=0x1c0; # decryption key schedule: invskew x*B
|
|
125
|
+
&data_word(0x8550D500,0x9A4FCA1F,0x1CC94C99,0x03D65386);
|
|
126
|
+
&data_word(0xB6FC4A00,0x115BEDA7,0x7E3482C8,0xD993256F);
|
|
127
|
+
$k_dkse=0x1e0; # decryption key schedule: invskew x*E + 0x63
|
|
128
|
+
&data_word(0x1FC9D600,0xD5031CCA,0x994F5086,0x53859A4C);
|
|
129
|
+
&data_word(0x4FDC7BE8,0xA2319605,0x20B31487,0xCD5EF96A);
|
|
130
|
+
$k_dks9=0x200; # decryption key schedule: invskew x*9
|
|
131
|
+
&data_word(0x7ED9A700,0xB6116FC8,0x82255BFC,0x4AED9334);
|
|
132
|
+
&data_word(0x27143300,0x45765162,0xE9DAFDCE,0x8BB89FAC);
|
|
133
|
+
|
|
134
|
+
##
|
|
135
|
+
## Decryption stuff
|
|
136
|
+
## Round function constants
|
|
137
|
+
##
|
|
138
|
+
$k_dipt=0x220; # decryption input transform
|
|
139
|
+
&data_word(0x0B545F00,0x0F505B04,0x114E451A,0x154A411E);
|
|
140
|
+
&data_word(0x60056500,0x86E383E6,0xF491F194,0x12771772);
|
|
141
|
+
|
|
142
|
+
$k_dsb9=0x240; # decryption sbox output *9*u, *9*t
|
|
143
|
+
&data_word(0x9A86D600,0x851C0353,0x4F994CC9,0xCAD51F50);
|
|
144
|
+
&data_word(0xECD74900,0xC03B1789,0xB2FBA565,0x725E2C9E);
|
|
145
|
+
$k_dsbd=0x260; # decryption sbox output *D*u, *D*t
|
|
146
|
+
&data_word(0xE6B1A200,0x7D57CCDF,0x882A4439,0xF56E9B13);
|
|
147
|
+
&data_word(0x24C6CB00,0x3CE2FAF7,0x15DEEFD3,0x2931180D);
|
|
148
|
+
$k_dsbb=0x280; # decryption sbox output *B*u, *B*t
|
|
149
|
+
&data_word(0x96B44200,0xD0226492,0xB0F2D404,0x602646F6);
|
|
150
|
+
&data_word(0xCD596700,0xC19498A6,0x3255AA6B,0xF3FF0C3E);
|
|
151
|
+
$k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t
|
|
152
|
+
&data_word(0x26D4D000,0x46F29296,0x64B4F6B0,0x22426004);
|
|
153
|
+
&data_word(0xFFAAC100,0x0C55A6CD,0x98593E32,0x9467F36B);
|
|
154
|
+
$k_dsbo=0x2c0; # decryption sbox final output
|
|
155
|
+
&data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9);
|
|
156
|
+
&data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159);
|
|
157
|
+
&asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)");
|
|
158
|
+
&align (64);
|
|
159
|
+
|
|
160
|
+
&function_begin_B("_vpaes_preheat");
|
|
161
|
+
&add ($const,&DWP(0,"esp"));
|
|
162
|
+
&movdqa ("xmm7",&QWP($k_inv,$const));
|
|
163
|
+
&movdqa ("xmm6",&QWP($k_s0F,$const));
|
|
164
|
+
&ret ();
|
|
165
|
+
&function_end_B("_vpaes_preheat");
|
|
166
|
+
|
|
167
|
+
##
|
|
168
|
+
## _aes_encrypt_core
|
|
169
|
+
##
|
|
170
|
+
## AES-encrypt %xmm0.
|
|
171
|
+
##
|
|
172
|
+
## Inputs:
|
|
173
|
+
## %xmm0 = input
|
|
174
|
+
## %xmm6-%xmm7 as in _vpaes_preheat
|
|
175
|
+
## (%edx) = scheduled keys
|
|
176
|
+
##
|
|
177
|
+
## Output in %xmm0
|
|
178
|
+
## Clobbers %xmm1-%xmm5, %eax, %ebx, %ecx, %edx
|
|
179
|
+
##
|
|
180
|
+
##
|
|
181
|
+
&function_begin_B("_vpaes_encrypt_core");
|
|
182
|
+
&mov ($magic,16);
|
|
183
|
+
&mov ($round,&DWP(240,$key));
|
|
184
|
+
&movdqa ("xmm1","xmm6")
|
|
185
|
+
&movdqa ("xmm2",&QWP($k_ipt,$const));
|
|
186
|
+
&pandn ("xmm1","xmm0");
|
|
187
|
+
&pand ("xmm0","xmm6");
|
|
188
|
+
&movdqu ("xmm5",&QWP(0,$key));
|
|
189
|
+
&pshufb ("xmm2","xmm0");
|
|
190
|
+
&movdqa ("xmm0",&QWP($k_ipt+16,$const));
|
|
191
|
+
&pxor ("xmm2","xmm5");
|
|
192
|
+
&psrld ("xmm1",4);
|
|
193
|
+
&add ($key,16);
|
|
194
|
+
&pshufb ("xmm0","xmm1");
|
|
195
|
+
&lea ($base,&DWP($k_mc_backward,$const));
|
|
196
|
+
&pxor ("xmm0","xmm2");
|
|
197
|
+
&jmp (&label("enc_entry"));
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
&set_label("enc_loop",16);
|
|
201
|
+
# middle of middle round
|
|
202
|
+
&movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sb1u
|
|
203
|
+
&movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sb1t
|
|
204
|
+
&pshufb ("xmm4","xmm2"); # 4 = sb1u
|
|
205
|
+
&pshufb ("xmm0","xmm3"); # 0 = sb1t
|
|
206
|
+
&pxor ("xmm4","xmm5"); # 4 = sb1u + k
|
|
207
|
+
&movdqa ("xmm5",&QWP($k_sb2,$const)); # 4 : sb2u
|
|
208
|
+
&pxor ("xmm0","xmm4"); # 0 = A
|
|
209
|
+
&movdqa ("xmm1",&QWP(-0x40,$base,$magic));# .Lk_mc_forward[]
|
|
210
|
+
&pshufb ("xmm5","xmm2"); # 4 = sb2u
|
|
211
|
+
&movdqa ("xmm2",&QWP($k_sb2+16,$const));# 2 : sb2t
|
|
212
|
+
&movdqa ("xmm4",&QWP(0,$base,$magic)); # .Lk_mc_backward[]
|
|
213
|
+
&pshufb ("xmm2","xmm3"); # 2 = sb2t
|
|
214
|
+
&movdqa ("xmm3","xmm0"); # 3 = A
|
|
215
|
+
&pxor ("xmm2","xmm5"); # 2 = 2A
|
|
216
|
+
&pshufb ("xmm0","xmm1"); # 0 = B
|
|
217
|
+
&add ($key,16); # next key
|
|
218
|
+
&pxor ("xmm0","xmm2"); # 0 = 2A+B
|
|
219
|
+
&pshufb ("xmm3","xmm4"); # 3 = D
|
|
220
|
+
&add ($magic,16); # next mc
|
|
221
|
+
&pxor ("xmm3","xmm0"); # 3 = 2A+B+D
|
|
222
|
+
&pshufb ("xmm0","xmm1"); # 0 = 2B+C
|
|
223
|
+
&and ($magic,0x30); # ... mod 4
|
|
224
|
+
&sub ($round,1); # nr--
|
|
225
|
+
&pxor ("xmm0","xmm3"); # 0 = 2A+3B+C+D
|
|
226
|
+
|
|
227
|
+
&set_label("enc_entry");
|
|
228
|
+
# top of round
|
|
229
|
+
&movdqa ("xmm1","xmm6"); # 1 : i
|
|
230
|
+
&movdqa ("xmm5",&QWP($k_inv+16,$const));# 2 : a/k
|
|
231
|
+
&pandn ("xmm1","xmm0"); # 1 = i<<4
|
|
232
|
+
&psrld ("xmm1",4); # 1 = i
|
|
233
|
+
&pand ("xmm0","xmm6"); # 0 = k
|
|
234
|
+
&pshufb ("xmm5","xmm0"); # 2 = a/k
|
|
235
|
+
&movdqa ("xmm3","xmm7"); # 3 : 1/i
|
|
236
|
+
&pxor ("xmm0","xmm1"); # 0 = j
|
|
237
|
+
&pshufb ("xmm3","xmm1"); # 3 = 1/i
|
|
238
|
+
&movdqa ("xmm4","xmm7"); # 4 : 1/j
|
|
239
|
+
&pxor ("xmm3","xmm5"); # 3 = iak = 1/i + a/k
|
|
240
|
+
&pshufb ("xmm4","xmm0"); # 4 = 1/j
|
|
241
|
+
&movdqa ("xmm2","xmm7"); # 2 : 1/iak
|
|
242
|
+
&pxor ("xmm4","xmm5"); # 4 = jak = 1/j + a/k
|
|
243
|
+
&pshufb ("xmm2","xmm3"); # 2 = 1/iak
|
|
244
|
+
&movdqa ("xmm3","xmm7"); # 3 : 1/jak
|
|
245
|
+
&pxor ("xmm2","xmm0"); # 2 = io
|
|
246
|
+
&pshufb ("xmm3","xmm4"); # 3 = 1/jak
|
|
247
|
+
&movdqu ("xmm5",&QWP(0,$key));
|
|
248
|
+
&pxor ("xmm3","xmm1"); # 3 = jo
|
|
249
|
+
&jnz (&label("enc_loop"));
|
|
250
|
+
|
|
251
|
+
# middle of last round
|
|
252
|
+
&movdqa ("xmm4",&QWP($k_sbo,$const)); # 3 : sbou .Lk_sbo
|
|
253
|
+
&movdqa ("xmm0",&QWP($k_sbo+16,$const));# 3 : sbot .Lk_sbo+16
|
|
254
|
+
&pshufb ("xmm4","xmm2"); # 4 = sbou
|
|
255
|
+
&pxor ("xmm4","xmm5"); # 4 = sb1u + k
|
|
256
|
+
&pshufb ("xmm0","xmm3"); # 0 = sb1t
|
|
257
|
+
&movdqa ("xmm1",&QWP(0x40,$base,$magic));# .Lk_sr[]
|
|
258
|
+
&pxor ("xmm0","xmm4"); # 0 = A
|
|
259
|
+
&pshufb ("xmm0","xmm1");
|
|
260
|
+
&ret ();
|
|
261
|
+
&function_end_B("_vpaes_encrypt_core");
|
|
262
|
+
|
|
263
|
+
##
|
|
264
|
+
## Decryption core
|
|
265
|
+
##
|
|
266
|
+
## Same API as encryption core.
|
|
267
|
+
##
|
|
268
|
+
&function_begin_B("_vpaes_decrypt_core");
|
|
269
|
+
&lea ($base,&DWP($k_dsbd,$const));
|
|
270
|
+
&mov ($round,&DWP(240,$key));
|
|
271
|
+
&movdqa ("xmm1","xmm6");
|
|
272
|
+
&movdqa ("xmm2",&QWP($k_dipt-$k_dsbd,$base));
|
|
273
|
+
&pandn ("xmm1","xmm0");
|
|
274
|
+
&mov ($magic,$round);
|
|
275
|
+
&psrld ("xmm1",4)
|
|
276
|
+
&movdqu ("xmm5",&QWP(0,$key));
|
|
277
|
+
&shl ($magic,4);
|
|
278
|
+
&pand ("xmm0","xmm6");
|
|
279
|
+
&pshufb ("xmm2","xmm0");
|
|
280
|
+
&movdqa ("xmm0",&QWP($k_dipt-$k_dsbd+16,$base));
|
|
281
|
+
&xor ($magic,0x30);
|
|
282
|
+
&pshufb ("xmm0","xmm1");
|
|
283
|
+
&and ($magic,0x30);
|
|
284
|
+
&pxor ("xmm2","xmm5");
|
|
285
|
+
&movdqa ("xmm5",&QWP($k_mc_forward+48,$const));
|
|
286
|
+
&pxor ("xmm0","xmm2");
|
|
287
|
+
&add ($key,16);
|
|
288
|
+
&lea ($magic,&DWP($k_sr-$k_dsbd,$base,$magic));
|
|
289
|
+
&jmp (&label("dec_entry"));
|
|
290
|
+
|
|
291
|
+
&set_label("dec_loop",16);
|
|
292
|
+
##
|
|
293
|
+
## Inverse mix columns
|
|
294
|
+
##
|
|
295
|
+
&movdqa ("xmm4",&QWP(-0x20,$base)); # 4 : sb9u
|
|
296
|
+
&movdqa ("xmm1",&QWP(-0x10,$base)); # 0 : sb9t
|
|
297
|
+
&pshufb ("xmm4","xmm2"); # 4 = sb9u
|
|
298
|
+
&pshufb ("xmm1","xmm3"); # 0 = sb9t
|
|
299
|
+
&pxor ("xmm0","xmm4");
|
|
300
|
+
&movdqa ("xmm4",&QWP(0,$base)); # 4 : sbdu
|
|
301
|
+
&pxor ("xmm0","xmm1"); # 0 = ch
|
|
302
|
+
&movdqa ("xmm1",&QWP(0x10,$base)); # 0 : sbdt
|
|
303
|
+
|
|
304
|
+
&pshufb ("xmm4","xmm2"); # 4 = sbdu
|
|
305
|
+
&pshufb ("xmm0","xmm5"); # MC ch
|
|
306
|
+
&pshufb ("xmm1","xmm3"); # 0 = sbdt
|
|
307
|
+
&pxor ("xmm0","xmm4"); # 4 = ch
|
|
308
|
+
&movdqa ("xmm4",&QWP(0x20,$base)); # 4 : sbbu
|
|
309
|
+
&pxor ("xmm0","xmm1"); # 0 = ch
|
|
310
|
+
&movdqa ("xmm1",&QWP(0x30,$base)); # 0 : sbbt
|
|
311
|
+
|
|
312
|
+
&pshufb ("xmm4","xmm2"); # 4 = sbbu
|
|
313
|
+
&pshufb ("xmm0","xmm5"); # MC ch
|
|
314
|
+
&pshufb ("xmm1","xmm3"); # 0 = sbbt
|
|
315
|
+
&pxor ("xmm0","xmm4"); # 4 = ch
|
|
316
|
+
&movdqa ("xmm4",&QWP(0x40,$base)); # 4 : sbeu
|
|
317
|
+
&pxor ("xmm0","xmm1"); # 0 = ch
|
|
318
|
+
&movdqa ("xmm1",&QWP(0x50,$base)); # 0 : sbet
|
|
319
|
+
|
|
320
|
+
&pshufb ("xmm4","xmm2"); # 4 = sbeu
|
|
321
|
+
&pshufb ("xmm0","xmm5"); # MC ch
|
|
322
|
+
&pshufb ("xmm1","xmm3"); # 0 = sbet
|
|
323
|
+
&pxor ("xmm0","xmm4"); # 4 = ch
|
|
324
|
+
&add ($key,16); # next round key
|
|
325
|
+
&palignr("xmm5","xmm5",12);
|
|
326
|
+
&pxor ("xmm0","xmm1"); # 0 = ch
|
|
327
|
+
&sub ($round,1); # nr--
|
|
328
|
+
|
|
329
|
+
&set_label("dec_entry");
|
|
330
|
+
# top of round
|
|
331
|
+
&movdqa ("xmm1","xmm6"); # 1 : i
|
|
332
|
+
&movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
|
|
333
|
+
&pandn ("xmm1","xmm0"); # 1 = i<<4
|
|
334
|
+
&pand ("xmm0","xmm6"); # 0 = k
|
|
335
|
+
&psrld ("xmm1",4); # 1 = i
|
|
336
|
+
&pshufb ("xmm2","xmm0"); # 2 = a/k
|
|
337
|
+
&movdqa ("xmm3","xmm7"); # 3 : 1/i
|
|
338
|
+
&pxor ("xmm0","xmm1"); # 0 = j
|
|
339
|
+
&pshufb ("xmm3","xmm1"); # 3 = 1/i
|
|
340
|
+
&movdqa ("xmm4","xmm7"); # 4 : 1/j
|
|
341
|
+
&pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k
|
|
342
|
+
&pshufb ("xmm4","xmm0"); # 4 = 1/j
|
|
343
|
+
&pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k
|
|
344
|
+
&movdqa ("xmm2","xmm7"); # 2 : 1/iak
|
|
345
|
+
&pshufb ("xmm2","xmm3"); # 2 = 1/iak
|
|
346
|
+
&movdqa ("xmm3","xmm7"); # 3 : 1/jak
|
|
347
|
+
&pxor ("xmm2","xmm0"); # 2 = io
|
|
348
|
+
&pshufb ("xmm3","xmm4"); # 3 = 1/jak
|
|
349
|
+
&movdqu ("xmm0",&QWP(0,$key));
|
|
350
|
+
&pxor ("xmm3","xmm1"); # 3 = jo
|
|
351
|
+
&jnz (&label("dec_loop"));
|
|
352
|
+
|
|
353
|
+
# middle of last round
|
|
354
|
+
&movdqa ("xmm4",&QWP(0x60,$base)); # 3 : sbou
|
|
355
|
+
&pshufb ("xmm4","xmm2"); # 4 = sbou
|
|
356
|
+
&pxor ("xmm4","xmm0"); # 4 = sb1u + k
|
|
357
|
+
&movdqa ("xmm0",&QWP(0x70,$base)); # 0 : sbot
|
|
358
|
+
&movdqa ("xmm2",&QWP(0,$magic));
|
|
359
|
+
&pshufb ("xmm0","xmm3"); # 0 = sb1t
|
|
360
|
+
&pxor ("xmm0","xmm4"); # 0 = A
|
|
361
|
+
&pshufb ("xmm0","xmm2");
|
|
362
|
+
&ret ();
|
|
363
|
+
&function_end_B("_vpaes_decrypt_core");
|
|
364
|
+
|
|
365
|
+
########################################################
|
|
366
|
+
## ##
|
|
367
|
+
## AES key schedule ##
|
|
368
|
+
## ##
|
|
369
|
+
########################################################
|
|
370
|
+
&function_begin_B("_vpaes_schedule_core");
|
|
371
|
+
&add ($const,&DWP(0,"esp"));
|
|
372
|
+
&movdqu ("xmm0",&QWP(0,$inp)); # load key (unaligned)
|
|
373
|
+
&movdqa ("xmm2",&QWP($k_rcon,$const)); # load rcon
|
|
374
|
+
|
|
375
|
+
# input transform
|
|
376
|
+
&movdqa ("xmm3","xmm0");
|
|
377
|
+
&lea ($base,&DWP($k_ipt,$const));
|
|
378
|
+
&movdqa (&QWP(4,"esp"),"xmm2"); # xmm8
|
|
379
|
+
&call ("_vpaes_schedule_transform");
|
|
380
|
+
&movdqa ("xmm7","xmm0");
|
|
381
|
+
|
|
382
|
+
&test ($out,$out);
|
|
383
|
+
&jnz (&label("schedule_am_decrypting"));
|
|
384
|
+
|
|
385
|
+
# encrypting, output zeroth round key after transform
|
|
386
|
+
&movdqu (&QWP(0,$key),"xmm0");
|
|
387
|
+
&jmp (&label("schedule_go"));
|
|
388
|
+
|
|
389
|
+
&set_label("schedule_am_decrypting");
|
|
390
|
+
# decrypting, output zeroth round key after shiftrows
|
|
391
|
+
&movdqa ("xmm1",&QWP($k_sr,$const,$magic));
|
|
392
|
+
&pshufb ("xmm3","xmm1");
|
|
393
|
+
&movdqu (&QWP(0,$key),"xmm3");
|
|
394
|
+
&xor ($magic,0x30);
|
|
395
|
+
|
|
396
|
+
&set_label("schedule_go");
|
|
397
|
+
&cmp ($round,192);
|
|
398
|
+
&ja (&label("schedule_256"));
|
|
399
|
+
&je (&label("schedule_192"));
|
|
400
|
+
# 128: fall though
|
|
401
|
+
|
|
402
|
+
##
|
|
403
|
+
## .schedule_128
|
|
404
|
+
##
|
|
405
|
+
## 128-bit specific part of key schedule.
|
|
406
|
+
##
|
|
407
|
+
## This schedule is really simple, because all its parts
|
|
408
|
+
## are accomplished by the subroutines.
|
|
409
|
+
##
|
|
410
|
+
&set_label("schedule_128");
|
|
411
|
+
&mov ($round,10);
|
|
412
|
+
|
|
413
|
+
&set_label("loop_schedule_128");
|
|
414
|
+
&call ("_vpaes_schedule_round");
|
|
415
|
+
&dec ($round);
|
|
416
|
+
&jz (&label("schedule_mangle_last"));
|
|
417
|
+
&call ("_vpaes_schedule_mangle"); # write output
|
|
418
|
+
&jmp (&label("loop_schedule_128"));
|
|
419
|
+
|
|
420
|
+
##
|
|
421
|
+
## .aes_schedule_192
|
|
422
|
+
##
|
|
423
|
+
## 192-bit specific part of key schedule.
|
|
424
|
+
##
|
|
425
|
+
## The main body of this schedule is the same as the 128-bit
|
|
426
|
+
## schedule, but with more smearing. The long, high side is
|
|
427
|
+
## stored in %xmm7 as before, and the short, low side is in
|
|
428
|
+
## the high bits of %xmm6.
|
|
429
|
+
##
|
|
430
|
+
## This schedule is somewhat nastier, however, because each
|
|
431
|
+
## round produces 192 bits of key material, or 1.5 round keys.
|
|
432
|
+
## Therefore, on each cycle we do 2 rounds and produce 3 round
|
|
433
|
+
## keys.
|
|
434
|
+
##
|
|
435
|
+
&set_label("schedule_192",16);
|
|
436
|
+
&movdqu ("xmm0",&QWP(8,$inp)); # load key part 2 (very unaligned)
|
|
437
|
+
&call ("_vpaes_schedule_transform"); # input transform
|
|
438
|
+
&movdqa ("xmm6","xmm0"); # save short part
|
|
439
|
+
&pxor ("xmm4","xmm4"); # clear 4
|
|
440
|
+
&movhlps("xmm6","xmm4"); # clobber low side with zeros
|
|
441
|
+
&mov ($round,4);
|
|
442
|
+
|
|
443
|
+
&set_label("loop_schedule_192");
|
|
444
|
+
&call ("_vpaes_schedule_round");
|
|
445
|
+
&palignr("xmm0","xmm6",8);
|
|
446
|
+
&call ("_vpaes_schedule_mangle"); # save key n
|
|
447
|
+
&call ("_vpaes_schedule_192_smear");
|
|
448
|
+
&call ("_vpaes_schedule_mangle"); # save key n+1
|
|
449
|
+
&call ("_vpaes_schedule_round");
|
|
450
|
+
&dec ($round);
|
|
451
|
+
&jz (&label("schedule_mangle_last"));
|
|
452
|
+
&call ("_vpaes_schedule_mangle"); # save key n+2
|
|
453
|
+
&call ("_vpaes_schedule_192_smear");
|
|
454
|
+
&jmp (&label("loop_schedule_192"));
|
|
455
|
+
|
|
456
|
+
##
|
|
457
|
+
## .aes_schedule_256
|
|
458
|
+
##
|
|
459
|
+
## 256-bit specific part of key schedule.
|
|
460
|
+
##
|
|
461
|
+
## The structure here is very similar to the 128-bit
|
|
462
|
+
## schedule, but with an additional "low side" in
|
|
463
|
+
## %xmm6. The low side's rounds are the same as the
|
|
464
|
+
## high side's, except no rcon and no rotation.
|
|
465
|
+
##
|
|
466
|
+
&set_label("schedule_256",16);
|
|
467
|
+
&movdqu ("xmm0",&QWP(16,$inp)); # load key part 2 (unaligned)
|
|
468
|
+
&call ("_vpaes_schedule_transform"); # input transform
|
|
469
|
+
&mov ($round,7);
|
|
470
|
+
|
|
471
|
+
&set_label("loop_schedule_256");
|
|
472
|
+
&call ("_vpaes_schedule_mangle"); # output low result
|
|
473
|
+
&movdqa ("xmm6","xmm0"); # save cur_lo in xmm6
|
|
474
|
+
|
|
475
|
+
# high round
|
|
476
|
+
&call ("_vpaes_schedule_round");
|
|
477
|
+
&dec ($round);
|
|
478
|
+
&jz (&label("schedule_mangle_last"));
|
|
479
|
+
&call ("_vpaes_schedule_mangle");
|
|
480
|
+
|
|
481
|
+
# low round. swap xmm7 and xmm6
|
|
482
|
+
&pshufd ("xmm0","xmm0",0xFF);
|
|
483
|
+
&movdqa (&QWP(20,"esp"),"xmm7");
|
|
484
|
+
&movdqa ("xmm7","xmm6");
|
|
485
|
+
&call ("_vpaes_schedule_low_round");
|
|
486
|
+
&movdqa ("xmm7",&QWP(20,"esp"));
|
|
487
|
+
|
|
488
|
+
&jmp (&label("loop_schedule_256"));
|
|
489
|
+
|
|
490
|
+
##
|
|
491
|
+
## .aes_schedule_mangle_last
|
|
492
|
+
##
|
|
493
|
+
## Mangler for last round of key schedule
|
|
494
|
+
## Mangles %xmm0
|
|
495
|
+
## when encrypting, outputs out(%xmm0) ^ 63
|
|
496
|
+
## when decrypting, outputs unskew(%xmm0)
|
|
497
|
+
##
|
|
498
|
+
## Always called right before return... jumps to cleanup and exits
|
|
499
|
+
##
|
|
500
|
+
&set_label("schedule_mangle_last",16);
|
|
501
|
+
# schedule last round key from xmm0
|
|
502
|
+
&lea ($base,&DWP($k_deskew,$const));
|
|
503
|
+
&test ($out,$out);
|
|
504
|
+
&jnz (&label("schedule_mangle_last_dec"));
|
|
505
|
+
|
|
506
|
+
# encrypting
|
|
507
|
+
&movdqa ("xmm1",&QWP($k_sr,$const,$magic));
|
|
508
|
+
&pshufb ("xmm0","xmm1"); # output permute
|
|
509
|
+
&lea ($base,&DWP($k_opt,$const)); # prepare to output transform
|
|
510
|
+
&add ($key,32);
|
|
511
|
+
|
|
512
|
+
&set_label("schedule_mangle_last_dec");
|
|
513
|
+
&add ($key,-16);
|
|
514
|
+
&pxor ("xmm0",&QWP($k_s63,$const));
|
|
515
|
+
&call ("_vpaes_schedule_transform"); # output transform
|
|
516
|
+
&movdqu (&QWP(0,$key),"xmm0"); # save last key
|
|
517
|
+
|
|
518
|
+
# cleanup
|
|
519
|
+
&pxor ("xmm0","xmm0");
|
|
520
|
+
&pxor ("xmm1","xmm1");
|
|
521
|
+
&pxor ("xmm2","xmm2");
|
|
522
|
+
&pxor ("xmm3","xmm3");
|
|
523
|
+
&pxor ("xmm4","xmm4");
|
|
524
|
+
&pxor ("xmm5","xmm5");
|
|
525
|
+
&pxor ("xmm6","xmm6");
|
|
526
|
+
&pxor ("xmm7","xmm7");
|
|
527
|
+
&ret ();
|
|
528
|
+
&function_end_B("_vpaes_schedule_core");
|
|
529
|
+
|
|
530
|
+
##
|
|
531
|
+
## .aes_schedule_192_smear
|
|
532
|
+
##
|
|
533
|
+
## Smear the short, low side in the 192-bit key schedule.
|
|
534
|
+
##
|
|
535
|
+
## Inputs:
|
|
536
|
+
## %xmm7: high side, b a x y
|
|
537
|
+
## %xmm6: low side, d c 0 0
|
|
538
|
+
## %xmm13: 0
|
|
539
|
+
##
|
|
540
|
+
## Outputs:
|
|
541
|
+
## %xmm6: b+c+d b+c 0 0
|
|
542
|
+
## %xmm0: b+c+d b+c b a
|
|
543
|
+
##
|
|
544
|
+
&function_begin_B("_vpaes_schedule_192_smear");
|
|
545
|
+
&pshufd ("xmm1","xmm6",0x80); # d c 0 0 -> c 0 0 0
|
|
546
|
+
&pshufd ("xmm0","xmm7",0xFE); # b a _ _ -> b b b a
|
|
547
|
+
&pxor ("xmm6","xmm1"); # -> c+d c 0 0
|
|
548
|
+
&pxor ("xmm1","xmm1");
|
|
549
|
+
&pxor ("xmm6","xmm0"); # -> b+c+d b+c b a
|
|
550
|
+
&movdqa ("xmm0","xmm6");
|
|
551
|
+
&movhlps("xmm6","xmm1"); # clobber low side with zeros
|
|
552
|
+
&ret ();
|
|
553
|
+
&function_end_B("_vpaes_schedule_192_smear");
|
|
554
|
+
|
|
555
|
+
##
|
|
556
|
+
## .aes_schedule_round
|
|
557
|
+
##
|
|
558
|
+
## Runs one main round of the key schedule on %xmm0, %xmm7
|
|
559
|
+
##
|
|
560
|
+
## Specifically, runs subbytes on the high dword of %xmm0
|
|
561
|
+
## then rotates it by one byte and xors into the low dword of
|
|
562
|
+
## %xmm7.
|
|
563
|
+
##
|
|
564
|
+
## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
|
|
565
|
+
## next rcon.
|
|
566
|
+
##
|
|
567
|
+
## Smears the dwords of %xmm7 by xoring the low into the
|
|
568
|
+
## second low, result into third, result into highest.
|
|
569
|
+
##
|
|
570
|
+
## Returns results in %xmm7 = %xmm0.
|
|
571
|
+
## Clobbers %xmm1-%xmm5.
|
|
572
|
+
##
|
|
573
|
+
&function_begin_B("_vpaes_schedule_round");
|
|
574
|
+
# extract rcon from xmm8
|
|
575
|
+
&movdqa ("xmm2",&QWP(8,"esp")); # xmm8
|
|
576
|
+
&pxor ("xmm1","xmm1");
|
|
577
|
+
&palignr("xmm1","xmm2",15);
|
|
578
|
+
&palignr("xmm2","xmm2",15);
|
|
579
|
+
&pxor ("xmm7","xmm1");
|
|
580
|
+
|
|
581
|
+
# rotate
|
|
582
|
+
&pshufd ("xmm0","xmm0",0xFF);
|
|
583
|
+
&palignr("xmm0","xmm0",1);
|
|
584
|
+
|
|
585
|
+
# fall through...
|
|
586
|
+
&movdqa (&QWP(8,"esp"),"xmm2"); # xmm8
|
|
587
|
+
|
|
588
|
+
# low round: same as high round, but no rotation and no rcon.
|
|
589
|
+
&set_label("_vpaes_schedule_low_round");
|
|
590
|
+
# smear xmm7
|
|
591
|
+
&movdqa ("xmm1","xmm7");
|
|
592
|
+
&pslldq ("xmm7",4);
|
|
593
|
+
&pxor ("xmm7","xmm1");
|
|
594
|
+
&movdqa ("xmm1","xmm7");
|
|
595
|
+
&pslldq ("xmm7",8);
|
|
596
|
+
&pxor ("xmm7","xmm1");
|
|
597
|
+
&pxor ("xmm7",&QWP($k_s63,$const));
|
|
598
|
+
|
|
599
|
+
# subbyte
|
|
600
|
+
&movdqa ("xmm4",&QWP($k_s0F,$const));
|
|
601
|
+
&movdqa ("xmm5",&QWP($k_inv,$const)); # 4 : 1/j
|
|
602
|
+
&movdqa ("xmm1","xmm4");
|
|
603
|
+
&pandn ("xmm1","xmm0");
|
|
604
|
+
&psrld ("xmm1",4); # 1 = i
|
|
605
|
+
&pand ("xmm0","xmm4"); # 0 = k
|
|
606
|
+
&movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
|
|
607
|
+
&pshufb ("xmm2","xmm0"); # 2 = a/k
|
|
608
|
+
&pxor ("xmm0","xmm1"); # 0 = j
|
|
609
|
+
&movdqa ("xmm3","xmm5"); # 3 : 1/i
|
|
610
|
+
&pshufb ("xmm3","xmm1"); # 3 = 1/i
|
|
611
|
+
&pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k
|
|
612
|
+
&movdqa ("xmm4","xmm5"); # 4 : 1/j
|
|
613
|
+
&pshufb ("xmm4","xmm0"); # 4 = 1/j
|
|
614
|
+
&pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k
|
|
615
|
+
&movdqa ("xmm2","xmm5"); # 2 : 1/iak
|
|
616
|
+
&pshufb ("xmm2","xmm3"); # 2 = 1/iak
|
|
617
|
+
&pxor ("xmm2","xmm0"); # 2 = io
|
|
618
|
+
&movdqa ("xmm3","xmm5"); # 3 : 1/jak
|
|
619
|
+
&pshufb ("xmm3","xmm4"); # 3 = 1/jak
|
|
620
|
+
&pxor ("xmm3","xmm1"); # 3 = jo
|
|
621
|
+
&movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sbou
|
|
622
|
+
&pshufb ("xmm4","xmm2"); # 4 = sbou
|
|
623
|
+
&movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sbot
|
|
624
|
+
&pshufb ("xmm0","xmm3"); # 0 = sb1t
|
|
625
|
+
&pxor ("xmm0","xmm4"); # 0 = sbox output
|
|
626
|
+
|
|
627
|
+
# add in smeared stuff
|
|
628
|
+
&pxor ("xmm0","xmm7");
|
|
629
|
+
&movdqa ("xmm7","xmm0");
|
|
630
|
+
&ret ();
|
|
631
|
+
&function_end_B("_vpaes_schedule_round");
|
|
632
|
+
|
|
633
|
+
##
|
|
634
|
+
## .aes_schedule_transform
|
|
635
|
+
##
|
|
636
|
+
## Linear-transform %xmm0 according to tables at (%ebx)
|
|
637
|
+
##
|
|
638
|
+
## Output in %xmm0
|
|
639
|
+
## Clobbers %xmm1, %xmm2
|
|
640
|
+
##
|
|
641
|
+
&function_begin_B("_vpaes_schedule_transform");
|
|
642
|
+
&movdqa ("xmm2",&QWP($k_s0F,$const));
|
|
643
|
+
&movdqa ("xmm1","xmm2");
|
|
644
|
+
&pandn ("xmm1","xmm0");
|
|
645
|
+
&psrld ("xmm1",4);
|
|
646
|
+
&pand ("xmm0","xmm2");
|
|
647
|
+
&movdqa ("xmm2",&QWP(0,$base));
|
|
648
|
+
&pshufb ("xmm2","xmm0");
|
|
649
|
+
&movdqa ("xmm0",&QWP(16,$base));
|
|
650
|
+
&pshufb ("xmm0","xmm1");
|
|
651
|
+
&pxor ("xmm0","xmm2");
|
|
652
|
+
&ret ();
|
|
653
|
+
&function_end_B("_vpaes_schedule_transform");
|
|
654
|
+
|
|
655
|
+
##
|
|
656
|
+
## .aes_schedule_mangle
|
|
657
|
+
##
|
|
658
|
+
## Mangle xmm0 from (basis-transformed) standard version
|
|
659
|
+
## to our version.
|
|
660
|
+
##
|
|
661
|
+
## On encrypt,
|
|
662
|
+
## xor with 0x63
|
|
663
|
+
## multiply by circulant 0,1,1,1
|
|
664
|
+
## apply shiftrows transform
|
|
665
|
+
##
|
|
666
|
+
## On decrypt,
|
|
667
|
+
## xor with 0x63
|
|
668
|
+
## multiply by "inverse mixcolumns" circulant E,B,D,9
|
|
669
|
+
## deskew
|
|
670
|
+
## apply shiftrows transform
|
|
671
|
+
##
|
|
672
|
+
##
|
|
673
|
+
## Writes out to (%edx), and increments or decrements it
|
|
674
|
+
## Keeps track of round number mod 4 in %ecx
|
|
675
|
+
## Preserves xmm0
|
|
676
|
+
## Clobbers xmm1-xmm5
|
|
677
|
+
##
|
|
678
|
+
&function_begin_B("_vpaes_schedule_mangle");
|
|
679
|
+
&movdqa ("xmm4","xmm0"); # save xmm0 for later
|
|
680
|
+
&movdqa ("xmm5",&QWP($k_mc_forward,$const));
|
|
681
|
+
&test ($out,$out);
|
|
682
|
+
&jnz (&label("schedule_mangle_dec"));
|
|
683
|
+
|
|
684
|
+
# encrypting
|
|
685
|
+
&add ($key,16);
|
|
686
|
+
&pxor ("xmm4",&QWP($k_s63,$const));
|
|
687
|
+
&pshufb ("xmm4","xmm5");
|
|
688
|
+
&movdqa ("xmm3","xmm4");
|
|
689
|
+
&pshufb ("xmm4","xmm5");
|
|
690
|
+
&pxor ("xmm3","xmm4");
|
|
691
|
+
&pshufb ("xmm4","xmm5");
|
|
692
|
+
&pxor ("xmm3","xmm4");
|
|
693
|
+
|
|
694
|
+
&jmp (&label("schedule_mangle_both"));
|
|
695
|
+
|
|
696
|
+
&set_label("schedule_mangle_dec",16);
|
|
697
|
+
# inverse mix columns
|
|
698
|
+
&movdqa ("xmm2",&QWP($k_s0F,$const));
|
|
699
|
+
&lea ($inp,&DWP($k_dksd,$const));
|
|
700
|
+
&movdqa ("xmm1","xmm2");
|
|
701
|
+
&pandn ("xmm1","xmm4");
|
|
702
|
+
&psrld ("xmm1",4); # 1 = hi
|
|
703
|
+
&pand ("xmm4","xmm2"); # 4 = lo
|
|
704
|
+
|
|
705
|
+
&movdqa ("xmm2",&QWP(0,$inp));
|
|
706
|
+
&pshufb ("xmm2","xmm4");
|
|
707
|
+
&movdqa ("xmm3",&QWP(0x10,$inp));
|
|
708
|
+
&pshufb ("xmm3","xmm1");
|
|
709
|
+
&pxor ("xmm3","xmm2");
|
|
710
|
+
&pshufb ("xmm3","xmm5");
|
|
711
|
+
|
|
712
|
+
&movdqa ("xmm2",&QWP(0x20,$inp));
|
|
713
|
+
&pshufb ("xmm2","xmm4");
|
|
714
|
+
&pxor ("xmm2","xmm3");
|
|
715
|
+
&movdqa ("xmm3",&QWP(0x30,$inp));
|
|
716
|
+
&pshufb ("xmm3","xmm1");
|
|
717
|
+
&pxor ("xmm3","xmm2");
|
|
718
|
+
&pshufb ("xmm3","xmm5");
|
|
719
|
+
|
|
720
|
+
&movdqa ("xmm2",&QWP(0x40,$inp));
|
|
721
|
+
&pshufb ("xmm2","xmm4");
|
|
722
|
+
&pxor ("xmm2","xmm3");
|
|
723
|
+
&movdqa ("xmm3",&QWP(0x50,$inp));
|
|
724
|
+
&pshufb ("xmm3","xmm1");
|
|
725
|
+
&pxor ("xmm3","xmm2");
|
|
726
|
+
&pshufb ("xmm3","xmm5");
|
|
727
|
+
|
|
728
|
+
&movdqa ("xmm2",&QWP(0x60,$inp));
|
|
729
|
+
&pshufb ("xmm2","xmm4");
|
|
730
|
+
&pxor ("xmm2","xmm3");
|
|
731
|
+
&movdqa ("xmm3",&QWP(0x70,$inp));
|
|
732
|
+
&pshufb ("xmm3","xmm1");
|
|
733
|
+
&pxor ("xmm3","xmm2");
|
|
734
|
+
|
|
735
|
+
&add ($key,-16);
|
|
736
|
+
|
|
737
|
+
&set_label("schedule_mangle_both");
|
|
738
|
+
&movdqa ("xmm1",&QWP($k_sr,$const,$magic));
|
|
739
|
+
&pshufb ("xmm3","xmm1");
|
|
740
|
+
&add ($magic,-16);
|
|
741
|
+
&and ($magic,0x30);
|
|
742
|
+
&movdqu (&QWP(0,$key),"xmm3");
|
|
743
|
+
&ret ();
|
|
744
|
+
&function_end_B("_vpaes_schedule_mangle");
|
|
745
|
+
|
|
746
|
+
#
|
|
747
|
+
# Interface to OpenSSL
|
|
748
|
+
#
|
|
749
|
+
&function_begin("${PREFIX}_set_encrypt_key");
|
|
750
|
+
&mov ($inp,&wparam(0)); # inp
|
|
751
|
+
&lea ($base,&DWP(-56,"esp"));
|
|
752
|
+
&mov ($round,&wparam(1)); # bits
|
|
753
|
+
&and ($base,-16);
|
|
754
|
+
&mov ($key,&wparam(2)); # key
|
|
755
|
+
&xchg ($base,"esp"); # alloca
|
|
756
|
+
&mov (&DWP(48,"esp"),$base);
|
|
757
|
+
|
|
758
|
+
&mov ($base,$round);
|
|
759
|
+
&shr ($base,5);
|
|
760
|
+
&add ($base,5);
|
|
761
|
+
&mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5;
|
|
762
|
+
&mov ($magic,0x30);
|
|
763
|
+
&mov ($out,0);
|
|
764
|
+
|
|
765
|
+
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
|
|
766
|
+
&call ("_vpaes_schedule_core");
|
|
767
|
+
&set_label("pic_point");
|
|
768
|
+
|
|
769
|
+
&mov ("esp",&DWP(48,"esp"));
|
|
770
|
+
&xor ("eax","eax");
|
|
771
|
+
&function_end("${PREFIX}_set_encrypt_key");
|
|
772
|
+
|
|
773
|
+
&function_begin("${PREFIX}_set_decrypt_key");
|
|
774
|
+
&mov ($inp,&wparam(0)); # inp
|
|
775
|
+
&lea ($base,&DWP(-56,"esp"));
|
|
776
|
+
&mov ($round,&wparam(1)); # bits
|
|
777
|
+
&and ($base,-16);
|
|
778
|
+
&mov ($key,&wparam(2)); # key
|
|
779
|
+
&xchg ($base,"esp"); # alloca
|
|
780
|
+
&mov (&DWP(48,"esp"),$base);
|
|
781
|
+
|
|
782
|
+
&mov ($base,$round);
|
|
783
|
+
&shr ($base,5);
|
|
784
|
+
&add ($base,5);
|
|
785
|
+
&mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5;
|
|
786
|
+
&shl ($base,4);
|
|
787
|
+
&lea ($key,&DWP(16,$key,$base));
|
|
788
|
+
|
|
789
|
+
&mov ($out,1);
|
|
790
|
+
&mov ($magic,$round);
|
|
791
|
+
&shr ($magic,1);
|
|
792
|
+
&and ($magic,32);
|
|
793
|
+
&xor ($magic,32); # nbist==192?0:32;
|
|
794
|
+
|
|
795
|
+
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
|
|
796
|
+
&call ("_vpaes_schedule_core");
|
|
797
|
+
&set_label("pic_point");
|
|
798
|
+
|
|
799
|
+
&mov ("esp",&DWP(48,"esp"));
|
|
800
|
+
&xor ("eax","eax");
|
|
801
|
+
&function_end("${PREFIX}_set_decrypt_key");
|
|
802
|
+
|
|
803
|
+
&function_begin("${PREFIX}_encrypt");
|
|
804
|
+
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
|
|
805
|
+
&call ("_vpaes_preheat");
|
|
806
|
+
&set_label("pic_point");
|
|
807
|
+
&mov ($inp,&wparam(0)); # inp
|
|
808
|
+
&lea ($base,&DWP(-56,"esp"));
|
|
809
|
+
&mov ($out,&wparam(1)); # out
|
|
810
|
+
&and ($base,-16);
|
|
811
|
+
&mov ($key,&wparam(2)); # key
|
|
812
|
+
&xchg ($base,"esp"); # alloca
|
|
813
|
+
&mov (&DWP(48,"esp"),$base);
|
|
814
|
+
|
|
815
|
+
&movdqu ("xmm0",&QWP(0,$inp));
|
|
816
|
+
&call ("_vpaes_encrypt_core");
|
|
817
|
+
&movdqu (&QWP(0,$out),"xmm0");
|
|
818
|
+
|
|
819
|
+
&mov ("esp",&DWP(48,"esp"));
|
|
820
|
+
&function_end("${PREFIX}_encrypt");
|
|
821
|
+
|
|
822
|
+
&function_begin("${PREFIX}_decrypt");
|
|
823
|
+
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
|
|
824
|
+
&call ("_vpaes_preheat");
|
|
825
|
+
&set_label("pic_point");
|
|
826
|
+
&mov ($inp,&wparam(0)); # inp
|
|
827
|
+
&lea ($base,&DWP(-56,"esp"));
|
|
828
|
+
&mov ($out,&wparam(1)); # out
|
|
829
|
+
&and ($base,-16);
|
|
830
|
+
&mov ($key,&wparam(2)); # key
|
|
831
|
+
&xchg ($base,"esp"); # alloca
|
|
832
|
+
&mov (&DWP(48,"esp"),$base);
|
|
833
|
+
|
|
834
|
+
&movdqu ("xmm0",&QWP(0,$inp));
|
|
835
|
+
&call ("_vpaes_decrypt_core");
|
|
836
|
+
&movdqu (&QWP(0,$out),"xmm0");
|
|
837
|
+
|
|
838
|
+
&mov ("esp",&DWP(48,"esp"));
|
|
839
|
+
&function_end("${PREFIX}_decrypt");
|
|
840
|
+
|
|
841
|
+
&asm_finish();
|