ring-native 0.0.0 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGES.md +7 -0
- data/Makefile +5 -0
- data/README.md +12 -5
- data/Rakefile +4 -0
- data/ext/ring/extconf.rb +4 -5
- data/lib/ring/native.rb +3 -1
- data/lib/ring/native/version.rb +5 -1
- data/ring-native.gemspec +6 -6
- data/vendor/ring-ffi/Cargo.lock +26 -0
- data/vendor/ring-ffi/Cargo.toml +45 -0
- data/vendor/ring-ffi/LICENSE +16 -0
- data/vendor/ring-ffi/README.md +59 -0
- data/vendor/ring-ffi/src/lib.rs +79 -0
- metadata +10 -255
- data/vendor/ring/BUILDING.md +0 -40
- data/vendor/ring/Cargo.toml +0 -43
- data/vendor/ring/LICENSE +0 -185
- data/vendor/ring/Makefile +0 -35
- data/vendor/ring/PORTING.md +0 -163
- data/vendor/ring/README.md +0 -113
- data/vendor/ring/STYLE.md +0 -197
- data/vendor/ring/appveyor.yml +0 -27
- data/vendor/ring/build.rs +0 -108
- data/vendor/ring/crypto/aes/aes.c +0 -1142
- data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/aes/aes_test.cc +0 -93
- data/vendor/ring/crypto/aes/asm/aes-586.pl +0 -2368
- data/vendor/ring/crypto/aes/asm/aes-armv4.pl +0 -1249
- data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +0 -2246
- data/vendor/ring/crypto/aes/asm/aesni-x86.pl +0 -1318
- data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +0 -2084
- data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +0 -675
- data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +0 -1364
- data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +0 -1565
- data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +0 -841
- data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +0 -1116
- data/vendor/ring/crypto/aes/internal.h +0 -87
- data/vendor/ring/crypto/aes/mode_wrappers.c +0 -61
- data/vendor/ring/crypto/bn/add.c +0 -394
- data/vendor/ring/crypto/bn/asm/armv4-mont.pl +0 -694
- data/vendor/ring/crypto/bn/asm/armv8-mont.pl +0 -1503
- data/vendor/ring/crypto/bn/asm/bn-586.pl +0 -774
- data/vendor/ring/crypto/bn/asm/co-586.pl +0 -287
- data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +0 -1882
- data/vendor/ring/crypto/bn/asm/x86-mont.pl +0 -592
- data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +0 -599
- data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +0 -1393
- data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +0 -3507
- data/vendor/ring/crypto/bn/bn.c +0 -352
- data/vendor/ring/crypto/bn/bn_asn1.c +0 -74
- data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/bn/bn_test.cc +0 -1696
- data/vendor/ring/crypto/bn/cmp.c +0 -200
- data/vendor/ring/crypto/bn/convert.c +0 -433
- data/vendor/ring/crypto/bn/ctx.c +0 -311
- data/vendor/ring/crypto/bn/div.c +0 -594
- data/vendor/ring/crypto/bn/exponentiation.c +0 -1335
- data/vendor/ring/crypto/bn/gcd.c +0 -711
- data/vendor/ring/crypto/bn/generic.c +0 -1019
- data/vendor/ring/crypto/bn/internal.h +0 -316
- data/vendor/ring/crypto/bn/montgomery.c +0 -516
- data/vendor/ring/crypto/bn/mul.c +0 -888
- data/vendor/ring/crypto/bn/prime.c +0 -829
- data/vendor/ring/crypto/bn/random.c +0 -334
- data/vendor/ring/crypto/bn/rsaz_exp.c +0 -262
- data/vendor/ring/crypto/bn/rsaz_exp.h +0 -53
- data/vendor/ring/crypto/bn/shift.c +0 -276
- data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/bytestring/bytestring_test.cc +0 -421
- data/vendor/ring/crypto/bytestring/cbb.c +0 -399
- data/vendor/ring/crypto/bytestring/cbs.c +0 -227
- data/vendor/ring/crypto/bytestring/internal.h +0 -46
- data/vendor/ring/crypto/chacha/chacha_generic.c +0 -140
- data/vendor/ring/crypto/chacha/chacha_vec.c +0 -323
- data/vendor/ring/crypto/chacha/chacha_vec_arm.S +0 -1447
- data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +0 -153
- data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/cipher/e_aes.c +0 -390
- data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +0 -208
- data/vendor/ring/crypto/cipher/internal.h +0 -173
- data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +0 -543
- data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +0 -9
- data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +0 -475
- data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +0 -23
- data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +0 -422
- data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +0 -484
- data/vendor/ring/crypto/cipher/test/cipher_test.txt +0 -100
- data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/constant_time_test.c +0 -304
- data/vendor/ring/crypto/cpu-arm-asm.S +0 -32
- data/vendor/ring/crypto/cpu-arm.c +0 -199
- data/vendor/ring/crypto/cpu-intel.c +0 -261
- data/vendor/ring/crypto/crypto.c +0 -151
- data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +0 -2118
- data/vendor/ring/crypto/curve25519/curve25519.c +0 -4888
- data/vendor/ring/crypto/curve25519/x25519_test.cc +0 -128
- data/vendor/ring/crypto/digest/md32_common.h +0 -181
- data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +0 -2725
- data/vendor/ring/crypto/ec/ec.c +0 -193
- data/vendor/ring/crypto/ec/ec_curves.c +0 -61
- data/vendor/ring/crypto/ec/ec_key.c +0 -228
- data/vendor/ring/crypto/ec/ec_montgomery.c +0 -114
- data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/ec/internal.h +0 -243
- data/vendor/ring/crypto/ec/oct.c +0 -253
- data/vendor/ring/crypto/ec/p256-64.c +0 -1794
- data/vendor/ring/crypto/ec/p256-x86_64-table.h +0 -9548
- data/vendor/ring/crypto/ec/p256-x86_64.c +0 -509
- data/vendor/ring/crypto/ec/simple.c +0 -1007
- data/vendor/ring/crypto/ec/util-64.c +0 -183
- data/vendor/ring/crypto/ec/wnaf.c +0 -508
- data/vendor/ring/crypto/ecdh/ecdh.c +0 -155
- data/vendor/ring/crypto/ecdsa/ecdsa.c +0 -304
- data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +0 -193
- data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +0 -327
- data/vendor/ring/crypto/header_removed.h +0 -17
- data/vendor/ring/crypto/internal.h +0 -495
- data/vendor/ring/crypto/libring.Windows.vcxproj +0 -101
- data/vendor/ring/crypto/mem.c +0 -98
- data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +0 -1045
- data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +0 -517
- data/vendor/ring/crypto/modes/asm/ghash-x86.pl +0 -1393
- data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +0 -1741
- data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +0 -422
- data/vendor/ring/crypto/modes/ctr.c +0 -226
- data/vendor/ring/crypto/modes/gcm.c +0 -1206
- data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/modes/gcm_test.c +0 -348
- data/vendor/ring/crypto/modes/internal.h +0 -299
- data/vendor/ring/crypto/perlasm/arm-xlate.pl +0 -170
- data/vendor/ring/crypto/perlasm/readme +0 -100
- data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +0 -1164
- data/vendor/ring/crypto/perlasm/x86asm.pl +0 -292
- data/vendor/ring/crypto/perlasm/x86gas.pl +0 -263
- data/vendor/ring/crypto/perlasm/x86masm.pl +0 -200
- data/vendor/ring/crypto/perlasm/x86nasm.pl +0 -187
- data/vendor/ring/crypto/poly1305/poly1305.c +0 -331
- data/vendor/ring/crypto/poly1305/poly1305_arm.c +0 -301
- data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +0 -2015
- data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/poly1305/poly1305_test.cc +0 -80
- data/vendor/ring/crypto/poly1305/poly1305_test.txt +0 -52
- data/vendor/ring/crypto/poly1305/poly1305_vec.c +0 -892
- data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +0 -75
- data/vendor/ring/crypto/rand/internal.h +0 -32
- data/vendor/ring/crypto/rand/rand.c +0 -189
- data/vendor/ring/crypto/rand/urandom.c +0 -219
- data/vendor/ring/crypto/rand/windows.c +0 -56
- data/vendor/ring/crypto/refcount_c11.c +0 -66
- data/vendor/ring/crypto/refcount_lock.c +0 -53
- data/vendor/ring/crypto/refcount_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/refcount_test.c +0 -58
- data/vendor/ring/crypto/rsa/blinding.c +0 -462
- data/vendor/ring/crypto/rsa/internal.h +0 -108
- data/vendor/ring/crypto/rsa/padding.c +0 -300
- data/vendor/ring/crypto/rsa/rsa.c +0 -450
- data/vendor/ring/crypto/rsa/rsa_asn1.c +0 -261
- data/vendor/ring/crypto/rsa/rsa_impl.c +0 -944
- data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/rsa/rsa_test.cc +0 -437
- data/vendor/ring/crypto/sha/asm/sha-armv8.pl +0 -436
- data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +0 -2390
- data/vendor/ring/crypto/sha/asm/sha256-586.pl +0 -1275
- data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +0 -735
- data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +0 -14
- data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +0 -14
- data/vendor/ring/crypto/sha/asm/sha512-586.pl +0 -911
- data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +0 -666
- data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +0 -14
- data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +0 -14
- data/vendor/ring/crypto/sha/sha1.c +0 -271
- data/vendor/ring/crypto/sha/sha256.c +0 -204
- data/vendor/ring/crypto/sha/sha512.c +0 -355
- data/vendor/ring/crypto/test/file_test.cc +0 -326
- data/vendor/ring/crypto/test/file_test.h +0 -181
- data/vendor/ring/crypto/test/malloc.cc +0 -150
- data/vendor/ring/crypto/test/scoped_types.h +0 -95
- data/vendor/ring/crypto/test/test.Windows.vcxproj +0 -35
- data/vendor/ring/crypto/test/test_util.cc +0 -46
- data/vendor/ring/crypto/test/test_util.h +0 -41
- data/vendor/ring/crypto/thread_none.c +0 -55
- data/vendor/ring/crypto/thread_pthread.c +0 -165
- data/vendor/ring/crypto/thread_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/thread_test.c +0 -200
- data/vendor/ring/crypto/thread_win.c +0 -282
- data/vendor/ring/examples/checkdigest.rs +0 -103
- data/vendor/ring/include/openssl/aes.h +0 -121
- data/vendor/ring/include/openssl/arm_arch.h +0 -129
- data/vendor/ring/include/openssl/base.h +0 -156
- data/vendor/ring/include/openssl/bn.h +0 -794
- data/vendor/ring/include/openssl/buffer.h +0 -18
- data/vendor/ring/include/openssl/bytestring.h +0 -235
- data/vendor/ring/include/openssl/chacha.h +0 -37
- data/vendor/ring/include/openssl/cmac.h +0 -76
- data/vendor/ring/include/openssl/cpu.h +0 -184
- data/vendor/ring/include/openssl/crypto.h +0 -43
- data/vendor/ring/include/openssl/curve25519.h +0 -88
- data/vendor/ring/include/openssl/ec.h +0 -225
- data/vendor/ring/include/openssl/ec_key.h +0 -129
- data/vendor/ring/include/openssl/ecdh.h +0 -110
- data/vendor/ring/include/openssl/ecdsa.h +0 -156
- data/vendor/ring/include/openssl/err.h +0 -201
- data/vendor/ring/include/openssl/mem.h +0 -101
- data/vendor/ring/include/openssl/obj_mac.h +0 -71
- data/vendor/ring/include/openssl/opensslfeatures.h +0 -68
- data/vendor/ring/include/openssl/opensslv.h +0 -18
- data/vendor/ring/include/openssl/ossl_typ.h +0 -18
- data/vendor/ring/include/openssl/poly1305.h +0 -51
- data/vendor/ring/include/openssl/rand.h +0 -70
- data/vendor/ring/include/openssl/rsa.h +0 -399
- data/vendor/ring/include/openssl/thread.h +0 -133
- data/vendor/ring/include/openssl/type_check.h +0 -71
- data/vendor/ring/mk/Common.props +0 -63
- data/vendor/ring/mk/Windows.props +0 -42
- data/vendor/ring/mk/WindowsTest.props +0 -18
- data/vendor/ring/mk/appveyor.bat +0 -62
- data/vendor/ring/mk/bottom_of_makefile.mk +0 -54
- data/vendor/ring/mk/ring.mk +0 -266
- data/vendor/ring/mk/top_of_makefile.mk +0 -214
- data/vendor/ring/mk/travis.sh +0 -40
- data/vendor/ring/mk/update-travis-yml.py +0 -229
- data/vendor/ring/ring.sln +0 -153
- data/vendor/ring/src/aead.rs +0 -682
- data/vendor/ring/src/agreement.rs +0 -248
- data/vendor/ring/src/c.rs +0 -129
- data/vendor/ring/src/constant_time.rs +0 -37
- data/vendor/ring/src/der.rs +0 -96
- data/vendor/ring/src/digest.rs +0 -690
- data/vendor/ring/src/digest_tests.txt +0 -57
- data/vendor/ring/src/ecc.rs +0 -28
- data/vendor/ring/src/ecc_build.rs +0 -279
- data/vendor/ring/src/ecc_curves.rs +0 -117
- data/vendor/ring/src/ed25519_tests.txt +0 -2579
- data/vendor/ring/src/exe_tests.rs +0 -46
- data/vendor/ring/src/ffi.rs +0 -29
- data/vendor/ring/src/file_test.rs +0 -187
- data/vendor/ring/src/hkdf.rs +0 -153
- data/vendor/ring/src/hkdf_tests.txt +0 -59
- data/vendor/ring/src/hmac.rs +0 -414
- data/vendor/ring/src/hmac_tests.txt +0 -97
- data/vendor/ring/src/input.rs +0 -312
- data/vendor/ring/src/lib.rs +0 -41
- data/vendor/ring/src/pbkdf2.rs +0 -265
- data/vendor/ring/src/pbkdf2_tests.txt +0 -113
- data/vendor/ring/src/polyfill.rs +0 -57
- data/vendor/ring/src/rand.rs +0 -28
- data/vendor/ring/src/signature.rs +0 -314
- data/vendor/ring/third-party/NIST/README.md +0 -9
- data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +0 -263
- data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +0 -267
- data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +0 -263
- data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +0 -267
- data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +0 -263
- data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +0 -267
- data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +0 -519
- data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +0 -523
- data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +0 -519
- data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +0 -523
- data/vendor/ring/third-party/NIST/sha256sums.txt +0 -1
@@ -1,841 +0,0 @@
|
|
1
|
-
#!/usr/bin/env perl
|
2
|
-
|
3
|
-
######################################################################
|
4
|
-
## Constant-time SSSE3 AES core implementation.
|
5
|
-
## version 0.1
|
6
|
-
##
|
7
|
-
## By Mike Hamburg (Stanford University), 2009
|
8
|
-
## Public domain.
|
9
|
-
##
|
10
|
-
## For details see http://shiftleft.org/papers/vector_aes/ and
|
11
|
-
## http://crypto.stanford.edu/vpaes/.
|
12
|
-
|
13
|
-
######################################################################
|
14
|
-
# September 2011.
|
15
|
-
#
|
16
|
-
# Port vpaes-x86_64.pl as 32-bit "almost" drop-in replacement for
|
17
|
-
# aes-586.pl. "Almost" refers to the fact that AES_cbc_encrypt
|
18
|
-
# doesn't handle partial vectors (doesn't have to if called from
|
19
|
-
# EVP only). "Drop-in" implies that this module doesn't share key
|
20
|
-
# schedule structure with the original nor does it make assumption
|
21
|
-
# about its alignment...
|
22
|
-
#
|
23
|
-
# Performance summary. aes-586.pl column lists large-block CBC
|
24
|
-
# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per
|
25
|
-
# byte processed with 128-bit key, and vpaes-x86.pl column - [also
|
26
|
-
# large-block CBC] encrypt/decrypt.
|
27
|
-
#
|
28
|
-
# aes-586.pl vpaes-x86.pl
|
29
|
-
#
|
30
|
-
# Core 2(**) 28.1/41.4/18.3 21.9/25.2(***)
|
31
|
-
# Nehalem 27.9/40.4/18.1 10.2/11.9
|
32
|
-
# Atom 70.7/92.1/60.1 61.1/75.4(***)
|
33
|
-
# Silvermont 45.4/62.9/24.1 49.2/61.1(***)
|
34
|
-
#
|
35
|
-
# (*) "Hyper-threading" in the context refers rather to cache shared
|
36
|
-
# among multiple cores, than to specifically Intel HTT. As vast
|
37
|
-
# majority of contemporary cores share cache, slower code path
|
38
|
-
# is common place. In other words "with-hyper-threading-off"
|
39
|
-
# results are presented mostly for reference purposes.
|
40
|
-
#
|
41
|
-
# (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe.
|
42
|
-
#
|
43
|
-
# (***) Less impressive improvement on Core 2 and Atom is due to slow
|
44
|
-
# pshufb, yet it's respectable +28%/64% improvement on Core 2
|
45
|
-
# and +15% on Atom (as implied, over "hyper-threading-safe"
|
46
|
-
# code path).
|
47
|
-
#
|
48
|
-
# <appro@openssl.org>
|
49
|
-
|
50
|
-
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
51
|
-
push(@INC,"${dir}","${dir}../../perlasm");
|
52
|
-
require "x86asm.pl";
|
53
|
-
|
54
|
-
&asm_init($ARGV[0],"vpaes-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
|
55
|
-
|
56
|
-
$PREFIX="vpaes";
|
57
|
-
|
58
|
-
my ($round, $base, $magic, $key, $const, $inp, $out)=
|
59
|
-
("eax", "ebx", "ecx", "edx","ebp", "esi","edi");
|
60
|
-
|
61
|
-
&static_label("_vpaes_consts");
|
62
|
-
&static_label("_vpaes_schedule_low_round");
|
63
|
-
|
64
|
-
&set_label("_vpaes_consts",64);
|
65
|
-
$k_inv=-0x30; # inv, inva
|
66
|
-
&data_word(0x0D080180,0x0E05060F,0x0A0B0C02,0x04070309);
|
67
|
-
&data_word(0x0F0B0780,0x01040A06,0x02050809,0x030D0E0C);
|
68
|
-
|
69
|
-
$k_s0F=-0x10; # s0F
|
70
|
-
&data_word(0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F);
|
71
|
-
|
72
|
-
$k_ipt=0x00; # input transform (lo, hi)
|
73
|
-
&data_word(0x5A2A7000,0xC2B2E898,0x52227808,0xCABAE090);
|
74
|
-
&data_word(0x317C4D00,0x4C01307D,0xB0FDCC81,0xCD80B1FC);
|
75
|
-
|
76
|
-
$k_sb1=0x20; # sb1u, sb1t
|
77
|
-
&data_word(0xCB503E00,0xB19BE18F,0x142AF544,0xA5DF7A6E);
|
78
|
-
&data_word(0xFAE22300,0x3618D415,0x0D2ED9EF,0x3BF7CCC1);
|
79
|
-
$k_sb2=0x40; # sb2u, sb2t
|
80
|
-
&data_word(0x0B712400,0xE27A93C6,0xBC982FCD,0x5EB7E955);
|
81
|
-
&data_word(0x0AE12900,0x69EB8840,0xAB82234A,0xC2A163C8);
|
82
|
-
$k_sbo=0x60; # sbou, sbot
|
83
|
-
&data_word(0x6FBDC700,0xD0D26D17,0xC502A878,0x15AABF7A);
|
84
|
-
&data_word(0x5FBB6A00,0xCFE474A5,0x412B35FA,0x8E1E90D1);
|
85
|
-
|
86
|
-
$k_mc_forward=0x80; # mc_forward
|
87
|
-
&data_word(0x00030201,0x04070605,0x080B0A09,0x0C0F0E0D);
|
88
|
-
&data_word(0x04070605,0x080B0A09,0x0C0F0E0D,0x00030201);
|
89
|
-
&data_word(0x080B0A09,0x0C0F0E0D,0x00030201,0x04070605);
|
90
|
-
&data_word(0x0C0F0E0D,0x00030201,0x04070605,0x080B0A09);
|
91
|
-
|
92
|
-
$k_mc_backward=0xc0; # mc_backward
|
93
|
-
&data_word(0x02010003,0x06050407,0x0A09080B,0x0E0D0C0F);
|
94
|
-
&data_word(0x0E0D0C0F,0x02010003,0x06050407,0x0A09080B);
|
95
|
-
&data_word(0x0A09080B,0x0E0D0C0F,0x02010003,0x06050407);
|
96
|
-
&data_word(0x06050407,0x0A09080B,0x0E0D0C0F,0x02010003);
|
97
|
-
|
98
|
-
$k_sr=0x100; # sr
|
99
|
-
&data_word(0x03020100,0x07060504,0x0B0A0908,0x0F0E0D0C);
|
100
|
-
&data_word(0x0F0A0500,0x030E0904,0x07020D08,0x0B06010C);
|
101
|
-
&data_word(0x0B020900,0x0F060D04,0x030A0108,0x070E050C);
|
102
|
-
&data_word(0x070A0D00,0x0B0E0104,0x0F020508,0x0306090C);
|
103
|
-
|
104
|
-
$k_rcon=0x140; # rcon
|
105
|
-
&data_word(0xAF9DEEB6,0x1F8391B9,0x4D7C7D81,0x702A9808);
|
106
|
-
|
107
|
-
$k_s63=0x150; # s63: all equal to 0x63 transformed
|
108
|
-
&data_word(0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B);
|
109
|
-
|
110
|
-
$k_opt=0x160; # output transform
|
111
|
-
&data_word(0xD6B66000,0xFF9F4929,0xDEBE6808,0xF7974121);
|
112
|
-
&data_word(0x50BCEC00,0x01EDBD51,0xB05C0CE0,0xE10D5DB1);
|
113
|
-
|
114
|
-
$k_deskew=0x180; # deskew tables: inverts the sbox's "skew"
|
115
|
-
&data_word(0x47A4E300,0x07E4A340,0x5DBEF91A,0x1DFEB95A);
|
116
|
-
&data_word(0x83EA6900,0x5F36B5DC,0xF49D1E77,0x2841C2AB);
|
117
|
-
##
|
118
|
-
## Decryption stuff
|
119
|
-
## Key schedule constants
|
120
|
-
##
|
121
|
-
$k_dksd=0x1a0; # decryption key schedule: invskew x*D
|
122
|
-
&data_word(0xA3E44700,0xFEB91A5D,0x5A1DBEF9,0x0740E3A4);
|
123
|
-
&data_word(0xB5368300,0x41C277F4,0xAB289D1E,0x5FDC69EA);
|
124
|
-
$k_dksb=0x1c0; # decryption key schedule: invskew x*B
|
125
|
-
&data_word(0x8550D500,0x9A4FCA1F,0x1CC94C99,0x03D65386);
|
126
|
-
&data_word(0xB6FC4A00,0x115BEDA7,0x7E3482C8,0xD993256F);
|
127
|
-
$k_dkse=0x1e0; # decryption key schedule: invskew x*E + 0x63
|
128
|
-
&data_word(0x1FC9D600,0xD5031CCA,0x994F5086,0x53859A4C);
|
129
|
-
&data_word(0x4FDC7BE8,0xA2319605,0x20B31487,0xCD5EF96A);
|
130
|
-
$k_dks9=0x200; # decryption key schedule: invskew x*9
|
131
|
-
&data_word(0x7ED9A700,0xB6116FC8,0x82255BFC,0x4AED9334);
|
132
|
-
&data_word(0x27143300,0x45765162,0xE9DAFDCE,0x8BB89FAC);
|
133
|
-
|
134
|
-
##
|
135
|
-
## Decryption stuff
|
136
|
-
## Round function constants
|
137
|
-
##
|
138
|
-
$k_dipt=0x220; # decryption input transform
|
139
|
-
&data_word(0x0B545F00,0x0F505B04,0x114E451A,0x154A411E);
|
140
|
-
&data_word(0x60056500,0x86E383E6,0xF491F194,0x12771772);
|
141
|
-
|
142
|
-
$k_dsb9=0x240; # decryption sbox output *9*u, *9*t
|
143
|
-
&data_word(0x9A86D600,0x851C0353,0x4F994CC9,0xCAD51F50);
|
144
|
-
&data_word(0xECD74900,0xC03B1789,0xB2FBA565,0x725E2C9E);
|
145
|
-
$k_dsbd=0x260; # decryption sbox output *D*u, *D*t
|
146
|
-
&data_word(0xE6B1A200,0x7D57CCDF,0x882A4439,0xF56E9B13);
|
147
|
-
&data_word(0x24C6CB00,0x3CE2FAF7,0x15DEEFD3,0x2931180D);
|
148
|
-
$k_dsbb=0x280; # decryption sbox output *B*u, *B*t
|
149
|
-
&data_word(0x96B44200,0xD0226492,0xB0F2D404,0x602646F6);
|
150
|
-
&data_word(0xCD596700,0xC19498A6,0x3255AA6B,0xF3FF0C3E);
|
151
|
-
$k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t
|
152
|
-
&data_word(0x26D4D000,0x46F29296,0x64B4F6B0,0x22426004);
|
153
|
-
&data_word(0xFFAAC100,0x0C55A6CD,0x98593E32,0x9467F36B);
|
154
|
-
$k_dsbo=0x2c0; # decryption sbox final output
|
155
|
-
&data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9);
|
156
|
-
&data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159);
|
157
|
-
&asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)");
|
158
|
-
&align (64);
|
159
|
-
|
160
|
-
&function_begin_B("_vpaes_preheat");
|
161
|
-
&add ($const,&DWP(0,"esp"));
|
162
|
-
&movdqa ("xmm7",&QWP($k_inv,$const));
|
163
|
-
&movdqa ("xmm6",&QWP($k_s0F,$const));
|
164
|
-
&ret ();
|
165
|
-
&function_end_B("_vpaes_preheat");
|
166
|
-
|
167
|
-
##
|
168
|
-
## _aes_encrypt_core
|
169
|
-
##
|
170
|
-
## AES-encrypt %xmm0.
|
171
|
-
##
|
172
|
-
## Inputs:
|
173
|
-
## %xmm0 = input
|
174
|
-
## %xmm6-%xmm7 as in _vpaes_preheat
|
175
|
-
## (%edx) = scheduled keys
|
176
|
-
##
|
177
|
-
## Output in %xmm0
|
178
|
-
## Clobbers %xmm1-%xmm5, %eax, %ebx, %ecx, %edx
|
179
|
-
##
|
180
|
-
##
|
181
|
-
&function_begin_B("_vpaes_encrypt_core");
|
182
|
-
&mov ($magic,16);
|
183
|
-
&mov ($round,&DWP(240,$key));
|
184
|
-
&movdqa ("xmm1","xmm6")
|
185
|
-
&movdqa ("xmm2",&QWP($k_ipt,$const));
|
186
|
-
&pandn ("xmm1","xmm0");
|
187
|
-
&pand ("xmm0","xmm6");
|
188
|
-
&movdqu ("xmm5",&QWP(0,$key));
|
189
|
-
&pshufb ("xmm2","xmm0");
|
190
|
-
&movdqa ("xmm0",&QWP($k_ipt+16,$const));
|
191
|
-
&pxor ("xmm2","xmm5");
|
192
|
-
&psrld ("xmm1",4);
|
193
|
-
&add ($key,16);
|
194
|
-
&pshufb ("xmm0","xmm1");
|
195
|
-
&lea ($base,&DWP($k_mc_backward,$const));
|
196
|
-
&pxor ("xmm0","xmm2");
|
197
|
-
&jmp (&label("enc_entry"));
|
198
|
-
|
199
|
-
|
200
|
-
&set_label("enc_loop",16);
|
201
|
-
# middle of middle round
|
202
|
-
&movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sb1u
|
203
|
-
&movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sb1t
|
204
|
-
&pshufb ("xmm4","xmm2"); # 4 = sb1u
|
205
|
-
&pshufb ("xmm0","xmm3"); # 0 = sb1t
|
206
|
-
&pxor ("xmm4","xmm5"); # 4 = sb1u + k
|
207
|
-
&movdqa ("xmm5",&QWP($k_sb2,$const)); # 4 : sb2u
|
208
|
-
&pxor ("xmm0","xmm4"); # 0 = A
|
209
|
-
&movdqa ("xmm1",&QWP(-0x40,$base,$magic));# .Lk_mc_forward[]
|
210
|
-
&pshufb ("xmm5","xmm2"); # 4 = sb2u
|
211
|
-
&movdqa ("xmm2",&QWP($k_sb2+16,$const));# 2 : sb2t
|
212
|
-
&movdqa ("xmm4",&QWP(0,$base,$magic)); # .Lk_mc_backward[]
|
213
|
-
&pshufb ("xmm2","xmm3"); # 2 = sb2t
|
214
|
-
&movdqa ("xmm3","xmm0"); # 3 = A
|
215
|
-
&pxor ("xmm2","xmm5"); # 2 = 2A
|
216
|
-
&pshufb ("xmm0","xmm1"); # 0 = B
|
217
|
-
&add ($key,16); # next key
|
218
|
-
&pxor ("xmm0","xmm2"); # 0 = 2A+B
|
219
|
-
&pshufb ("xmm3","xmm4"); # 3 = D
|
220
|
-
&add ($magic,16); # next mc
|
221
|
-
&pxor ("xmm3","xmm0"); # 3 = 2A+B+D
|
222
|
-
&pshufb ("xmm0","xmm1"); # 0 = 2B+C
|
223
|
-
&and ($magic,0x30); # ... mod 4
|
224
|
-
&sub ($round,1); # nr--
|
225
|
-
&pxor ("xmm0","xmm3"); # 0 = 2A+3B+C+D
|
226
|
-
|
227
|
-
&set_label("enc_entry");
|
228
|
-
# top of round
|
229
|
-
&movdqa ("xmm1","xmm6"); # 1 : i
|
230
|
-
&movdqa ("xmm5",&QWP($k_inv+16,$const));# 2 : a/k
|
231
|
-
&pandn ("xmm1","xmm0"); # 1 = i<<4
|
232
|
-
&psrld ("xmm1",4); # 1 = i
|
233
|
-
&pand ("xmm0","xmm6"); # 0 = k
|
234
|
-
&pshufb ("xmm5","xmm0"); # 2 = a/k
|
235
|
-
&movdqa ("xmm3","xmm7"); # 3 : 1/i
|
236
|
-
&pxor ("xmm0","xmm1"); # 0 = j
|
237
|
-
&pshufb ("xmm3","xmm1"); # 3 = 1/i
|
238
|
-
&movdqa ("xmm4","xmm7"); # 4 : 1/j
|
239
|
-
&pxor ("xmm3","xmm5"); # 3 = iak = 1/i + a/k
|
240
|
-
&pshufb ("xmm4","xmm0"); # 4 = 1/j
|
241
|
-
&movdqa ("xmm2","xmm7"); # 2 : 1/iak
|
242
|
-
&pxor ("xmm4","xmm5"); # 4 = jak = 1/j + a/k
|
243
|
-
&pshufb ("xmm2","xmm3"); # 2 = 1/iak
|
244
|
-
&movdqa ("xmm3","xmm7"); # 3 : 1/jak
|
245
|
-
&pxor ("xmm2","xmm0"); # 2 = io
|
246
|
-
&pshufb ("xmm3","xmm4"); # 3 = 1/jak
|
247
|
-
&movdqu ("xmm5",&QWP(0,$key));
|
248
|
-
&pxor ("xmm3","xmm1"); # 3 = jo
|
249
|
-
&jnz (&label("enc_loop"));
|
250
|
-
|
251
|
-
# middle of last round
|
252
|
-
&movdqa ("xmm4",&QWP($k_sbo,$const)); # 3 : sbou .Lk_sbo
|
253
|
-
&movdqa ("xmm0",&QWP($k_sbo+16,$const));# 3 : sbot .Lk_sbo+16
|
254
|
-
&pshufb ("xmm4","xmm2"); # 4 = sbou
|
255
|
-
&pxor ("xmm4","xmm5"); # 4 = sb1u + k
|
256
|
-
&pshufb ("xmm0","xmm3"); # 0 = sb1t
|
257
|
-
&movdqa ("xmm1",&QWP(0x40,$base,$magic));# .Lk_sr[]
|
258
|
-
&pxor ("xmm0","xmm4"); # 0 = A
|
259
|
-
&pshufb ("xmm0","xmm1");
|
260
|
-
&ret ();
|
261
|
-
&function_end_B("_vpaes_encrypt_core");
|
262
|
-
|
263
|
-
##
|
264
|
-
## Decryption core
|
265
|
-
##
|
266
|
-
## Same API as encryption core.
|
267
|
-
##
|
268
|
-
&function_begin_B("_vpaes_decrypt_core");
|
269
|
-
&lea ($base,&DWP($k_dsbd,$const));
|
270
|
-
&mov ($round,&DWP(240,$key));
|
271
|
-
&movdqa ("xmm1","xmm6");
|
272
|
-
&movdqa ("xmm2",&QWP($k_dipt-$k_dsbd,$base));
|
273
|
-
&pandn ("xmm1","xmm0");
|
274
|
-
&mov ($magic,$round);
|
275
|
-
&psrld ("xmm1",4)
|
276
|
-
&movdqu ("xmm5",&QWP(0,$key));
|
277
|
-
&shl ($magic,4);
|
278
|
-
&pand ("xmm0","xmm6");
|
279
|
-
&pshufb ("xmm2","xmm0");
|
280
|
-
&movdqa ("xmm0",&QWP($k_dipt-$k_dsbd+16,$base));
|
281
|
-
&xor ($magic,0x30);
|
282
|
-
&pshufb ("xmm0","xmm1");
|
283
|
-
&and ($magic,0x30);
|
284
|
-
&pxor ("xmm2","xmm5");
|
285
|
-
&movdqa ("xmm5",&QWP($k_mc_forward+48,$const));
|
286
|
-
&pxor ("xmm0","xmm2");
|
287
|
-
&add ($key,16);
|
288
|
-
&lea ($magic,&DWP($k_sr-$k_dsbd,$base,$magic));
|
289
|
-
&jmp (&label("dec_entry"));
|
290
|
-
|
291
|
-
&set_label("dec_loop",16);
|
292
|
-
##
|
293
|
-
## Inverse mix columns
|
294
|
-
##
|
295
|
-
&movdqa ("xmm4",&QWP(-0x20,$base)); # 4 : sb9u
|
296
|
-
&movdqa ("xmm1",&QWP(-0x10,$base)); # 0 : sb9t
|
297
|
-
&pshufb ("xmm4","xmm2"); # 4 = sb9u
|
298
|
-
&pshufb ("xmm1","xmm3"); # 0 = sb9t
|
299
|
-
&pxor ("xmm0","xmm4");
|
300
|
-
&movdqa ("xmm4",&QWP(0,$base)); # 4 : sbdu
|
301
|
-
&pxor ("xmm0","xmm1"); # 0 = ch
|
302
|
-
&movdqa ("xmm1",&QWP(0x10,$base)); # 0 : sbdt
|
303
|
-
|
304
|
-
&pshufb ("xmm4","xmm2"); # 4 = sbdu
|
305
|
-
&pshufb ("xmm0","xmm5"); # MC ch
|
306
|
-
&pshufb ("xmm1","xmm3"); # 0 = sbdt
|
307
|
-
&pxor ("xmm0","xmm4"); # 4 = ch
|
308
|
-
&movdqa ("xmm4",&QWP(0x20,$base)); # 4 : sbbu
|
309
|
-
&pxor ("xmm0","xmm1"); # 0 = ch
|
310
|
-
&movdqa ("xmm1",&QWP(0x30,$base)); # 0 : sbbt
|
311
|
-
|
312
|
-
&pshufb ("xmm4","xmm2"); # 4 = sbbu
|
313
|
-
&pshufb ("xmm0","xmm5"); # MC ch
|
314
|
-
&pshufb ("xmm1","xmm3"); # 0 = sbbt
|
315
|
-
&pxor ("xmm0","xmm4"); # 4 = ch
|
316
|
-
&movdqa ("xmm4",&QWP(0x40,$base)); # 4 : sbeu
|
317
|
-
&pxor ("xmm0","xmm1"); # 0 = ch
|
318
|
-
&movdqa ("xmm1",&QWP(0x50,$base)); # 0 : sbet
|
319
|
-
|
320
|
-
&pshufb ("xmm4","xmm2"); # 4 = sbeu
|
321
|
-
&pshufb ("xmm0","xmm5"); # MC ch
|
322
|
-
&pshufb ("xmm1","xmm3"); # 0 = sbet
|
323
|
-
&pxor ("xmm0","xmm4"); # 4 = ch
|
324
|
-
&add ($key,16); # next round key
|
325
|
-
&palignr("xmm5","xmm5",12);
|
326
|
-
&pxor ("xmm0","xmm1"); # 0 = ch
|
327
|
-
&sub ($round,1); # nr--
|
328
|
-
|
329
|
-
&set_label("dec_entry");
|
330
|
-
# top of round
|
331
|
-
&movdqa ("xmm1","xmm6"); # 1 : i
|
332
|
-
&movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
|
333
|
-
&pandn ("xmm1","xmm0"); # 1 = i<<4
|
334
|
-
&pand ("xmm0","xmm6"); # 0 = k
|
335
|
-
&psrld ("xmm1",4); # 1 = i
|
336
|
-
&pshufb ("xmm2","xmm0"); # 2 = a/k
|
337
|
-
&movdqa ("xmm3","xmm7"); # 3 : 1/i
|
338
|
-
&pxor ("xmm0","xmm1"); # 0 = j
|
339
|
-
&pshufb ("xmm3","xmm1"); # 3 = 1/i
|
340
|
-
&movdqa ("xmm4","xmm7"); # 4 : 1/j
|
341
|
-
&pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k
|
342
|
-
&pshufb ("xmm4","xmm0"); # 4 = 1/j
|
343
|
-
&pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k
|
344
|
-
&movdqa ("xmm2","xmm7"); # 2 : 1/iak
|
345
|
-
&pshufb ("xmm2","xmm3"); # 2 = 1/iak
|
346
|
-
&movdqa ("xmm3","xmm7"); # 3 : 1/jak
|
347
|
-
&pxor ("xmm2","xmm0"); # 2 = io
|
348
|
-
&pshufb ("xmm3","xmm4"); # 3 = 1/jak
|
349
|
-
&movdqu ("xmm0",&QWP(0,$key));
|
350
|
-
&pxor ("xmm3","xmm1"); # 3 = jo
|
351
|
-
&jnz (&label("dec_loop"));
|
352
|
-
|
353
|
-
# middle of last round
|
354
|
-
&movdqa ("xmm4",&QWP(0x60,$base)); # 3 : sbou
|
355
|
-
&pshufb ("xmm4","xmm2"); # 4 = sbou
|
356
|
-
&pxor ("xmm4","xmm0"); # 4 = sb1u + k
|
357
|
-
&movdqa ("xmm0",&QWP(0x70,$base)); # 0 : sbot
|
358
|
-
&movdqa ("xmm2",&QWP(0,$magic));
|
359
|
-
&pshufb ("xmm0","xmm3"); # 0 = sb1t
|
360
|
-
&pxor ("xmm0","xmm4"); # 0 = A
|
361
|
-
&pshufb ("xmm0","xmm2");
|
362
|
-
&ret ();
|
363
|
-
&function_end_B("_vpaes_decrypt_core");
|
364
|
-
|
365
|
-
########################################################
|
366
|
-
## ##
|
367
|
-
## AES key schedule ##
|
368
|
-
## ##
|
369
|
-
########################################################
|
370
|
-
&function_begin_B("_vpaes_schedule_core");
|
371
|
-
&add ($const,&DWP(0,"esp"));
|
372
|
-
&movdqu ("xmm0",&QWP(0,$inp)); # load key (unaligned)
|
373
|
-
&movdqa ("xmm2",&QWP($k_rcon,$const)); # load rcon
|
374
|
-
|
375
|
-
# input transform
|
376
|
-
&movdqa ("xmm3","xmm0");
|
377
|
-
&lea ($base,&DWP($k_ipt,$const));
|
378
|
-
&movdqa (&QWP(4,"esp"),"xmm2"); # xmm8
|
379
|
-
&call ("_vpaes_schedule_transform");
|
380
|
-
&movdqa ("xmm7","xmm0");
|
381
|
-
|
382
|
-
&test ($out,$out);
|
383
|
-
&jnz (&label("schedule_am_decrypting"));
|
384
|
-
|
385
|
-
# encrypting, output zeroth round key after transform
|
386
|
-
&movdqu (&QWP(0,$key),"xmm0");
|
387
|
-
&jmp (&label("schedule_go"));
|
388
|
-
|
389
|
-
&set_label("schedule_am_decrypting");
|
390
|
-
# decrypting, output zeroth round key after shiftrows
|
391
|
-
&movdqa ("xmm1",&QWP($k_sr,$const,$magic));
|
392
|
-
&pshufb ("xmm3","xmm1");
|
393
|
-
&movdqu (&QWP(0,$key),"xmm3");
|
394
|
-
&xor ($magic,0x30);
|
395
|
-
|
396
|
-
&set_label("schedule_go");
|
397
|
-
&cmp ($round,192);
|
398
|
-
&ja (&label("schedule_256"));
|
399
|
-
&je (&label("schedule_192"));
|
400
|
-
# 128: fall though
|
401
|
-
|
402
|
-
##
|
403
|
-
## .schedule_128
|
404
|
-
##
|
405
|
-
## 128-bit specific part of key schedule.
|
406
|
-
##
|
407
|
-
## This schedule is really simple, because all its parts
|
408
|
-
## are accomplished by the subroutines.
|
409
|
-
##
|
410
|
-
&set_label("schedule_128");
|
411
|
-
&mov ($round,10);
|
412
|
-
|
413
|
-
&set_label("loop_schedule_128");
|
414
|
-
&call ("_vpaes_schedule_round");
|
415
|
-
&dec ($round);
|
416
|
-
&jz (&label("schedule_mangle_last"));
|
417
|
-
&call ("_vpaes_schedule_mangle"); # write output
|
418
|
-
&jmp (&label("loop_schedule_128"));
|
419
|
-
|
420
|
-
##
|
421
|
-
## .aes_schedule_192
|
422
|
-
##
|
423
|
-
## 192-bit specific part of key schedule.
|
424
|
-
##
|
425
|
-
## The main body of this schedule is the same as the 128-bit
|
426
|
-
## schedule, but with more smearing. The long, high side is
|
427
|
-
## stored in %xmm7 as before, and the short, low side is in
|
428
|
-
## the high bits of %xmm6.
|
429
|
-
##
|
430
|
-
## This schedule is somewhat nastier, however, because each
|
431
|
-
## round produces 192 bits of key material, or 1.5 round keys.
|
432
|
-
## Therefore, on each cycle we do 2 rounds and produce 3 round
|
433
|
-
## keys.
|
434
|
-
##
|
435
|
-
&set_label("schedule_192",16);
|
436
|
-
&movdqu ("xmm0",&QWP(8,$inp)); # load key part 2 (very unaligned)
|
437
|
-
&call ("_vpaes_schedule_transform"); # input transform
|
438
|
-
&movdqa ("xmm6","xmm0"); # save short part
|
439
|
-
&pxor ("xmm4","xmm4"); # clear 4
|
440
|
-
&movhlps("xmm6","xmm4"); # clobber low side with zeros
|
441
|
-
&mov ($round,4);
|
442
|
-
|
443
|
-
&set_label("loop_schedule_192");
|
444
|
-
&call ("_vpaes_schedule_round");
|
445
|
-
&palignr("xmm0","xmm6",8);
|
446
|
-
&call ("_vpaes_schedule_mangle"); # save key n
|
447
|
-
&call ("_vpaes_schedule_192_smear");
|
448
|
-
&call ("_vpaes_schedule_mangle"); # save key n+1
|
449
|
-
&call ("_vpaes_schedule_round");
|
450
|
-
&dec ($round);
|
451
|
-
&jz (&label("schedule_mangle_last"));
|
452
|
-
&call ("_vpaes_schedule_mangle"); # save key n+2
|
453
|
-
&call ("_vpaes_schedule_192_smear");
|
454
|
-
&jmp (&label("loop_schedule_192"));
|
455
|
-
|
456
|
-
##
|
457
|
-
## .aes_schedule_256
|
458
|
-
##
|
459
|
-
## 256-bit specific part of key schedule.
|
460
|
-
##
|
461
|
-
## The structure here is very similar to the 128-bit
|
462
|
-
## schedule, but with an additional "low side" in
|
463
|
-
## %xmm6. The low side's rounds are the same as the
|
464
|
-
## high side's, except no rcon and no rotation.
|
465
|
-
##
|
466
|
-
&set_label("schedule_256",16);
|
467
|
-
&movdqu ("xmm0",&QWP(16,$inp)); # load key part 2 (unaligned)
|
468
|
-
&call ("_vpaes_schedule_transform"); # input transform
|
469
|
-
&mov ($round,7);
|
470
|
-
|
471
|
-
&set_label("loop_schedule_256");
|
472
|
-
&call ("_vpaes_schedule_mangle"); # output low result
|
473
|
-
&movdqa ("xmm6","xmm0"); # save cur_lo in xmm6
|
474
|
-
|
475
|
-
# high round
|
476
|
-
&call ("_vpaes_schedule_round");
|
477
|
-
&dec ($round);
|
478
|
-
&jz (&label("schedule_mangle_last"));
|
479
|
-
&call ("_vpaes_schedule_mangle");
|
480
|
-
|
481
|
-
# low round. swap xmm7 and xmm6
|
482
|
-
&pshufd ("xmm0","xmm0",0xFF);
|
483
|
-
&movdqa (&QWP(20,"esp"),"xmm7");
|
484
|
-
&movdqa ("xmm7","xmm6");
|
485
|
-
&call ("_vpaes_schedule_low_round");
|
486
|
-
&movdqa ("xmm7",&QWP(20,"esp"));
|
487
|
-
|
488
|
-
&jmp (&label("loop_schedule_256"));
|
489
|
-
|
490
|
-
##
|
491
|
-
## .aes_schedule_mangle_last
|
492
|
-
##
|
493
|
-
## Mangler for last round of key schedule
|
494
|
-
## Mangles %xmm0
|
495
|
-
## when encrypting, outputs out(%xmm0) ^ 63
|
496
|
-
## when decrypting, outputs unskew(%xmm0)
|
497
|
-
##
|
498
|
-
## Always called right before return... jumps to cleanup and exits
|
499
|
-
##
|
500
|
-
&set_label("schedule_mangle_last",16);
|
501
|
-
# schedule last round key from xmm0
|
502
|
-
&lea ($base,&DWP($k_deskew,$const));
|
503
|
-
&test ($out,$out);
|
504
|
-
&jnz (&label("schedule_mangle_last_dec"));
|
505
|
-
|
506
|
-
# encrypting
|
507
|
-
&movdqa ("xmm1",&QWP($k_sr,$const,$magic));
|
508
|
-
&pshufb ("xmm0","xmm1"); # output permute
|
509
|
-
&lea ($base,&DWP($k_opt,$const)); # prepare to output transform
|
510
|
-
&add ($key,32);
|
511
|
-
|
512
|
-
&set_label("schedule_mangle_last_dec");
|
513
|
-
&add ($key,-16);
|
514
|
-
&pxor ("xmm0",&QWP($k_s63,$const));
|
515
|
-
&call ("_vpaes_schedule_transform"); # output transform
|
516
|
-
&movdqu (&QWP(0,$key),"xmm0"); # save last key
|
517
|
-
|
518
|
-
# cleanup
|
519
|
-
&pxor ("xmm0","xmm0");
|
520
|
-
&pxor ("xmm1","xmm1");
|
521
|
-
&pxor ("xmm2","xmm2");
|
522
|
-
&pxor ("xmm3","xmm3");
|
523
|
-
&pxor ("xmm4","xmm4");
|
524
|
-
&pxor ("xmm5","xmm5");
|
525
|
-
&pxor ("xmm6","xmm6");
|
526
|
-
&pxor ("xmm7","xmm7");
|
527
|
-
&ret ();
|
528
|
-
&function_end_B("_vpaes_schedule_core");
|
529
|
-
|
530
|
-
##
|
531
|
-
## .aes_schedule_192_smear
|
532
|
-
##
|
533
|
-
## Smear the short, low side in the 192-bit key schedule.
|
534
|
-
##
|
535
|
-
## Inputs:
|
536
|
-
## %xmm7: high side, b a x y
|
537
|
-
## %xmm6: low side, d c 0 0
|
538
|
-
## %xmm13: 0
|
539
|
-
##
|
540
|
-
## Outputs:
|
541
|
-
## %xmm6: b+c+d b+c 0 0
|
542
|
-
## %xmm0: b+c+d b+c b a
|
543
|
-
##
|
544
|
-
&function_begin_B("_vpaes_schedule_192_smear");
|
545
|
-
&pshufd ("xmm1","xmm6",0x80); # d c 0 0 -> c 0 0 0
|
546
|
-
&pshufd ("xmm0","xmm7",0xFE); # b a _ _ -> b b b a
|
547
|
-
&pxor ("xmm6","xmm1"); # -> c+d c 0 0
|
548
|
-
&pxor ("xmm1","xmm1");
|
549
|
-
&pxor ("xmm6","xmm0"); # -> b+c+d b+c b a
|
550
|
-
&movdqa ("xmm0","xmm6");
|
551
|
-
&movhlps("xmm6","xmm1"); # clobber low side with zeros
|
552
|
-
&ret ();
|
553
|
-
&function_end_B("_vpaes_schedule_192_smear");
|
554
|
-
|
555
|
-
##
|
556
|
-
## .aes_schedule_round
|
557
|
-
##
|
558
|
-
## Runs one main round of the key schedule on %xmm0, %xmm7
|
559
|
-
##
|
560
|
-
## Specifically, runs subbytes on the high dword of %xmm0
|
561
|
-
## then rotates it by one byte and xors into the low dword of
|
562
|
-
## %xmm7.
|
563
|
-
##
|
564
|
-
## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
|
565
|
-
## next rcon.
|
566
|
-
##
|
567
|
-
## Smears the dwords of %xmm7 by xoring the low into the
|
568
|
-
## second low, result into third, result into highest.
|
569
|
-
##
|
570
|
-
## Returns results in %xmm7 = %xmm0.
|
571
|
-
## Clobbers %xmm1-%xmm5.
|
572
|
-
##
|
573
|
-
&function_begin_B("_vpaes_schedule_round");
|
574
|
-
# extract rcon from xmm8
|
575
|
-
&movdqa ("xmm2",&QWP(8,"esp")); # xmm8
|
576
|
-
&pxor ("xmm1","xmm1");
|
577
|
-
&palignr("xmm1","xmm2",15);
|
578
|
-
&palignr("xmm2","xmm2",15);
|
579
|
-
&pxor ("xmm7","xmm1");
|
580
|
-
|
581
|
-
# rotate
|
582
|
-
&pshufd ("xmm0","xmm0",0xFF);
|
583
|
-
&palignr("xmm0","xmm0",1);
|
584
|
-
|
585
|
-
# fall through...
|
586
|
-
&movdqa (&QWP(8,"esp"),"xmm2"); # xmm8
|
587
|
-
|
588
|
-
# low round: same as high round, but no rotation and no rcon.
|
589
|
-
&set_label("_vpaes_schedule_low_round");
|
590
|
-
# smear xmm7
|
591
|
-
&movdqa ("xmm1","xmm7");
|
592
|
-
&pslldq ("xmm7",4);
|
593
|
-
&pxor ("xmm7","xmm1");
|
594
|
-
&movdqa ("xmm1","xmm7");
|
595
|
-
&pslldq ("xmm7",8);
|
596
|
-
&pxor ("xmm7","xmm1");
|
597
|
-
&pxor ("xmm7",&QWP($k_s63,$const));
|
598
|
-
|
599
|
-
# subbyte
|
600
|
-
&movdqa ("xmm4",&QWP($k_s0F,$const));
|
601
|
-
&movdqa ("xmm5",&QWP($k_inv,$const)); # 4 : 1/j
|
602
|
-
&movdqa ("xmm1","xmm4");
|
603
|
-
&pandn ("xmm1","xmm0");
|
604
|
-
&psrld ("xmm1",4); # 1 = i
|
605
|
-
&pand ("xmm0","xmm4"); # 0 = k
|
606
|
-
&movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
|
607
|
-
&pshufb ("xmm2","xmm0"); # 2 = a/k
|
608
|
-
&pxor ("xmm0","xmm1"); # 0 = j
|
609
|
-
&movdqa ("xmm3","xmm5"); # 3 : 1/i
|
610
|
-
&pshufb ("xmm3","xmm1"); # 3 = 1/i
|
611
|
-
&pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k
|
612
|
-
&movdqa ("xmm4","xmm5"); # 4 : 1/j
|
613
|
-
&pshufb ("xmm4","xmm0"); # 4 = 1/j
|
614
|
-
&pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k
|
615
|
-
&movdqa ("xmm2","xmm5"); # 2 : 1/iak
|
616
|
-
&pshufb ("xmm2","xmm3"); # 2 = 1/iak
|
617
|
-
&pxor ("xmm2","xmm0"); # 2 = io
|
618
|
-
&movdqa ("xmm3","xmm5"); # 3 : 1/jak
|
619
|
-
&pshufb ("xmm3","xmm4"); # 3 = 1/jak
|
620
|
-
&pxor ("xmm3","xmm1"); # 3 = jo
|
621
|
-
&movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sbou
|
622
|
-
&pshufb ("xmm4","xmm2"); # 4 = sbou
|
623
|
-
&movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sbot
|
624
|
-
&pshufb ("xmm0","xmm3"); # 0 = sb1t
|
625
|
-
&pxor ("xmm0","xmm4"); # 0 = sbox output
|
626
|
-
|
627
|
-
# add in smeared stuff
|
628
|
-
&pxor ("xmm0","xmm7");
|
629
|
-
&movdqa ("xmm7","xmm0");
|
630
|
-
&ret ();
|
631
|
-
&function_end_B("_vpaes_schedule_round");
|
632
|
-
|
633
|
-
##
|
634
|
-
## .aes_schedule_transform
|
635
|
-
##
|
636
|
-
## Linear-transform %xmm0 according to tables at (%ebx)
|
637
|
-
##
|
638
|
-
## Output in %xmm0
|
639
|
-
## Clobbers %xmm1, %xmm2
|
640
|
-
##
|
641
|
-
&function_begin_B("_vpaes_schedule_transform");
|
642
|
-
&movdqa ("xmm2",&QWP($k_s0F,$const));
|
643
|
-
&movdqa ("xmm1","xmm2");
|
644
|
-
&pandn ("xmm1","xmm0");
|
645
|
-
&psrld ("xmm1",4);
|
646
|
-
&pand ("xmm0","xmm2");
|
647
|
-
&movdqa ("xmm2",&QWP(0,$base));
|
648
|
-
&pshufb ("xmm2","xmm0");
|
649
|
-
&movdqa ("xmm0",&QWP(16,$base));
|
650
|
-
&pshufb ("xmm0","xmm1");
|
651
|
-
&pxor ("xmm0","xmm2");
|
652
|
-
&ret ();
|
653
|
-
&function_end_B("_vpaes_schedule_transform");
|
654
|
-
|
655
|
-
##
|
656
|
-
## .aes_schedule_mangle
|
657
|
-
##
|
658
|
-
## Mangle xmm0 from (basis-transformed) standard version
|
659
|
-
## to our version.
|
660
|
-
##
|
661
|
-
## On encrypt,
|
662
|
-
## xor with 0x63
|
663
|
-
## multiply by circulant 0,1,1,1
|
664
|
-
## apply shiftrows transform
|
665
|
-
##
|
666
|
-
## On decrypt,
|
667
|
-
## xor with 0x63
|
668
|
-
## multiply by "inverse mixcolumns" circulant E,B,D,9
|
669
|
-
## deskew
|
670
|
-
## apply shiftrows transform
|
671
|
-
##
|
672
|
-
##
|
673
|
-
## Writes out to (%edx), and increments or decrements it
|
674
|
-
## Keeps track of round number mod 4 in %ecx
|
675
|
-
## Preserves xmm0
|
676
|
-
## Clobbers xmm1-xmm5
|
677
|
-
##
|
678
|
-
&function_begin_B("_vpaes_schedule_mangle");
|
679
|
-
&movdqa ("xmm4","xmm0"); # save xmm0 for later
|
680
|
-
&movdqa ("xmm5",&QWP($k_mc_forward,$const));
|
681
|
-
&test ($out,$out);
|
682
|
-
&jnz (&label("schedule_mangle_dec"));
|
683
|
-
|
684
|
-
# encrypting
|
685
|
-
&add ($key,16);
|
686
|
-
&pxor ("xmm4",&QWP($k_s63,$const));
|
687
|
-
&pshufb ("xmm4","xmm5");
|
688
|
-
&movdqa ("xmm3","xmm4");
|
689
|
-
&pshufb ("xmm4","xmm5");
|
690
|
-
&pxor ("xmm3","xmm4");
|
691
|
-
&pshufb ("xmm4","xmm5");
|
692
|
-
&pxor ("xmm3","xmm4");
|
693
|
-
|
694
|
-
&jmp (&label("schedule_mangle_both"));
|
695
|
-
|
696
|
-
&set_label("schedule_mangle_dec",16);
|
697
|
-
# inverse mix columns
|
698
|
-
&movdqa ("xmm2",&QWP($k_s0F,$const));
|
699
|
-
&lea ($inp,&DWP($k_dksd,$const));
|
700
|
-
&movdqa ("xmm1","xmm2");
|
701
|
-
&pandn ("xmm1","xmm4");
|
702
|
-
&psrld ("xmm1",4); # 1 = hi
|
703
|
-
&pand ("xmm4","xmm2"); # 4 = lo
|
704
|
-
|
705
|
-
&movdqa ("xmm2",&QWP(0,$inp));
|
706
|
-
&pshufb ("xmm2","xmm4");
|
707
|
-
&movdqa ("xmm3",&QWP(0x10,$inp));
|
708
|
-
&pshufb ("xmm3","xmm1");
|
709
|
-
&pxor ("xmm3","xmm2");
|
710
|
-
&pshufb ("xmm3","xmm5");
|
711
|
-
|
712
|
-
&movdqa ("xmm2",&QWP(0x20,$inp));
|
713
|
-
&pshufb ("xmm2","xmm4");
|
714
|
-
&pxor ("xmm2","xmm3");
|
715
|
-
&movdqa ("xmm3",&QWP(0x30,$inp));
|
716
|
-
&pshufb ("xmm3","xmm1");
|
717
|
-
&pxor ("xmm3","xmm2");
|
718
|
-
&pshufb ("xmm3","xmm5");
|
719
|
-
|
720
|
-
&movdqa ("xmm2",&QWP(0x40,$inp));
|
721
|
-
&pshufb ("xmm2","xmm4");
|
722
|
-
&pxor ("xmm2","xmm3");
|
723
|
-
&movdqa ("xmm3",&QWP(0x50,$inp));
|
724
|
-
&pshufb ("xmm3","xmm1");
|
725
|
-
&pxor ("xmm3","xmm2");
|
726
|
-
&pshufb ("xmm3","xmm5");
|
727
|
-
|
728
|
-
&movdqa ("xmm2",&QWP(0x60,$inp));
|
729
|
-
&pshufb ("xmm2","xmm4");
|
730
|
-
&pxor ("xmm2","xmm3");
|
731
|
-
&movdqa ("xmm3",&QWP(0x70,$inp));
|
732
|
-
&pshufb ("xmm3","xmm1");
|
733
|
-
&pxor ("xmm3","xmm2");
|
734
|
-
|
735
|
-
&add ($key,-16);
|
736
|
-
|
737
|
-
&set_label("schedule_mangle_both");
|
738
|
-
&movdqa ("xmm1",&QWP($k_sr,$const,$magic));
|
739
|
-
&pshufb ("xmm3","xmm1");
|
740
|
-
&add ($magic,-16);
|
741
|
-
&and ($magic,0x30);
|
742
|
-
&movdqu (&QWP(0,$key),"xmm3");
|
743
|
-
&ret ();
|
744
|
-
&function_end_B("_vpaes_schedule_mangle");
|
745
|
-
|
746
|
-
#
|
747
|
-
# Interface to OpenSSL
|
748
|
-
#
|
749
|
-
&function_begin("${PREFIX}_set_encrypt_key");
|
750
|
-
&mov ($inp,&wparam(0)); # inp
|
751
|
-
&lea ($base,&DWP(-56,"esp"));
|
752
|
-
&mov ($round,&wparam(1)); # bits
|
753
|
-
&and ($base,-16);
|
754
|
-
&mov ($key,&wparam(2)); # key
|
755
|
-
&xchg ($base,"esp"); # alloca
|
756
|
-
&mov (&DWP(48,"esp"),$base);
|
757
|
-
|
758
|
-
&mov ($base,$round);
|
759
|
-
&shr ($base,5);
|
760
|
-
&add ($base,5);
|
761
|
-
&mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5;
|
762
|
-
&mov ($magic,0x30);
|
763
|
-
&mov ($out,0);
|
764
|
-
|
765
|
-
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
|
766
|
-
&call ("_vpaes_schedule_core");
|
767
|
-
&set_label("pic_point");
|
768
|
-
|
769
|
-
&mov ("esp",&DWP(48,"esp"));
|
770
|
-
&xor ("eax","eax");
|
771
|
-
&function_end("${PREFIX}_set_encrypt_key");
|
772
|
-
|
773
|
-
&function_begin("${PREFIX}_set_decrypt_key");
|
774
|
-
&mov ($inp,&wparam(0)); # inp
|
775
|
-
&lea ($base,&DWP(-56,"esp"));
|
776
|
-
&mov ($round,&wparam(1)); # bits
|
777
|
-
&and ($base,-16);
|
778
|
-
&mov ($key,&wparam(2)); # key
|
779
|
-
&xchg ($base,"esp"); # alloca
|
780
|
-
&mov (&DWP(48,"esp"),$base);
|
781
|
-
|
782
|
-
&mov ($base,$round);
|
783
|
-
&shr ($base,5);
|
784
|
-
&add ($base,5);
|
785
|
-
&mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5;
|
786
|
-
&shl ($base,4);
|
787
|
-
&lea ($key,&DWP(16,$key,$base));
|
788
|
-
|
789
|
-
&mov ($out,1);
|
790
|
-
&mov ($magic,$round);
|
791
|
-
&shr ($magic,1);
|
792
|
-
&and ($magic,32);
|
793
|
-
&xor ($magic,32); # nbist==192?0:32;
|
794
|
-
|
795
|
-
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
|
796
|
-
&call ("_vpaes_schedule_core");
|
797
|
-
&set_label("pic_point");
|
798
|
-
|
799
|
-
&mov ("esp",&DWP(48,"esp"));
|
800
|
-
&xor ("eax","eax");
|
801
|
-
&function_end("${PREFIX}_set_decrypt_key");
|
802
|
-
|
803
|
-
&function_begin("${PREFIX}_encrypt");
|
804
|
-
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
|
805
|
-
&call ("_vpaes_preheat");
|
806
|
-
&set_label("pic_point");
|
807
|
-
&mov ($inp,&wparam(0)); # inp
|
808
|
-
&lea ($base,&DWP(-56,"esp"));
|
809
|
-
&mov ($out,&wparam(1)); # out
|
810
|
-
&and ($base,-16);
|
811
|
-
&mov ($key,&wparam(2)); # key
|
812
|
-
&xchg ($base,"esp"); # alloca
|
813
|
-
&mov (&DWP(48,"esp"),$base);
|
814
|
-
|
815
|
-
&movdqu ("xmm0",&QWP(0,$inp));
|
816
|
-
&call ("_vpaes_encrypt_core");
|
817
|
-
&movdqu (&QWP(0,$out),"xmm0");
|
818
|
-
|
819
|
-
&mov ("esp",&DWP(48,"esp"));
|
820
|
-
&function_end("${PREFIX}_encrypt");
|
821
|
-
|
822
|
-
&function_begin("${PREFIX}_decrypt");
|
823
|
-
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
|
824
|
-
&call ("_vpaes_preheat");
|
825
|
-
&set_label("pic_point");
|
826
|
-
&mov ($inp,&wparam(0)); # inp
|
827
|
-
&lea ($base,&DWP(-56,"esp"));
|
828
|
-
&mov ($out,&wparam(1)); # out
|
829
|
-
&and ($base,-16);
|
830
|
-
&mov ($key,&wparam(2)); # key
|
831
|
-
&xchg ($base,"esp"); # alloca
|
832
|
-
&mov (&DWP(48,"esp"),$base);
|
833
|
-
|
834
|
-
&movdqu ("xmm0",&QWP(0,$inp));
|
835
|
-
&call ("_vpaes_decrypt_core");
|
836
|
-
&movdqu (&QWP(0,$out),"xmm0");
|
837
|
-
|
838
|
-
&mov ("esp",&DWP(48,"esp"));
|
839
|
-
&function_end("${PREFIX}_decrypt");
|
840
|
-
|
841
|
-
&asm_finish();
|