ring-native 0.0.0 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGES.md +7 -0
- data/Makefile +5 -0
- data/README.md +12 -5
- data/Rakefile +4 -0
- data/ext/ring/extconf.rb +4 -5
- data/lib/ring/native.rb +3 -1
- data/lib/ring/native/version.rb +5 -1
- data/ring-native.gemspec +6 -6
- data/vendor/ring-ffi/Cargo.lock +26 -0
- data/vendor/ring-ffi/Cargo.toml +45 -0
- data/vendor/ring-ffi/LICENSE +16 -0
- data/vendor/ring-ffi/README.md +59 -0
- data/vendor/ring-ffi/src/lib.rs +79 -0
- metadata +10 -255
- data/vendor/ring/BUILDING.md +0 -40
- data/vendor/ring/Cargo.toml +0 -43
- data/vendor/ring/LICENSE +0 -185
- data/vendor/ring/Makefile +0 -35
- data/vendor/ring/PORTING.md +0 -163
- data/vendor/ring/README.md +0 -113
- data/vendor/ring/STYLE.md +0 -197
- data/vendor/ring/appveyor.yml +0 -27
- data/vendor/ring/build.rs +0 -108
- data/vendor/ring/crypto/aes/aes.c +0 -1142
- data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/aes/aes_test.cc +0 -93
- data/vendor/ring/crypto/aes/asm/aes-586.pl +0 -2368
- data/vendor/ring/crypto/aes/asm/aes-armv4.pl +0 -1249
- data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +0 -2246
- data/vendor/ring/crypto/aes/asm/aesni-x86.pl +0 -1318
- data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +0 -2084
- data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +0 -675
- data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +0 -1364
- data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +0 -1565
- data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +0 -841
- data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +0 -1116
- data/vendor/ring/crypto/aes/internal.h +0 -87
- data/vendor/ring/crypto/aes/mode_wrappers.c +0 -61
- data/vendor/ring/crypto/bn/add.c +0 -394
- data/vendor/ring/crypto/bn/asm/armv4-mont.pl +0 -694
- data/vendor/ring/crypto/bn/asm/armv8-mont.pl +0 -1503
- data/vendor/ring/crypto/bn/asm/bn-586.pl +0 -774
- data/vendor/ring/crypto/bn/asm/co-586.pl +0 -287
- data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +0 -1882
- data/vendor/ring/crypto/bn/asm/x86-mont.pl +0 -592
- data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +0 -599
- data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +0 -1393
- data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +0 -3507
- data/vendor/ring/crypto/bn/bn.c +0 -352
- data/vendor/ring/crypto/bn/bn_asn1.c +0 -74
- data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/bn/bn_test.cc +0 -1696
- data/vendor/ring/crypto/bn/cmp.c +0 -200
- data/vendor/ring/crypto/bn/convert.c +0 -433
- data/vendor/ring/crypto/bn/ctx.c +0 -311
- data/vendor/ring/crypto/bn/div.c +0 -594
- data/vendor/ring/crypto/bn/exponentiation.c +0 -1335
- data/vendor/ring/crypto/bn/gcd.c +0 -711
- data/vendor/ring/crypto/bn/generic.c +0 -1019
- data/vendor/ring/crypto/bn/internal.h +0 -316
- data/vendor/ring/crypto/bn/montgomery.c +0 -516
- data/vendor/ring/crypto/bn/mul.c +0 -888
- data/vendor/ring/crypto/bn/prime.c +0 -829
- data/vendor/ring/crypto/bn/random.c +0 -334
- data/vendor/ring/crypto/bn/rsaz_exp.c +0 -262
- data/vendor/ring/crypto/bn/rsaz_exp.h +0 -53
- data/vendor/ring/crypto/bn/shift.c +0 -276
- data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/bytestring/bytestring_test.cc +0 -421
- data/vendor/ring/crypto/bytestring/cbb.c +0 -399
- data/vendor/ring/crypto/bytestring/cbs.c +0 -227
- data/vendor/ring/crypto/bytestring/internal.h +0 -46
- data/vendor/ring/crypto/chacha/chacha_generic.c +0 -140
- data/vendor/ring/crypto/chacha/chacha_vec.c +0 -323
- data/vendor/ring/crypto/chacha/chacha_vec_arm.S +0 -1447
- data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +0 -153
- data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/cipher/e_aes.c +0 -390
- data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +0 -208
- data/vendor/ring/crypto/cipher/internal.h +0 -173
- data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +0 -543
- data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +0 -9
- data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +0 -475
- data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +0 -23
- data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +0 -422
- data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +0 -484
- data/vendor/ring/crypto/cipher/test/cipher_test.txt +0 -100
- data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/constant_time_test.c +0 -304
- data/vendor/ring/crypto/cpu-arm-asm.S +0 -32
- data/vendor/ring/crypto/cpu-arm.c +0 -199
- data/vendor/ring/crypto/cpu-intel.c +0 -261
- data/vendor/ring/crypto/crypto.c +0 -151
- data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +0 -2118
- data/vendor/ring/crypto/curve25519/curve25519.c +0 -4888
- data/vendor/ring/crypto/curve25519/x25519_test.cc +0 -128
- data/vendor/ring/crypto/digest/md32_common.h +0 -181
- data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +0 -2725
- data/vendor/ring/crypto/ec/ec.c +0 -193
- data/vendor/ring/crypto/ec/ec_curves.c +0 -61
- data/vendor/ring/crypto/ec/ec_key.c +0 -228
- data/vendor/ring/crypto/ec/ec_montgomery.c +0 -114
- data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/ec/internal.h +0 -243
- data/vendor/ring/crypto/ec/oct.c +0 -253
- data/vendor/ring/crypto/ec/p256-64.c +0 -1794
- data/vendor/ring/crypto/ec/p256-x86_64-table.h +0 -9548
- data/vendor/ring/crypto/ec/p256-x86_64.c +0 -509
- data/vendor/ring/crypto/ec/simple.c +0 -1007
- data/vendor/ring/crypto/ec/util-64.c +0 -183
- data/vendor/ring/crypto/ec/wnaf.c +0 -508
- data/vendor/ring/crypto/ecdh/ecdh.c +0 -155
- data/vendor/ring/crypto/ecdsa/ecdsa.c +0 -304
- data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +0 -193
- data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +0 -327
- data/vendor/ring/crypto/header_removed.h +0 -17
- data/vendor/ring/crypto/internal.h +0 -495
- data/vendor/ring/crypto/libring.Windows.vcxproj +0 -101
- data/vendor/ring/crypto/mem.c +0 -98
- data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +0 -1045
- data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +0 -517
- data/vendor/ring/crypto/modes/asm/ghash-x86.pl +0 -1393
- data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +0 -1741
- data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +0 -422
- data/vendor/ring/crypto/modes/ctr.c +0 -226
- data/vendor/ring/crypto/modes/gcm.c +0 -1206
- data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/modes/gcm_test.c +0 -348
- data/vendor/ring/crypto/modes/internal.h +0 -299
- data/vendor/ring/crypto/perlasm/arm-xlate.pl +0 -170
- data/vendor/ring/crypto/perlasm/readme +0 -100
- data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +0 -1164
- data/vendor/ring/crypto/perlasm/x86asm.pl +0 -292
- data/vendor/ring/crypto/perlasm/x86gas.pl +0 -263
- data/vendor/ring/crypto/perlasm/x86masm.pl +0 -200
- data/vendor/ring/crypto/perlasm/x86nasm.pl +0 -187
- data/vendor/ring/crypto/poly1305/poly1305.c +0 -331
- data/vendor/ring/crypto/poly1305/poly1305_arm.c +0 -301
- data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +0 -2015
- data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/poly1305/poly1305_test.cc +0 -80
- data/vendor/ring/crypto/poly1305/poly1305_test.txt +0 -52
- data/vendor/ring/crypto/poly1305/poly1305_vec.c +0 -892
- data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +0 -75
- data/vendor/ring/crypto/rand/internal.h +0 -32
- data/vendor/ring/crypto/rand/rand.c +0 -189
- data/vendor/ring/crypto/rand/urandom.c +0 -219
- data/vendor/ring/crypto/rand/windows.c +0 -56
- data/vendor/ring/crypto/refcount_c11.c +0 -66
- data/vendor/ring/crypto/refcount_lock.c +0 -53
- data/vendor/ring/crypto/refcount_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/refcount_test.c +0 -58
- data/vendor/ring/crypto/rsa/blinding.c +0 -462
- data/vendor/ring/crypto/rsa/internal.h +0 -108
- data/vendor/ring/crypto/rsa/padding.c +0 -300
- data/vendor/ring/crypto/rsa/rsa.c +0 -450
- data/vendor/ring/crypto/rsa/rsa_asn1.c +0 -261
- data/vendor/ring/crypto/rsa/rsa_impl.c +0 -944
- data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/rsa/rsa_test.cc +0 -437
- data/vendor/ring/crypto/sha/asm/sha-armv8.pl +0 -436
- data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +0 -2390
- data/vendor/ring/crypto/sha/asm/sha256-586.pl +0 -1275
- data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +0 -735
- data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +0 -14
- data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +0 -14
- data/vendor/ring/crypto/sha/asm/sha512-586.pl +0 -911
- data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +0 -666
- data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +0 -14
- data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +0 -14
- data/vendor/ring/crypto/sha/sha1.c +0 -271
- data/vendor/ring/crypto/sha/sha256.c +0 -204
- data/vendor/ring/crypto/sha/sha512.c +0 -355
- data/vendor/ring/crypto/test/file_test.cc +0 -326
- data/vendor/ring/crypto/test/file_test.h +0 -181
- data/vendor/ring/crypto/test/malloc.cc +0 -150
- data/vendor/ring/crypto/test/scoped_types.h +0 -95
- data/vendor/ring/crypto/test/test.Windows.vcxproj +0 -35
- data/vendor/ring/crypto/test/test_util.cc +0 -46
- data/vendor/ring/crypto/test/test_util.h +0 -41
- data/vendor/ring/crypto/thread_none.c +0 -55
- data/vendor/ring/crypto/thread_pthread.c +0 -165
- data/vendor/ring/crypto/thread_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/thread_test.c +0 -200
- data/vendor/ring/crypto/thread_win.c +0 -282
- data/vendor/ring/examples/checkdigest.rs +0 -103
- data/vendor/ring/include/openssl/aes.h +0 -121
- data/vendor/ring/include/openssl/arm_arch.h +0 -129
- data/vendor/ring/include/openssl/base.h +0 -156
- data/vendor/ring/include/openssl/bn.h +0 -794
- data/vendor/ring/include/openssl/buffer.h +0 -18
- data/vendor/ring/include/openssl/bytestring.h +0 -235
- data/vendor/ring/include/openssl/chacha.h +0 -37
- data/vendor/ring/include/openssl/cmac.h +0 -76
- data/vendor/ring/include/openssl/cpu.h +0 -184
- data/vendor/ring/include/openssl/crypto.h +0 -43
- data/vendor/ring/include/openssl/curve25519.h +0 -88
- data/vendor/ring/include/openssl/ec.h +0 -225
- data/vendor/ring/include/openssl/ec_key.h +0 -129
- data/vendor/ring/include/openssl/ecdh.h +0 -110
- data/vendor/ring/include/openssl/ecdsa.h +0 -156
- data/vendor/ring/include/openssl/err.h +0 -201
- data/vendor/ring/include/openssl/mem.h +0 -101
- data/vendor/ring/include/openssl/obj_mac.h +0 -71
- data/vendor/ring/include/openssl/opensslfeatures.h +0 -68
- data/vendor/ring/include/openssl/opensslv.h +0 -18
- data/vendor/ring/include/openssl/ossl_typ.h +0 -18
- data/vendor/ring/include/openssl/poly1305.h +0 -51
- data/vendor/ring/include/openssl/rand.h +0 -70
- data/vendor/ring/include/openssl/rsa.h +0 -399
- data/vendor/ring/include/openssl/thread.h +0 -133
- data/vendor/ring/include/openssl/type_check.h +0 -71
- data/vendor/ring/mk/Common.props +0 -63
- data/vendor/ring/mk/Windows.props +0 -42
- data/vendor/ring/mk/WindowsTest.props +0 -18
- data/vendor/ring/mk/appveyor.bat +0 -62
- data/vendor/ring/mk/bottom_of_makefile.mk +0 -54
- data/vendor/ring/mk/ring.mk +0 -266
- data/vendor/ring/mk/top_of_makefile.mk +0 -214
- data/vendor/ring/mk/travis.sh +0 -40
- data/vendor/ring/mk/update-travis-yml.py +0 -229
- data/vendor/ring/ring.sln +0 -153
- data/vendor/ring/src/aead.rs +0 -682
- data/vendor/ring/src/agreement.rs +0 -248
- data/vendor/ring/src/c.rs +0 -129
- data/vendor/ring/src/constant_time.rs +0 -37
- data/vendor/ring/src/der.rs +0 -96
- data/vendor/ring/src/digest.rs +0 -690
- data/vendor/ring/src/digest_tests.txt +0 -57
- data/vendor/ring/src/ecc.rs +0 -28
- data/vendor/ring/src/ecc_build.rs +0 -279
- data/vendor/ring/src/ecc_curves.rs +0 -117
- data/vendor/ring/src/ed25519_tests.txt +0 -2579
- data/vendor/ring/src/exe_tests.rs +0 -46
- data/vendor/ring/src/ffi.rs +0 -29
- data/vendor/ring/src/file_test.rs +0 -187
- data/vendor/ring/src/hkdf.rs +0 -153
- data/vendor/ring/src/hkdf_tests.txt +0 -59
- data/vendor/ring/src/hmac.rs +0 -414
- data/vendor/ring/src/hmac_tests.txt +0 -97
- data/vendor/ring/src/input.rs +0 -312
- data/vendor/ring/src/lib.rs +0 -41
- data/vendor/ring/src/pbkdf2.rs +0 -265
- data/vendor/ring/src/pbkdf2_tests.txt +0 -113
- data/vendor/ring/src/polyfill.rs +0 -57
- data/vendor/ring/src/rand.rs +0 -28
- data/vendor/ring/src/signature.rs +0 -314
- data/vendor/ring/third-party/NIST/README.md +0 -9
- data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +0 -263
- data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +0 -267
- data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +0 -263
- data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +0 -267
- data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +0 -263
- data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +0 -267
- data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +0 -519
- data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +0 -523
- data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +0 -519
- data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +0 -523
- data/vendor/ring/third-party/NIST/sha256sums.txt +0 -1
@@ -1,517 +0,0 @@
|
|
1
|
-
#!/usr/bin/env perl
|
2
|
-
#
|
3
|
-
# ====================================================================
|
4
|
-
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
5
|
-
# project. The module is, however, dual licensed under OpenSSL and
|
6
|
-
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
7
|
-
# details see http://www.openssl.org/~appro/cryptogams/.
|
8
|
-
# ====================================================================
|
9
|
-
#
|
10
|
-
# April 2010
|
11
|
-
#
|
12
|
-
# The module implements "4-bit" GCM GHASH function and underlying
|
13
|
-
# single multiplication operation in GF(2^128). "4-bit" means that it
|
14
|
-
# uses 256 bytes per-key table [+32 bytes shared table]. There is no
|
15
|
-
# experimental performance data available yet. The only approximation
|
16
|
-
# that can be made at this point is based on code size. Inner loop is
|
17
|
-
# 32 instructions long and on single-issue core should execute in <40
|
18
|
-
# cycles. Having verified that gcc 3.4 didn't unroll corresponding
|
19
|
-
# loop, this assembler loop body was found to be ~3x smaller than
|
20
|
-
# compiler-generated one...
|
21
|
-
#
|
22
|
-
# July 2010
|
23
|
-
#
|
24
|
-
# Rescheduling for dual-issue pipeline resulted in 8.5% improvement on
|
25
|
-
# Cortex A8 core and ~25 cycles per processed byte (which was observed
|
26
|
-
# to be ~3 times faster than gcc-generated code:-)
|
27
|
-
#
|
28
|
-
# February 2011
|
29
|
-
#
|
30
|
-
# Profiler-assisted and platform-specific optimization resulted in 7%
|
31
|
-
# improvement on Cortex A8 core and ~23.5 cycles per byte.
|
32
|
-
#
|
33
|
-
# March 2011
|
34
|
-
#
|
35
|
-
# Add NEON implementation featuring polynomial multiplication, i.e. no
|
36
|
-
# lookup tables involved. On Cortex A8 it was measured to process one
|
37
|
-
# byte in 15 cycles or 55% faster than integer-only code.
|
38
|
-
#
|
39
|
-
# April 2014
|
40
|
-
#
|
41
|
-
# Switch to multiplication algorithm suggested in paper referred
|
42
|
-
# below and combine it with reduction algorithm from x86 module.
|
43
|
-
# Performance improvement over previous version varies from 65% on
|
44
|
-
# Snapdragon S4 to 110% on Cortex A9. In absolute terms Cortex A8
|
45
|
-
# processes one byte in 8.45 cycles, A9 - in 10.2, A15 - in 7.63,
|
46
|
-
# Snapdragon S4 - in 9.33.
|
47
|
-
#
|
48
|
-
# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
|
49
|
-
# Polynomial Multiplication on ARM Processors using the NEON Engine.
|
50
|
-
#
|
51
|
-
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
|
52
|
-
|
53
|
-
# ====================================================================
|
54
|
-
# Note about "528B" variant. In ARM case it makes lesser sense to
|
55
|
-
# implement it for following reasons:
|
56
|
-
#
|
57
|
-
# - performance improvement won't be anywhere near 50%, because 128-
|
58
|
-
# bit shift operation is neatly fused with 128-bit xor here, and
|
59
|
-
# "538B" variant would eliminate only 4-5 instructions out of 32
|
60
|
-
# in the inner loop (meaning that estimated improvement is ~15%);
|
61
|
-
# - ARM-based systems are often embedded ones and extra memory
|
62
|
-
# consumption might be unappreciated (for so little improvement);
|
63
|
-
#
|
64
|
-
# Byte order [in]dependence. =========================================
|
65
|
-
#
|
66
|
-
# Caller is expected to maintain specific *dword* order in Htable,
|
67
|
-
# namely with *least* significant dword of 128-bit value at *lower*
|
68
|
-
# address. This differs completely from C code and has everything to
|
69
|
-
# do with ldm instruction and order in which dwords are "consumed" by
|
70
|
-
# algorithm. *Byte* order within these dwords in turn is whatever
|
71
|
-
# *native* byte order on current platform. See gcm128.c for working
|
72
|
-
# example...
|
73
|
-
|
74
|
-
$flavour = shift;
|
75
|
-
if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
|
76
|
-
else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
|
77
|
-
|
78
|
-
if ($flavour && $flavour ne "void") {
|
79
|
-
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
80
|
-
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
81
|
-
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
82
|
-
die "can't locate arm-xlate.pl";
|
83
|
-
|
84
|
-
open STDOUT,"| \"$^X\" $xlate $flavour $output";
|
85
|
-
} else {
|
86
|
-
open STDOUT,">$output";
|
87
|
-
}
|
88
|
-
|
89
|
-
$Xi="r0"; # argument block
|
90
|
-
$Htbl="r1";
|
91
|
-
$inp="r2";
|
92
|
-
$len="r3";
|
93
|
-
|
94
|
-
$Zll="r4"; # variables
|
95
|
-
$Zlh="r5";
|
96
|
-
$Zhl="r6";
|
97
|
-
$Zhh="r7";
|
98
|
-
$Tll="r8";
|
99
|
-
$Tlh="r9";
|
100
|
-
$Thl="r10";
|
101
|
-
$Thh="r11";
|
102
|
-
$nlo="r12";
|
103
|
-
################# r13 is stack pointer
|
104
|
-
$nhi="r14";
|
105
|
-
################# r15 is program counter
|
106
|
-
|
107
|
-
$rem_4bit=$inp; # used in gcm_gmult_4bit
|
108
|
-
$cnt=$len;
|
109
|
-
|
110
|
-
sub Zsmash() {
|
111
|
-
my $i=12;
|
112
|
-
my @args=@_;
|
113
|
-
for ($Zll,$Zlh,$Zhl,$Zhh) {
|
114
|
-
$code.=<<___;
|
115
|
-
#if __ARM_ARCH__>=7 && defined(__ARMEL__)
|
116
|
-
rev $_,$_
|
117
|
-
str $_,[$Xi,#$i]
|
118
|
-
#elif defined(__ARMEB__)
|
119
|
-
str $_,[$Xi,#$i]
|
120
|
-
#else
|
121
|
-
mov $Tlh,$_,lsr#8
|
122
|
-
strb $_,[$Xi,#$i+3]
|
123
|
-
mov $Thl,$_,lsr#16
|
124
|
-
strb $Tlh,[$Xi,#$i+2]
|
125
|
-
mov $Thh,$_,lsr#24
|
126
|
-
strb $Thl,[$Xi,#$i+1]
|
127
|
-
strb $Thh,[$Xi,#$i]
|
128
|
-
#endif
|
129
|
-
___
|
130
|
-
$code.="\t".shift(@args)."\n";
|
131
|
-
$i-=4;
|
132
|
-
}
|
133
|
-
}
|
134
|
-
|
135
|
-
$code=<<___;
|
136
|
-
#include <openssl/arm_arch.h>
|
137
|
-
|
138
|
-
.syntax unified
|
139
|
-
|
140
|
-
.text
|
141
|
-
.code 32
|
142
|
-
|
143
|
-
#ifdef __clang__
|
144
|
-
#define ldrplb ldrbpl
|
145
|
-
#define ldrneb ldrbne
|
146
|
-
#endif
|
147
|
-
|
148
|
-
.type rem_4bit,%object
|
149
|
-
.align 5
|
150
|
-
rem_4bit:
|
151
|
-
.short 0x0000,0x1C20,0x3840,0x2460
|
152
|
-
.short 0x7080,0x6CA0,0x48C0,0x54E0
|
153
|
-
.short 0xE100,0xFD20,0xD940,0xC560
|
154
|
-
.short 0x9180,0x8DA0,0xA9C0,0xB5E0
|
155
|
-
.size rem_4bit,.-rem_4bit
|
156
|
-
|
157
|
-
.type rem_4bit_get,%function
|
158
|
-
rem_4bit_get:
|
159
|
-
sub $rem_4bit,pc,#8
|
160
|
-
sub $rem_4bit,$rem_4bit,#32 @ &rem_4bit
|
161
|
-
b .Lrem_4bit_got
|
162
|
-
nop
|
163
|
-
.size rem_4bit_get,.-rem_4bit_get
|
164
|
-
|
165
|
-
.global gcm_ghash_4bit
|
166
|
-
.hidden gcm_ghash_4bit
|
167
|
-
.type gcm_ghash_4bit,%function
|
168
|
-
gcm_ghash_4bit:
|
169
|
-
sub r12,pc,#8
|
170
|
-
add $len,$inp,$len @ $len to point at the end
|
171
|
-
stmdb sp!,{r3-r11,lr} @ save $len/end too
|
172
|
-
sub r12,r12,#48 @ &rem_4bit
|
173
|
-
|
174
|
-
ldmia r12,{r4-r11} @ copy rem_4bit ...
|
175
|
-
stmdb sp!,{r4-r11} @ ... to stack
|
176
|
-
|
177
|
-
ldrb $nlo,[$inp,#15]
|
178
|
-
ldrb $nhi,[$Xi,#15]
|
179
|
-
.Louter:
|
180
|
-
eor $nlo,$nlo,$nhi
|
181
|
-
and $nhi,$nlo,#0xf0
|
182
|
-
and $nlo,$nlo,#0x0f
|
183
|
-
mov $cnt,#14
|
184
|
-
|
185
|
-
add $Zhh,$Htbl,$nlo,lsl#4
|
186
|
-
ldmia $Zhh,{$Zll-$Zhh} @ load Htbl[nlo]
|
187
|
-
add $Thh,$Htbl,$nhi
|
188
|
-
ldrb $nlo,[$inp,#14]
|
189
|
-
|
190
|
-
and $nhi,$Zll,#0xf @ rem
|
191
|
-
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
|
192
|
-
add $nhi,$nhi,$nhi
|
193
|
-
eor $Zll,$Tll,$Zll,lsr#4
|
194
|
-
ldrh $Tll,[sp,$nhi] @ rem_4bit[rem]
|
195
|
-
eor $Zll,$Zll,$Zlh,lsl#28
|
196
|
-
ldrb $nhi,[$Xi,#14]
|
197
|
-
eor $Zlh,$Tlh,$Zlh,lsr#4
|
198
|
-
eor $Zlh,$Zlh,$Zhl,lsl#28
|
199
|
-
eor $Zhl,$Thl,$Zhl,lsr#4
|
200
|
-
eor $Zhl,$Zhl,$Zhh,lsl#28
|
201
|
-
eor $Zhh,$Thh,$Zhh,lsr#4
|
202
|
-
eor $nlo,$nlo,$nhi
|
203
|
-
and $nhi,$nlo,#0xf0
|
204
|
-
and $nlo,$nlo,#0x0f
|
205
|
-
eor $Zhh,$Zhh,$Tll,lsl#16
|
206
|
-
|
207
|
-
.Linner:
|
208
|
-
add $Thh,$Htbl,$nlo,lsl#4
|
209
|
-
and $nlo,$Zll,#0xf @ rem
|
210
|
-
subs $cnt,$cnt,#1
|
211
|
-
add $nlo,$nlo,$nlo
|
212
|
-
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nlo]
|
213
|
-
eor $Zll,$Tll,$Zll,lsr#4
|
214
|
-
eor $Zll,$Zll,$Zlh,lsl#28
|
215
|
-
eor $Zlh,$Tlh,$Zlh,lsr#4
|
216
|
-
eor $Zlh,$Zlh,$Zhl,lsl#28
|
217
|
-
ldrh $Tll,[sp,$nlo] @ rem_4bit[rem]
|
218
|
-
eor $Zhl,$Thl,$Zhl,lsr#4
|
219
|
-
ldrbpl $nlo,[$inp,$cnt]
|
220
|
-
eor $Zhl,$Zhl,$Zhh,lsl#28
|
221
|
-
eor $Zhh,$Thh,$Zhh,lsr#4
|
222
|
-
|
223
|
-
add $Thh,$Htbl,$nhi
|
224
|
-
and $nhi,$Zll,#0xf @ rem
|
225
|
-
eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]
|
226
|
-
add $nhi,$nhi,$nhi
|
227
|
-
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
|
228
|
-
eor $Zll,$Tll,$Zll,lsr#4
|
229
|
-
ldrbpl $Tll,[$Xi,$cnt]
|
230
|
-
eor $Zll,$Zll,$Zlh,lsl#28
|
231
|
-
eor $Zlh,$Tlh,$Zlh,lsr#4
|
232
|
-
ldrh $Tlh,[sp,$nhi]
|
233
|
-
eor $Zlh,$Zlh,$Zhl,lsl#28
|
234
|
-
eor $Zhl,$Thl,$Zhl,lsr#4
|
235
|
-
eor $Zhl,$Zhl,$Zhh,lsl#28
|
236
|
-
eorpl $nlo,$nlo,$Tll
|
237
|
-
eor $Zhh,$Thh,$Zhh,lsr#4
|
238
|
-
andpl $nhi,$nlo,#0xf0
|
239
|
-
andpl $nlo,$nlo,#0x0f
|
240
|
-
eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem]
|
241
|
-
bpl .Linner
|
242
|
-
|
243
|
-
ldr $len,[sp,#32] @ re-load $len/end
|
244
|
-
add $inp,$inp,#16
|
245
|
-
mov $nhi,$Zll
|
246
|
-
___
|
247
|
-
&Zsmash("cmp\t$inp,$len","ldrbne\t$nlo,[$inp,#15]");
|
248
|
-
$code.=<<___;
|
249
|
-
bne .Louter
|
250
|
-
|
251
|
-
add sp,sp,#36
|
252
|
-
#if __ARM_ARCH__>=5
|
253
|
-
ldmia sp!,{r4-r11,pc}
|
254
|
-
#else
|
255
|
-
ldmia sp!,{r4-r11,lr}
|
256
|
-
tst lr,#1
|
257
|
-
moveq pc,lr @ be binary compatible with V4, yet
|
258
|
-
bx lr @ interoperable with Thumb ISA:-)
|
259
|
-
#endif
|
260
|
-
.size gcm_ghash_4bit,.-gcm_ghash_4bit
|
261
|
-
|
262
|
-
.global gcm_gmult_4bit
|
263
|
-
.hidden gcm_gmult_4bit
|
264
|
-
.type gcm_gmult_4bit,%function
|
265
|
-
gcm_gmult_4bit:
|
266
|
-
stmdb sp!,{r4-r11,lr}
|
267
|
-
ldrb $nlo,[$Xi,#15]
|
268
|
-
b rem_4bit_get
|
269
|
-
.Lrem_4bit_got:
|
270
|
-
and $nhi,$nlo,#0xf0
|
271
|
-
and $nlo,$nlo,#0x0f
|
272
|
-
mov $cnt,#14
|
273
|
-
|
274
|
-
add $Zhh,$Htbl,$nlo,lsl#4
|
275
|
-
ldmia $Zhh,{$Zll-$Zhh} @ load Htbl[nlo]
|
276
|
-
ldrb $nlo,[$Xi,#14]
|
277
|
-
|
278
|
-
add $Thh,$Htbl,$nhi
|
279
|
-
and $nhi,$Zll,#0xf @ rem
|
280
|
-
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
|
281
|
-
add $nhi,$nhi,$nhi
|
282
|
-
eor $Zll,$Tll,$Zll,lsr#4
|
283
|
-
ldrh $Tll,[$rem_4bit,$nhi] @ rem_4bit[rem]
|
284
|
-
eor $Zll,$Zll,$Zlh,lsl#28
|
285
|
-
eor $Zlh,$Tlh,$Zlh,lsr#4
|
286
|
-
eor $Zlh,$Zlh,$Zhl,lsl#28
|
287
|
-
eor $Zhl,$Thl,$Zhl,lsr#4
|
288
|
-
eor $Zhl,$Zhl,$Zhh,lsl#28
|
289
|
-
eor $Zhh,$Thh,$Zhh,lsr#4
|
290
|
-
and $nhi,$nlo,#0xf0
|
291
|
-
eor $Zhh,$Zhh,$Tll,lsl#16
|
292
|
-
and $nlo,$nlo,#0x0f
|
293
|
-
|
294
|
-
.Loop:
|
295
|
-
add $Thh,$Htbl,$nlo,lsl#4
|
296
|
-
and $nlo,$Zll,#0xf @ rem
|
297
|
-
subs $cnt,$cnt,#1
|
298
|
-
add $nlo,$nlo,$nlo
|
299
|
-
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nlo]
|
300
|
-
eor $Zll,$Tll,$Zll,lsr#4
|
301
|
-
eor $Zll,$Zll,$Zlh,lsl#28
|
302
|
-
eor $Zlh,$Tlh,$Zlh,lsr#4
|
303
|
-
eor $Zlh,$Zlh,$Zhl,lsl#28
|
304
|
-
ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem]
|
305
|
-
eor $Zhl,$Thl,$Zhl,lsr#4
|
306
|
-
ldrbpl $nlo,[$Xi,$cnt]
|
307
|
-
eor $Zhl,$Zhl,$Zhh,lsl#28
|
308
|
-
eor $Zhh,$Thh,$Zhh,lsr#4
|
309
|
-
|
310
|
-
add $Thh,$Htbl,$nhi
|
311
|
-
and $nhi,$Zll,#0xf @ rem
|
312
|
-
eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]
|
313
|
-
add $nhi,$nhi,$nhi
|
314
|
-
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
|
315
|
-
eor $Zll,$Tll,$Zll,lsr#4
|
316
|
-
eor $Zll,$Zll,$Zlh,lsl#28
|
317
|
-
eor $Zlh,$Tlh,$Zlh,lsr#4
|
318
|
-
ldrh $Tll,[$rem_4bit,$nhi] @ rem_4bit[rem]
|
319
|
-
eor $Zlh,$Zlh,$Zhl,lsl#28
|
320
|
-
eor $Zhl,$Thl,$Zhl,lsr#4
|
321
|
-
eor $Zhl,$Zhl,$Zhh,lsl#28
|
322
|
-
eor $Zhh,$Thh,$Zhh,lsr#4
|
323
|
-
andpl $nhi,$nlo,#0xf0
|
324
|
-
andpl $nlo,$nlo,#0x0f
|
325
|
-
eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]
|
326
|
-
bpl .Loop
|
327
|
-
___
|
328
|
-
&Zsmash();
|
329
|
-
$code.=<<___;
|
330
|
-
#if __ARM_ARCH__>=5
|
331
|
-
ldmia sp!,{r4-r11,pc}
|
332
|
-
#else
|
333
|
-
ldmia sp!,{r4-r11,lr}
|
334
|
-
tst lr,#1
|
335
|
-
moveq pc,lr @ be binary compatible with V4, yet
|
336
|
-
bx lr @ interoperable with Thumb ISA:-)
|
337
|
-
#endif
|
338
|
-
.size gcm_gmult_4bit,.-gcm_gmult_4bit
|
339
|
-
___
|
340
|
-
{
|
341
|
-
my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3));
|
342
|
-
my ($t0,$t1,$t2,$t3)=map("q$_",(8..12));
|
343
|
-
my ($Hlo,$Hhi,$Hhl,$k48,$k32,$k16)=map("d$_",(26..31));
|
344
|
-
|
345
|
-
sub clmul64x64 {
|
346
|
-
my ($r,$a,$b)=@_;
|
347
|
-
$code.=<<___;
|
348
|
-
vext.8 $t0#lo, $a, $a, #1 @ A1
|
349
|
-
vmull.p8 $t0, $t0#lo, $b @ F = A1*B
|
350
|
-
vext.8 $r#lo, $b, $b, #1 @ B1
|
351
|
-
vmull.p8 $r, $a, $r#lo @ E = A*B1
|
352
|
-
vext.8 $t1#lo, $a, $a, #2 @ A2
|
353
|
-
vmull.p8 $t1, $t1#lo, $b @ H = A2*B
|
354
|
-
vext.8 $t3#lo, $b, $b, #2 @ B2
|
355
|
-
vmull.p8 $t3, $a, $t3#lo @ G = A*B2
|
356
|
-
vext.8 $t2#lo, $a, $a, #3 @ A3
|
357
|
-
veor $t0, $t0, $r @ L = E + F
|
358
|
-
vmull.p8 $t2, $t2#lo, $b @ J = A3*B
|
359
|
-
vext.8 $r#lo, $b, $b, #3 @ B3
|
360
|
-
veor $t1, $t1, $t3 @ M = G + H
|
361
|
-
vmull.p8 $r, $a, $r#lo @ I = A*B3
|
362
|
-
veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8
|
363
|
-
vand $t0#hi, $t0#hi, $k48
|
364
|
-
vext.8 $t3#lo, $b, $b, #4 @ B4
|
365
|
-
veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16
|
366
|
-
vand $t1#hi, $t1#hi, $k32
|
367
|
-
vmull.p8 $t3, $a, $t3#lo @ K = A*B4
|
368
|
-
veor $t2, $t2, $r @ N = I + J
|
369
|
-
veor $t0#lo, $t0#lo, $t0#hi
|
370
|
-
veor $t1#lo, $t1#lo, $t1#hi
|
371
|
-
veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24
|
372
|
-
vand $t2#hi, $t2#hi, $k16
|
373
|
-
vext.8 $t0, $t0, $t0, #15
|
374
|
-
veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32
|
375
|
-
vmov.i64 $t3#hi, #0
|
376
|
-
vext.8 $t1, $t1, $t1, #14
|
377
|
-
veor $t2#lo, $t2#lo, $t2#hi
|
378
|
-
vmull.p8 $r, $a, $b @ D = A*B
|
379
|
-
vext.8 $t3, $t3, $t3, #12
|
380
|
-
vext.8 $t2, $t2, $t2, #13
|
381
|
-
veor $t0, $t0, $t1
|
382
|
-
veor $t2, $t2, $t3
|
383
|
-
veor $r, $r, $t0
|
384
|
-
veor $r, $r, $t2
|
385
|
-
___
|
386
|
-
}
|
387
|
-
|
388
|
-
$code.=<<___;
|
389
|
-
#if __ARM_MAX_ARCH__>=7
|
390
|
-
.arch armv7-a
|
391
|
-
.fpu neon
|
392
|
-
|
393
|
-
.global gcm_init_neon
|
394
|
-
.hidden gcm_init_neon
|
395
|
-
.type gcm_init_neon,%function
|
396
|
-
.align 4
|
397
|
-
gcm_init_neon:
|
398
|
-
vld1.64 $IN#hi,[r1]! @ load H
|
399
|
-
vmov.i8 $t0,#0xe1
|
400
|
-
vld1.64 $IN#lo,[r1]
|
401
|
-
vshl.i64 $t0#hi,#57
|
402
|
-
vshr.u64 $t0#lo,#63 @ t0=0xc2....01
|
403
|
-
vdup.8 $t1,$IN#hi[7]
|
404
|
-
vshr.u64 $Hlo,$IN#lo,#63
|
405
|
-
vshr.s8 $t1,#7 @ broadcast carry bit
|
406
|
-
vshl.i64 $IN,$IN,#1
|
407
|
-
vand $t0,$t0,$t1
|
408
|
-
vorr $IN#hi,$Hlo @ H<<<=1
|
409
|
-
veor $IN,$IN,$t0 @ twisted H
|
410
|
-
vstmia r0,{$IN}
|
411
|
-
|
412
|
-
ret @ bx lr
|
413
|
-
.size gcm_init_neon,.-gcm_init_neon
|
414
|
-
|
415
|
-
.global gcm_gmult_neon
|
416
|
-
.hidden gcm_gmult_neon
|
417
|
-
.type gcm_gmult_neon,%function
|
418
|
-
.align 4
|
419
|
-
gcm_gmult_neon:
|
420
|
-
vld1.64 $IN#hi,[$Xi]! @ load Xi
|
421
|
-
vld1.64 $IN#lo,[$Xi]!
|
422
|
-
vmov.i64 $k48,#0x0000ffffffffffff
|
423
|
-
vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H
|
424
|
-
vmov.i64 $k32,#0x00000000ffffffff
|
425
|
-
#ifdef __ARMEL__
|
426
|
-
vrev64.8 $IN,$IN
|
427
|
-
#endif
|
428
|
-
vmov.i64 $k16,#0x000000000000ffff
|
429
|
-
veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing
|
430
|
-
mov $len,#16
|
431
|
-
b .Lgmult_neon
|
432
|
-
.size gcm_gmult_neon,.-gcm_gmult_neon
|
433
|
-
|
434
|
-
.global gcm_ghash_neon
|
435
|
-
.hidden gcm_ghash_neon
|
436
|
-
.type gcm_ghash_neon,%function
|
437
|
-
.align 4
|
438
|
-
gcm_ghash_neon:
|
439
|
-
vld1.64 $Xl#hi,[$Xi]! @ load Xi
|
440
|
-
vld1.64 $Xl#lo,[$Xi]!
|
441
|
-
vmov.i64 $k48,#0x0000ffffffffffff
|
442
|
-
vldmia $Htbl,{$Hlo-$Hhi} @ load twisted H
|
443
|
-
vmov.i64 $k32,#0x00000000ffffffff
|
444
|
-
#ifdef __ARMEL__
|
445
|
-
vrev64.8 $Xl,$Xl
|
446
|
-
#endif
|
447
|
-
vmov.i64 $k16,#0x000000000000ffff
|
448
|
-
veor $Hhl,$Hlo,$Hhi @ Karatsuba pre-processing
|
449
|
-
|
450
|
-
.Loop_neon:
|
451
|
-
vld1.64 $IN#hi,[$inp]! @ load inp
|
452
|
-
vld1.64 $IN#lo,[$inp]!
|
453
|
-
#ifdef __ARMEL__
|
454
|
-
vrev64.8 $IN,$IN
|
455
|
-
#endif
|
456
|
-
veor $IN,$Xl @ inp^=Xi
|
457
|
-
.Lgmult_neon:
|
458
|
-
___
|
459
|
-
&clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
|
460
|
-
$code.=<<___;
|
461
|
-
veor $IN#lo,$IN#lo,$IN#hi @ Karatsuba pre-processing
|
462
|
-
___
|
463
|
-
&clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
|
464
|
-
&clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
|
465
|
-
$code.=<<___;
|
466
|
-
veor $Xm,$Xm,$Xl @ Karatsuba post-processing
|
467
|
-
veor $Xm,$Xm,$Xh
|
468
|
-
veor $Xl#hi,$Xl#hi,$Xm#lo
|
469
|
-
veor $Xh#lo,$Xh#lo,$Xm#hi @ Xh|Xl - 256-bit result
|
470
|
-
|
471
|
-
@ equivalent of reduction_avx from ghash-x86_64.pl
|
472
|
-
vshl.i64 $t1,$Xl,#57 @ 1st phase
|
473
|
-
vshl.i64 $t2,$Xl,#62
|
474
|
-
veor $t2,$t2,$t1 @
|
475
|
-
vshl.i64 $t1,$Xl,#63
|
476
|
-
veor $t2, $t2, $t1 @
|
477
|
-
veor $Xl#hi,$Xl#hi,$t2#lo @
|
478
|
-
veor $Xh#lo,$Xh#lo,$t2#hi
|
479
|
-
|
480
|
-
vshr.u64 $t2,$Xl,#1 @ 2nd phase
|
481
|
-
veor $Xh,$Xh,$Xl
|
482
|
-
veor $Xl,$Xl,$t2 @
|
483
|
-
vshr.u64 $t2,$t2,#6
|
484
|
-
vshr.u64 $Xl,$Xl,#1 @
|
485
|
-
veor $Xl,$Xl,$Xh @
|
486
|
-
veor $Xl,$Xl,$t2 @
|
487
|
-
|
488
|
-
subs $len,#16
|
489
|
-
bne .Loop_neon
|
490
|
-
|
491
|
-
#ifdef __ARMEL__
|
492
|
-
vrev64.8 $Xl,$Xl
|
493
|
-
#endif
|
494
|
-
sub $Xi,#16
|
495
|
-
vst1.64 $Xl#hi,[$Xi]! @ write out Xi
|
496
|
-
vst1.64 $Xl#lo,[$Xi]
|
497
|
-
|
498
|
-
ret @ bx lr
|
499
|
-
.size gcm_ghash_neon,.-gcm_ghash_neon
|
500
|
-
#endif
|
501
|
-
___
|
502
|
-
}
|
503
|
-
$code.=<<___;
|
504
|
-
.asciz "GHASH for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
|
505
|
-
.align 2
|
506
|
-
___
|
507
|
-
|
508
|
-
foreach (split("\n",$code)) {
|
509
|
-
s/\`([^\`]*)\`/eval $1/geo;
|
510
|
-
|
511
|
-
s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
|
512
|
-
s/\bret\b/bx lr/go or
|
513
|
-
s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
|
514
|
-
|
515
|
-
print $_,"\n";
|
516
|
-
}
|
517
|
-
close STDOUT; # enforce flush
|