ring-native 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGES.md +7 -0
- data/Makefile +5 -0
- data/README.md +12 -5
- data/Rakefile +4 -0
- data/ext/ring/extconf.rb +4 -5
- data/lib/ring/native.rb +3 -1
- data/lib/ring/native/version.rb +5 -1
- data/ring-native.gemspec +6 -6
- data/vendor/ring-ffi/Cargo.lock +26 -0
- data/vendor/ring-ffi/Cargo.toml +45 -0
- data/vendor/ring-ffi/LICENSE +16 -0
- data/vendor/ring-ffi/README.md +59 -0
- data/vendor/ring-ffi/src/lib.rs +79 -0
- metadata +10 -255
- data/vendor/ring/BUILDING.md +0 -40
- data/vendor/ring/Cargo.toml +0 -43
- data/vendor/ring/LICENSE +0 -185
- data/vendor/ring/Makefile +0 -35
- data/vendor/ring/PORTING.md +0 -163
- data/vendor/ring/README.md +0 -113
- data/vendor/ring/STYLE.md +0 -197
- data/vendor/ring/appveyor.yml +0 -27
- data/vendor/ring/build.rs +0 -108
- data/vendor/ring/crypto/aes/aes.c +0 -1142
- data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/aes/aes_test.cc +0 -93
- data/vendor/ring/crypto/aes/asm/aes-586.pl +0 -2368
- data/vendor/ring/crypto/aes/asm/aes-armv4.pl +0 -1249
- data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +0 -2246
- data/vendor/ring/crypto/aes/asm/aesni-x86.pl +0 -1318
- data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +0 -2084
- data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +0 -675
- data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +0 -1364
- data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +0 -1565
- data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +0 -841
- data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +0 -1116
- data/vendor/ring/crypto/aes/internal.h +0 -87
- data/vendor/ring/crypto/aes/mode_wrappers.c +0 -61
- data/vendor/ring/crypto/bn/add.c +0 -394
- data/vendor/ring/crypto/bn/asm/armv4-mont.pl +0 -694
- data/vendor/ring/crypto/bn/asm/armv8-mont.pl +0 -1503
- data/vendor/ring/crypto/bn/asm/bn-586.pl +0 -774
- data/vendor/ring/crypto/bn/asm/co-586.pl +0 -287
- data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +0 -1882
- data/vendor/ring/crypto/bn/asm/x86-mont.pl +0 -592
- data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +0 -599
- data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +0 -1393
- data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +0 -3507
- data/vendor/ring/crypto/bn/bn.c +0 -352
- data/vendor/ring/crypto/bn/bn_asn1.c +0 -74
- data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/bn/bn_test.cc +0 -1696
- data/vendor/ring/crypto/bn/cmp.c +0 -200
- data/vendor/ring/crypto/bn/convert.c +0 -433
- data/vendor/ring/crypto/bn/ctx.c +0 -311
- data/vendor/ring/crypto/bn/div.c +0 -594
- data/vendor/ring/crypto/bn/exponentiation.c +0 -1335
- data/vendor/ring/crypto/bn/gcd.c +0 -711
- data/vendor/ring/crypto/bn/generic.c +0 -1019
- data/vendor/ring/crypto/bn/internal.h +0 -316
- data/vendor/ring/crypto/bn/montgomery.c +0 -516
- data/vendor/ring/crypto/bn/mul.c +0 -888
- data/vendor/ring/crypto/bn/prime.c +0 -829
- data/vendor/ring/crypto/bn/random.c +0 -334
- data/vendor/ring/crypto/bn/rsaz_exp.c +0 -262
- data/vendor/ring/crypto/bn/rsaz_exp.h +0 -53
- data/vendor/ring/crypto/bn/shift.c +0 -276
- data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/bytestring/bytestring_test.cc +0 -421
- data/vendor/ring/crypto/bytestring/cbb.c +0 -399
- data/vendor/ring/crypto/bytestring/cbs.c +0 -227
- data/vendor/ring/crypto/bytestring/internal.h +0 -46
- data/vendor/ring/crypto/chacha/chacha_generic.c +0 -140
- data/vendor/ring/crypto/chacha/chacha_vec.c +0 -323
- data/vendor/ring/crypto/chacha/chacha_vec_arm.S +0 -1447
- data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +0 -153
- data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/cipher/e_aes.c +0 -390
- data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +0 -208
- data/vendor/ring/crypto/cipher/internal.h +0 -173
- data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +0 -543
- data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +0 -9
- data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +0 -475
- data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +0 -23
- data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +0 -422
- data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +0 -484
- data/vendor/ring/crypto/cipher/test/cipher_test.txt +0 -100
- data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/constant_time_test.c +0 -304
- data/vendor/ring/crypto/cpu-arm-asm.S +0 -32
- data/vendor/ring/crypto/cpu-arm.c +0 -199
- data/vendor/ring/crypto/cpu-intel.c +0 -261
- data/vendor/ring/crypto/crypto.c +0 -151
- data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +0 -2118
- data/vendor/ring/crypto/curve25519/curve25519.c +0 -4888
- data/vendor/ring/crypto/curve25519/x25519_test.cc +0 -128
- data/vendor/ring/crypto/digest/md32_common.h +0 -181
- data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +0 -2725
- data/vendor/ring/crypto/ec/ec.c +0 -193
- data/vendor/ring/crypto/ec/ec_curves.c +0 -61
- data/vendor/ring/crypto/ec/ec_key.c +0 -228
- data/vendor/ring/crypto/ec/ec_montgomery.c +0 -114
- data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/ec/internal.h +0 -243
- data/vendor/ring/crypto/ec/oct.c +0 -253
- data/vendor/ring/crypto/ec/p256-64.c +0 -1794
- data/vendor/ring/crypto/ec/p256-x86_64-table.h +0 -9548
- data/vendor/ring/crypto/ec/p256-x86_64.c +0 -509
- data/vendor/ring/crypto/ec/simple.c +0 -1007
- data/vendor/ring/crypto/ec/util-64.c +0 -183
- data/vendor/ring/crypto/ec/wnaf.c +0 -508
- data/vendor/ring/crypto/ecdh/ecdh.c +0 -155
- data/vendor/ring/crypto/ecdsa/ecdsa.c +0 -304
- data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +0 -193
- data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +0 -327
- data/vendor/ring/crypto/header_removed.h +0 -17
- data/vendor/ring/crypto/internal.h +0 -495
- data/vendor/ring/crypto/libring.Windows.vcxproj +0 -101
- data/vendor/ring/crypto/mem.c +0 -98
- data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +0 -1045
- data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +0 -517
- data/vendor/ring/crypto/modes/asm/ghash-x86.pl +0 -1393
- data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +0 -1741
- data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +0 -422
- data/vendor/ring/crypto/modes/ctr.c +0 -226
- data/vendor/ring/crypto/modes/gcm.c +0 -1206
- data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/modes/gcm_test.c +0 -348
- data/vendor/ring/crypto/modes/internal.h +0 -299
- data/vendor/ring/crypto/perlasm/arm-xlate.pl +0 -170
- data/vendor/ring/crypto/perlasm/readme +0 -100
- data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +0 -1164
- data/vendor/ring/crypto/perlasm/x86asm.pl +0 -292
- data/vendor/ring/crypto/perlasm/x86gas.pl +0 -263
- data/vendor/ring/crypto/perlasm/x86masm.pl +0 -200
- data/vendor/ring/crypto/perlasm/x86nasm.pl +0 -187
- data/vendor/ring/crypto/poly1305/poly1305.c +0 -331
- data/vendor/ring/crypto/poly1305/poly1305_arm.c +0 -301
- data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +0 -2015
- data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/poly1305/poly1305_test.cc +0 -80
- data/vendor/ring/crypto/poly1305/poly1305_test.txt +0 -52
- data/vendor/ring/crypto/poly1305/poly1305_vec.c +0 -892
- data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +0 -75
- data/vendor/ring/crypto/rand/internal.h +0 -32
- data/vendor/ring/crypto/rand/rand.c +0 -189
- data/vendor/ring/crypto/rand/urandom.c +0 -219
- data/vendor/ring/crypto/rand/windows.c +0 -56
- data/vendor/ring/crypto/refcount_c11.c +0 -66
- data/vendor/ring/crypto/refcount_lock.c +0 -53
- data/vendor/ring/crypto/refcount_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/refcount_test.c +0 -58
- data/vendor/ring/crypto/rsa/blinding.c +0 -462
- data/vendor/ring/crypto/rsa/internal.h +0 -108
- data/vendor/ring/crypto/rsa/padding.c +0 -300
- data/vendor/ring/crypto/rsa/rsa.c +0 -450
- data/vendor/ring/crypto/rsa/rsa_asn1.c +0 -261
- data/vendor/ring/crypto/rsa/rsa_impl.c +0 -944
- data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/rsa/rsa_test.cc +0 -437
- data/vendor/ring/crypto/sha/asm/sha-armv8.pl +0 -436
- data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +0 -2390
- data/vendor/ring/crypto/sha/asm/sha256-586.pl +0 -1275
- data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +0 -735
- data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +0 -14
- data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +0 -14
- data/vendor/ring/crypto/sha/asm/sha512-586.pl +0 -911
- data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +0 -666
- data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +0 -14
- data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +0 -14
- data/vendor/ring/crypto/sha/sha1.c +0 -271
- data/vendor/ring/crypto/sha/sha256.c +0 -204
- data/vendor/ring/crypto/sha/sha512.c +0 -355
- data/vendor/ring/crypto/test/file_test.cc +0 -326
- data/vendor/ring/crypto/test/file_test.h +0 -181
- data/vendor/ring/crypto/test/malloc.cc +0 -150
- data/vendor/ring/crypto/test/scoped_types.h +0 -95
- data/vendor/ring/crypto/test/test.Windows.vcxproj +0 -35
- data/vendor/ring/crypto/test/test_util.cc +0 -46
- data/vendor/ring/crypto/test/test_util.h +0 -41
- data/vendor/ring/crypto/thread_none.c +0 -55
- data/vendor/ring/crypto/thread_pthread.c +0 -165
- data/vendor/ring/crypto/thread_test.Windows.vcxproj +0 -25
- data/vendor/ring/crypto/thread_test.c +0 -200
- data/vendor/ring/crypto/thread_win.c +0 -282
- data/vendor/ring/examples/checkdigest.rs +0 -103
- data/vendor/ring/include/openssl/aes.h +0 -121
- data/vendor/ring/include/openssl/arm_arch.h +0 -129
- data/vendor/ring/include/openssl/base.h +0 -156
- data/vendor/ring/include/openssl/bn.h +0 -794
- data/vendor/ring/include/openssl/buffer.h +0 -18
- data/vendor/ring/include/openssl/bytestring.h +0 -235
- data/vendor/ring/include/openssl/chacha.h +0 -37
- data/vendor/ring/include/openssl/cmac.h +0 -76
- data/vendor/ring/include/openssl/cpu.h +0 -184
- data/vendor/ring/include/openssl/crypto.h +0 -43
- data/vendor/ring/include/openssl/curve25519.h +0 -88
- data/vendor/ring/include/openssl/ec.h +0 -225
- data/vendor/ring/include/openssl/ec_key.h +0 -129
- data/vendor/ring/include/openssl/ecdh.h +0 -110
- data/vendor/ring/include/openssl/ecdsa.h +0 -156
- data/vendor/ring/include/openssl/err.h +0 -201
- data/vendor/ring/include/openssl/mem.h +0 -101
- data/vendor/ring/include/openssl/obj_mac.h +0 -71
- data/vendor/ring/include/openssl/opensslfeatures.h +0 -68
- data/vendor/ring/include/openssl/opensslv.h +0 -18
- data/vendor/ring/include/openssl/ossl_typ.h +0 -18
- data/vendor/ring/include/openssl/poly1305.h +0 -51
- data/vendor/ring/include/openssl/rand.h +0 -70
- data/vendor/ring/include/openssl/rsa.h +0 -399
- data/vendor/ring/include/openssl/thread.h +0 -133
- data/vendor/ring/include/openssl/type_check.h +0 -71
- data/vendor/ring/mk/Common.props +0 -63
- data/vendor/ring/mk/Windows.props +0 -42
- data/vendor/ring/mk/WindowsTest.props +0 -18
- data/vendor/ring/mk/appveyor.bat +0 -62
- data/vendor/ring/mk/bottom_of_makefile.mk +0 -54
- data/vendor/ring/mk/ring.mk +0 -266
- data/vendor/ring/mk/top_of_makefile.mk +0 -214
- data/vendor/ring/mk/travis.sh +0 -40
- data/vendor/ring/mk/update-travis-yml.py +0 -229
- data/vendor/ring/ring.sln +0 -153
- data/vendor/ring/src/aead.rs +0 -682
- data/vendor/ring/src/agreement.rs +0 -248
- data/vendor/ring/src/c.rs +0 -129
- data/vendor/ring/src/constant_time.rs +0 -37
- data/vendor/ring/src/der.rs +0 -96
- data/vendor/ring/src/digest.rs +0 -690
- data/vendor/ring/src/digest_tests.txt +0 -57
- data/vendor/ring/src/ecc.rs +0 -28
- data/vendor/ring/src/ecc_build.rs +0 -279
- data/vendor/ring/src/ecc_curves.rs +0 -117
- data/vendor/ring/src/ed25519_tests.txt +0 -2579
- data/vendor/ring/src/exe_tests.rs +0 -46
- data/vendor/ring/src/ffi.rs +0 -29
- data/vendor/ring/src/file_test.rs +0 -187
- data/vendor/ring/src/hkdf.rs +0 -153
- data/vendor/ring/src/hkdf_tests.txt +0 -59
- data/vendor/ring/src/hmac.rs +0 -414
- data/vendor/ring/src/hmac_tests.txt +0 -97
- data/vendor/ring/src/input.rs +0 -312
- data/vendor/ring/src/lib.rs +0 -41
- data/vendor/ring/src/pbkdf2.rs +0 -265
- data/vendor/ring/src/pbkdf2_tests.txt +0 -113
- data/vendor/ring/src/polyfill.rs +0 -57
- data/vendor/ring/src/rand.rs +0 -28
- data/vendor/ring/src/signature.rs +0 -314
- data/vendor/ring/third-party/NIST/README.md +0 -9
- data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +0 -263
- data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +0 -267
- data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +0 -263
- data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +0 -267
- data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +0 -263
- data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +0 -267
- data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +0 -519
- data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +0 -523
- data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +0 -519
- data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +0 -309
- data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +0 -523
- data/vendor/ring/third-party/NIST/sha256sums.txt +0 -1
@@ -1,25 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="utf-8"?>
|
2
|
-
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
3
|
-
<PropertyGroup Label="Globals">
|
4
|
-
<ProjectGuid>{CD0F021B-E347-4CCA-B5B7-CD1F757E15D6}</ProjectGuid>
|
5
|
-
<TargetName>poly1305_test</TargetName>
|
6
|
-
</PropertyGroup>
|
7
|
-
<ImportGroup Label="PropertySheets">
|
8
|
-
<Import Project="..\..\mk\WindowsTest.props" />
|
9
|
-
</ImportGroup>
|
10
|
-
<PropertyGroup Label="Configuration">
|
11
|
-
<OutDir>$(OutRootDir)test\ring\crypto\poly1305\</OutDir>
|
12
|
-
</PropertyGroup>
|
13
|
-
<ItemGroup>
|
14
|
-
<ClCompile Include="poly1305_test.cc" />
|
15
|
-
</ItemGroup>
|
16
|
-
<ItemGroup>
|
17
|
-
<ProjectReference Include="..\libring.Windows.vcxproj">
|
18
|
-
<Project>{f4c0a1b6-5e09-41c8-8242-3e1f6762fb18}</Project>
|
19
|
-
</ProjectReference>
|
20
|
-
<ProjectReference Include="..\test\test.Windows.vcxproj">
|
21
|
-
<Project>{1dace503-6498-492d-b1ff-f9ee18624443}</Project>
|
22
|
-
</ProjectReference>
|
23
|
-
</ItemGroup>
|
24
|
-
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
25
|
-
</Project>
|
@@ -1,80 +0,0 @@
|
|
1
|
-
/* Copyright (c) 2015, Google Inc.
|
2
|
-
*
|
3
|
-
* Permission to use, copy, modify, and/or distribute this software for any
|
4
|
-
* purpose with or without fee is hereby granted, provided that the above
|
5
|
-
* copyright notice and this permission notice appear in all copies.
|
6
|
-
*
|
7
|
-
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
8
|
-
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
9
|
-
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
10
|
-
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
11
|
-
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
12
|
-
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
13
|
-
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
14
|
-
|
15
|
-
#include <stdio.h>
|
16
|
-
#include <string.h>
|
17
|
-
|
18
|
-
#include <vector>
|
19
|
-
|
20
|
-
#include <openssl/crypto.h>
|
21
|
-
#include <openssl/poly1305.h>
|
22
|
-
|
23
|
-
#include "../test/file_test.h"
|
24
|
-
|
25
|
-
|
26
|
-
// |CRYPTO_poly1305_finish| requires a 16-byte-aligned output.
|
27
|
-
#if defined(OPENSSL_WINDOWS)
|
28
|
-
// MSVC doesn't support C++11 |alignas|.
|
29
|
-
#define ALIGNED __declspec(align(16))
|
30
|
-
#else
|
31
|
-
#define ALIGNED alignas(16)
|
32
|
-
#endif
|
33
|
-
|
34
|
-
static bool TestPoly1305(FileTest *t, void *arg) {
|
35
|
-
std::vector<uint8_t> key, in, mac;
|
36
|
-
if (!t->GetBytes(&key, "Key") ||
|
37
|
-
!t->GetBytes(&in, "Input") ||
|
38
|
-
!t->GetBytes(&mac, "MAC")) {
|
39
|
-
return false;
|
40
|
-
}
|
41
|
-
if (key.size() != 32 || mac.size() != 16) {
|
42
|
-
t->PrintLine("Invalid test");
|
43
|
-
return false;
|
44
|
-
}
|
45
|
-
|
46
|
-
// Test single-shot operation.
|
47
|
-
poly1305_state state;
|
48
|
-
CRYPTO_poly1305_init(&state, key.data());
|
49
|
-
CRYPTO_poly1305_update(&state, in.data(), in.size());
|
50
|
-
ALIGNED uint8_t out[16];
|
51
|
-
CRYPTO_poly1305_finish(&state, out);
|
52
|
-
if (!t->ExpectBytesEqual(out, 16, mac.data(), mac.size())) {
|
53
|
-
t->PrintLine("Single-shot Poly1305 failed.");
|
54
|
-
return false;
|
55
|
-
}
|
56
|
-
|
57
|
-
// Test streaming byte-by-byte.
|
58
|
-
CRYPTO_poly1305_init(&state, key.data());
|
59
|
-
for (size_t i = 0; i < in.size(); i++) {
|
60
|
-
CRYPTO_poly1305_update(&state, &in[i], 1);
|
61
|
-
}
|
62
|
-
CRYPTO_poly1305_finish(&state, out);
|
63
|
-
if (!t->ExpectBytesEqual(out, 16, mac.data(), mac.size())) {
|
64
|
-
t->PrintLine("Streaming Poly1305 failed.");
|
65
|
-
return false;
|
66
|
-
}
|
67
|
-
|
68
|
-
return true;
|
69
|
-
}
|
70
|
-
|
71
|
-
int main(int argc, char **argv) {
|
72
|
-
CRYPTO_library_init();
|
73
|
-
|
74
|
-
if (argc != 2) {
|
75
|
-
fprintf(stderr, "%s <test file>\n", argv[0]);
|
76
|
-
return 1;
|
77
|
-
}
|
78
|
-
|
79
|
-
return FileTestMain(TestPoly1305, nullptr, argv[1]);
|
80
|
-
}
|
@@ -1,52 +0,0 @@
|
|
1
|
-
# RFC 7539, section 2.5.2.
|
2
|
-
|
3
|
-
Key = 85d6be7857556d337f4452fe42d506a80103808afb0db2fd4abff6af4149f51b
|
4
|
-
Input = "Cryptographic Forum Research Group"
|
5
|
-
MAC = a8061dc1305136c6c22b8baf0c0127a9
|
6
|
-
|
7
|
-
|
8
|
-
# RFC 7539, section A.3.
|
9
|
-
|
10
|
-
Key = 0000000000000000000000000000000000000000000000000000000000000000
|
11
|
-
Input = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
|
12
|
-
MAC = 00000000000000000000000000000000
|
13
|
-
|
14
|
-
Key = 0000000000000000000000000000000036e5f6b5c5e06070f0efca96227a863e
|
15
|
-
Input = 416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e69636174696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f
|
16
|
-
MAC = 36e5f6b5c5e06070f0efca96227a863e
|
17
|
-
|
18
|
-
Key = 36e5f6b5c5e06070f0efca96227a863e00000000000000000000000000000000
|
19
|
-
Input = 416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e69636174696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f
|
20
|
-
MAC = f3477e7cd95417af89a6b8794c310cf0
|
21
|
-
|
22
|
-
Key = 1c9240a5eb55d38af333888604f6b5f0473917c1402b80099dca5cbc207075c0
|
23
|
-
Input = 2754776173206272696c6c69672c20616e642074686520736c6974687920746f7665730a446964206779726520616e642067696d626c6520696e2074686520776162653a0a416c6c206d696d737920776572652074686520626f726f676f7665732c0a416e6420746865206d6f6d65207261746873206f757467726162652e
|
24
|
-
MAC = 4541669a7eaaee61e708dc7cbcc5eb62
|
25
|
-
|
26
|
-
Key = 0200000000000000000000000000000000000000000000000000000000000000
|
27
|
-
Input = ffffffffffffffffffffffffffffffff
|
28
|
-
MAC = 03000000000000000000000000000000
|
29
|
-
|
30
|
-
Key = 02000000000000000000000000000000ffffffffffffffffffffffffffffffff
|
31
|
-
Input = 02000000000000000000000000000000
|
32
|
-
MAC = 03000000000000000000000000000000
|
33
|
-
|
34
|
-
Key = 0100000000000000000000000000000000000000000000000000000000000000
|
35
|
-
Input = fffffffffffffffffffffffffffffffff0ffffffffffffffffffffffffffffff11000000000000000000000000000000
|
36
|
-
MAC = 05000000000000000000000000000000
|
37
|
-
|
38
|
-
Key = 0100000000000000000000000000000000000000000000000000000000000000
|
39
|
-
Input = fffffffffffffffffffffffffffffffffbfefefefefefefefefefefefefefefe01010101010101010101010101010101
|
40
|
-
MAC = 00000000000000000000000000000000
|
41
|
-
|
42
|
-
Key = 0200000000000000000000000000000000000000000000000000000000000000
|
43
|
-
Input = fdffffffffffffffffffffffffffffff
|
44
|
-
MAC = faffffffffffffffffffffffffffffff
|
45
|
-
|
46
|
-
Key = 0100000000000000040000000000000000000000000000000000000000000000
|
47
|
-
Input = e33594d7505e43b900000000000000003394d7505e4379cd01000000000000000000000000000000000000000000000001000000000000000000000000000000
|
48
|
-
MAC = 14000000000000005500000000000000
|
49
|
-
|
50
|
-
Key = 0100000000000000040000000000000000000000000000000000000000000000
|
51
|
-
Input = e33594d7505e43b900000000000000003394d7505e4379cd010000000000000000000000000000000000000000000000
|
52
|
-
MAC = 13000000000000000000000000000000
|
@@ -1,892 +0,0 @@
|
|
1
|
-
/* Copyright (c) 2014, Google Inc.
|
2
|
-
*
|
3
|
-
* Permission to use, copy, modify, and/or distribute this software for any
|
4
|
-
* purpose with or without fee is hereby granted, provided that the above
|
5
|
-
* copyright notice and this permission notice appear in all copies.
|
6
|
-
*
|
7
|
-
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
8
|
-
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
9
|
-
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
10
|
-
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
11
|
-
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
12
|
-
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
13
|
-
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
14
|
-
|
15
|
-
/* This implementation of poly1305 is by Andrew Moon
|
16
|
-
* (https://github.com/floodyberry/poly1305-donna) and released as public
|
17
|
-
* domain. It implements SIMD vectorization based on the algorithm described in
|
18
|
-
* http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte
|
19
|
-
* block size */
|
20
|
-
|
21
|
-
#include <openssl/poly1305.h>
|
22
|
-
|
23
|
-
|
24
|
-
#if !defined(OPENSSL_WINDOWS) && defined(OPENSSL_X86_64)
|
25
|
-
|
26
|
-
#include <emmintrin.h>
|
27
|
-
|
28
|
-
#define ALIGN(x) __attribute__((aligned(x)))
|
29
|
-
/* inline is not a keyword in C89. */
|
30
|
-
#define INLINE
|
31
|
-
#define U8TO64_LE(m) (*(uint64_t *)(m))
|
32
|
-
#define U8TO32_LE(m) (*(uint32_t *)(m))
|
33
|
-
#define U64TO8_LE(m, v) (*(uint64_t *)(m)) = v
|
34
|
-
|
35
|
-
typedef __m128i xmmi;
|
36
|
-
typedef unsigned __int128 uint128_t;
|
37
|
-
|
38
|
-
static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = {
|
39
|
-
(1 << 26) - 1, 0, (1 << 26) - 1, 0};
|
40
|
-
static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0};
|
41
|
-
static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = {(1 << 24), 0,
|
42
|
-
(1 << 24), 0};
|
43
|
-
|
44
|
-
static uint128_t INLINE add128(uint128_t a, uint128_t b) { return a + b; }
|
45
|
-
|
46
|
-
static uint128_t INLINE add128_64(uint128_t a, uint64_t b) { return a + b; }
|
47
|
-
|
48
|
-
static uint128_t INLINE mul64x64_128(uint64_t a, uint64_t b) {
|
49
|
-
return (uint128_t)a * b;
|
50
|
-
}
|
51
|
-
|
52
|
-
static uint64_t INLINE lo128(uint128_t a) { return (uint64_t)a; }
|
53
|
-
|
54
|
-
static uint64_t INLINE shr128(uint128_t v, const int shift) {
|
55
|
-
return (uint64_t)(v >> shift);
|
56
|
-
}
|
57
|
-
|
58
|
-
static uint64_t INLINE shr128_pair(uint64_t hi, uint64_t lo, const int shift) {
|
59
|
-
return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift);
|
60
|
-
}
|
61
|
-
|
62
|
-
typedef struct poly1305_power_t {
|
63
|
-
union {
|
64
|
-
xmmi v;
|
65
|
-
uint64_t u[2];
|
66
|
-
uint32_t d[4];
|
67
|
-
} R20, R21, R22, R23, R24, S21, S22, S23, S24;
|
68
|
-
} poly1305_power;
|
69
|
-
|
70
|
-
typedef struct poly1305_state_internal_t {
|
71
|
-
poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144
|
72
|
-
bytes of free storage */
|
73
|
-
union {
|
74
|
-
xmmi H[5]; /* 80 bytes */
|
75
|
-
uint64_t HH[10];
|
76
|
-
};
|
77
|
-
/* uint64_t r0,r1,r2; [24 bytes] */
|
78
|
-
/* uint64_t pad0,pad1; [16 bytes] */
|
79
|
-
uint64_t started; /* 8 bytes */
|
80
|
-
uint64_t leftover; /* 8 bytes */
|
81
|
-
uint8_t buffer[64]; /* 64 bytes */
|
82
|
-
} poly1305_state_internal; /* 448 bytes total + 63 bytes for
|
83
|
-
alignment = 511 bytes raw */
|
84
|
-
|
85
|
-
static poly1305_state_internal INLINE *poly1305_aligned_state(
|
86
|
-
poly1305_state *state) {
|
87
|
-
return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63);
|
88
|
-
}
|
89
|
-
|
90
|
-
/* copy 0-63 bytes */
|
91
|
-
static void INLINE
|
92
|
-
poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) {
|
93
|
-
size_t offset = src - dst;
|
94
|
-
if (bytes & 32) {
|
95
|
-
_mm_storeu_si128((xmmi *)(dst + 0),
|
96
|
-
_mm_loadu_si128((xmmi *)(dst + offset + 0)));
|
97
|
-
_mm_storeu_si128((xmmi *)(dst + 16),
|
98
|
-
_mm_loadu_si128((xmmi *)(dst + offset + 16)));
|
99
|
-
dst += 32;
|
100
|
-
}
|
101
|
-
if (bytes & 16) {
|
102
|
-
_mm_storeu_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset)));
|
103
|
-
dst += 16;
|
104
|
-
}
|
105
|
-
if (bytes & 8) {
|
106
|
-
*(uint64_t *)dst = *(uint64_t *)(dst + offset);
|
107
|
-
dst += 8;
|
108
|
-
}
|
109
|
-
if (bytes & 4) {
|
110
|
-
*(uint32_t *)dst = *(uint32_t *)(dst + offset);
|
111
|
-
dst += 4;
|
112
|
-
}
|
113
|
-
if (bytes & 2) {
|
114
|
-
*(uint16_t *)dst = *(uint16_t *)(dst + offset);
|
115
|
-
dst += 2;
|
116
|
-
}
|
117
|
-
if (bytes & 1) {
|
118
|
-
*(uint8_t *)dst = *(uint8_t *)(dst + offset);
|
119
|
-
}
|
120
|
-
}
|
121
|
-
|
122
|
-
/* zero 0-15 bytes */
|
123
|
-
static void INLINE poly1305_block_zero(uint8_t *dst, size_t bytes) {
|
124
|
-
if (bytes & 8) {
|
125
|
-
*(uint64_t *)dst = 0;
|
126
|
-
dst += 8;
|
127
|
-
}
|
128
|
-
if (bytes & 4) {
|
129
|
-
*(uint32_t *)dst = 0;
|
130
|
-
dst += 4;
|
131
|
-
}
|
132
|
-
if (bytes & 2) {
|
133
|
-
*(uint16_t *)dst = 0;
|
134
|
-
dst += 2;
|
135
|
-
}
|
136
|
-
if (bytes & 1) {
|
137
|
-
*(uint8_t *)dst = 0;
|
138
|
-
}
|
139
|
-
}
|
140
|
-
|
141
|
-
static size_t INLINE poly1305_min(size_t a, size_t b) {
|
142
|
-
return (a < b) ? a : b;
|
143
|
-
}
|
144
|
-
|
145
|
-
void CRYPTO_poly1305_init(poly1305_state *state, const uint8_t key[32]) {
|
146
|
-
poly1305_state_internal *st = poly1305_aligned_state(state);
|
147
|
-
poly1305_power *p;
|
148
|
-
uint64_t r0, r1, r2;
|
149
|
-
uint64_t t0, t1;
|
150
|
-
|
151
|
-
/* clamp key */
|
152
|
-
t0 = U8TO64_LE(key + 0);
|
153
|
-
t1 = U8TO64_LE(key + 8);
|
154
|
-
r0 = t0 & 0xffc0fffffff;
|
155
|
-
t0 >>= 44;
|
156
|
-
t0 |= t1 << 20;
|
157
|
-
r1 = t0 & 0xfffffc0ffff;
|
158
|
-
t1 >>= 24;
|
159
|
-
r2 = t1 & 0x00ffffffc0f;
|
160
|
-
|
161
|
-
/* store r in un-used space of st->P[1] */
|
162
|
-
p = &st->P[1];
|
163
|
-
p->R20.d[1] = (uint32_t)(r0);
|
164
|
-
p->R20.d[3] = (uint32_t)(r0 >> 32);
|
165
|
-
p->R21.d[1] = (uint32_t)(r1);
|
166
|
-
p->R21.d[3] = (uint32_t)(r1 >> 32);
|
167
|
-
p->R22.d[1] = (uint32_t)(r2);
|
168
|
-
p->R22.d[3] = (uint32_t)(r2 >> 32);
|
169
|
-
|
170
|
-
/* store pad */
|
171
|
-
p->R23.d[1] = U8TO32_LE(key + 16);
|
172
|
-
p->R23.d[3] = U8TO32_LE(key + 20);
|
173
|
-
p->R24.d[1] = U8TO32_LE(key + 24);
|
174
|
-
p->R24.d[3] = U8TO32_LE(key + 28);
|
175
|
-
|
176
|
-
/* H = 0 */
|
177
|
-
st->H[0] = _mm_setzero_si128();
|
178
|
-
st->H[1] = _mm_setzero_si128();
|
179
|
-
st->H[2] = _mm_setzero_si128();
|
180
|
-
st->H[3] = _mm_setzero_si128();
|
181
|
-
st->H[4] = _mm_setzero_si128();
|
182
|
-
|
183
|
-
st->started = 0;
|
184
|
-
st->leftover = 0;
|
185
|
-
}
|
186
|
-
|
187
|
-
static void poly1305_first_block(poly1305_state_internal *st,
|
188
|
-
const uint8_t *m) {
|
189
|
-
const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
|
190
|
-
const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
|
191
|
-
const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
|
192
|
-
xmmi T5, T6;
|
193
|
-
poly1305_power *p;
|
194
|
-
uint128_t d[3];
|
195
|
-
uint64_t r0, r1, r2;
|
196
|
-
uint64_t r20, r21, r22, s22;
|
197
|
-
uint64_t pad0, pad1;
|
198
|
-
uint64_t c;
|
199
|
-
uint64_t i;
|
200
|
-
|
201
|
-
/* pull out stored info */
|
202
|
-
p = &st->P[1];
|
203
|
-
|
204
|
-
r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
|
205
|
-
r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
|
206
|
-
r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
|
207
|
-
pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
|
208
|
-
pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
|
209
|
-
|
210
|
-
/* compute powers r^2,r^4 */
|
211
|
-
r20 = r0;
|
212
|
-
r21 = r1;
|
213
|
-
r22 = r2;
|
214
|
-
for (i = 0; i < 2; i++) {
|
215
|
-
s22 = r22 * (5 << 2);
|
216
|
-
|
217
|
-
d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22));
|
218
|
-
d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21));
|
219
|
-
d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20));
|
220
|
-
|
221
|
-
r20 = lo128(d[0]) & 0xfffffffffff;
|
222
|
-
c = shr128(d[0], 44);
|
223
|
-
d[1] = add128_64(d[1], c);
|
224
|
-
r21 = lo128(d[1]) & 0xfffffffffff;
|
225
|
-
c = shr128(d[1], 44);
|
226
|
-
d[2] = add128_64(d[2], c);
|
227
|
-
r22 = lo128(d[2]) & 0x3ffffffffff;
|
228
|
-
c = shr128(d[2], 42);
|
229
|
-
r20 += c * 5;
|
230
|
-
c = (r20 >> 44);
|
231
|
-
r20 = r20 & 0xfffffffffff;
|
232
|
-
r21 += c;
|
233
|
-
|
234
|
-
p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)(r20)&0x3ffffff),
|
235
|
-
_MM_SHUFFLE(1, 0, 1, 0));
|
236
|
-
p->R21.v = _mm_shuffle_epi32(
|
237
|
-
_mm_cvtsi32_si128((uint32_t)((r20 >> 26) | (r21 << 18)) & 0x3ffffff),
|
238
|
-
_MM_SHUFFLE(1, 0, 1, 0));
|
239
|
-
p->R22.v =
|
240
|
-
_mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 8)) & 0x3ffffff),
|
241
|
-
_MM_SHUFFLE(1, 0, 1, 0));
|
242
|
-
p->R23.v = _mm_shuffle_epi32(
|
243
|
-
_mm_cvtsi32_si128((uint32_t)((r21 >> 34) | (r22 << 10)) & 0x3ffffff),
|
244
|
-
_MM_SHUFFLE(1, 0, 1, 0));
|
245
|
-
p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >> 16))),
|
246
|
-
_MM_SHUFFLE(1, 0, 1, 0));
|
247
|
-
p->S21.v = _mm_mul_epu32(p->R21.v, FIVE);
|
248
|
-
p->S22.v = _mm_mul_epu32(p->R22.v, FIVE);
|
249
|
-
p->S23.v = _mm_mul_epu32(p->R23.v, FIVE);
|
250
|
-
p->S24.v = _mm_mul_epu32(p->R24.v, FIVE);
|
251
|
-
p--;
|
252
|
-
}
|
253
|
-
|
254
|
-
/* put saved info back */
|
255
|
-
p = &st->P[1];
|
256
|
-
p->R20.d[1] = (uint32_t)(r0);
|
257
|
-
p->R20.d[3] = (uint32_t)(r0 >> 32);
|
258
|
-
p->R21.d[1] = (uint32_t)(r1);
|
259
|
-
p->R21.d[3] = (uint32_t)(r1 >> 32);
|
260
|
-
p->R22.d[1] = (uint32_t)(r2);
|
261
|
-
p->R22.d[3] = (uint32_t)(r2 >> 32);
|
262
|
-
p->R23.d[1] = (uint32_t)(pad0);
|
263
|
-
p->R23.d[3] = (uint32_t)(pad0 >> 32);
|
264
|
-
p->R24.d[1] = (uint32_t)(pad1);
|
265
|
-
p->R24.d[3] = (uint32_t)(pad1 >> 32);
|
266
|
-
|
267
|
-
/* H = [Mx,My] */
|
268
|
-
T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
|
269
|
-
_mm_loadl_epi64((xmmi *)(m + 16)));
|
270
|
-
T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
|
271
|
-
_mm_loadl_epi64((xmmi *)(m + 24)));
|
272
|
-
st->H[0] = _mm_and_si128(MMASK, T5);
|
273
|
-
st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
274
|
-
T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
|
275
|
-
st->H[2] = _mm_and_si128(MMASK, T5);
|
276
|
-
st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
277
|
-
st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
|
278
|
-
}
|
279
|
-
|
280
|
-
static void poly1305_blocks(poly1305_state_internal *st, const uint8_t *m,
|
281
|
-
size_t bytes) {
|
282
|
-
const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
|
283
|
-
const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
|
284
|
-
const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
|
285
|
-
|
286
|
-
poly1305_power *p;
|
287
|
-
xmmi H0, H1, H2, H3, H4;
|
288
|
-
xmmi T0, T1, T2, T3, T4, T5, T6;
|
289
|
-
xmmi M0, M1, M2, M3, M4;
|
290
|
-
xmmi C1, C2;
|
291
|
-
|
292
|
-
H0 = st->H[0];
|
293
|
-
H1 = st->H[1];
|
294
|
-
H2 = st->H[2];
|
295
|
-
H3 = st->H[3];
|
296
|
-
H4 = st->H[4];
|
297
|
-
|
298
|
-
while (bytes >= 64) {
|
299
|
-
/* H *= [r^4,r^4] */
|
300
|
-
p = &st->P[0];
|
301
|
-
T0 = _mm_mul_epu32(H0, p->R20.v);
|
302
|
-
T1 = _mm_mul_epu32(H0, p->R21.v);
|
303
|
-
T2 = _mm_mul_epu32(H0, p->R22.v);
|
304
|
-
T3 = _mm_mul_epu32(H0, p->R23.v);
|
305
|
-
T4 = _mm_mul_epu32(H0, p->R24.v);
|
306
|
-
T5 = _mm_mul_epu32(H1, p->S24.v);
|
307
|
-
T6 = _mm_mul_epu32(H1, p->R20.v);
|
308
|
-
T0 = _mm_add_epi64(T0, T5);
|
309
|
-
T1 = _mm_add_epi64(T1, T6);
|
310
|
-
T5 = _mm_mul_epu32(H2, p->S23.v);
|
311
|
-
T6 = _mm_mul_epu32(H2, p->S24.v);
|
312
|
-
T0 = _mm_add_epi64(T0, T5);
|
313
|
-
T1 = _mm_add_epi64(T1, T6);
|
314
|
-
T5 = _mm_mul_epu32(H3, p->S22.v);
|
315
|
-
T6 = _mm_mul_epu32(H3, p->S23.v);
|
316
|
-
T0 = _mm_add_epi64(T0, T5);
|
317
|
-
T1 = _mm_add_epi64(T1, T6);
|
318
|
-
T5 = _mm_mul_epu32(H4, p->S21.v);
|
319
|
-
T6 = _mm_mul_epu32(H4, p->S22.v);
|
320
|
-
T0 = _mm_add_epi64(T0, T5);
|
321
|
-
T1 = _mm_add_epi64(T1, T6);
|
322
|
-
T5 = _mm_mul_epu32(H1, p->R21.v);
|
323
|
-
T6 = _mm_mul_epu32(H1, p->R22.v);
|
324
|
-
T2 = _mm_add_epi64(T2, T5);
|
325
|
-
T3 = _mm_add_epi64(T3, T6);
|
326
|
-
T5 = _mm_mul_epu32(H2, p->R20.v);
|
327
|
-
T6 = _mm_mul_epu32(H2, p->R21.v);
|
328
|
-
T2 = _mm_add_epi64(T2, T5);
|
329
|
-
T3 = _mm_add_epi64(T3, T6);
|
330
|
-
T5 = _mm_mul_epu32(H3, p->S24.v);
|
331
|
-
T6 = _mm_mul_epu32(H3, p->R20.v);
|
332
|
-
T2 = _mm_add_epi64(T2, T5);
|
333
|
-
T3 = _mm_add_epi64(T3, T6);
|
334
|
-
T5 = _mm_mul_epu32(H4, p->S23.v);
|
335
|
-
T6 = _mm_mul_epu32(H4, p->S24.v);
|
336
|
-
T2 = _mm_add_epi64(T2, T5);
|
337
|
-
T3 = _mm_add_epi64(T3, T6);
|
338
|
-
T5 = _mm_mul_epu32(H1, p->R23.v);
|
339
|
-
T4 = _mm_add_epi64(T4, T5);
|
340
|
-
T5 = _mm_mul_epu32(H2, p->R22.v);
|
341
|
-
T4 = _mm_add_epi64(T4, T5);
|
342
|
-
T5 = _mm_mul_epu32(H3, p->R21.v);
|
343
|
-
T4 = _mm_add_epi64(T4, T5);
|
344
|
-
T5 = _mm_mul_epu32(H4, p->R20.v);
|
345
|
-
T4 = _mm_add_epi64(T4, T5);
|
346
|
-
|
347
|
-
/* H += [Mx,My]*[r^2,r^2] */
|
348
|
-
T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
|
349
|
-
_mm_loadl_epi64((xmmi *)(m + 16)));
|
350
|
-
T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
|
351
|
-
_mm_loadl_epi64((xmmi *)(m + 24)));
|
352
|
-
M0 = _mm_and_si128(MMASK, T5);
|
353
|
-
M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
354
|
-
T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
|
355
|
-
M2 = _mm_and_si128(MMASK, T5);
|
356
|
-
M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
357
|
-
M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
|
358
|
-
|
359
|
-
p = &st->P[1];
|
360
|
-
T5 = _mm_mul_epu32(M0, p->R20.v);
|
361
|
-
T6 = _mm_mul_epu32(M0, p->R21.v);
|
362
|
-
T0 = _mm_add_epi64(T0, T5);
|
363
|
-
T1 = _mm_add_epi64(T1, T6);
|
364
|
-
T5 = _mm_mul_epu32(M1, p->S24.v);
|
365
|
-
T6 = _mm_mul_epu32(M1, p->R20.v);
|
366
|
-
T0 = _mm_add_epi64(T0, T5);
|
367
|
-
T1 = _mm_add_epi64(T1, T6);
|
368
|
-
T5 = _mm_mul_epu32(M2, p->S23.v);
|
369
|
-
T6 = _mm_mul_epu32(M2, p->S24.v);
|
370
|
-
T0 = _mm_add_epi64(T0, T5);
|
371
|
-
T1 = _mm_add_epi64(T1, T6);
|
372
|
-
T5 = _mm_mul_epu32(M3, p->S22.v);
|
373
|
-
T6 = _mm_mul_epu32(M3, p->S23.v);
|
374
|
-
T0 = _mm_add_epi64(T0, T5);
|
375
|
-
T1 = _mm_add_epi64(T1, T6);
|
376
|
-
T5 = _mm_mul_epu32(M4, p->S21.v);
|
377
|
-
T6 = _mm_mul_epu32(M4, p->S22.v);
|
378
|
-
T0 = _mm_add_epi64(T0, T5);
|
379
|
-
T1 = _mm_add_epi64(T1, T6);
|
380
|
-
T5 = _mm_mul_epu32(M0, p->R22.v);
|
381
|
-
T6 = _mm_mul_epu32(M0, p->R23.v);
|
382
|
-
T2 = _mm_add_epi64(T2, T5);
|
383
|
-
T3 = _mm_add_epi64(T3, T6);
|
384
|
-
T5 = _mm_mul_epu32(M1, p->R21.v);
|
385
|
-
T6 = _mm_mul_epu32(M1, p->R22.v);
|
386
|
-
T2 = _mm_add_epi64(T2, T5);
|
387
|
-
T3 = _mm_add_epi64(T3, T6);
|
388
|
-
T5 = _mm_mul_epu32(M2, p->R20.v);
|
389
|
-
T6 = _mm_mul_epu32(M2, p->R21.v);
|
390
|
-
T2 = _mm_add_epi64(T2, T5);
|
391
|
-
T3 = _mm_add_epi64(T3, T6);
|
392
|
-
T5 = _mm_mul_epu32(M3, p->S24.v);
|
393
|
-
T6 = _mm_mul_epu32(M3, p->R20.v);
|
394
|
-
T2 = _mm_add_epi64(T2, T5);
|
395
|
-
T3 = _mm_add_epi64(T3, T6);
|
396
|
-
T5 = _mm_mul_epu32(M4, p->S23.v);
|
397
|
-
T6 = _mm_mul_epu32(M4, p->S24.v);
|
398
|
-
T2 = _mm_add_epi64(T2, T5);
|
399
|
-
T3 = _mm_add_epi64(T3, T6);
|
400
|
-
T5 = _mm_mul_epu32(M0, p->R24.v);
|
401
|
-
T4 = _mm_add_epi64(T4, T5);
|
402
|
-
T5 = _mm_mul_epu32(M1, p->R23.v);
|
403
|
-
T4 = _mm_add_epi64(T4, T5);
|
404
|
-
T5 = _mm_mul_epu32(M2, p->R22.v);
|
405
|
-
T4 = _mm_add_epi64(T4, T5);
|
406
|
-
T5 = _mm_mul_epu32(M3, p->R21.v);
|
407
|
-
T4 = _mm_add_epi64(T4, T5);
|
408
|
-
T5 = _mm_mul_epu32(M4, p->R20.v);
|
409
|
-
T4 = _mm_add_epi64(T4, T5);
|
410
|
-
|
411
|
-
/* H += [Mx,My] */
|
412
|
-
T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)),
|
413
|
-
_mm_loadl_epi64((xmmi *)(m + 48)));
|
414
|
-
T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)),
|
415
|
-
_mm_loadl_epi64((xmmi *)(m + 56)));
|
416
|
-
M0 = _mm_and_si128(MMASK, T5);
|
417
|
-
M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
418
|
-
T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
|
419
|
-
M2 = _mm_and_si128(MMASK, T5);
|
420
|
-
M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
421
|
-
M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
|
422
|
-
|
423
|
-
T0 = _mm_add_epi64(T0, M0);
|
424
|
-
T1 = _mm_add_epi64(T1, M1);
|
425
|
-
T2 = _mm_add_epi64(T2, M2);
|
426
|
-
T3 = _mm_add_epi64(T3, M3);
|
427
|
-
T4 = _mm_add_epi64(T4, M4);
|
428
|
-
|
429
|
-
/* reduce */
|
430
|
-
C1 = _mm_srli_epi64(T0, 26);
|
431
|
-
C2 = _mm_srli_epi64(T3, 26);
|
432
|
-
T0 = _mm_and_si128(T0, MMASK);
|
433
|
-
T3 = _mm_and_si128(T3, MMASK);
|
434
|
-
T1 = _mm_add_epi64(T1, C1);
|
435
|
-
T4 = _mm_add_epi64(T4, C2);
|
436
|
-
C1 = _mm_srli_epi64(T1, 26);
|
437
|
-
C2 = _mm_srli_epi64(T4, 26);
|
438
|
-
T1 = _mm_and_si128(T1, MMASK);
|
439
|
-
T4 = _mm_and_si128(T4, MMASK);
|
440
|
-
T2 = _mm_add_epi64(T2, C1);
|
441
|
-
T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
|
442
|
-
C1 = _mm_srli_epi64(T2, 26);
|
443
|
-
C2 = _mm_srli_epi64(T0, 26);
|
444
|
-
T2 = _mm_and_si128(T2, MMASK);
|
445
|
-
T0 = _mm_and_si128(T0, MMASK);
|
446
|
-
T3 = _mm_add_epi64(T3, C1);
|
447
|
-
T1 = _mm_add_epi64(T1, C2);
|
448
|
-
C1 = _mm_srli_epi64(T3, 26);
|
449
|
-
T3 = _mm_and_si128(T3, MMASK);
|
450
|
-
T4 = _mm_add_epi64(T4, C1);
|
451
|
-
|
452
|
-
/* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */
|
453
|
-
H0 = T0;
|
454
|
-
H1 = T1;
|
455
|
-
H2 = T2;
|
456
|
-
H3 = T3;
|
457
|
-
H4 = T4;
|
458
|
-
|
459
|
-
m += 64;
|
460
|
-
bytes -= 64;
|
461
|
-
}
|
462
|
-
|
463
|
-
st->H[0] = H0;
|
464
|
-
st->H[1] = H1;
|
465
|
-
st->H[2] = H2;
|
466
|
-
st->H[3] = H3;
|
467
|
-
st->H[4] = H4;
|
468
|
-
}
|
469
|
-
|
470
|
-
static size_t poly1305_combine(poly1305_state_internal *st, const uint8_t *m,
|
471
|
-
size_t bytes) {
|
472
|
-
const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
|
473
|
-
const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
|
474
|
-
const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
|
475
|
-
|
476
|
-
poly1305_power *p;
|
477
|
-
xmmi H0, H1, H2, H3, H4;
|
478
|
-
xmmi M0, M1, M2, M3, M4;
|
479
|
-
xmmi T0, T1, T2, T3, T4, T5, T6;
|
480
|
-
xmmi C1, C2;
|
481
|
-
|
482
|
-
uint64_t r0, r1, r2;
|
483
|
-
uint64_t t0, t1, t2, t3, t4;
|
484
|
-
uint64_t c;
|
485
|
-
size_t consumed = 0;
|
486
|
-
|
487
|
-
H0 = st->H[0];
|
488
|
-
H1 = st->H[1];
|
489
|
-
H2 = st->H[2];
|
490
|
-
H3 = st->H[3];
|
491
|
-
H4 = st->H[4];
|
492
|
-
|
493
|
-
/* p = [r^2,r^2] */
|
494
|
-
p = &st->P[1];
|
495
|
-
|
496
|
-
if (bytes >= 32) {
|
497
|
-
/* H *= [r^2,r^2] */
|
498
|
-
T0 = _mm_mul_epu32(H0, p->R20.v);
|
499
|
-
T1 = _mm_mul_epu32(H0, p->R21.v);
|
500
|
-
T2 = _mm_mul_epu32(H0, p->R22.v);
|
501
|
-
T3 = _mm_mul_epu32(H0, p->R23.v);
|
502
|
-
T4 = _mm_mul_epu32(H0, p->R24.v);
|
503
|
-
T5 = _mm_mul_epu32(H1, p->S24.v);
|
504
|
-
T6 = _mm_mul_epu32(H1, p->R20.v);
|
505
|
-
T0 = _mm_add_epi64(T0, T5);
|
506
|
-
T1 = _mm_add_epi64(T1, T6);
|
507
|
-
T5 = _mm_mul_epu32(H2, p->S23.v);
|
508
|
-
T6 = _mm_mul_epu32(H2, p->S24.v);
|
509
|
-
T0 = _mm_add_epi64(T0, T5);
|
510
|
-
T1 = _mm_add_epi64(T1, T6);
|
511
|
-
T5 = _mm_mul_epu32(H3, p->S22.v);
|
512
|
-
T6 = _mm_mul_epu32(H3, p->S23.v);
|
513
|
-
T0 = _mm_add_epi64(T0, T5);
|
514
|
-
T1 = _mm_add_epi64(T1, T6);
|
515
|
-
T5 = _mm_mul_epu32(H4, p->S21.v);
|
516
|
-
T6 = _mm_mul_epu32(H4, p->S22.v);
|
517
|
-
T0 = _mm_add_epi64(T0, T5);
|
518
|
-
T1 = _mm_add_epi64(T1, T6);
|
519
|
-
T5 = _mm_mul_epu32(H1, p->R21.v);
|
520
|
-
T6 = _mm_mul_epu32(H1, p->R22.v);
|
521
|
-
T2 = _mm_add_epi64(T2, T5);
|
522
|
-
T3 = _mm_add_epi64(T3, T6);
|
523
|
-
T5 = _mm_mul_epu32(H2, p->R20.v);
|
524
|
-
T6 = _mm_mul_epu32(H2, p->R21.v);
|
525
|
-
T2 = _mm_add_epi64(T2, T5);
|
526
|
-
T3 = _mm_add_epi64(T3, T6);
|
527
|
-
T5 = _mm_mul_epu32(H3, p->S24.v);
|
528
|
-
T6 = _mm_mul_epu32(H3, p->R20.v);
|
529
|
-
T2 = _mm_add_epi64(T2, T5);
|
530
|
-
T3 = _mm_add_epi64(T3, T6);
|
531
|
-
T5 = _mm_mul_epu32(H4, p->S23.v);
|
532
|
-
T6 = _mm_mul_epu32(H4, p->S24.v);
|
533
|
-
T2 = _mm_add_epi64(T2, T5);
|
534
|
-
T3 = _mm_add_epi64(T3, T6);
|
535
|
-
T5 = _mm_mul_epu32(H1, p->R23.v);
|
536
|
-
T4 = _mm_add_epi64(T4, T5);
|
537
|
-
T5 = _mm_mul_epu32(H2, p->R22.v);
|
538
|
-
T4 = _mm_add_epi64(T4, T5);
|
539
|
-
T5 = _mm_mul_epu32(H3, p->R21.v);
|
540
|
-
T4 = _mm_add_epi64(T4, T5);
|
541
|
-
T5 = _mm_mul_epu32(H4, p->R20.v);
|
542
|
-
T4 = _mm_add_epi64(T4, T5);
|
543
|
-
|
544
|
-
/* H += [Mx,My] */
|
545
|
-
T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
|
546
|
-
_mm_loadl_epi64((xmmi *)(m + 16)));
|
547
|
-
T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
|
548
|
-
_mm_loadl_epi64((xmmi *)(m + 24)));
|
549
|
-
M0 = _mm_and_si128(MMASK, T5);
|
550
|
-
M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
551
|
-
T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
|
552
|
-
M2 = _mm_and_si128(MMASK, T5);
|
553
|
-
M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
554
|
-
M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
|
555
|
-
|
556
|
-
T0 = _mm_add_epi64(T0, M0);
|
557
|
-
T1 = _mm_add_epi64(T1, M1);
|
558
|
-
T2 = _mm_add_epi64(T2, M2);
|
559
|
-
T3 = _mm_add_epi64(T3, M3);
|
560
|
-
T4 = _mm_add_epi64(T4, M4);
|
561
|
-
|
562
|
-
/* reduce */
|
563
|
-
C1 = _mm_srli_epi64(T0, 26);
|
564
|
-
C2 = _mm_srli_epi64(T3, 26);
|
565
|
-
T0 = _mm_and_si128(T0, MMASK);
|
566
|
-
T3 = _mm_and_si128(T3, MMASK);
|
567
|
-
T1 = _mm_add_epi64(T1, C1);
|
568
|
-
T4 = _mm_add_epi64(T4, C2);
|
569
|
-
C1 = _mm_srli_epi64(T1, 26);
|
570
|
-
C2 = _mm_srli_epi64(T4, 26);
|
571
|
-
T1 = _mm_and_si128(T1, MMASK);
|
572
|
-
T4 = _mm_and_si128(T4, MMASK);
|
573
|
-
T2 = _mm_add_epi64(T2, C1);
|
574
|
-
T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
|
575
|
-
C1 = _mm_srli_epi64(T2, 26);
|
576
|
-
C2 = _mm_srli_epi64(T0, 26);
|
577
|
-
T2 = _mm_and_si128(T2, MMASK);
|
578
|
-
T0 = _mm_and_si128(T0, MMASK);
|
579
|
-
T3 = _mm_add_epi64(T3, C1);
|
580
|
-
T1 = _mm_add_epi64(T1, C2);
|
581
|
-
C1 = _mm_srli_epi64(T3, 26);
|
582
|
-
T3 = _mm_and_si128(T3, MMASK);
|
583
|
-
T4 = _mm_add_epi64(T4, C1);
|
584
|
-
|
585
|
-
/* H = (H*[r^2,r^2] + [Mx,My]) */
|
586
|
-
H0 = T0;
|
587
|
-
H1 = T1;
|
588
|
-
H2 = T2;
|
589
|
-
H3 = T3;
|
590
|
-
H4 = T4;
|
591
|
-
|
592
|
-
consumed = 32;
|
593
|
-
}
|
594
|
-
|
595
|
-
/* finalize, H *= [r^2,r] */
|
596
|
-
r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
|
597
|
-
r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
|
598
|
-
r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
|
599
|
-
|
600
|
-
p->R20.d[2] = (uint32_t)(r0)&0x3ffffff;
|
601
|
-
p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
|
602
|
-
p->R22.d[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff;
|
603
|
-
p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
|
604
|
-
p->R24.d[2] = (uint32_t)((r2 >> 16));
|
605
|
-
p->S21.d[2] = p->R21.d[2] * 5;
|
606
|
-
p->S22.d[2] = p->R22.d[2] * 5;
|
607
|
-
p->S23.d[2] = p->R23.d[2] * 5;
|
608
|
-
p->S24.d[2] = p->R24.d[2] * 5;
|
609
|
-
|
610
|
-
/* H *= [r^2,r] */
|
611
|
-
T0 = _mm_mul_epu32(H0, p->R20.v);
|
612
|
-
T1 = _mm_mul_epu32(H0, p->R21.v);
|
613
|
-
T2 = _mm_mul_epu32(H0, p->R22.v);
|
614
|
-
T3 = _mm_mul_epu32(H0, p->R23.v);
|
615
|
-
T4 = _mm_mul_epu32(H0, p->R24.v);
|
616
|
-
T5 = _mm_mul_epu32(H1, p->S24.v);
|
617
|
-
T6 = _mm_mul_epu32(H1, p->R20.v);
|
618
|
-
T0 = _mm_add_epi64(T0, T5);
|
619
|
-
T1 = _mm_add_epi64(T1, T6);
|
620
|
-
T5 = _mm_mul_epu32(H2, p->S23.v);
|
621
|
-
T6 = _mm_mul_epu32(H2, p->S24.v);
|
622
|
-
T0 = _mm_add_epi64(T0, T5);
|
623
|
-
T1 = _mm_add_epi64(T1, T6);
|
624
|
-
T5 = _mm_mul_epu32(H3, p->S22.v);
|
625
|
-
T6 = _mm_mul_epu32(H3, p->S23.v);
|
626
|
-
T0 = _mm_add_epi64(T0, T5);
|
627
|
-
T1 = _mm_add_epi64(T1, T6);
|
628
|
-
T5 = _mm_mul_epu32(H4, p->S21.v);
|
629
|
-
T6 = _mm_mul_epu32(H4, p->S22.v);
|
630
|
-
T0 = _mm_add_epi64(T0, T5);
|
631
|
-
T1 = _mm_add_epi64(T1, T6);
|
632
|
-
T5 = _mm_mul_epu32(H1, p->R21.v);
|
633
|
-
T6 = _mm_mul_epu32(H1, p->R22.v);
|
634
|
-
T2 = _mm_add_epi64(T2, T5);
|
635
|
-
T3 = _mm_add_epi64(T3, T6);
|
636
|
-
T5 = _mm_mul_epu32(H2, p->R20.v);
|
637
|
-
T6 = _mm_mul_epu32(H2, p->R21.v);
|
638
|
-
T2 = _mm_add_epi64(T2, T5);
|
639
|
-
T3 = _mm_add_epi64(T3, T6);
|
640
|
-
T5 = _mm_mul_epu32(H3, p->S24.v);
|
641
|
-
T6 = _mm_mul_epu32(H3, p->R20.v);
|
642
|
-
T2 = _mm_add_epi64(T2, T5);
|
643
|
-
T3 = _mm_add_epi64(T3, T6);
|
644
|
-
T5 = _mm_mul_epu32(H4, p->S23.v);
|
645
|
-
T6 = _mm_mul_epu32(H4, p->S24.v);
|
646
|
-
T2 = _mm_add_epi64(T2, T5);
|
647
|
-
T3 = _mm_add_epi64(T3, T6);
|
648
|
-
T5 = _mm_mul_epu32(H1, p->R23.v);
|
649
|
-
T4 = _mm_add_epi64(T4, T5);
|
650
|
-
T5 = _mm_mul_epu32(H2, p->R22.v);
|
651
|
-
T4 = _mm_add_epi64(T4, T5);
|
652
|
-
T5 = _mm_mul_epu32(H3, p->R21.v);
|
653
|
-
T4 = _mm_add_epi64(T4, T5);
|
654
|
-
T5 = _mm_mul_epu32(H4, p->R20.v);
|
655
|
-
T4 = _mm_add_epi64(T4, T5);
|
656
|
-
|
657
|
-
C1 = _mm_srli_epi64(T0, 26);
|
658
|
-
C2 = _mm_srli_epi64(T3, 26);
|
659
|
-
T0 = _mm_and_si128(T0, MMASK);
|
660
|
-
T3 = _mm_and_si128(T3, MMASK);
|
661
|
-
T1 = _mm_add_epi64(T1, C1);
|
662
|
-
T4 = _mm_add_epi64(T4, C2);
|
663
|
-
C1 = _mm_srli_epi64(T1, 26);
|
664
|
-
C2 = _mm_srli_epi64(T4, 26);
|
665
|
-
T1 = _mm_and_si128(T1, MMASK);
|
666
|
-
T4 = _mm_and_si128(T4, MMASK);
|
667
|
-
T2 = _mm_add_epi64(T2, C1);
|
668
|
-
T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
|
669
|
-
C1 = _mm_srli_epi64(T2, 26);
|
670
|
-
C2 = _mm_srli_epi64(T0, 26);
|
671
|
-
T2 = _mm_and_si128(T2, MMASK);
|
672
|
-
T0 = _mm_and_si128(T0, MMASK);
|
673
|
-
T3 = _mm_add_epi64(T3, C1);
|
674
|
-
T1 = _mm_add_epi64(T1, C2);
|
675
|
-
C1 = _mm_srli_epi64(T3, 26);
|
676
|
-
T3 = _mm_and_si128(T3, MMASK);
|
677
|
-
T4 = _mm_add_epi64(T4, C1);
|
678
|
-
|
679
|
-
/* H = H[0]+H[1] */
|
680
|
-
H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
|
681
|
-
H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
|
682
|
-
H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
|
683
|
-
H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
|
684
|
-
H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
|
685
|
-
|
686
|
-
t0 = _mm_cvtsi128_si32(H0);
|
687
|
-
c = (t0 >> 26);
|
688
|
-
t0 &= 0x3ffffff;
|
689
|
-
t1 = _mm_cvtsi128_si32(H1) + c;
|
690
|
-
c = (t1 >> 26);
|
691
|
-
t1 &= 0x3ffffff;
|
692
|
-
t2 = _mm_cvtsi128_si32(H2) + c;
|
693
|
-
c = (t2 >> 26);
|
694
|
-
t2 &= 0x3ffffff;
|
695
|
-
t3 = _mm_cvtsi128_si32(H3) + c;
|
696
|
-
c = (t3 >> 26);
|
697
|
-
t3 &= 0x3ffffff;
|
698
|
-
t4 = _mm_cvtsi128_si32(H4) + c;
|
699
|
-
c = (t4 >> 26);
|
700
|
-
t4 &= 0x3ffffff;
|
701
|
-
t0 = t0 + (c * 5);
|
702
|
-
c = (t0 >> 26);
|
703
|
-
t0 &= 0x3ffffff;
|
704
|
-
t1 = t1 + c;
|
705
|
-
|
706
|
-
st->HH[0] = ((t0) | (t1 << 26)) & 0xfffffffffffull;
|
707
|
-
st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull;
|
708
|
-
st->HH[2] = ((t3 >> 10) | (t4 << 16)) & 0x3ffffffffffull;
|
709
|
-
|
710
|
-
return consumed;
|
711
|
-
}
|
712
|
-
|
713
|
-
void CRYPTO_poly1305_update(poly1305_state *state, const uint8_t *m,
|
714
|
-
size_t bytes) {
|
715
|
-
poly1305_state_internal *st = poly1305_aligned_state(state);
|
716
|
-
size_t want;
|
717
|
-
|
718
|
-
/* need at least 32 initial bytes to start the accelerated branch */
|
719
|
-
if (!st->started) {
|
720
|
-
if ((st->leftover == 0) && (bytes > 32)) {
|
721
|
-
poly1305_first_block(st, m);
|
722
|
-
m += 32;
|
723
|
-
bytes -= 32;
|
724
|
-
} else {
|
725
|
-
want = poly1305_min(32 - st->leftover, bytes);
|
726
|
-
poly1305_block_copy(st->buffer + st->leftover, m, want);
|
727
|
-
bytes -= want;
|
728
|
-
m += want;
|
729
|
-
st->leftover += want;
|
730
|
-
if ((st->leftover < 32) || (bytes == 0)) {
|
731
|
-
return;
|
732
|
-
}
|
733
|
-
poly1305_first_block(st, st->buffer);
|
734
|
-
st->leftover = 0;
|
735
|
-
}
|
736
|
-
st->started = 1;
|
737
|
-
}
|
738
|
-
|
739
|
-
/* handle leftover */
|
740
|
-
if (st->leftover) {
|
741
|
-
want = poly1305_min(64 - st->leftover, bytes);
|
742
|
-
poly1305_block_copy(st->buffer + st->leftover, m, want);
|
743
|
-
bytes -= want;
|
744
|
-
m += want;
|
745
|
-
st->leftover += want;
|
746
|
-
if (st->leftover < 64) {
|
747
|
-
return;
|
748
|
-
}
|
749
|
-
poly1305_blocks(st, st->buffer, 64);
|
750
|
-
st->leftover = 0;
|
751
|
-
}
|
752
|
-
|
753
|
-
/* process 64 byte blocks */
|
754
|
-
if (bytes >= 64) {
|
755
|
-
want = (bytes & ~63);
|
756
|
-
poly1305_blocks(st, m, want);
|
757
|
-
m += want;
|
758
|
-
bytes -= want;
|
759
|
-
}
|
760
|
-
|
761
|
-
if (bytes) {
|
762
|
-
poly1305_block_copy(st->buffer + st->leftover, m, bytes);
|
763
|
-
st->leftover += bytes;
|
764
|
-
}
|
765
|
-
}
|
766
|
-
|
767
|
-
void CRYPTO_poly1305_finish(poly1305_state *state, uint8_t mac[16]) {
|
768
|
-
poly1305_state_internal *st = poly1305_aligned_state(state);
|
769
|
-
size_t leftover = st->leftover;
|
770
|
-
uint8_t *m = st->buffer;
|
771
|
-
uint128_t d[3];
|
772
|
-
uint64_t h0, h1, h2;
|
773
|
-
uint64_t t0, t1;
|
774
|
-
uint64_t g0, g1, g2, c, nc;
|
775
|
-
uint64_t r0, r1, r2, s1, s2;
|
776
|
-
poly1305_power *p;
|
777
|
-
|
778
|
-
if (st->started) {
|
779
|
-
size_t consumed = poly1305_combine(st, m, leftover);
|
780
|
-
leftover -= consumed;
|
781
|
-
m += consumed;
|
782
|
-
}
|
783
|
-
|
784
|
-
/* st->HH will either be 0 or have the combined result */
|
785
|
-
h0 = st->HH[0];
|
786
|
-
h1 = st->HH[1];
|
787
|
-
h2 = st->HH[2];
|
788
|
-
|
789
|
-
p = &st->P[1];
|
790
|
-
r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
|
791
|
-
r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
|
792
|
-
r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
|
793
|
-
s1 = r1 * (5 << 2);
|
794
|
-
s2 = r2 * (5 << 2);
|
795
|
-
|
796
|
-
if (leftover < 16) {
|
797
|
-
goto poly1305_donna_atmost15bytes;
|
798
|
-
}
|
799
|
-
|
800
|
-
poly1305_donna_atleast16bytes:
|
801
|
-
t0 = U8TO64_LE(m + 0);
|
802
|
-
t1 = U8TO64_LE(m + 8);
|
803
|
-
h0 += t0 & 0xfffffffffff;
|
804
|
-
t0 = shr128_pair(t1, t0, 44);
|
805
|
-
h1 += t0 & 0xfffffffffff;
|
806
|
-
h2 += (t1 >> 24) | ((uint64_t)1 << 40);
|
807
|
-
|
808
|
-
poly1305_donna_mul:
|
809
|
-
d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)),
|
810
|
-
mul64x64_128(h2, s1));
|
811
|
-
d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)),
|
812
|
-
mul64x64_128(h2, s2));
|
813
|
-
d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)),
|
814
|
-
mul64x64_128(h2, r0));
|
815
|
-
h0 = lo128(d[0]) & 0xfffffffffff;
|
816
|
-
c = shr128(d[0], 44);
|
817
|
-
d[1] = add128_64(d[1], c);
|
818
|
-
h1 = lo128(d[1]) & 0xfffffffffff;
|
819
|
-
c = shr128(d[1], 44);
|
820
|
-
d[2] = add128_64(d[2], c);
|
821
|
-
h2 = lo128(d[2]) & 0x3ffffffffff;
|
822
|
-
c = shr128(d[2], 42);
|
823
|
-
h0 += c * 5;
|
824
|
-
|
825
|
-
m += 16;
|
826
|
-
leftover -= 16;
|
827
|
-
if (leftover >= 16) {
|
828
|
-
goto poly1305_donna_atleast16bytes;
|
829
|
-
}
|
830
|
-
|
831
|
-
/* final bytes */
|
832
|
-
poly1305_donna_atmost15bytes:
|
833
|
-
if (!leftover) {
|
834
|
-
goto poly1305_donna_finish;
|
835
|
-
}
|
836
|
-
|
837
|
-
m[leftover++] = 1;
|
838
|
-
poly1305_block_zero(m + leftover, 16 - leftover);
|
839
|
-
leftover = 16;
|
840
|
-
|
841
|
-
t0 = U8TO64_LE(m + 0);
|
842
|
-
t1 = U8TO64_LE(m + 8);
|
843
|
-
h0 += t0 & 0xfffffffffff;
|
844
|
-
t0 = shr128_pair(t1, t0, 44);
|
845
|
-
h1 += t0 & 0xfffffffffff;
|
846
|
-
h2 += (t1 >> 24);
|
847
|
-
|
848
|
-
goto poly1305_donna_mul;
|
849
|
-
|
850
|
-
poly1305_donna_finish:
|
851
|
-
c = (h0 >> 44);
|
852
|
-
h0 &= 0xfffffffffff;
|
853
|
-
h1 += c;
|
854
|
-
c = (h1 >> 44);
|
855
|
-
h1 &= 0xfffffffffff;
|
856
|
-
h2 += c;
|
857
|
-
c = (h2 >> 42);
|
858
|
-
h2 &= 0x3ffffffffff;
|
859
|
-
h0 += c * 5;
|
860
|
-
|
861
|
-
g0 = h0 + 5;
|
862
|
-
c = (g0 >> 44);
|
863
|
-
g0 &= 0xfffffffffff;
|
864
|
-
g1 = h1 + c;
|
865
|
-
c = (g1 >> 44);
|
866
|
-
g1 &= 0xfffffffffff;
|
867
|
-
g2 = h2 + c - ((uint64_t)1 << 42);
|
868
|
-
|
869
|
-
c = (g2 >> 63) - 1;
|
870
|
-
nc = ~c;
|
871
|
-
h0 = (h0 & nc) | (g0 & c);
|
872
|
-
h1 = (h1 & nc) | (g1 & c);
|
873
|
-
h2 = (h2 & nc) | (g2 & c);
|
874
|
-
|
875
|
-
/* pad */
|
876
|
-
t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
|
877
|
-
t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
|
878
|
-
h0 += (t0 & 0xfffffffffff);
|
879
|
-
c = (h0 >> 44);
|
880
|
-
h0 &= 0xfffffffffff;
|
881
|
-
t0 = shr128_pair(t1, t0, 44);
|
882
|
-
h1 += (t0 & 0xfffffffffff) + c;
|
883
|
-
c = (h1 >> 44);
|
884
|
-
h1 &= 0xfffffffffff;
|
885
|
-
t1 = (t1 >> 24);
|
886
|
-
h2 += (t1)+c;
|
887
|
-
|
888
|
-
U64TO8_LE(mac + 0, ((h0) | (h1 << 44)));
|
889
|
-
U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24)));
|
890
|
-
}
|
891
|
-
|
892
|
-
#endif /* !OPENSSL_WINDOWS && OPENSSL_X86_64 */
|