ring-native 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/Gemfile +3 -0
- data/README.md +22 -0
- data/Rakefile +1 -0
- data/ext/ring/extconf.rb +29 -0
- data/lib/ring/native.rb +8 -0
- data/lib/ring/native/version.rb +5 -0
- data/ring-native.gemspec +25 -0
- data/vendor/ring/BUILDING.md +40 -0
- data/vendor/ring/Cargo.toml +43 -0
- data/vendor/ring/LICENSE +185 -0
- data/vendor/ring/Makefile +35 -0
- data/vendor/ring/PORTING.md +163 -0
- data/vendor/ring/README.md +113 -0
- data/vendor/ring/STYLE.md +197 -0
- data/vendor/ring/appveyor.yml +27 -0
- data/vendor/ring/build.rs +108 -0
- data/vendor/ring/crypto/aes/aes.c +1142 -0
- data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/aes/aes_test.cc +93 -0
- data/vendor/ring/crypto/aes/asm/aes-586.pl +2368 -0
- data/vendor/ring/crypto/aes/asm/aes-armv4.pl +1249 -0
- data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +2246 -0
- data/vendor/ring/crypto/aes/asm/aesni-x86.pl +1318 -0
- data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +2084 -0
- data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +675 -0
- data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +1364 -0
- data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +1565 -0
- data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +841 -0
- data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +1116 -0
- data/vendor/ring/crypto/aes/internal.h +87 -0
- data/vendor/ring/crypto/aes/mode_wrappers.c +61 -0
- data/vendor/ring/crypto/bn/add.c +394 -0
- data/vendor/ring/crypto/bn/asm/armv4-mont.pl +694 -0
- data/vendor/ring/crypto/bn/asm/armv8-mont.pl +1503 -0
- data/vendor/ring/crypto/bn/asm/bn-586.pl +774 -0
- data/vendor/ring/crypto/bn/asm/co-586.pl +287 -0
- data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +1882 -0
- data/vendor/ring/crypto/bn/asm/x86-mont.pl +592 -0
- data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +599 -0
- data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +1393 -0
- data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +3507 -0
- data/vendor/ring/crypto/bn/bn.c +352 -0
- data/vendor/ring/crypto/bn/bn_asn1.c +74 -0
- data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/bn/bn_test.cc +1696 -0
- data/vendor/ring/crypto/bn/cmp.c +200 -0
- data/vendor/ring/crypto/bn/convert.c +433 -0
- data/vendor/ring/crypto/bn/ctx.c +311 -0
- data/vendor/ring/crypto/bn/div.c +594 -0
- data/vendor/ring/crypto/bn/exponentiation.c +1335 -0
- data/vendor/ring/crypto/bn/gcd.c +711 -0
- data/vendor/ring/crypto/bn/generic.c +1019 -0
- data/vendor/ring/crypto/bn/internal.h +316 -0
- data/vendor/ring/crypto/bn/montgomery.c +516 -0
- data/vendor/ring/crypto/bn/mul.c +888 -0
- data/vendor/ring/crypto/bn/prime.c +829 -0
- data/vendor/ring/crypto/bn/random.c +334 -0
- data/vendor/ring/crypto/bn/rsaz_exp.c +262 -0
- data/vendor/ring/crypto/bn/rsaz_exp.h +53 -0
- data/vendor/ring/crypto/bn/shift.c +276 -0
- data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/bytestring/bytestring_test.cc +421 -0
- data/vendor/ring/crypto/bytestring/cbb.c +399 -0
- data/vendor/ring/crypto/bytestring/cbs.c +227 -0
- data/vendor/ring/crypto/bytestring/internal.h +46 -0
- data/vendor/ring/crypto/chacha/chacha_generic.c +140 -0
- data/vendor/ring/crypto/chacha/chacha_vec.c +323 -0
- data/vendor/ring/crypto/chacha/chacha_vec_arm.S +1447 -0
- data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +153 -0
- data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/cipher/e_aes.c +390 -0
- data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +208 -0
- data/vendor/ring/crypto/cipher/internal.h +173 -0
- data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +543 -0
- data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +9 -0
- data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +475 -0
- data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +23 -0
- data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +422 -0
- data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +484 -0
- data/vendor/ring/crypto/cipher/test/cipher_test.txt +100 -0
- data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/constant_time_test.c +304 -0
- data/vendor/ring/crypto/cpu-arm-asm.S +32 -0
- data/vendor/ring/crypto/cpu-arm.c +199 -0
- data/vendor/ring/crypto/cpu-intel.c +261 -0
- data/vendor/ring/crypto/crypto.c +151 -0
- data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +2118 -0
- data/vendor/ring/crypto/curve25519/curve25519.c +4888 -0
- data/vendor/ring/crypto/curve25519/x25519_test.cc +128 -0
- data/vendor/ring/crypto/digest/md32_common.h +181 -0
- data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +2725 -0
- data/vendor/ring/crypto/ec/ec.c +193 -0
- data/vendor/ring/crypto/ec/ec_curves.c +61 -0
- data/vendor/ring/crypto/ec/ec_key.c +228 -0
- data/vendor/ring/crypto/ec/ec_montgomery.c +114 -0
- data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/ec/internal.h +243 -0
- data/vendor/ring/crypto/ec/oct.c +253 -0
- data/vendor/ring/crypto/ec/p256-64.c +1794 -0
- data/vendor/ring/crypto/ec/p256-x86_64-table.h +9548 -0
- data/vendor/ring/crypto/ec/p256-x86_64.c +509 -0
- data/vendor/ring/crypto/ec/simple.c +1007 -0
- data/vendor/ring/crypto/ec/util-64.c +183 -0
- data/vendor/ring/crypto/ec/wnaf.c +508 -0
- data/vendor/ring/crypto/ecdh/ecdh.c +155 -0
- data/vendor/ring/crypto/ecdsa/ecdsa.c +304 -0
- data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +193 -0
- data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +327 -0
- data/vendor/ring/crypto/header_removed.h +17 -0
- data/vendor/ring/crypto/internal.h +495 -0
- data/vendor/ring/crypto/libring.Windows.vcxproj +101 -0
- data/vendor/ring/crypto/mem.c +98 -0
- data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +1045 -0
- data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +517 -0
- data/vendor/ring/crypto/modes/asm/ghash-x86.pl +1393 -0
- data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +1741 -0
- data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +422 -0
- data/vendor/ring/crypto/modes/ctr.c +226 -0
- data/vendor/ring/crypto/modes/gcm.c +1206 -0
- data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/modes/gcm_test.c +348 -0
- data/vendor/ring/crypto/modes/internal.h +299 -0
- data/vendor/ring/crypto/perlasm/arm-xlate.pl +170 -0
- data/vendor/ring/crypto/perlasm/readme +100 -0
- data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +1164 -0
- data/vendor/ring/crypto/perlasm/x86asm.pl +292 -0
- data/vendor/ring/crypto/perlasm/x86gas.pl +263 -0
- data/vendor/ring/crypto/perlasm/x86masm.pl +200 -0
- data/vendor/ring/crypto/perlasm/x86nasm.pl +187 -0
- data/vendor/ring/crypto/poly1305/poly1305.c +331 -0
- data/vendor/ring/crypto/poly1305/poly1305_arm.c +301 -0
- data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +2015 -0
- data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/poly1305/poly1305_test.cc +80 -0
- data/vendor/ring/crypto/poly1305/poly1305_test.txt +52 -0
- data/vendor/ring/crypto/poly1305/poly1305_vec.c +892 -0
- data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +75 -0
- data/vendor/ring/crypto/rand/internal.h +32 -0
- data/vendor/ring/crypto/rand/rand.c +189 -0
- data/vendor/ring/crypto/rand/urandom.c +219 -0
- data/vendor/ring/crypto/rand/windows.c +56 -0
- data/vendor/ring/crypto/refcount_c11.c +66 -0
- data/vendor/ring/crypto/refcount_lock.c +53 -0
- data/vendor/ring/crypto/refcount_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/refcount_test.c +58 -0
- data/vendor/ring/crypto/rsa/blinding.c +462 -0
- data/vendor/ring/crypto/rsa/internal.h +108 -0
- data/vendor/ring/crypto/rsa/padding.c +300 -0
- data/vendor/ring/crypto/rsa/rsa.c +450 -0
- data/vendor/ring/crypto/rsa/rsa_asn1.c +261 -0
- data/vendor/ring/crypto/rsa/rsa_impl.c +944 -0
- data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/rsa/rsa_test.cc +437 -0
- data/vendor/ring/crypto/sha/asm/sha-armv8.pl +436 -0
- data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +2390 -0
- data/vendor/ring/crypto/sha/asm/sha256-586.pl +1275 -0
- data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +735 -0
- data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +14 -0
- data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +14 -0
- data/vendor/ring/crypto/sha/asm/sha512-586.pl +911 -0
- data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +666 -0
- data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +14 -0
- data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +14 -0
- data/vendor/ring/crypto/sha/sha1.c +271 -0
- data/vendor/ring/crypto/sha/sha256.c +204 -0
- data/vendor/ring/crypto/sha/sha512.c +355 -0
- data/vendor/ring/crypto/test/file_test.cc +326 -0
- data/vendor/ring/crypto/test/file_test.h +181 -0
- data/vendor/ring/crypto/test/malloc.cc +150 -0
- data/vendor/ring/crypto/test/scoped_types.h +95 -0
- data/vendor/ring/crypto/test/test.Windows.vcxproj +35 -0
- data/vendor/ring/crypto/test/test_util.cc +46 -0
- data/vendor/ring/crypto/test/test_util.h +41 -0
- data/vendor/ring/crypto/thread_none.c +55 -0
- data/vendor/ring/crypto/thread_pthread.c +165 -0
- data/vendor/ring/crypto/thread_test.Windows.vcxproj +25 -0
- data/vendor/ring/crypto/thread_test.c +200 -0
- data/vendor/ring/crypto/thread_win.c +282 -0
- data/vendor/ring/examples/checkdigest.rs +103 -0
- data/vendor/ring/include/openssl/aes.h +121 -0
- data/vendor/ring/include/openssl/arm_arch.h +129 -0
- data/vendor/ring/include/openssl/base.h +156 -0
- data/vendor/ring/include/openssl/bn.h +794 -0
- data/vendor/ring/include/openssl/buffer.h +18 -0
- data/vendor/ring/include/openssl/bytestring.h +235 -0
- data/vendor/ring/include/openssl/chacha.h +37 -0
- data/vendor/ring/include/openssl/cmac.h +76 -0
- data/vendor/ring/include/openssl/cpu.h +184 -0
- data/vendor/ring/include/openssl/crypto.h +43 -0
- data/vendor/ring/include/openssl/curve25519.h +88 -0
- data/vendor/ring/include/openssl/ec.h +225 -0
- data/vendor/ring/include/openssl/ec_key.h +129 -0
- data/vendor/ring/include/openssl/ecdh.h +110 -0
- data/vendor/ring/include/openssl/ecdsa.h +156 -0
- data/vendor/ring/include/openssl/err.h +201 -0
- data/vendor/ring/include/openssl/mem.h +101 -0
- data/vendor/ring/include/openssl/obj_mac.h +71 -0
- data/vendor/ring/include/openssl/opensslfeatures.h +68 -0
- data/vendor/ring/include/openssl/opensslv.h +18 -0
- data/vendor/ring/include/openssl/ossl_typ.h +18 -0
- data/vendor/ring/include/openssl/poly1305.h +51 -0
- data/vendor/ring/include/openssl/rand.h +70 -0
- data/vendor/ring/include/openssl/rsa.h +399 -0
- data/vendor/ring/include/openssl/thread.h +133 -0
- data/vendor/ring/include/openssl/type_check.h +71 -0
- data/vendor/ring/mk/Common.props +63 -0
- data/vendor/ring/mk/Windows.props +42 -0
- data/vendor/ring/mk/WindowsTest.props +18 -0
- data/vendor/ring/mk/appveyor.bat +62 -0
- data/vendor/ring/mk/bottom_of_makefile.mk +54 -0
- data/vendor/ring/mk/ring.mk +266 -0
- data/vendor/ring/mk/top_of_makefile.mk +214 -0
- data/vendor/ring/mk/travis.sh +40 -0
- data/vendor/ring/mk/update-travis-yml.py +229 -0
- data/vendor/ring/ring.sln +153 -0
- data/vendor/ring/src/aead.rs +682 -0
- data/vendor/ring/src/agreement.rs +248 -0
- data/vendor/ring/src/c.rs +129 -0
- data/vendor/ring/src/constant_time.rs +37 -0
- data/vendor/ring/src/der.rs +96 -0
- data/vendor/ring/src/digest.rs +690 -0
- data/vendor/ring/src/digest_tests.txt +57 -0
- data/vendor/ring/src/ecc.rs +28 -0
- data/vendor/ring/src/ecc_build.rs +279 -0
- data/vendor/ring/src/ecc_curves.rs +117 -0
- data/vendor/ring/src/ed25519_tests.txt +2579 -0
- data/vendor/ring/src/exe_tests.rs +46 -0
- data/vendor/ring/src/ffi.rs +29 -0
- data/vendor/ring/src/file_test.rs +187 -0
- data/vendor/ring/src/hkdf.rs +153 -0
- data/vendor/ring/src/hkdf_tests.txt +59 -0
- data/vendor/ring/src/hmac.rs +414 -0
- data/vendor/ring/src/hmac_tests.txt +97 -0
- data/vendor/ring/src/input.rs +312 -0
- data/vendor/ring/src/lib.rs +41 -0
- data/vendor/ring/src/pbkdf2.rs +265 -0
- data/vendor/ring/src/pbkdf2_tests.txt +113 -0
- data/vendor/ring/src/polyfill.rs +57 -0
- data/vendor/ring/src/rand.rs +28 -0
- data/vendor/ring/src/signature.rs +314 -0
- data/vendor/ring/third-party/NIST/README.md +9 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +263 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +309 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +267 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +263 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +309 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +267 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +263 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +309 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +267 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +519 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +309 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +523 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +519 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +309 -0
- data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +523 -0
- data/vendor/ring/third-party/NIST/sha256sums.txt +1 -0
- metadata +333 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
3
|
+
<PropertyGroup Label="Globals">
|
4
|
+
<ProjectGuid>{CD0F021B-E347-4CCA-B5B7-CD1F757E15D6}</ProjectGuid>
|
5
|
+
<TargetName>poly1305_test</TargetName>
|
6
|
+
</PropertyGroup>
|
7
|
+
<ImportGroup Label="PropertySheets">
|
8
|
+
<Import Project="..\..\mk\WindowsTest.props" />
|
9
|
+
</ImportGroup>
|
10
|
+
<PropertyGroup Label="Configuration">
|
11
|
+
<OutDir>$(OutRootDir)test\ring\crypto\poly1305\</OutDir>
|
12
|
+
</PropertyGroup>
|
13
|
+
<ItemGroup>
|
14
|
+
<ClCompile Include="poly1305_test.cc" />
|
15
|
+
</ItemGroup>
|
16
|
+
<ItemGroup>
|
17
|
+
<ProjectReference Include="..\libring.Windows.vcxproj">
|
18
|
+
<Project>{f4c0a1b6-5e09-41c8-8242-3e1f6762fb18}</Project>
|
19
|
+
</ProjectReference>
|
20
|
+
<ProjectReference Include="..\test\test.Windows.vcxproj">
|
21
|
+
<Project>{1dace503-6498-492d-b1ff-f9ee18624443}</Project>
|
22
|
+
</ProjectReference>
|
23
|
+
</ItemGroup>
|
24
|
+
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
25
|
+
</Project>
|
@@ -0,0 +1,80 @@
|
|
1
|
+
/* Copyright (c) 2015, Google Inc.
|
2
|
+
*
|
3
|
+
* Permission to use, copy, modify, and/or distribute this software for any
|
4
|
+
* purpose with or without fee is hereby granted, provided that the above
|
5
|
+
* copyright notice and this permission notice appear in all copies.
|
6
|
+
*
|
7
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
8
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
9
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
10
|
+
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
11
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
12
|
+
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
13
|
+
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
14
|
+
|
15
|
+
#include <stdio.h>
|
16
|
+
#include <string.h>
|
17
|
+
|
18
|
+
#include <vector>
|
19
|
+
|
20
|
+
#include <openssl/crypto.h>
|
21
|
+
#include <openssl/poly1305.h>
|
22
|
+
|
23
|
+
#include "../test/file_test.h"
|
24
|
+
|
25
|
+
|
26
|
+
// |CRYPTO_poly1305_finish| requires a 16-byte-aligned output.
|
27
|
+
#if defined(OPENSSL_WINDOWS)
|
28
|
+
// MSVC doesn't support C++11 |alignas|.
|
29
|
+
#define ALIGNED __declspec(align(16))
|
30
|
+
#else
|
31
|
+
#define ALIGNED alignas(16)
|
32
|
+
#endif
|
33
|
+
|
34
|
+
static bool TestPoly1305(FileTest *t, void *arg) {
|
35
|
+
std::vector<uint8_t> key, in, mac;
|
36
|
+
if (!t->GetBytes(&key, "Key") ||
|
37
|
+
!t->GetBytes(&in, "Input") ||
|
38
|
+
!t->GetBytes(&mac, "MAC")) {
|
39
|
+
return false;
|
40
|
+
}
|
41
|
+
if (key.size() != 32 || mac.size() != 16) {
|
42
|
+
t->PrintLine("Invalid test");
|
43
|
+
return false;
|
44
|
+
}
|
45
|
+
|
46
|
+
// Test single-shot operation.
|
47
|
+
poly1305_state state;
|
48
|
+
CRYPTO_poly1305_init(&state, key.data());
|
49
|
+
CRYPTO_poly1305_update(&state, in.data(), in.size());
|
50
|
+
ALIGNED uint8_t out[16];
|
51
|
+
CRYPTO_poly1305_finish(&state, out);
|
52
|
+
if (!t->ExpectBytesEqual(out, 16, mac.data(), mac.size())) {
|
53
|
+
t->PrintLine("Single-shot Poly1305 failed.");
|
54
|
+
return false;
|
55
|
+
}
|
56
|
+
|
57
|
+
// Test streaming byte-by-byte.
|
58
|
+
CRYPTO_poly1305_init(&state, key.data());
|
59
|
+
for (size_t i = 0; i < in.size(); i++) {
|
60
|
+
CRYPTO_poly1305_update(&state, &in[i], 1);
|
61
|
+
}
|
62
|
+
CRYPTO_poly1305_finish(&state, out);
|
63
|
+
if (!t->ExpectBytesEqual(out, 16, mac.data(), mac.size())) {
|
64
|
+
t->PrintLine("Streaming Poly1305 failed.");
|
65
|
+
return false;
|
66
|
+
}
|
67
|
+
|
68
|
+
return true;
|
69
|
+
}
|
70
|
+
|
71
|
+
int main(int argc, char **argv) {
|
72
|
+
CRYPTO_library_init();
|
73
|
+
|
74
|
+
if (argc != 2) {
|
75
|
+
fprintf(stderr, "%s <test file>\n", argv[0]);
|
76
|
+
return 1;
|
77
|
+
}
|
78
|
+
|
79
|
+
return FileTestMain(TestPoly1305, nullptr, argv[1]);
|
80
|
+
}
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# RFC 7539, section 2.5.2.
|
2
|
+
|
3
|
+
Key = 85d6be7857556d337f4452fe42d506a80103808afb0db2fd4abff6af4149f51b
|
4
|
+
Input = "Cryptographic Forum Research Group"
|
5
|
+
MAC = a8061dc1305136c6c22b8baf0c0127a9
|
6
|
+
|
7
|
+
|
8
|
+
# RFC 7539, section A.3.
|
9
|
+
|
10
|
+
Key = 0000000000000000000000000000000000000000000000000000000000000000
|
11
|
+
Input = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
|
12
|
+
MAC = 00000000000000000000000000000000
|
13
|
+
|
14
|
+
Key = 0000000000000000000000000000000036e5f6b5c5e06070f0efca96227a863e
|
15
|
+
Input = 416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e69636174696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f
|
16
|
+
MAC = 36e5f6b5c5e06070f0efca96227a863e
|
17
|
+
|
18
|
+
Key = 36e5f6b5c5e06070f0efca96227a863e00000000000000000000000000000000
|
19
|
+
Input = 416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e69636174696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f
|
20
|
+
MAC = f3477e7cd95417af89a6b8794c310cf0
|
21
|
+
|
22
|
+
Key = 1c9240a5eb55d38af333888604f6b5f0473917c1402b80099dca5cbc207075c0
|
23
|
+
Input = 2754776173206272696c6c69672c20616e642074686520736c6974687920746f7665730a446964206779726520616e642067696d626c6520696e2074686520776162653a0a416c6c206d696d737920776572652074686520626f726f676f7665732c0a416e6420746865206d6f6d65207261746873206f757467726162652e
|
24
|
+
MAC = 4541669a7eaaee61e708dc7cbcc5eb62
|
25
|
+
|
26
|
+
Key = 0200000000000000000000000000000000000000000000000000000000000000
|
27
|
+
Input = ffffffffffffffffffffffffffffffff
|
28
|
+
MAC = 03000000000000000000000000000000
|
29
|
+
|
30
|
+
Key = 02000000000000000000000000000000ffffffffffffffffffffffffffffffff
|
31
|
+
Input = 02000000000000000000000000000000
|
32
|
+
MAC = 03000000000000000000000000000000
|
33
|
+
|
34
|
+
Key = 0100000000000000000000000000000000000000000000000000000000000000
|
35
|
+
Input = fffffffffffffffffffffffffffffffff0ffffffffffffffffffffffffffffff11000000000000000000000000000000
|
36
|
+
MAC = 05000000000000000000000000000000
|
37
|
+
|
38
|
+
Key = 0100000000000000000000000000000000000000000000000000000000000000
|
39
|
+
Input = fffffffffffffffffffffffffffffffffbfefefefefefefefefefefefefefefe01010101010101010101010101010101
|
40
|
+
MAC = 00000000000000000000000000000000
|
41
|
+
|
42
|
+
Key = 0200000000000000000000000000000000000000000000000000000000000000
|
43
|
+
Input = fdffffffffffffffffffffffffffffff
|
44
|
+
MAC = faffffffffffffffffffffffffffffff
|
45
|
+
|
46
|
+
Key = 0100000000000000040000000000000000000000000000000000000000000000
|
47
|
+
Input = e33594d7505e43b900000000000000003394d7505e4379cd01000000000000000000000000000000000000000000000001000000000000000000000000000000
|
48
|
+
MAC = 14000000000000005500000000000000
|
49
|
+
|
50
|
+
Key = 0100000000000000040000000000000000000000000000000000000000000000
|
51
|
+
Input = e33594d7505e43b900000000000000003394d7505e4379cd010000000000000000000000000000000000000000000000
|
52
|
+
MAC = 13000000000000000000000000000000
|
@@ -0,0 +1,892 @@
|
|
1
|
+
/* Copyright (c) 2014, Google Inc.
|
2
|
+
*
|
3
|
+
* Permission to use, copy, modify, and/or distribute this software for any
|
4
|
+
* purpose with or without fee is hereby granted, provided that the above
|
5
|
+
* copyright notice and this permission notice appear in all copies.
|
6
|
+
*
|
7
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
8
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
9
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
10
|
+
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
11
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
12
|
+
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
13
|
+
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
14
|
+
|
15
|
+
/* This implementation of poly1305 is by Andrew Moon
|
16
|
+
* (https://github.com/floodyberry/poly1305-donna) and released as public
|
17
|
+
* domain. It implements SIMD vectorization based on the algorithm described in
|
18
|
+
* http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte
|
19
|
+
* block size */
|
20
|
+
|
21
|
+
#include <openssl/poly1305.h>
|
22
|
+
|
23
|
+
|
24
|
+
#if !defined(OPENSSL_WINDOWS) && defined(OPENSSL_X86_64)
|
25
|
+
|
26
|
+
#include <emmintrin.h>
|
27
|
+
|
28
|
+
#define ALIGN(x) __attribute__((aligned(x)))
|
29
|
+
/* inline is not a keyword in C89. */
|
30
|
+
#define INLINE
|
31
|
+
#define U8TO64_LE(m) (*(uint64_t *)(m))
|
32
|
+
#define U8TO32_LE(m) (*(uint32_t *)(m))
|
33
|
+
#define U64TO8_LE(m, v) (*(uint64_t *)(m)) = v
|
34
|
+
|
35
|
+
typedef __m128i xmmi;
|
36
|
+
typedef unsigned __int128 uint128_t;
|
37
|
+
|
38
|
+
static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = {
|
39
|
+
(1 << 26) - 1, 0, (1 << 26) - 1, 0};
|
40
|
+
static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0};
|
41
|
+
static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = {(1 << 24), 0,
|
42
|
+
(1 << 24), 0};
|
43
|
+
|
44
|
+
static uint128_t INLINE add128(uint128_t a, uint128_t b) { return a + b; }
|
45
|
+
|
46
|
+
static uint128_t INLINE add128_64(uint128_t a, uint64_t b) { return a + b; }
|
47
|
+
|
48
|
+
static uint128_t INLINE mul64x64_128(uint64_t a, uint64_t b) {
|
49
|
+
return (uint128_t)a * b;
|
50
|
+
}
|
51
|
+
|
52
|
+
static uint64_t INLINE lo128(uint128_t a) { return (uint64_t)a; }
|
53
|
+
|
54
|
+
static uint64_t INLINE shr128(uint128_t v, const int shift) {
|
55
|
+
return (uint64_t)(v >> shift);
|
56
|
+
}
|
57
|
+
|
58
|
+
static uint64_t INLINE shr128_pair(uint64_t hi, uint64_t lo, const int shift) {
|
59
|
+
return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift);
|
60
|
+
}
|
61
|
+
|
62
|
+
typedef struct poly1305_power_t {
|
63
|
+
union {
|
64
|
+
xmmi v;
|
65
|
+
uint64_t u[2];
|
66
|
+
uint32_t d[4];
|
67
|
+
} R20, R21, R22, R23, R24, S21, S22, S23, S24;
|
68
|
+
} poly1305_power;
|
69
|
+
|
70
|
+
typedef struct poly1305_state_internal_t {
|
71
|
+
poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144
|
72
|
+
bytes of free storage */
|
73
|
+
union {
|
74
|
+
xmmi H[5]; /* 80 bytes */
|
75
|
+
uint64_t HH[10];
|
76
|
+
};
|
77
|
+
/* uint64_t r0,r1,r2; [24 bytes] */
|
78
|
+
/* uint64_t pad0,pad1; [16 bytes] */
|
79
|
+
uint64_t started; /* 8 bytes */
|
80
|
+
uint64_t leftover; /* 8 bytes */
|
81
|
+
uint8_t buffer[64]; /* 64 bytes */
|
82
|
+
} poly1305_state_internal; /* 448 bytes total + 63 bytes for
|
83
|
+
alignment = 511 bytes raw */
|
84
|
+
|
85
|
+
static poly1305_state_internal INLINE *poly1305_aligned_state(
|
86
|
+
poly1305_state *state) {
|
87
|
+
return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63);
|
88
|
+
}
|
89
|
+
|
90
|
+
/* copy 0-63 bytes */
|
91
|
+
static void INLINE
|
92
|
+
poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) {
|
93
|
+
size_t offset = src - dst;
|
94
|
+
if (bytes & 32) {
|
95
|
+
_mm_storeu_si128((xmmi *)(dst + 0),
|
96
|
+
_mm_loadu_si128((xmmi *)(dst + offset + 0)));
|
97
|
+
_mm_storeu_si128((xmmi *)(dst + 16),
|
98
|
+
_mm_loadu_si128((xmmi *)(dst + offset + 16)));
|
99
|
+
dst += 32;
|
100
|
+
}
|
101
|
+
if (bytes & 16) {
|
102
|
+
_mm_storeu_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset)));
|
103
|
+
dst += 16;
|
104
|
+
}
|
105
|
+
if (bytes & 8) {
|
106
|
+
*(uint64_t *)dst = *(uint64_t *)(dst + offset);
|
107
|
+
dst += 8;
|
108
|
+
}
|
109
|
+
if (bytes & 4) {
|
110
|
+
*(uint32_t *)dst = *(uint32_t *)(dst + offset);
|
111
|
+
dst += 4;
|
112
|
+
}
|
113
|
+
if (bytes & 2) {
|
114
|
+
*(uint16_t *)dst = *(uint16_t *)(dst + offset);
|
115
|
+
dst += 2;
|
116
|
+
}
|
117
|
+
if (bytes & 1) {
|
118
|
+
*(uint8_t *)dst = *(uint8_t *)(dst + offset);
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
/* zero 0-15 bytes */
|
123
|
+
static void INLINE poly1305_block_zero(uint8_t *dst, size_t bytes) {
|
124
|
+
if (bytes & 8) {
|
125
|
+
*(uint64_t *)dst = 0;
|
126
|
+
dst += 8;
|
127
|
+
}
|
128
|
+
if (bytes & 4) {
|
129
|
+
*(uint32_t *)dst = 0;
|
130
|
+
dst += 4;
|
131
|
+
}
|
132
|
+
if (bytes & 2) {
|
133
|
+
*(uint16_t *)dst = 0;
|
134
|
+
dst += 2;
|
135
|
+
}
|
136
|
+
if (bytes & 1) {
|
137
|
+
*(uint8_t *)dst = 0;
|
138
|
+
}
|
139
|
+
}
|
140
|
+
|
141
|
+
static size_t INLINE poly1305_min(size_t a, size_t b) {
|
142
|
+
return (a < b) ? a : b;
|
143
|
+
}
|
144
|
+
|
145
|
+
void CRYPTO_poly1305_init(poly1305_state *state, const uint8_t key[32]) {
|
146
|
+
poly1305_state_internal *st = poly1305_aligned_state(state);
|
147
|
+
poly1305_power *p;
|
148
|
+
uint64_t r0, r1, r2;
|
149
|
+
uint64_t t0, t1;
|
150
|
+
|
151
|
+
/* clamp key */
|
152
|
+
t0 = U8TO64_LE(key + 0);
|
153
|
+
t1 = U8TO64_LE(key + 8);
|
154
|
+
r0 = t0 & 0xffc0fffffff;
|
155
|
+
t0 >>= 44;
|
156
|
+
t0 |= t1 << 20;
|
157
|
+
r1 = t0 & 0xfffffc0ffff;
|
158
|
+
t1 >>= 24;
|
159
|
+
r2 = t1 & 0x00ffffffc0f;
|
160
|
+
|
161
|
+
/* store r in un-used space of st->P[1] */
|
162
|
+
p = &st->P[1];
|
163
|
+
p->R20.d[1] = (uint32_t)(r0);
|
164
|
+
p->R20.d[3] = (uint32_t)(r0 >> 32);
|
165
|
+
p->R21.d[1] = (uint32_t)(r1);
|
166
|
+
p->R21.d[3] = (uint32_t)(r1 >> 32);
|
167
|
+
p->R22.d[1] = (uint32_t)(r2);
|
168
|
+
p->R22.d[3] = (uint32_t)(r2 >> 32);
|
169
|
+
|
170
|
+
/* store pad */
|
171
|
+
p->R23.d[1] = U8TO32_LE(key + 16);
|
172
|
+
p->R23.d[3] = U8TO32_LE(key + 20);
|
173
|
+
p->R24.d[1] = U8TO32_LE(key + 24);
|
174
|
+
p->R24.d[3] = U8TO32_LE(key + 28);
|
175
|
+
|
176
|
+
/* H = 0 */
|
177
|
+
st->H[0] = _mm_setzero_si128();
|
178
|
+
st->H[1] = _mm_setzero_si128();
|
179
|
+
st->H[2] = _mm_setzero_si128();
|
180
|
+
st->H[3] = _mm_setzero_si128();
|
181
|
+
st->H[4] = _mm_setzero_si128();
|
182
|
+
|
183
|
+
st->started = 0;
|
184
|
+
st->leftover = 0;
|
185
|
+
}
|
186
|
+
|
187
|
+
static void poly1305_first_block(poly1305_state_internal *st,
|
188
|
+
const uint8_t *m) {
|
189
|
+
const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
|
190
|
+
const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
|
191
|
+
const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
|
192
|
+
xmmi T5, T6;
|
193
|
+
poly1305_power *p;
|
194
|
+
uint128_t d[3];
|
195
|
+
uint64_t r0, r1, r2;
|
196
|
+
uint64_t r20, r21, r22, s22;
|
197
|
+
uint64_t pad0, pad1;
|
198
|
+
uint64_t c;
|
199
|
+
uint64_t i;
|
200
|
+
|
201
|
+
/* pull out stored info */
|
202
|
+
p = &st->P[1];
|
203
|
+
|
204
|
+
r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
|
205
|
+
r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
|
206
|
+
r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
|
207
|
+
pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
|
208
|
+
pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
|
209
|
+
|
210
|
+
/* compute powers r^2,r^4 */
|
211
|
+
r20 = r0;
|
212
|
+
r21 = r1;
|
213
|
+
r22 = r2;
|
214
|
+
for (i = 0; i < 2; i++) {
|
215
|
+
s22 = r22 * (5 << 2);
|
216
|
+
|
217
|
+
d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22));
|
218
|
+
d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21));
|
219
|
+
d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20));
|
220
|
+
|
221
|
+
r20 = lo128(d[0]) & 0xfffffffffff;
|
222
|
+
c = shr128(d[0], 44);
|
223
|
+
d[1] = add128_64(d[1], c);
|
224
|
+
r21 = lo128(d[1]) & 0xfffffffffff;
|
225
|
+
c = shr128(d[1], 44);
|
226
|
+
d[2] = add128_64(d[2], c);
|
227
|
+
r22 = lo128(d[2]) & 0x3ffffffffff;
|
228
|
+
c = shr128(d[2], 42);
|
229
|
+
r20 += c * 5;
|
230
|
+
c = (r20 >> 44);
|
231
|
+
r20 = r20 & 0xfffffffffff;
|
232
|
+
r21 += c;
|
233
|
+
|
234
|
+
p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)(r20)&0x3ffffff),
|
235
|
+
_MM_SHUFFLE(1, 0, 1, 0));
|
236
|
+
p->R21.v = _mm_shuffle_epi32(
|
237
|
+
_mm_cvtsi32_si128((uint32_t)((r20 >> 26) | (r21 << 18)) & 0x3ffffff),
|
238
|
+
_MM_SHUFFLE(1, 0, 1, 0));
|
239
|
+
p->R22.v =
|
240
|
+
_mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 8)) & 0x3ffffff),
|
241
|
+
_MM_SHUFFLE(1, 0, 1, 0));
|
242
|
+
p->R23.v = _mm_shuffle_epi32(
|
243
|
+
_mm_cvtsi32_si128((uint32_t)((r21 >> 34) | (r22 << 10)) & 0x3ffffff),
|
244
|
+
_MM_SHUFFLE(1, 0, 1, 0));
|
245
|
+
p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >> 16))),
|
246
|
+
_MM_SHUFFLE(1, 0, 1, 0));
|
247
|
+
p->S21.v = _mm_mul_epu32(p->R21.v, FIVE);
|
248
|
+
p->S22.v = _mm_mul_epu32(p->R22.v, FIVE);
|
249
|
+
p->S23.v = _mm_mul_epu32(p->R23.v, FIVE);
|
250
|
+
p->S24.v = _mm_mul_epu32(p->R24.v, FIVE);
|
251
|
+
p--;
|
252
|
+
}
|
253
|
+
|
254
|
+
/* put saved info back */
|
255
|
+
p = &st->P[1];
|
256
|
+
p->R20.d[1] = (uint32_t)(r0);
|
257
|
+
p->R20.d[3] = (uint32_t)(r0 >> 32);
|
258
|
+
p->R21.d[1] = (uint32_t)(r1);
|
259
|
+
p->R21.d[3] = (uint32_t)(r1 >> 32);
|
260
|
+
p->R22.d[1] = (uint32_t)(r2);
|
261
|
+
p->R22.d[3] = (uint32_t)(r2 >> 32);
|
262
|
+
p->R23.d[1] = (uint32_t)(pad0);
|
263
|
+
p->R23.d[3] = (uint32_t)(pad0 >> 32);
|
264
|
+
p->R24.d[1] = (uint32_t)(pad1);
|
265
|
+
p->R24.d[3] = (uint32_t)(pad1 >> 32);
|
266
|
+
|
267
|
+
/* H = [Mx,My] */
|
268
|
+
T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
|
269
|
+
_mm_loadl_epi64((xmmi *)(m + 16)));
|
270
|
+
T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
|
271
|
+
_mm_loadl_epi64((xmmi *)(m + 24)));
|
272
|
+
st->H[0] = _mm_and_si128(MMASK, T5);
|
273
|
+
st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
274
|
+
T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
|
275
|
+
st->H[2] = _mm_and_si128(MMASK, T5);
|
276
|
+
st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
277
|
+
st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
|
278
|
+
}
|
279
|
+
|
280
|
+
static void poly1305_blocks(poly1305_state_internal *st, const uint8_t *m,
|
281
|
+
size_t bytes) {
|
282
|
+
const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
|
283
|
+
const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
|
284
|
+
const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
|
285
|
+
|
286
|
+
poly1305_power *p;
|
287
|
+
xmmi H0, H1, H2, H3, H4;
|
288
|
+
xmmi T0, T1, T2, T3, T4, T5, T6;
|
289
|
+
xmmi M0, M1, M2, M3, M4;
|
290
|
+
xmmi C1, C2;
|
291
|
+
|
292
|
+
H0 = st->H[0];
|
293
|
+
H1 = st->H[1];
|
294
|
+
H2 = st->H[2];
|
295
|
+
H3 = st->H[3];
|
296
|
+
H4 = st->H[4];
|
297
|
+
|
298
|
+
while (bytes >= 64) {
|
299
|
+
/* H *= [r^4,r^4] */
|
300
|
+
p = &st->P[0];
|
301
|
+
T0 = _mm_mul_epu32(H0, p->R20.v);
|
302
|
+
T1 = _mm_mul_epu32(H0, p->R21.v);
|
303
|
+
T2 = _mm_mul_epu32(H0, p->R22.v);
|
304
|
+
T3 = _mm_mul_epu32(H0, p->R23.v);
|
305
|
+
T4 = _mm_mul_epu32(H0, p->R24.v);
|
306
|
+
T5 = _mm_mul_epu32(H1, p->S24.v);
|
307
|
+
T6 = _mm_mul_epu32(H1, p->R20.v);
|
308
|
+
T0 = _mm_add_epi64(T0, T5);
|
309
|
+
T1 = _mm_add_epi64(T1, T6);
|
310
|
+
T5 = _mm_mul_epu32(H2, p->S23.v);
|
311
|
+
T6 = _mm_mul_epu32(H2, p->S24.v);
|
312
|
+
T0 = _mm_add_epi64(T0, T5);
|
313
|
+
T1 = _mm_add_epi64(T1, T6);
|
314
|
+
T5 = _mm_mul_epu32(H3, p->S22.v);
|
315
|
+
T6 = _mm_mul_epu32(H3, p->S23.v);
|
316
|
+
T0 = _mm_add_epi64(T0, T5);
|
317
|
+
T1 = _mm_add_epi64(T1, T6);
|
318
|
+
T5 = _mm_mul_epu32(H4, p->S21.v);
|
319
|
+
T6 = _mm_mul_epu32(H4, p->S22.v);
|
320
|
+
T0 = _mm_add_epi64(T0, T5);
|
321
|
+
T1 = _mm_add_epi64(T1, T6);
|
322
|
+
T5 = _mm_mul_epu32(H1, p->R21.v);
|
323
|
+
T6 = _mm_mul_epu32(H1, p->R22.v);
|
324
|
+
T2 = _mm_add_epi64(T2, T5);
|
325
|
+
T3 = _mm_add_epi64(T3, T6);
|
326
|
+
T5 = _mm_mul_epu32(H2, p->R20.v);
|
327
|
+
T6 = _mm_mul_epu32(H2, p->R21.v);
|
328
|
+
T2 = _mm_add_epi64(T2, T5);
|
329
|
+
T3 = _mm_add_epi64(T3, T6);
|
330
|
+
T5 = _mm_mul_epu32(H3, p->S24.v);
|
331
|
+
T6 = _mm_mul_epu32(H3, p->R20.v);
|
332
|
+
T2 = _mm_add_epi64(T2, T5);
|
333
|
+
T3 = _mm_add_epi64(T3, T6);
|
334
|
+
T5 = _mm_mul_epu32(H4, p->S23.v);
|
335
|
+
T6 = _mm_mul_epu32(H4, p->S24.v);
|
336
|
+
T2 = _mm_add_epi64(T2, T5);
|
337
|
+
T3 = _mm_add_epi64(T3, T6);
|
338
|
+
T5 = _mm_mul_epu32(H1, p->R23.v);
|
339
|
+
T4 = _mm_add_epi64(T4, T5);
|
340
|
+
T5 = _mm_mul_epu32(H2, p->R22.v);
|
341
|
+
T4 = _mm_add_epi64(T4, T5);
|
342
|
+
T5 = _mm_mul_epu32(H3, p->R21.v);
|
343
|
+
T4 = _mm_add_epi64(T4, T5);
|
344
|
+
T5 = _mm_mul_epu32(H4, p->R20.v);
|
345
|
+
T4 = _mm_add_epi64(T4, T5);
|
346
|
+
|
347
|
+
/* H += [Mx,My]*[r^2,r^2] */
|
348
|
+
T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
|
349
|
+
_mm_loadl_epi64((xmmi *)(m + 16)));
|
350
|
+
T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
|
351
|
+
_mm_loadl_epi64((xmmi *)(m + 24)));
|
352
|
+
M0 = _mm_and_si128(MMASK, T5);
|
353
|
+
M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
354
|
+
T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
|
355
|
+
M2 = _mm_and_si128(MMASK, T5);
|
356
|
+
M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
357
|
+
M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
|
358
|
+
|
359
|
+
p = &st->P[1];
|
360
|
+
T5 = _mm_mul_epu32(M0, p->R20.v);
|
361
|
+
T6 = _mm_mul_epu32(M0, p->R21.v);
|
362
|
+
T0 = _mm_add_epi64(T0, T5);
|
363
|
+
T1 = _mm_add_epi64(T1, T6);
|
364
|
+
T5 = _mm_mul_epu32(M1, p->S24.v);
|
365
|
+
T6 = _mm_mul_epu32(M1, p->R20.v);
|
366
|
+
T0 = _mm_add_epi64(T0, T5);
|
367
|
+
T1 = _mm_add_epi64(T1, T6);
|
368
|
+
T5 = _mm_mul_epu32(M2, p->S23.v);
|
369
|
+
T6 = _mm_mul_epu32(M2, p->S24.v);
|
370
|
+
T0 = _mm_add_epi64(T0, T5);
|
371
|
+
T1 = _mm_add_epi64(T1, T6);
|
372
|
+
T5 = _mm_mul_epu32(M3, p->S22.v);
|
373
|
+
T6 = _mm_mul_epu32(M3, p->S23.v);
|
374
|
+
T0 = _mm_add_epi64(T0, T5);
|
375
|
+
T1 = _mm_add_epi64(T1, T6);
|
376
|
+
T5 = _mm_mul_epu32(M4, p->S21.v);
|
377
|
+
T6 = _mm_mul_epu32(M4, p->S22.v);
|
378
|
+
T0 = _mm_add_epi64(T0, T5);
|
379
|
+
T1 = _mm_add_epi64(T1, T6);
|
380
|
+
T5 = _mm_mul_epu32(M0, p->R22.v);
|
381
|
+
T6 = _mm_mul_epu32(M0, p->R23.v);
|
382
|
+
T2 = _mm_add_epi64(T2, T5);
|
383
|
+
T3 = _mm_add_epi64(T3, T6);
|
384
|
+
T5 = _mm_mul_epu32(M1, p->R21.v);
|
385
|
+
T6 = _mm_mul_epu32(M1, p->R22.v);
|
386
|
+
T2 = _mm_add_epi64(T2, T5);
|
387
|
+
T3 = _mm_add_epi64(T3, T6);
|
388
|
+
T5 = _mm_mul_epu32(M2, p->R20.v);
|
389
|
+
T6 = _mm_mul_epu32(M2, p->R21.v);
|
390
|
+
T2 = _mm_add_epi64(T2, T5);
|
391
|
+
T3 = _mm_add_epi64(T3, T6);
|
392
|
+
T5 = _mm_mul_epu32(M3, p->S24.v);
|
393
|
+
T6 = _mm_mul_epu32(M3, p->R20.v);
|
394
|
+
T2 = _mm_add_epi64(T2, T5);
|
395
|
+
T3 = _mm_add_epi64(T3, T6);
|
396
|
+
T5 = _mm_mul_epu32(M4, p->S23.v);
|
397
|
+
T6 = _mm_mul_epu32(M4, p->S24.v);
|
398
|
+
T2 = _mm_add_epi64(T2, T5);
|
399
|
+
T3 = _mm_add_epi64(T3, T6);
|
400
|
+
T5 = _mm_mul_epu32(M0, p->R24.v);
|
401
|
+
T4 = _mm_add_epi64(T4, T5);
|
402
|
+
T5 = _mm_mul_epu32(M1, p->R23.v);
|
403
|
+
T4 = _mm_add_epi64(T4, T5);
|
404
|
+
T5 = _mm_mul_epu32(M2, p->R22.v);
|
405
|
+
T4 = _mm_add_epi64(T4, T5);
|
406
|
+
T5 = _mm_mul_epu32(M3, p->R21.v);
|
407
|
+
T4 = _mm_add_epi64(T4, T5);
|
408
|
+
T5 = _mm_mul_epu32(M4, p->R20.v);
|
409
|
+
T4 = _mm_add_epi64(T4, T5);
|
410
|
+
|
411
|
+
/* H += [Mx,My] */
|
412
|
+
T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)),
|
413
|
+
_mm_loadl_epi64((xmmi *)(m + 48)));
|
414
|
+
T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)),
|
415
|
+
_mm_loadl_epi64((xmmi *)(m + 56)));
|
416
|
+
M0 = _mm_and_si128(MMASK, T5);
|
417
|
+
M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
418
|
+
T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
|
419
|
+
M2 = _mm_and_si128(MMASK, T5);
|
420
|
+
M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
421
|
+
M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
|
422
|
+
|
423
|
+
T0 = _mm_add_epi64(T0, M0);
|
424
|
+
T1 = _mm_add_epi64(T1, M1);
|
425
|
+
T2 = _mm_add_epi64(T2, M2);
|
426
|
+
T3 = _mm_add_epi64(T3, M3);
|
427
|
+
T4 = _mm_add_epi64(T4, M4);
|
428
|
+
|
429
|
+
/* reduce */
|
430
|
+
C1 = _mm_srli_epi64(T0, 26);
|
431
|
+
C2 = _mm_srli_epi64(T3, 26);
|
432
|
+
T0 = _mm_and_si128(T0, MMASK);
|
433
|
+
T3 = _mm_and_si128(T3, MMASK);
|
434
|
+
T1 = _mm_add_epi64(T1, C1);
|
435
|
+
T4 = _mm_add_epi64(T4, C2);
|
436
|
+
C1 = _mm_srli_epi64(T1, 26);
|
437
|
+
C2 = _mm_srli_epi64(T4, 26);
|
438
|
+
T1 = _mm_and_si128(T1, MMASK);
|
439
|
+
T4 = _mm_and_si128(T4, MMASK);
|
440
|
+
T2 = _mm_add_epi64(T2, C1);
|
441
|
+
T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
|
442
|
+
C1 = _mm_srli_epi64(T2, 26);
|
443
|
+
C2 = _mm_srli_epi64(T0, 26);
|
444
|
+
T2 = _mm_and_si128(T2, MMASK);
|
445
|
+
T0 = _mm_and_si128(T0, MMASK);
|
446
|
+
T3 = _mm_add_epi64(T3, C1);
|
447
|
+
T1 = _mm_add_epi64(T1, C2);
|
448
|
+
C1 = _mm_srli_epi64(T3, 26);
|
449
|
+
T3 = _mm_and_si128(T3, MMASK);
|
450
|
+
T4 = _mm_add_epi64(T4, C1);
|
451
|
+
|
452
|
+
/* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */
|
453
|
+
H0 = T0;
|
454
|
+
H1 = T1;
|
455
|
+
H2 = T2;
|
456
|
+
H3 = T3;
|
457
|
+
H4 = T4;
|
458
|
+
|
459
|
+
m += 64;
|
460
|
+
bytes -= 64;
|
461
|
+
}
|
462
|
+
|
463
|
+
st->H[0] = H0;
|
464
|
+
st->H[1] = H1;
|
465
|
+
st->H[2] = H2;
|
466
|
+
st->H[3] = H3;
|
467
|
+
st->H[4] = H4;
|
468
|
+
}
|
469
|
+
|
470
|
+
static size_t poly1305_combine(poly1305_state_internal *st, const uint8_t *m,
|
471
|
+
size_t bytes) {
|
472
|
+
const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
|
473
|
+
const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
|
474
|
+
const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
|
475
|
+
|
476
|
+
poly1305_power *p;
|
477
|
+
xmmi H0, H1, H2, H3, H4;
|
478
|
+
xmmi M0, M1, M2, M3, M4;
|
479
|
+
xmmi T0, T1, T2, T3, T4, T5, T6;
|
480
|
+
xmmi C1, C2;
|
481
|
+
|
482
|
+
uint64_t r0, r1, r2;
|
483
|
+
uint64_t t0, t1, t2, t3, t4;
|
484
|
+
uint64_t c;
|
485
|
+
size_t consumed = 0;
|
486
|
+
|
487
|
+
H0 = st->H[0];
|
488
|
+
H1 = st->H[1];
|
489
|
+
H2 = st->H[2];
|
490
|
+
H3 = st->H[3];
|
491
|
+
H4 = st->H[4];
|
492
|
+
|
493
|
+
/* p = [r^2,r^2] */
|
494
|
+
p = &st->P[1];
|
495
|
+
|
496
|
+
if (bytes >= 32) {
|
497
|
+
/* H *= [r^2,r^2] */
|
498
|
+
T0 = _mm_mul_epu32(H0, p->R20.v);
|
499
|
+
T1 = _mm_mul_epu32(H0, p->R21.v);
|
500
|
+
T2 = _mm_mul_epu32(H0, p->R22.v);
|
501
|
+
T3 = _mm_mul_epu32(H0, p->R23.v);
|
502
|
+
T4 = _mm_mul_epu32(H0, p->R24.v);
|
503
|
+
T5 = _mm_mul_epu32(H1, p->S24.v);
|
504
|
+
T6 = _mm_mul_epu32(H1, p->R20.v);
|
505
|
+
T0 = _mm_add_epi64(T0, T5);
|
506
|
+
T1 = _mm_add_epi64(T1, T6);
|
507
|
+
T5 = _mm_mul_epu32(H2, p->S23.v);
|
508
|
+
T6 = _mm_mul_epu32(H2, p->S24.v);
|
509
|
+
T0 = _mm_add_epi64(T0, T5);
|
510
|
+
T1 = _mm_add_epi64(T1, T6);
|
511
|
+
T5 = _mm_mul_epu32(H3, p->S22.v);
|
512
|
+
T6 = _mm_mul_epu32(H3, p->S23.v);
|
513
|
+
T0 = _mm_add_epi64(T0, T5);
|
514
|
+
T1 = _mm_add_epi64(T1, T6);
|
515
|
+
T5 = _mm_mul_epu32(H4, p->S21.v);
|
516
|
+
T6 = _mm_mul_epu32(H4, p->S22.v);
|
517
|
+
T0 = _mm_add_epi64(T0, T5);
|
518
|
+
T1 = _mm_add_epi64(T1, T6);
|
519
|
+
T5 = _mm_mul_epu32(H1, p->R21.v);
|
520
|
+
T6 = _mm_mul_epu32(H1, p->R22.v);
|
521
|
+
T2 = _mm_add_epi64(T2, T5);
|
522
|
+
T3 = _mm_add_epi64(T3, T6);
|
523
|
+
T5 = _mm_mul_epu32(H2, p->R20.v);
|
524
|
+
T6 = _mm_mul_epu32(H2, p->R21.v);
|
525
|
+
T2 = _mm_add_epi64(T2, T5);
|
526
|
+
T3 = _mm_add_epi64(T3, T6);
|
527
|
+
T5 = _mm_mul_epu32(H3, p->S24.v);
|
528
|
+
T6 = _mm_mul_epu32(H3, p->R20.v);
|
529
|
+
T2 = _mm_add_epi64(T2, T5);
|
530
|
+
T3 = _mm_add_epi64(T3, T6);
|
531
|
+
T5 = _mm_mul_epu32(H4, p->S23.v);
|
532
|
+
T6 = _mm_mul_epu32(H4, p->S24.v);
|
533
|
+
T2 = _mm_add_epi64(T2, T5);
|
534
|
+
T3 = _mm_add_epi64(T3, T6);
|
535
|
+
T5 = _mm_mul_epu32(H1, p->R23.v);
|
536
|
+
T4 = _mm_add_epi64(T4, T5);
|
537
|
+
T5 = _mm_mul_epu32(H2, p->R22.v);
|
538
|
+
T4 = _mm_add_epi64(T4, T5);
|
539
|
+
T5 = _mm_mul_epu32(H3, p->R21.v);
|
540
|
+
T4 = _mm_add_epi64(T4, T5);
|
541
|
+
T5 = _mm_mul_epu32(H4, p->R20.v);
|
542
|
+
T4 = _mm_add_epi64(T4, T5);
|
543
|
+
|
544
|
+
/* H += [Mx,My] */
|
545
|
+
T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
|
546
|
+
_mm_loadl_epi64((xmmi *)(m + 16)));
|
547
|
+
T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
|
548
|
+
_mm_loadl_epi64((xmmi *)(m + 24)));
|
549
|
+
M0 = _mm_and_si128(MMASK, T5);
|
550
|
+
M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
551
|
+
T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
|
552
|
+
M2 = _mm_and_si128(MMASK, T5);
|
553
|
+
M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
|
554
|
+
M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
|
555
|
+
|
556
|
+
T0 = _mm_add_epi64(T0, M0);
|
557
|
+
T1 = _mm_add_epi64(T1, M1);
|
558
|
+
T2 = _mm_add_epi64(T2, M2);
|
559
|
+
T3 = _mm_add_epi64(T3, M3);
|
560
|
+
T4 = _mm_add_epi64(T4, M4);
|
561
|
+
|
562
|
+
/* reduce */
|
563
|
+
C1 = _mm_srli_epi64(T0, 26);
|
564
|
+
C2 = _mm_srli_epi64(T3, 26);
|
565
|
+
T0 = _mm_and_si128(T0, MMASK);
|
566
|
+
T3 = _mm_and_si128(T3, MMASK);
|
567
|
+
T1 = _mm_add_epi64(T1, C1);
|
568
|
+
T4 = _mm_add_epi64(T4, C2);
|
569
|
+
C1 = _mm_srli_epi64(T1, 26);
|
570
|
+
C2 = _mm_srli_epi64(T4, 26);
|
571
|
+
T1 = _mm_and_si128(T1, MMASK);
|
572
|
+
T4 = _mm_and_si128(T4, MMASK);
|
573
|
+
T2 = _mm_add_epi64(T2, C1);
|
574
|
+
T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
|
575
|
+
C1 = _mm_srli_epi64(T2, 26);
|
576
|
+
C2 = _mm_srli_epi64(T0, 26);
|
577
|
+
T2 = _mm_and_si128(T2, MMASK);
|
578
|
+
T0 = _mm_and_si128(T0, MMASK);
|
579
|
+
T3 = _mm_add_epi64(T3, C1);
|
580
|
+
T1 = _mm_add_epi64(T1, C2);
|
581
|
+
C1 = _mm_srli_epi64(T3, 26);
|
582
|
+
T3 = _mm_and_si128(T3, MMASK);
|
583
|
+
T4 = _mm_add_epi64(T4, C1);
|
584
|
+
|
585
|
+
/* H = (H*[r^2,r^2] + [Mx,My]) */
|
586
|
+
H0 = T0;
|
587
|
+
H1 = T1;
|
588
|
+
H2 = T2;
|
589
|
+
H3 = T3;
|
590
|
+
H4 = T4;
|
591
|
+
|
592
|
+
consumed = 32;
|
593
|
+
}
|
594
|
+
|
595
|
+
/* finalize, H *= [r^2,r] */
|
596
|
+
r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
|
597
|
+
r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
|
598
|
+
r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
|
599
|
+
|
600
|
+
p->R20.d[2] = (uint32_t)(r0)&0x3ffffff;
|
601
|
+
p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
|
602
|
+
p->R22.d[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff;
|
603
|
+
p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
|
604
|
+
p->R24.d[2] = (uint32_t)((r2 >> 16));
|
605
|
+
p->S21.d[2] = p->R21.d[2] * 5;
|
606
|
+
p->S22.d[2] = p->R22.d[2] * 5;
|
607
|
+
p->S23.d[2] = p->R23.d[2] * 5;
|
608
|
+
p->S24.d[2] = p->R24.d[2] * 5;
|
609
|
+
|
610
|
+
/* H *= [r^2,r] */
|
611
|
+
T0 = _mm_mul_epu32(H0, p->R20.v);
|
612
|
+
T1 = _mm_mul_epu32(H0, p->R21.v);
|
613
|
+
T2 = _mm_mul_epu32(H0, p->R22.v);
|
614
|
+
T3 = _mm_mul_epu32(H0, p->R23.v);
|
615
|
+
T4 = _mm_mul_epu32(H0, p->R24.v);
|
616
|
+
T5 = _mm_mul_epu32(H1, p->S24.v);
|
617
|
+
T6 = _mm_mul_epu32(H1, p->R20.v);
|
618
|
+
T0 = _mm_add_epi64(T0, T5);
|
619
|
+
T1 = _mm_add_epi64(T1, T6);
|
620
|
+
T5 = _mm_mul_epu32(H2, p->S23.v);
|
621
|
+
T6 = _mm_mul_epu32(H2, p->S24.v);
|
622
|
+
T0 = _mm_add_epi64(T0, T5);
|
623
|
+
T1 = _mm_add_epi64(T1, T6);
|
624
|
+
T5 = _mm_mul_epu32(H3, p->S22.v);
|
625
|
+
T6 = _mm_mul_epu32(H3, p->S23.v);
|
626
|
+
T0 = _mm_add_epi64(T0, T5);
|
627
|
+
T1 = _mm_add_epi64(T1, T6);
|
628
|
+
T5 = _mm_mul_epu32(H4, p->S21.v);
|
629
|
+
T6 = _mm_mul_epu32(H4, p->S22.v);
|
630
|
+
T0 = _mm_add_epi64(T0, T5);
|
631
|
+
T1 = _mm_add_epi64(T1, T6);
|
632
|
+
T5 = _mm_mul_epu32(H1, p->R21.v);
|
633
|
+
T6 = _mm_mul_epu32(H1, p->R22.v);
|
634
|
+
T2 = _mm_add_epi64(T2, T5);
|
635
|
+
T3 = _mm_add_epi64(T3, T6);
|
636
|
+
T5 = _mm_mul_epu32(H2, p->R20.v);
|
637
|
+
T6 = _mm_mul_epu32(H2, p->R21.v);
|
638
|
+
T2 = _mm_add_epi64(T2, T5);
|
639
|
+
T3 = _mm_add_epi64(T3, T6);
|
640
|
+
T5 = _mm_mul_epu32(H3, p->S24.v);
|
641
|
+
T6 = _mm_mul_epu32(H3, p->R20.v);
|
642
|
+
T2 = _mm_add_epi64(T2, T5);
|
643
|
+
T3 = _mm_add_epi64(T3, T6);
|
644
|
+
T5 = _mm_mul_epu32(H4, p->S23.v);
|
645
|
+
T6 = _mm_mul_epu32(H4, p->S24.v);
|
646
|
+
T2 = _mm_add_epi64(T2, T5);
|
647
|
+
T3 = _mm_add_epi64(T3, T6);
|
648
|
+
T5 = _mm_mul_epu32(H1, p->R23.v);
|
649
|
+
T4 = _mm_add_epi64(T4, T5);
|
650
|
+
T5 = _mm_mul_epu32(H2, p->R22.v);
|
651
|
+
T4 = _mm_add_epi64(T4, T5);
|
652
|
+
T5 = _mm_mul_epu32(H3, p->R21.v);
|
653
|
+
T4 = _mm_add_epi64(T4, T5);
|
654
|
+
T5 = _mm_mul_epu32(H4, p->R20.v);
|
655
|
+
T4 = _mm_add_epi64(T4, T5);
|
656
|
+
|
657
|
+
C1 = _mm_srli_epi64(T0, 26);
|
658
|
+
C2 = _mm_srli_epi64(T3, 26);
|
659
|
+
T0 = _mm_and_si128(T0, MMASK);
|
660
|
+
T3 = _mm_and_si128(T3, MMASK);
|
661
|
+
T1 = _mm_add_epi64(T1, C1);
|
662
|
+
T4 = _mm_add_epi64(T4, C2);
|
663
|
+
C1 = _mm_srli_epi64(T1, 26);
|
664
|
+
C2 = _mm_srli_epi64(T4, 26);
|
665
|
+
T1 = _mm_and_si128(T1, MMASK);
|
666
|
+
T4 = _mm_and_si128(T4, MMASK);
|
667
|
+
T2 = _mm_add_epi64(T2, C1);
|
668
|
+
T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
|
669
|
+
C1 = _mm_srli_epi64(T2, 26);
|
670
|
+
C2 = _mm_srli_epi64(T0, 26);
|
671
|
+
T2 = _mm_and_si128(T2, MMASK);
|
672
|
+
T0 = _mm_and_si128(T0, MMASK);
|
673
|
+
T3 = _mm_add_epi64(T3, C1);
|
674
|
+
T1 = _mm_add_epi64(T1, C2);
|
675
|
+
C1 = _mm_srli_epi64(T3, 26);
|
676
|
+
T3 = _mm_and_si128(T3, MMASK);
|
677
|
+
T4 = _mm_add_epi64(T4, C1);
|
678
|
+
|
679
|
+
/* H = H[0]+H[1] */
|
680
|
+
H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
|
681
|
+
H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
|
682
|
+
H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
|
683
|
+
H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
|
684
|
+
H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
|
685
|
+
|
686
|
+
t0 = _mm_cvtsi128_si32(H0);
|
687
|
+
c = (t0 >> 26);
|
688
|
+
t0 &= 0x3ffffff;
|
689
|
+
t1 = _mm_cvtsi128_si32(H1) + c;
|
690
|
+
c = (t1 >> 26);
|
691
|
+
t1 &= 0x3ffffff;
|
692
|
+
t2 = _mm_cvtsi128_si32(H2) + c;
|
693
|
+
c = (t2 >> 26);
|
694
|
+
t2 &= 0x3ffffff;
|
695
|
+
t3 = _mm_cvtsi128_si32(H3) + c;
|
696
|
+
c = (t3 >> 26);
|
697
|
+
t3 &= 0x3ffffff;
|
698
|
+
t4 = _mm_cvtsi128_si32(H4) + c;
|
699
|
+
c = (t4 >> 26);
|
700
|
+
t4 &= 0x3ffffff;
|
701
|
+
t0 = t0 + (c * 5);
|
702
|
+
c = (t0 >> 26);
|
703
|
+
t0 &= 0x3ffffff;
|
704
|
+
t1 = t1 + c;
|
705
|
+
|
706
|
+
st->HH[0] = ((t0) | (t1 << 26)) & 0xfffffffffffull;
|
707
|
+
st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull;
|
708
|
+
st->HH[2] = ((t3 >> 10) | (t4 << 16)) & 0x3ffffffffffull;
|
709
|
+
|
710
|
+
return consumed;
|
711
|
+
}
|
712
|
+
|
713
|
+
void CRYPTO_poly1305_update(poly1305_state *state, const uint8_t *m,
|
714
|
+
size_t bytes) {
|
715
|
+
poly1305_state_internal *st = poly1305_aligned_state(state);
|
716
|
+
size_t want;
|
717
|
+
|
718
|
+
/* need at least 32 initial bytes to start the accelerated branch */
|
719
|
+
if (!st->started) {
|
720
|
+
if ((st->leftover == 0) && (bytes > 32)) {
|
721
|
+
poly1305_first_block(st, m);
|
722
|
+
m += 32;
|
723
|
+
bytes -= 32;
|
724
|
+
} else {
|
725
|
+
want = poly1305_min(32 - st->leftover, bytes);
|
726
|
+
poly1305_block_copy(st->buffer + st->leftover, m, want);
|
727
|
+
bytes -= want;
|
728
|
+
m += want;
|
729
|
+
st->leftover += want;
|
730
|
+
if ((st->leftover < 32) || (bytes == 0)) {
|
731
|
+
return;
|
732
|
+
}
|
733
|
+
poly1305_first_block(st, st->buffer);
|
734
|
+
st->leftover = 0;
|
735
|
+
}
|
736
|
+
st->started = 1;
|
737
|
+
}
|
738
|
+
|
739
|
+
/* handle leftover */
|
740
|
+
if (st->leftover) {
|
741
|
+
want = poly1305_min(64 - st->leftover, bytes);
|
742
|
+
poly1305_block_copy(st->buffer + st->leftover, m, want);
|
743
|
+
bytes -= want;
|
744
|
+
m += want;
|
745
|
+
st->leftover += want;
|
746
|
+
if (st->leftover < 64) {
|
747
|
+
return;
|
748
|
+
}
|
749
|
+
poly1305_blocks(st, st->buffer, 64);
|
750
|
+
st->leftover = 0;
|
751
|
+
}
|
752
|
+
|
753
|
+
/* process 64 byte blocks */
|
754
|
+
if (bytes >= 64) {
|
755
|
+
want = (bytes & ~63);
|
756
|
+
poly1305_blocks(st, m, want);
|
757
|
+
m += want;
|
758
|
+
bytes -= want;
|
759
|
+
}
|
760
|
+
|
761
|
+
if (bytes) {
|
762
|
+
poly1305_block_copy(st->buffer + st->leftover, m, bytes);
|
763
|
+
st->leftover += bytes;
|
764
|
+
}
|
765
|
+
}
|
766
|
+
|
767
|
+
void CRYPTO_poly1305_finish(poly1305_state *state, uint8_t mac[16]) {
|
768
|
+
poly1305_state_internal *st = poly1305_aligned_state(state);
|
769
|
+
size_t leftover = st->leftover;
|
770
|
+
uint8_t *m = st->buffer;
|
771
|
+
uint128_t d[3];
|
772
|
+
uint64_t h0, h1, h2;
|
773
|
+
uint64_t t0, t1;
|
774
|
+
uint64_t g0, g1, g2, c, nc;
|
775
|
+
uint64_t r0, r1, r2, s1, s2;
|
776
|
+
poly1305_power *p;
|
777
|
+
|
778
|
+
if (st->started) {
|
779
|
+
size_t consumed = poly1305_combine(st, m, leftover);
|
780
|
+
leftover -= consumed;
|
781
|
+
m += consumed;
|
782
|
+
}
|
783
|
+
|
784
|
+
/* st->HH will either be 0 or have the combined result */
|
785
|
+
h0 = st->HH[0];
|
786
|
+
h1 = st->HH[1];
|
787
|
+
h2 = st->HH[2];
|
788
|
+
|
789
|
+
p = &st->P[1];
|
790
|
+
r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
|
791
|
+
r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
|
792
|
+
r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
|
793
|
+
s1 = r1 * (5 << 2);
|
794
|
+
s2 = r2 * (5 << 2);
|
795
|
+
|
796
|
+
if (leftover < 16) {
|
797
|
+
goto poly1305_donna_atmost15bytes;
|
798
|
+
}
|
799
|
+
|
800
|
+
poly1305_donna_atleast16bytes:
|
801
|
+
t0 = U8TO64_LE(m + 0);
|
802
|
+
t1 = U8TO64_LE(m + 8);
|
803
|
+
h0 += t0 & 0xfffffffffff;
|
804
|
+
t0 = shr128_pair(t1, t0, 44);
|
805
|
+
h1 += t0 & 0xfffffffffff;
|
806
|
+
h2 += (t1 >> 24) | ((uint64_t)1 << 40);
|
807
|
+
|
808
|
+
poly1305_donna_mul:
|
809
|
+
d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)),
|
810
|
+
mul64x64_128(h2, s1));
|
811
|
+
d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)),
|
812
|
+
mul64x64_128(h2, s2));
|
813
|
+
d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)),
|
814
|
+
mul64x64_128(h2, r0));
|
815
|
+
h0 = lo128(d[0]) & 0xfffffffffff;
|
816
|
+
c = shr128(d[0], 44);
|
817
|
+
d[1] = add128_64(d[1], c);
|
818
|
+
h1 = lo128(d[1]) & 0xfffffffffff;
|
819
|
+
c = shr128(d[1], 44);
|
820
|
+
d[2] = add128_64(d[2], c);
|
821
|
+
h2 = lo128(d[2]) & 0x3ffffffffff;
|
822
|
+
c = shr128(d[2], 42);
|
823
|
+
h0 += c * 5;
|
824
|
+
|
825
|
+
m += 16;
|
826
|
+
leftover -= 16;
|
827
|
+
if (leftover >= 16) {
|
828
|
+
goto poly1305_donna_atleast16bytes;
|
829
|
+
}
|
830
|
+
|
831
|
+
/* final bytes */
|
832
|
+
poly1305_donna_atmost15bytes:
|
833
|
+
if (!leftover) {
|
834
|
+
goto poly1305_donna_finish;
|
835
|
+
}
|
836
|
+
|
837
|
+
m[leftover++] = 1;
|
838
|
+
poly1305_block_zero(m + leftover, 16 - leftover);
|
839
|
+
leftover = 16;
|
840
|
+
|
841
|
+
t0 = U8TO64_LE(m + 0);
|
842
|
+
t1 = U8TO64_LE(m + 8);
|
843
|
+
h0 += t0 & 0xfffffffffff;
|
844
|
+
t0 = shr128_pair(t1, t0, 44);
|
845
|
+
h1 += t0 & 0xfffffffffff;
|
846
|
+
h2 += (t1 >> 24);
|
847
|
+
|
848
|
+
goto poly1305_donna_mul;
|
849
|
+
|
850
|
+
poly1305_donna_finish:
|
851
|
+
c = (h0 >> 44);
|
852
|
+
h0 &= 0xfffffffffff;
|
853
|
+
h1 += c;
|
854
|
+
c = (h1 >> 44);
|
855
|
+
h1 &= 0xfffffffffff;
|
856
|
+
h2 += c;
|
857
|
+
c = (h2 >> 42);
|
858
|
+
h2 &= 0x3ffffffffff;
|
859
|
+
h0 += c * 5;
|
860
|
+
|
861
|
+
g0 = h0 + 5;
|
862
|
+
c = (g0 >> 44);
|
863
|
+
g0 &= 0xfffffffffff;
|
864
|
+
g1 = h1 + c;
|
865
|
+
c = (g1 >> 44);
|
866
|
+
g1 &= 0xfffffffffff;
|
867
|
+
g2 = h2 + c - ((uint64_t)1 << 42);
|
868
|
+
|
869
|
+
c = (g2 >> 63) - 1;
|
870
|
+
nc = ~c;
|
871
|
+
h0 = (h0 & nc) | (g0 & c);
|
872
|
+
h1 = (h1 & nc) | (g1 & c);
|
873
|
+
h2 = (h2 & nc) | (g2 & c);
|
874
|
+
|
875
|
+
/* pad */
|
876
|
+
t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
|
877
|
+
t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
|
878
|
+
h0 += (t0 & 0xfffffffffff);
|
879
|
+
c = (h0 >> 44);
|
880
|
+
h0 &= 0xfffffffffff;
|
881
|
+
t0 = shr128_pair(t1, t0, 44);
|
882
|
+
h1 += (t0 & 0xfffffffffff) + c;
|
883
|
+
c = (h1 >> 44);
|
884
|
+
h1 &= 0xfffffffffff;
|
885
|
+
t1 = (t1 >> 24);
|
886
|
+
h2 += (t1)+c;
|
887
|
+
|
888
|
+
U64TO8_LE(mac + 0, ((h0) | (h1 << 44)));
|
889
|
+
U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24)));
|
890
|
+
}
|
891
|
+
|
892
|
+
#endif /* !OPENSSL_WINDOWS && OPENSSL_X86_64 */
|