rbnacl-libsodium 1.0.5 → 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGES.md +8 -0
- data/lib/rbnacl/libsodium/version.rb +1 -1
- data/vendor/libsodium/ChangeLog +25 -0
- data/vendor/libsodium/Makefile.in +1 -1
- data/vendor/libsodium/THANKS +1 -0
- data/vendor/libsodium/autom4te.cache/output.1 +16 -16
- data/vendor/libsodium/autom4te.cache/output.5 +16 -16
- data/vendor/libsodium/autom4te.cache/requests +894 -894
- data/vendor/libsodium/autom4te.cache/traces.1 +4 -4
- data/vendor/libsodium/autom4te.cache/traces.5 +2 -2
- data/vendor/libsodium/builds/msvc/version.h +3 -3
- data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj +7 -3
- data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters +21 -9
- data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj +8 -4
- data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters +21 -9
- data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj +7 -3
- data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters +21 -9
- data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj +7 -3
- data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters +21 -9
- data/vendor/libsodium/configure +16 -16
- data/vendor/libsodium/configure.ac +7 -7
- data/vendor/libsodium/dist-build/Makefile.in +1 -1
- data/vendor/libsodium/libsodium.sln +4 -2
- data/vendor/libsodium/libsodium.vcxproj +17 -13
- data/vendor/libsodium/libsodium.vcxproj.filters +22 -10
- data/vendor/libsodium/msvc-scripts/Makefile.in +1 -1
- data/vendor/libsodium/msvc-scripts/process.bat +3 -3
- data/vendor/libsodium/src/Makefile.in +1 -1
- data/vendor/libsodium/src/libsodium/Makefile.am +19 -7
- data/vendor/libsodium/src/libsodium/Makefile.in +85 -48
- data/vendor/libsodium/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +11 -3
- data/vendor/libsodium/src/libsodium/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c +1 -0
- data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/keypair_curve25519xsalsa20poly1305.c +2 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2-impl.h +4 -4
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2.h +20 -18
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ref.c +93 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-sse41.c +80 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.c +89 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse2.h +68 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse41.h +402 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-ref.c +58 -102
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-round.h +123 -0
- data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/generichash_blake2b.c +6 -0
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/auth_poly1305_donna.c +15 -10
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h +14 -14
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h +0 -2
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h +0 -2
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.c +18 -16
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.h +23 -0
- data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pbkdf2-sha256.c +2 -2
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/api.h +0 -3
- data/vendor/libsodium/src/libsodium/crypto_secretbox/crypto_secretbox_easy.c +6 -2
- data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.c +5 -0
- data/vendor/libsodium/src/libsodium/include/Makefile.in +1 -1
- data/vendor/libsodium/src/libsodium/include/sodium/core.h +2 -1
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_aes256gcm.h +5 -2
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_chacha20poly1305.h +5 -2
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth.h +3 -1
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha256.h +2 -1
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha512.h +2 -1
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha512256.h +2 -1
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_box.h +15 -7
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_box_curve25519xsalsa20poly1305.h +4 -2
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_generichash_blake2b.h +4 -0
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_hash.h +2 -1
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_onetimeauth.h +2 -1
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_onetimeauth_poly1305.h +7 -30
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash_scryptsalsa208sha256.h +8 -4
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_secretbox.h +7 -3
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign.h +5 -2
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign_ed25519.h +6 -3
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign_edwards25519sha512batch.h +14 -8
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_verify_16.h +2 -1
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_verify_32.h +2 -1
- data/vendor/libsodium/src/libsodium/include/sodium/crypto_verify_64.h +2 -1
- data/vendor/libsodium/src/libsodium/include/sodium/runtime.h +10 -3
- data/vendor/libsodium/src/libsodium/include/sodium/utils.h +8 -4
- data/vendor/libsodium/src/libsodium/randombytes/randombytes.c +7 -0
- data/vendor/libsodium/src/libsodium/randombytes/salsa20/randombytes_salsa20_random.c +4 -2
- data/vendor/libsodium/src/libsodium/randombytes/sysrandom/randombytes_sysrandom.c +12 -10
- data/vendor/libsodium/src/libsodium/sodium/core.c +4 -4
- data/vendor/libsodium/src/libsodium/sodium/runtime.c +27 -1
- data/vendor/libsodium/src/libsodium/sodium/utils.c +4 -2
- data/vendor/libsodium/test/Makefile.in +1 -1
- data/vendor/libsodium/test/default/Makefile.in +1 -1
- data/vendor/libsodium/test/default/aead_aes256gcm.c +14 -0
- data/vendor/libsodium/test/default/box_easy2.c +13 -7
- data/vendor/libsodium/test/default/chacha20.c +36 -12
- data/vendor/libsodium/test/default/ed25519_convert.c +6 -2
- data/vendor/libsodium/test/default/generichash3.exp +1 -1
- data/vendor/libsodium/test/default/secretbox_easy2.c +5 -3
- data/vendor/libsodium/test/default/sodium_core.c +2 -0
- data/vendor/libsodium/test/default/sodium_utils.c +10 -4
- data/vendor/libsodium/test/default/sodium_utils2.c +1 -0
- data/vendor/libsodium/test/default/sodium_utils3.c +1 -0
- metadata +8 -22
- data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha256/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512256/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_core/salsa20/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_core/salsa2012/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_core/salsa208/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_hash/sha256/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_hash/sha512/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/verify_poly1305_donna.c +0 -15
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305_api.c +0 -11
- data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305_try.c +0 -13
- data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_secretbox/xsalsa20poly1305/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_verify/16/checksum +0 -1
- data/vendor/libsodium/src/libsodium/crypto_verify/32/checksum +0 -1
@@ -214,8 +214,12 @@ addmul(unsigned char *c, const unsigned char *a, unsigned int xlen, const unsign
|
|
214
214
|
A = _mm_loadu_si128((const __m128i *) a);
|
215
215
|
} else {
|
216
216
|
CRYPTO_ALIGN(16) unsigned char padded[16];
|
217
|
+
unsigned int i;
|
218
|
+
|
217
219
|
memset(padded, 0, 16);
|
218
|
-
|
220
|
+
for (i = 0; i < xlen; i++) {
|
221
|
+
padded[i] = a[i];
|
222
|
+
}
|
219
223
|
A = _mm_load_si128((const __m128i *) padded);
|
220
224
|
}
|
221
225
|
A = _mm_shuffle_epi8(A, rev);
|
@@ -638,14 +642,17 @@ crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen
|
|
638
642
|
CRYPTO_ALIGN(16) unsigned char fb[16];
|
639
643
|
|
640
644
|
(void) nsec;
|
641
|
-
memcpy(H, ctx->H, sizeof H);
|
642
645
|
if (clen > 16ULL * (1ULL << 32) - 16ULL) {
|
643
646
|
abort();
|
644
647
|
}
|
645
|
-
mlen = clen - 16;
|
646
648
|
if (mlen_p != NULL) {
|
647
649
|
*mlen_p = 0U;
|
648
650
|
}
|
651
|
+
if (clen < 16) {
|
652
|
+
return -1;
|
653
|
+
}
|
654
|
+
mlen = clen - 16;
|
655
|
+
|
649
656
|
memcpy(&n2[0], npub, 12);
|
650
657
|
*(uint32_t *) &n2[12] = 0x01000000;
|
651
658
|
aesni_encrypt1(T, _mm_load_si128((const __m128i *) n2), rkeys);
|
@@ -653,6 +660,7 @@ crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen
|
|
653
660
|
(*(uint64_t *) &fb[0]) = _bswap64((uint64_t)(8 * adlen));
|
654
661
|
(*(uint64_t *) &fb[8]) = _bswap64((uint64_t)(8 * mlen));
|
655
662
|
|
663
|
+
memcpy(H, ctx->H, sizeof H);
|
656
664
|
Hv = _mm_shuffle_epi8(_mm_load_si128((const __m128i *) H), rev);
|
657
665
|
_mm_store_si128((__m128i *) H, Hv);
|
658
666
|
H2v = mulv(Hv, Hv);
|
@@ -4,6 +4,7 @@
|
|
4
4
|
#include "crypto_scalarmult_curve25519.h"
|
5
5
|
#include "api.h"
|
6
6
|
#include "randombytes.h"
|
7
|
+
#include "utils.h"
|
7
8
|
|
8
9
|
int crypto_box_seed_keypair(
|
9
10
|
unsigned char *pk,
|
@@ -14,6 +15,7 @@ int crypto_box_seed_keypair(
|
|
14
15
|
unsigned char hash[64];
|
15
16
|
crypto_hash_sha512(hash,seed,32);
|
16
17
|
memmove(sk,hash,32);
|
18
|
+
sodium_memzero(hash, sizeof hash);
|
17
19
|
return crypto_scalarmult_curve25519_base(pk,sk);
|
18
20
|
}
|
19
21
|
|
@@ -21,7 +21,7 @@
|
|
21
21
|
|
22
22
|
static inline uint32_t load32( const void *src )
|
23
23
|
{
|
24
|
-
#
|
24
|
+
#ifdef NATIVE_LITTLE_ENDIAN
|
25
25
|
uint32_t w;
|
26
26
|
memcpy(&w, src, sizeof w);
|
27
27
|
return w;
|
@@ -37,7 +37,7 @@ static inline uint32_t load32( const void *src )
|
|
37
37
|
|
38
38
|
static inline uint64_t load64( const void *src )
|
39
39
|
{
|
40
|
-
#
|
40
|
+
#ifdef NATIVE_LITTLE_ENDIAN
|
41
41
|
uint64_t w;
|
42
42
|
memcpy(&w, src, sizeof w);
|
43
43
|
return w;
|
@@ -57,7 +57,7 @@ static inline uint64_t load64( const void *src )
|
|
57
57
|
|
58
58
|
static inline void store32( void *dst, uint32_t w )
|
59
59
|
{
|
60
|
-
#
|
60
|
+
#ifdef NATIVE_LITTLE_ENDIAN
|
61
61
|
memcpy(dst, &w, sizeof w);
|
62
62
|
#else
|
63
63
|
uint8_t *p = ( uint8_t * )dst;
|
@@ -70,7 +70,7 @@ static inline void store32( void *dst, uint32_t w )
|
|
70
70
|
|
71
71
|
static inline void store64( void *dst, uint64_t w )
|
72
72
|
{
|
73
|
-
#
|
73
|
+
#ifdef NATIVE_LITTLE_ENDIAN
|
74
74
|
memcpy(dst, &w, sizeof w);
|
75
75
|
#else
|
76
76
|
uint8_t *p = ( uint8_t * )dst;
|
@@ -18,22 +18,18 @@
|
|
18
18
|
#include <stdint.h>
|
19
19
|
|
20
20
|
#include "crypto_generichash_blake2b.h"
|
21
|
-
|
22
|
-
|
23
|
-
#define
|
24
|
-
#define
|
25
|
-
#define
|
26
|
-
#define
|
27
|
-
#define
|
28
|
-
#define
|
29
|
-
#define
|
30
|
-
#define
|
31
|
-
|
32
|
-
#
|
33
|
-
#define ALIGN(x) __declspec(align(x))
|
34
|
-
#else
|
35
|
-
#define ALIGN(x) __attribute__((aligned(x)))
|
36
|
-
#endif
|
21
|
+
#include "export.h"
|
22
|
+
|
23
|
+
#define blake2b_init_param crypto_generichash_blake2b__init_param
|
24
|
+
#define blake2b_init crypto_generichash_blake2b__init
|
25
|
+
#define blake2b_init_salt_personal crypto_generichash_blake2b__init_salt_personal
|
26
|
+
#define blake2b_init_key crypto_generichash_blake2b__init_key
|
27
|
+
#define blake2b_init_key_salt_personal crypto_generichash_blake2b__init_key_salt_personal
|
28
|
+
#define blake2b_update crypto_generichash_blake2b__update
|
29
|
+
#define blake2b_final crypto_generichash_blake2b__final
|
30
|
+
#define blake2b crypto_generichash_blake2b__blake2b
|
31
|
+
#define blake2b_salt_personal crypto_generichash_blake2b__blake2b_salt_personal
|
32
|
+
#define blake2b_pick_best_implementation crypto_generichash_blake2b__pick_best_implementation
|
37
33
|
|
38
34
|
#if defined(__cplusplus)
|
39
35
|
extern "C" {
|
@@ -78,7 +74,7 @@ extern "C" {
|
|
78
74
|
uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32
|
79
75
|
} blake2s_param;
|
80
76
|
|
81
|
-
|
77
|
+
CRYPTO_ALIGN( 64 ) typedef struct blake2s_state_
|
82
78
|
{
|
83
79
|
uint32_t h[8];
|
84
80
|
uint32_t t[2];
|
@@ -106,7 +102,7 @@ extern "C" {
|
|
106
102
|
#ifndef DEFINE_BLAKE2B_STATE
|
107
103
|
typedef crypto_generichash_blake2b_state blake2b_state;
|
108
104
|
#else
|
109
|
-
|
105
|
+
CRYPTO_ALIGN( 64 ) typedef struct blake2b_state_
|
110
106
|
{
|
111
107
|
uint64_t h[8];
|
112
108
|
uint64_t t[2];
|
@@ -179,6 +175,12 @@ typedef crypto_generichash_blake2b_state blake2b_state;
|
|
179
175
|
return blake2b( out, in, key, outlen, inlen, keylen );
|
180
176
|
}
|
181
177
|
|
178
|
+
typedef int ( *blake2b_compress_fn )( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] );
|
179
|
+
int blake2b_pick_best_implementation(void);
|
180
|
+
int blake2b_compress_ref( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] );
|
181
|
+
int blake2b_compress_ssse3( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] );
|
182
|
+
int blake2b_compress_sse41( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] );
|
183
|
+
|
182
184
|
#if defined(__cplusplus)
|
183
185
|
}
|
184
186
|
#endif
|
@@ -0,0 +1,93 @@
|
|
1
|
+
|
2
|
+
#include <stdint.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "blake2.h"
|
6
|
+
#include "blake2-impl.h"
|
7
|
+
|
8
|
+
static const uint64_t blake2b_IV[8] =
|
9
|
+
{
|
10
|
+
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
11
|
+
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
12
|
+
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
13
|
+
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
14
|
+
};
|
15
|
+
|
16
|
+
static const uint8_t blake2b_sigma[12][16] =
|
17
|
+
{
|
18
|
+
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
19
|
+
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
20
|
+
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
21
|
+
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
22
|
+
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
23
|
+
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
24
|
+
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
25
|
+
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
26
|
+
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
27
|
+
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
28
|
+
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
29
|
+
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
30
|
+
};
|
31
|
+
|
32
|
+
int blake2b_compress_ref( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
|
33
|
+
{
|
34
|
+
uint64_t m[16];
|
35
|
+
uint64_t v[16];
|
36
|
+
int i;
|
37
|
+
|
38
|
+
for( i = 0; i < 16; ++i )
|
39
|
+
m[i] = load64( block + i * sizeof( m[i] ) );
|
40
|
+
|
41
|
+
for( i = 0; i < 8; ++i )
|
42
|
+
v[i] = S->h[i];
|
43
|
+
|
44
|
+
v[ 8] = blake2b_IV[0];
|
45
|
+
v[ 9] = blake2b_IV[1];
|
46
|
+
v[10] = blake2b_IV[2];
|
47
|
+
v[11] = blake2b_IV[3];
|
48
|
+
v[12] = S->t[0] ^ blake2b_IV[4];
|
49
|
+
v[13] = S->t[1] ^ blake2b_IV[5];
|
50
|
+
v[14] = S->f[0] ^ blake2b_IV[6];
|
51
|
+
v[15] = S->f[1] ^ blake2b_IV[7];
|
52
|
+
#define G(r,i,a,b,c,d) \
|
53
|
+
do { \
|
54
|
+
a = a + b + m[blake2b_sigma[r][2*i+0]]; \
|
55
|
+
d = rotr64(d ^ a, 32); \
|
56
|
+
c = c + d; \
|
57
|
+
b = rotr64(b ^ c, 24); \
|
58
|
+
a = a + b + m[blake2b_sigma[r][2*i+1]]; \
|
59
|
+
d = rotr64(d ^ a, 16); \
|
60
|
+
c = c + d; \
|
61
|
+
b = rotr64(b ^ c, 63); \
|
62
|
+
} while(0)
|
63
|
+
#define ROUND(r) \
|
64
|
+
do { \
|
65
|
+
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
66
|
+
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
67
|
+
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
68
|
+
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
69
|
+
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
70
|
+
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
71
|
+
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
72
|
+
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
73
|
+
} while(0)
|
74
|
+
ROUND( 0 );
|
75
|
+
ROUND( 1 );
|
76
|
+
ROUND( 2 );
|
77
|
+
ROUND( 3 );
|
78
|
+
ROUND( 4 );
|
79
|
+
ROUND( 5 );
|
80
|
+
ROUND( 6 );
|
81
|
+
ROUND( 7 );
|
82
|
+
ROUND( 8 );
|
83
|
+
ROUND( 9 );
|
84
|
+
ROUND( 10 );
|
85
|
+
ROUND( 11 );
|
86
|
+
|
87
|
+
for( i = 0; i < 8; ++i )
|
88
|
+
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
89
|
+
|
90
|
+
#undef G
|
91
|
+
#undef ROUND
|
92
|
+
return 0;
|
93
|
+
}
|
@@ -0,0 +1,80 @@
|
|
1
|
+
|
2
|
+
#define BLAKE2_USE_SSSE3
|
3
|
+
#define BLAKE2_USE_SSE41
|
4
|
+
|
5
|
+
#include <stdint.h>
|
6
|
+
#include <string.h>
|
7
|
+
|
8
|
+
#if (defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && defined(HAVE_SMMINTRIN_H)) || \
|
9
|
+
(defined(_MSC_VER) && (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86)))
|
10
|
+
|
11
|
+
#pragma GCC target("sse2")
|
12
|
+
#pragma GCC target("ssse3")
|
13
|
+
#pragma GCC target("sse4.1")
|
14
|
+
|
15
|
+
#include <emmintrin.h>
|
16
|
+
#include <tmmintrin.h>
|
17
|
+
#include <smmintrin.h>
|
18
|
+
|
19
|
+
#include "blake2.h"
|
20
|
+
#include "blake2-impl.h"
|
21
|
+
#include "blake2b-round.h"
|
22
|
+
|
23
|
+
static const uint64_t blake2b_IV[8] =
|
24
|
+
{
|
25
|
+
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
26
|
+
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
27
|
+
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
28
|
+
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
29
|
+
};
|
30
|
+
|
31
|
+
int blake2b_compress_sse41( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
|
32
|
+
{
|
33
|
+
__m128i row1l, row1h;
|
34
|
+
__m128i row2l, row2h;
|
35
|
+
__m128i row3l, row3h;
|
36
|
+
__m128i row4l, row4h;
|
37
|
+
__m128i b0, b1;
|
38
|
+
__m128i t0, t1;
|
39
|
+
const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
|
40
|
+
const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
|
41
|
+
const __m128i m0 = LOADU( block + 00 );
|
42
|
+
const __m128i m1 = LOADU( block + 16 );
|
43
|
+
const __m128i m2 = LOADU( block + 32 );
|
44
|
+
const __m128i m3 = LOADU( block + 48 );
|
45
|
+
const __m128i m4 = LOADU( block + 64 );
|
46
|
+
const __m128i m5 = LOADU( block + 80 );
|
47
|
+
const __m128i m6 = LOADU( block + 96 );
|
48
|
+
const __m128i m7 = LOADU( block + 112 );
|
49
|
+
row1l = LOADU( &S->h[0] );
|
50
|
+
row1h = LOADU( &S->h[2] );
|
51
|
+
row2l = LOADU( &S->h[4] );
|
52
|
+
row2h = LOADU( &S->h[6] );
|
53
|
+
row3l = LOADU( &blake2b_IV[0] );
|
54
|
+
row3h = LOADU( &blake2b_IV[2] );
|
55
|
+
row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
|
56
|
+
row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
|
57
|
+
ROUND( 0 );
|
58
|
+
ROUND( 1 );
|
59
|
+
ROUND( 2 );
|
60
|
+
ROUND( 3 );
|
61
|
+
ROUND( 4 );
|
62
|
+
ROUND( 5 );
|
63
|
+
ROUND( 6 );
|
64
|
+
ROUND( 7 );
|
65
|
+
ROUND( 8 );
|
66
|
+
ROUND( 9 );
|
67
|
+
ROUND( 10 );
|
68
|
+
ROUND( 11 );
|
69
|
+
row1l = _mm_xor_si128( row3l, row1l );
|
70
|
+
row1h = _mm_xor_si128( row3h, row1h );
|
71
|
+
STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
|
72
|
+
STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
|
73
|
+
row2l = _mm_xor_si128( row4l, row2l );
|
74
|
+
row2h = _mm_xor_si128( row4h, row2h );
|
75
|
+
STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
|
76
|
+
STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
|
77
|
+
return 0;
|
78
|
+
}
|
79
|
+
|
80
|
+
#endif
|
@@ -0,0 +1,89 @@
|
|
1
|
+
|
2
|
+
#define BLAKE2_USE_SSSE3
|
3
|
+
|
4
|
+
#include <stdint.h>
|
5
|
+
#include <string.h>
|
6
|
+
|
7
|
+
#if (defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)) || \
|
8
|
+
(defined(_MSC_VER) && (defined(_M_X64) || defined(_M_AMD64)))
|
9
|
+
|
10
|
+
#pragma GCC target("sse2")
|
11
|
+
#pragma GCC target("ssse3")
|
12
|
+
|
13
|
+
#ifdef _MSC_VER
|
14
|
+
# include <intrin.h> /* for _mm_set_epi64x */
|
15
|
+
#endif
|
16
|
+
#include <emmintrin.h>
|
17
|
+
#include <tmmintrin.h>
|
18
|
+
|
19
|
+
#include "blake2.h"
|
20
|
+
#include "blake2-impl.h"
|
21
|
+
#include "blake2b-round.h"
|
22
|
+
|
23
|
+
static const uint64_t blake2b_IV[8] =
|
24
|
+
{
|
25
|
+
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
26
|
+
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
|
27
|
+
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
|
28
|
+
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
29
|
+
};
|
30
|
+
|
31
|
+
int blake2b_compress_ssse3( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
|
32
|
+
{
|
33
|
+
__m128i row1l, row1h;
|
34
|
+
__m128i row2l, row2h;
|
35
|
+
__m128i row3l, row3h;
|
36
|
+
__m128i row4l, row4h;
|
37
|
+
__m128i b0, b1;
|
38
|
+
__m128i t0, t1;
|
39
|
+
const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
|
40
|
+
const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
|
41
|
+
const uint64_t m0 = ( ( uint64_t * )block )[ 0];
|
42
|
+
const uint64_t m1 = ( ( uint64_t * )block )[ 1];
|
43
|
+
const uint64_t m2 = ( ( uint64_t * )block )[ 2];
|
44
|
+
const uint64_t m3 = ( ( uint64_t * )block )[ 3];
|
45
|
+
const uint64_t m4 = ( ( uint64_t * )block )[ 4];
|
46
|
+
const uint64_t m5 = ( ( uint64_t * )block )[ 5];
|
47
|
+
const uint64_t m6 = ( ( uint64_t * )block )[ 6];
|
48
|
+
const uint64_t m7 = ( ( uint64_t * )block )[ 7];
|
49
|
+
const uint64_t m8 = ( ( uint64_t * )block )[ 8];
|
50
|
+
const uint64_t m9 = ( ( uint64_t * )block )[ 9];
|
51
|
+
const uint64_t m10 = ( ( uint64_t * )block )[10];
|
52
|
+
const uint64_t m11 = ( ( uint64_t * )block )[11];
|
53
|
+
const uint64_t m12 = ( ( uint64_t * )block )[12];
|
54
|
+
const uint64_t m13 = ( ( uint64_t * )block )[13];
|
55
|
+
const uint64_t m14 = ( ( uint64_t * )block )[14];
|
56
|
+
const uint64_t m15 = ( ( uint64_t * )block )[15];
|
57
|
+
|
58
|
+
row1l = LOADU( &S->h[0] );
|
59
|
+
row1h = LOADU( &S->h[2] );
|
60
|
+
row2l = LOADU( &S->h[4] );
|
61
|
+
row2h = LOADU( &S->h[6] );
|
62
|
+
row3l = LOADU( &blake2b_IV[0] );
|
63
|
+
row3h = LOADU( &blake2b_IV[2] );
|
64
|
+
row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
|
65
|
+
row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
|
66
|
+
ROUND( 0 );
|
67
|
+
ROUND( 1 );
|
68
|
+
ROUND( 2 );
|
69
|
+
ROUND( 3 );
|
70
|
+
ROUND( 4 );
|
71
|
+
ROUND( 5 );
|
72
|
+
ROUND( 6 );
|
73
|
+
ROUND( 7 );
|
74
|
+
ROUND( 8 );
|
75
|
+
ROUND( 9 );
|
76
|
+
ROUND( 10 );
|
77
|
+
ROUND( 11 );
|
78
|
+
row1l = _mm_xor_si128( row3l, row1l );
|
79
|
+
row1h = _mm_xor_si128( row3h, row1h );
|
80
|
+
STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
|
81
|
+
STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
|
82
|
+
row2l = _mm_xor_si128( row4l, row2l );
|
83
|
+
row2h = _mm_xor_si128( row4h, row2h );
|
84
|
+
STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
|
85
|
+
STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
|
86
|
+
return 0;
|
87
|
+
}
|
88
|
+
|
89
|
+
#endif
|
@@ -0,0 +1,68 @@
|
|
1
|
+
/*
|
2
|
+
BLAKE2 reference source code package - optimized C implementations
|
3
|
+
|
4
|
+
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
5
|
+
|
6
|
+
To the extent possible under law, the author(s) have dedicated all copyright
|
7
|
+
and related and neighboring rights to this software to the public domain
|
8
|
+
worldwide. This software is distributed without any warranty.
|
9
|
+
|
10
|
+
You should have received a copy of the CC0 Public Domain Dedication along with
|
11
|
+
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
12
|
+
*/
|
13
|
+
|
14
|
+
#ifndef blake2b_load_sse2_H
|
15
|
+
#define blake2b_load_sse2_H
|
16
|
+
|
17
|
+
#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
|
18
|
+
#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
|
19
|
+
#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
|
20
|
+
#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
|
21
|
+
#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
|
22
|
+
#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
|
23
|
+
#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
|
24
|
+
#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
|
25
|
+
#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
|
26
|
+
#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
|
27
|
+
#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
|
28
|
+
#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
|
29
|
+
#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
|
30
|
+
#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
|
31
|
+
#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
|
32
|
+
#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
|
33
|
+
#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
|
34
|
+
#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
|
35
|
+
#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
|
36
|
+
#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
|
37
|
+
#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
|
38
|
+
#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
|
39
|
+
#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
|
40
|
+
#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
|
41
|
+
#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
|
42
|
+
#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
|
43
|
+
#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
|
44
|
+
#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
|
45
|
+
#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
|
46
|
+
#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
|
47
|
+
#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
|
48
|
+
#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
|
49
|
+
#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
|
50
|
+
#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
|
51
|
+
#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
|
52
|
+
#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
|
53
|
+
#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
|
54
|
+
#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
|
55
|
+
#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
|
56
|
+
#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
|
57
|
+
#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
|
58
|
+
#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
|
59
|
+
#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
|
60
|
+
#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
|
61
|
+
#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
|
62
|
+
#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
|
63
|
+
#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
|
64
|
+
#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
|
65
|
+
|
66
|
+
|
67
|
+
#endif
|
68
|
+
|