rbnacl-libsodium 1.0.5 → 1.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (118) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +8 -0
  3. data/lib/rbnacl/libsodium/version.rb +1 -1
  4. data/vendor/libsodium/ChangeLog +25 -0
  5. data/vendor/libsodium/Makefile.in +1 -1
  6. data/vendor/libsodium/THANKS +1 -0
  7. data/vendor/libsodium/autom4te.cache/output.1 +16 -16
  8. data/vendor/libsodium/autom4te.cache/output.5 +16 -16
  9. data/vendor/libsodium/autom4te.cache/requests +894 -894
  10. data/vendor/libsodium/autom4te.cache/traces.1 +4 -4
  11. data/vendor/libsodium/autom4te.cache/traces.5 +2 -2
  12. data/vendor/libsodium/builds/msvc/version.h +3 -3
  13. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj +7 -3
  14. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters +21 -9
  15. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj +8 -4
  16. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters +21 -9
  17. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj +7 -3
  18. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters +21 -9
  19. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj +7 -3
  20. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters +21 -9
  21. data/vendor/libsodium/configure +16 -16
  22. data/vendor/libsodium/configure.ac +7 -7
  23. data/vendor/libsodium/dist-build/Makefile.in +1 -1
  24. data/vendor/libsodium/libsodium.sln +4 -2
  25. data/vendor/libsodium/libsodium.vcxproj +17 -13
  26. data/vendor/libsodium/libsodium.vcxproj.filters +22 -10
  27. data/vendor/libsodium/msvc-scripts/Makefile.in +1 -1
  28. data/vendor/libsodium/msvc-scripts/process.bat +3 -3
  29. data/vendor/libsodium/src/Makefile.in +1 -1
  30. data/vendor/libsodium/src/libsodium/Makefile.am +19 -7
  31. data/vendor/libsodium/src/libsodium/Makefile.in +85 -48
  32. data/vendor/libsodium/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +11 -3
  33. data/vendor/libsodium/src/libsodium/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c +1 -0
  34. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/keypair_curve25519xsalsa20poly1305.c +2 -0
  35. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2-impl.h +4 -4
  36. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2.h +20 -18
  37. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ref.c +93 -0
  38. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-sse41.c +80 -0
  39. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.c +89 -0
  40. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse2.h +68 -0
  41. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse41.h +402 -0
  42. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-ref.c +58 -102
  43. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-round.h +123 -0
  44. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/generichash_blake2b.c +6 -0
  45. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/auth_poly1305_donna.c +15 -10
  46. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h +14 -14
  47. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h +0 -2
  48. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h +0 -2
  49. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.c +18 -16
  50. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.h +23 -0
  51. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pbkdf2-sha256.c +2 -2
  52. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/api.h +0 -3
  53. data/vendor/libsodium/src/libsodium/crypto_secretbox/crypto_secretbox_easy.c +6 -2
  54. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.c +5 -0
  55. data/vendor/libsodium/src/libsodium/include/Makefile.in +1 -1
  56. data/vendor/libsodium/src/libsodium/include/sodium/core.h +2 -1
  57. data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_aes256gcm.h +5 -2
  58. data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_chacha20poly1305.h +5 -2
  59. data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth.h +3 -1
  60. data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha256.h +2 -1
  61. data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha512.h +2 -1
  62. data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha512256.h +2 -1
  63. data/vendor/libsodium/src/libsodium/include/sodium/crypto_box.h +15 -7
  64. data/vendor/libsodium/src/libsodium/include/sodium/crypto_box_curve25519xsalsa20poly1305.h +4 -2
  65. data/vendor/libsodium/src/libsodium/include/sodium/crypto_generichash_blake2b.h +4 -0
  66. data/vendor/libsodium/src/libsodium/include/sodium/crypto_hash.h +2 -1
  67. data/vendor/libsodium/src/libsodium/include/sodium/crypto_onetimeauth.h +2 -1
  68. data/vendor/libsodium/src/libsodium/include/sodium/crypto_onetimeauth_poly1305.h +7 -30
  69. data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash_scryptsalsa208sha256.h +8 -4
  70. data/vendor/libsodium/src/libsodium/include/sodium/crypto_secretbox.h +7 -3
  71. data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign.h +5 -2
  72. data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign_ed25519.h +6 -3
  73. data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign_edwards25519sha512batch.h +14 -8
  74. data/vendor/libsodium/src/libsodium/include/sodium/crypto_verify_16.h +2 -1
  75. data/vendor/libsodium/src/libsodium/include/sodium/crypto_verify_32.h +2 -1
  76. data/vendor/libsodium/src/libsodium/include/sodium/crypto_verify_64.h +2 -1
  77. data/vendor/libsodium/src/libsodium/include/sodium/runtime.h +10 -3
  78. data/vendor/libsodium/src/libsodium/include/sodium/utils.h +8 -4
  79. data/vendor/libsodium/src/libsodium/randombytes/randombytes.c +7 -0
  80. data/vendor/libsodium/src/libsodium/randombytes/salsa20/randombytes_salsa20_random.c +4 -2
  81. data/vendor/libsodium/src/libsodium/randombytes/sysrandom/randombytes_sysrandom.c +12 -10
  82. data/vendor/libsodium/src/libsodium/sodium/core.c +4 -4
  83. data/vendor/libsodium/src/libsodium/sodium/runtime.c +27 -1
  84. data/vendor/libsodium/src/libsodium/sodium/utils.c +4 -2
  85. data/vendor/libsodium/test/Makefile.in +1 -1
  86. data/vendor/libsodium/test/default/Makefile.in +1 -1
  87. data/vendor/libsodium/test/default/aead_aes256gcm.c +14 -0
  88. data/vendor/libsodium/test/default/box_easy2.c +13 -7
  89. data/vendor/libsodium/test/default/chacha20.c +36 -12
  90. data/vendor/libsodium/test/default/ed25519_convert.c +6 -2
  91. data/vendor/libsodium/test/default/generichash3.exp +1 -1
  92. data/vendor/libsodium/test/default/secretbox_easy2.c +5 -3
  93. data/vendor/libsodium/test/default/sodium_core.c +2 -0
  94. data/vendor/libsodium/test/default/sodium_utils.c +10 -4
  95. data/vendor/libsodium/test/default/sodium_utils2.c +1 -0
  96. data/vendor/libsodium/test/default/sodium_utils3.c +1 -0
  97. metadata +8 -22
  98. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha256/checksum +0 -1
  99. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512256/checksum +0 -1
  100. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/checksum +0 -1
  101. data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/checksum +0 -1
  102. data/vendor/libsodium/src/libsodium/crypto_core/salsa20/checksum +0 -1
  103. data/vendor/libsodium/src/libsodium/crypto_core/salsa2012/checksum +0 -1
  104. data/vendor/libsodium/src/libsodium/crypto_core/salsa208/checksum +0 -1
  105. data/vendor/libsodium/src/libsodium/crypto_hash/sha256/checksum +0 -1
  106. data/vendor/libsodium/src/libsodium/crypto_hash/sha512/checksum +0 -1
  107. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/verify_poly1305_donna.c +0 -15
  108. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305_api.c +0 -11
  109. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305_try.c +0 -13
  110. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/checksum +0 -1
  111. data/vendor/libsodium/src/libsodium/crypto_secretbox/xsalsa20poly1305/checksum +0 -1
  112. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/checksum +0 -1
  113. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/checksum +0 -1
  114. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/checksum +0 -1
  115. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/checksum +0 -1
  116. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/checksum +0 -1
  117. data/vendor/libsodium/src/libsodium/crypto_verify/16/checksum +0 -1
  118. data/vendor/libsodium/src/libsodium/crypto_verify/32/checksum +0 -1
@@ -214,8 +214,12 @@ addmul(unsigned char *c, const unsigned char *a, unsigned int xlen, const unsign
214
214
  A = _mm_loadu_si128((const __m128i *) a);
215
215
  } else {
216
216
  CRYPTO_ALIGN(16) unsigned char padded[16];
217
+ unsigned int i;
218
+
217
219
  memset(padded, 0, 16);
218
- memcpy(padded, a, xlen);
220
+ for (i = 0; i < xlen; i++) {
221
+ padded[i] = a[i];
222
+ }
219
223
  A = _mm_load_si128((const __m128i *) padded);
220
224
  }
221
225
  A = _mm_shuffle_epi8(A, rev);
@@ -638,14 +642,17 @@ crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen
638
642
  CRYPTO_ALIGN(16) unsigned char fb[16];
639
643
 
640
644
  (void) nsec;
641
- memcpy(H, ctx->H, sizeof H);
642
645
  if (clen > 16ULL * (1ULL << 32) - 16ULL) {
643
646
  abort();
644
647
  }
645
- mlen = clen - 16;
646
648
  if (mlen_p != NULL) {
647
649
  *mlen_p = 0U;
648
650
  }
651
+ if (clen < 16) {
652
+ return -1;
653
+ }
654
+ mlen = clen - 16;
655
+
649
656
  memcpy(&n2[0], npub, 12);
650
657
  *(uint32_t *) &n2[12] = 0x01000000;
651
658
  aesni_encrypt1(T, _mm_load_si128((const __m128i *) n2), rkeys);
@@ -653,6 +660,7 @@ crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen
653
660
  (*(uint64_t *) &fb[0]) = _bswap64((uint64_t)(8 * adlen));
654
661
  (*(uint64_t *) &fb[8]) = _bswap64((uint64_t)(8 * mlen));
655
662
 
663
+ memcpy(H, ctx->H, sizeof H);
656
664
  Hv = _mm_shuffle_epi8(_mm_load_si128((const __m128i *) H), rev);
657
665
  _mm_store_si128((__m128i *) H, Hv);
658
666
  H2v = mulv(Hv, Hv);
@@ -1,4 +1,5 @@
1
1
 
2
+ #include <stdlib.h>
2
3
  #include <limits.h>
3
4
  #include <string.h>
4
5
 
@@ -4,6 +4,7 @@
4
4
  #include "crypto_scalarmult_curve25519.h"
5
5
  #include "api.h"
6
6
  #include "randombytes.h"
7
+ #include "utils.h"
7
8
 
8
9
  int crypto_box_seed_keypair(
9
10
  unsigned char *pk,
@@ -14,6 +15,7 @@ int crypto_box_seed_keypair(
14
15
  unsigned char hash[64];
15
16
  crypto_hash_sha512(hash,seed,32);
16
17
  memmove(sk,hash,32);
18
+ sodium_memzero(hash, sizeof hash);
17
19
  return crypto_scalarmult_curve25519_base(pk,sk);
18
20
  }
19
21
 
@@ -21,7 +21,7 @@
21
21
 
22
22
  static inline uint32_t load32( const void *src )
23
23
  {
24
- #if defined(NATIVE_LITTLE_ENDIAN)
24
+ #ifdef NATIVE_LITTLE_ENDIAN
25
25
  uint32_t w;
26
26
  memcpy(&w, src, sizeof w);
27
27
  return w;
@@ -37,7 +37,7 @@ static inline uint32_t load32( const void *src )
37
37
 
38
38
  static inline uint64_t load64( const void *src )
39
39
  {
40
- #if defined(NATIVE_LITTLE_ENDIAN)
40
+ #ifdef NATIVE_LITTLE_ENDIAN
41
41
  uint64_t w;
42
42
  memcpy(&w, src, sizeof w);
43
43
  return w;
@@ -57,7 +57,7 @@ static inline uint64_t load64( const void *src )
57
57
 
58
58
  static inline void store32( void *dst, uint32_t w )
59
59
  {
60
- #if defined(NATIVE_LITTLE_ENDIAN)
60
+ #ifdef NATIVE_LITTLE_ENDIAN
61
61
  memcpy(dst, &w, sizeof w);
62
62
  #else
63
63
  uint8_t *p = ( uint8_t * )dst;
@@ -70,7 +70,7 @@ static inline void store32( void *dst, uint32_t w )
70
70
 
71
71
  static inline void store64( void *dst, uint64_t w )
72
72
  {
73
- #if defined(NATIVE_LITTLE_ENDIAN)
73
+ #ifdef NATIVE_LITTLE_ENDIAN
74
74
  memcpy(dst, &w, sizeof w);
75
75
  #else
76
76
  uint8_t *p = ( uint8_t * )dst;
@@ -18,22 +18,18 @@
18
18
  #include <stdint.h>
19
19
 
20
20
  #include "crypto_generichash_blake2b.h"
21
-
22
- #define blake2b_init_param crypto_generichash_blake2b__init_param
23
- #define blake2b_init crypto_generichash_blake2b__init
24
- #define blake2b_init_salt_personal crypto_generichash_blake2b__init_salt_personal
25
- #define blake2b_init_key crypto_generichash_blake2b__init_key
26
- #define blake2b_init_key_salt_personal crypto_generichash_blake2b__init_key_salt_personal
27
- #define blake2b_update crypto_generichash_blake2b__update
28
- #define blake2b_final crypto_generichash_blake2b__final
29
- #define blake2b crypto_generichash_blake2b__blake2b
30
- #define blake2b_salt_personal crypto_generichash_blake2b__blake2b_salt_personal
31
-
32
- #if defined(_MSC_VER)
33
- #define ALIGN(x) __declspec(align(x))
34
- #else
35
- #define ALIGN(x) __attribute__((aligned(x)))
36
- #endif
21
+ #include "export.h"
22
+
23
+ #define blake2b_init_param crypto_generichash_blake2b__init_param
24
+ #define blake2b_init crypto_generichash_blake2b__init
25
+ #define blake2b_init_salt_personal crypto_generichash_blake2b__init_salt_personal
26
+ #define blake2b_init_key crypto_generichash_blake2b__init_key
27
+ #define blake2b_init_key_salt_personal crypto_generichash_blake2b__init_key_salt_personal
28
+ #define blake2b_update crypto_generichash_blake2b__update
29
+ #define blake2b_final crypto_generichash_blake2b__final
30
+ #define blake2b crypto_generichash_blake2b__blake2b
31
+ #define blake2b_salt_personal crypto_generichash_blake2b__blake2b_salt_personal
32
+ #define blake2b_pick_best_implementation crypto_generichash_blake2b__pick_best_implementation
37
33
 
38
34
  #if defined(__cplusplus)
39
35
  extern "C" {
@@ -78,7 +74,7 @@ extern "C" {
78
74
  uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32
79
75
  } blake2s_param;
80
76
 
81
- ALIGN( 64 ) typedef struct blake2s_state_
77
+ CRYPTO_ALIGN( 64 ) typedef struct blake2s_state_
82
78
  {
83
79
  uint32_t h[8];
84
80
  uint32_t t[2];
@@ -106,7 +102,7 @@ extern "C" {
106
102
  #ifndef DEFINE_BLAKE2B_STATE
107
103
  typedef crypto_generichash_blake2b_state blake2b_state;
108
104
  #else
109
- ALIGN( 64 ) typedef struct blake2b_state_
105
+ CRYPTO_ALIGN( 64 ) typedef struct blake2b_state_
110
106
  {
111
107
  uint64_t h[8];
112
108
  uint64_t t[2];
@@ -179,6 +175,12 @@ typedef crypto_generichash_blake2b_state blake2b_state;
179
175
  return blake2b( out, in, key, outlen, inlen, keylen );
180
176
  }
181
177
 
178
+ typedef int ( *blake2b_compress_fn )( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] );
179
+ int blake2b_pick_best_implementation(void);
180
+ int blake2b_compress_ref( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] );
181
+ int blake2b_compress_ssse3( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] );
182
+ int blake2b_compress_sse41( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] );
183
+
182
184
  #if defined(__cplusplus)
183
185
  }
184
186
  #endif
@@ -0,0 +1,93 @@
1
+
2
+ #include <stdint.h>
3
+ #include <string.h>
4
+
5
+ #include "blake2.h"
6
+ #include "blake2-impl.h"
7
+
8
+ static const uint64_t blake2b_IV[8] =
9
+ {
10
+ 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
11
+ 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
12
+ 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
13
+ 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
14
+ };
15
+
16
+ static const uint8_t blake2b_sigma[12][16] =
17
+ {
18
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
19
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
20
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
21
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
22
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
23
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
24
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
25
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
26
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
27
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
28
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
29
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
30
+ };
31
+
32
+ int blake2b_compress_ref( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
33
+ {
34
+ uint64_t m[16];
35
+ uint64_t v[16];
36
+ int i;
37
+
38
+ for( i = 0; i < 16; ++i )
39
+ m[i] = load64( block + i * sizeof( m[i] ) );
40
+
41
+ for( i = 0; i < 8; ++i )
42
+ v[i] = S->h[i];
43
+
44
+ v[ 8] = blake2b_IV[0];
45
+ v[ 9] = blake2b_IV[1];
46
+ v[10] = blake2b_IV[2];
47
+ v[11] = blake2b_IV[3];
48
+ v[12] = S->t[0] ^ blake2b_IV[4];
49
+ v[13] = S->t[1] ^ blake2b_IV[5];
50
+ v[14] = S->f[0] ^ blake2b_IV[6];
51
+ v[15] = S->f[1] ^ blake2b_IV[7];
52
+ #define G(r,i,a,b,c,d) \
53
+ do { \
54
+ a = a + b + m[blake2b_sigma[r][2*i+0]]; \
55
+ d = rotr64(d ^ a, 32); \
56
+ c = c + d; \
57
+ b = rotr64(b ^ c, 24); \
58
+ a = a + b + m[blake2b_sigma[r][2*i+1]]; \
59
+ d = rotr64(d ^ a, 16); \
60
+ c = c + d; \
61
+ b = rotr64(b ^ c, 63); \
62
+ } while(0)
63
+ #define ROUND(r) \
64
+ do { \
65
+ G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
66
+ G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
67
+ G(r,2,v[ 2],v[ 6],v[10],v[14]); \
68
+ G(r,3,v[ 3],v[ 7],v[11],v[15]); \
69
+ G(r,4,v[ 0],v[ 5],v[10],v[15]); \
70
+ G(r,5,v[ 1],v[ 6],v[11],v[12]); \
71
+ G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
72
+ G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
73
+ } while(0)
74
+ ROUND( 0 );
75
+ ROUND( 1 );
76
+ ROUND( 2 );
77
+ ROUND( 3 );
78
+ ROUND( 4 );
79
+ ROUND( 5 );
80
+ ROUND( 6 );
81
+ ROUND( 7 );
82
+ ROUND( 8 );
83
+ ROUND( 9 );
84
+ ROUND( 10 );
85
+ ROUND( 11 );
86
+
87
+ for( i = 0; i < 8; ++i )
88
+ S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
89
+
90
+ #undef G
91
+ #undef ROUND
92
+ return 0;
93
+ }
@@ -0,0 +1,80 @@
1
+
2
+ #define BLAKE2_USE_SSSE3
3
+ #define BLAKE2_USE_SSE41
4
+
5
+ #include <stdint.h>
6
+ #include <string.h>
7
+
8
+ #if (defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && defined(HAVE_SMMINTRIN_H)) || \
9
+ (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_AMD64) || defined(_M_IX86)))
10
+
11
+ #pragma GCC target("sse2")
12
+ #pragma GCC target("ssse3")
13
+ #pragma GCC target("sse4.1")
14
+
15
+ #include <emmintrin.h>
16
+ #include <tmmintrin.h>
17
+ #include <smmintrin.h>
18
+
19
+ #include "blake2.h"
20
+ #include "blake2-impl.h"
21
+ #include "blake2b-round.h"
22
+
23
+ static const uint64_t blake2b_IV[8] =
24
+ {
25
+ 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
26
+ 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
27
+ 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
28
+ 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
29
+ };
30
+
31
+ int blake2b_compress_sse41( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
32
+ {
33
+ __m128i row1l, row1h;
34
+ __m128i row2l, row2h;
35
+ __m128i row3l, row3h;
36
+ __m128i row4l, row4h;
37
+ __m128i b0, b1;
38
+ __m128i t0, t1;
39
+ const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
40
+ const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
41
+ const __m128i m0 = LOADU( block + 00 );
42
+ const __m128i m1 = LOADU( block + 16 );
43
+ const __m128i m2 = LOADU( block + 32 );
44
+ const __m128i m3 = LOADU( block + 48 );
45
+ const __m128i m4 = LOADU( block + 64 );
46
+ const __m128i m5 = LOADU( block + 80 );
47
+ const __m128i m6 = LOADU( block + 96 );
48
+ const __m128i m7 = LOADU( block + 112 );
49
+ row1l = LOADU( &S->h[0] );
50
+ row1h = LOADU( &S->h[2] );
51
+ row2l = LOADU( &S->h[4] );
52
+ row2h = LOADU( &S->h[6] );
53
+ row3l = LOADU( &blake2b_IV[0] );
54
+ row3h = LOADU( &blake2b_IV[2] );
55
+ row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
56
+ row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
57
+ ROUND( 0 );
58
+ ROUND( 1 );
59
+ ROUND( 2 );
60
+ ROUND( 3 );
61
+ ROUND( 4 );
62
+ ROUND( 5 );
63
+ ROUND( 6 );
64
+ ROUND( 7 );
65
+ ROUND( 8 );
66
+ ROUND( 9 );
67
+ ROUND( 10 );
68
+ ROUND( 11 );
69
+ row1l = _mm_xor_si128( row3l, row1l );
70
+ row1h = _mm_xor_si128( row3h, row1h );
71
+ STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
72
+ STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
73
+ row2l = _mm_xor_si128( row4l, row2l );
74
+ row2h = _mm_xor_si128( row4h, row2h );
75
+ STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
76
+ STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
77
+ return 0;
78
+ }
79
+
80
+ #endif
@@ -0,0 +1,89 @@
1
+
2
+ #define BLAKE2_USE_SSSE3
3
+
4
+ #include <stdint.h>
5
+ #include <string.h>
6
+
7
+ #if (defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)) || \
8
+ (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_AMD64)))
9
+
10
+ #pragma GCC target("sse2")
11
+ #pragma GCC target("ssse3")
12
+
13
+ #ifdef _MSC_VER
14
+ # include <intrin.h> /* for _mm_set_epi64x */
15
+ #endif
16
+ #include <emmintrin.h>
17
+ #include <tmmintrin.h>
18
+
19
+ #include "blake2.h"
20
+ #include "blake2-impl.h"
21
+ #include "blake2b-round.h"
22
+
23
+ static const uint64_t blake2b_IV[8] =
24
+ {
25
+ 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
26
+ 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
27
+ 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
28
+ 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
29
+ };
30
+
31
+ int blake2b_compress_ssse3( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
32
+ {
33
+ __m128i row1l, row1h;
34
+ __m128i row2l, row2h;
35
+ __m128i row3l, row3h;
36
+ __m128i row4l, row4h;
37
+ __m128i b0, b1;
38
+ __m128i t0, t1;
39
+ const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
40
+ const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
41
+ const uint64_t m0 = ( ( uint64_t * )block )[ 0];
42
+ const uint64_t m1 = ( ( uint64_t * )block )[ 1];
43
+ const uint64_t m2 = ( ( uint64_t * )block )[ 2];
44
+ const uint64_t m3 = ( ( uint64_t * )block )[ 3];
45
+ const uint64_t m4 = ( ( uint64_t * )block )[ 4];
46
+ const uint64_t m5 = ( ( uint64_t * )block )[ 5];
47
+ const uint64_t m6 = ( ( uint64_t * )block )[ 6];
48
+ const uint64_t m7 = ( ( uint64_t * )block )[ 7];
49
+ const uint64_t m8 = ( ( uint64_t * )block )[ 8];
50
+ const uint64_t m9 = ( ( uint64_t * )block )[ 9];
51
+ const uint64_t m10 = ( ( uint64_t * )block )[10];
52
+ const uint64_t m11 = ( ( uint64_t * )block )[11];
53
+ const uint64_t m12 = ( ( uint64_t * )block )[12];
54
+ const uint64_t m13 = ( ( uint64_t * )block )[13];
55
+ const uint64_t m14 = ( ( uint64_t * )block )[14];
56
+ const uint64_t m15 = ( ( uint64_t * )block )[15];
57
+
58
+ row1l = LOADU( &S->h[0] );
59
+ row1h = LOADU( &S->h[2] );
60
+ row2l = LOADU( &S->h[4] );
61
+ row2h = LOADU( &S->h[6] );
62
+ row3l = LOADU( &blake2b_IV[0] );
63
+ row3h = LOADU( &blake2b_IV[2] );
64
+ row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
65
+ row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
66
+ ROUND( 0 );
67
+ ROUND( 1 );
68
+ ROUND( 2 );
69
+ ROUND( 3 );
70
+ ROUND( 4 );
71
+ ROUND( 5 );
72
+ ROUND( 6 );
73
+ ROUND( 7 );
74
+ ROUND( 8 );
75
+ ROUND( 9 );
76
+ ROUND( 10 );
77
+ ROUND( 11 );
78
+ row1l = _mm_xor_si128( row3l, row1l );
79
+ row1h = _mm_xor_si128( row3h, row1h );
80
+ STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
81
+ STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
82
+ row2l = _mm_xor_si128( row4l, row2l );
83
+ row2h = _mm_xor_si128( row4h, row2h );
84
+ STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
85
+ STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
86
+ return 0;
87
+ }
88
+
89
+ #endif
@@ -0,0 +1,68 @@
1
+ /*
2
+ BLAKE2 reference source code package - optimized C implementations
3
+
4
+ Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
5
+
6
+ To the extent possible under law, the author(s) have dedicated all copyright
7
+ and related and neighboring rights to this software to the public domain
8
+ worldwide. This software is distributed without any warranty.
9
+
10
+ You should have received a copy of the CC0 Public Domain Dedication along with
11
+ this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
12
+ */
13
+
14
+ #ifndef blake2b_load_sse2_H
15
+ #define blake2b_load_sse2_H
16
+
17
+ #define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
18
+ #define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
19
+ #define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
20
+ #define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
21
+ #define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
22
+ #define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
23
+ #define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
24
+ #define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
25
+ #define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
26
+ #define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
27
+ #define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
28
+ #define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
29
+ #define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
30
+ #define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
31
+ #define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
32
+ #define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
33
+ #define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
34
+ #define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
35
+ #define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
36
+ #define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
37
+ #define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
38
+ #define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
39
+ #define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
40
+ #define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
41
+ #define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
42
+ #define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
43
+ #define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
44
+ #define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
45
+ #define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
46
+ #define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
47
+ #define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
48
+ #define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
49
+ #define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
50
+ #define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
51
+ #define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
52
+ #define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
53
+ #define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
54
+ #define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
55
+ #define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
56
+ #define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
57
+ #define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
58
+ #define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
59
+ #define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
60
+ #define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
61
+ #define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
62
+ #define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
63
+ #define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
64
+ #define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
65
+
66
+
67
+ #endif
68
+