rbnacl-libsodium 1.0.8 → 1.0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (204) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +23 -0
  3. data/CHANGES.md +5 -0
  4. data/Gemfile +5 -2
  5. data/Rakefile +5 -0
  6. data/ext/rbnacl/extconf.rb +2 -1
  7. data/lib/rbnacl/libsodium.rb +8 -2
  8. data/lib/rbnacl/libsodium/version.rb +1 -1
  9. data/vendor/libsodium/AUTHORS +14 -0
  10. data/vendor/libsodium/ChangeLog +26 -0
  11. data/vendor/libsodium/LICENSE +1 -1
  12. data/vendor/libsodium/Makefile.am +1 -0
  13. data/vendor/libsodium/Makefile.in +9 -0
  14. data/vendor/libsodium/README.markdown +7 -0
  15. data/vendor/libsodium/aclocal.m4 +1 -0
  16. data/vendor/libsodium/appveyor.yml +25 -0
  17. data/vendor/libsodium/autom4te.cache/output.1 +640 -126
  18. data/vendor/libsodium/autom4te.cache/output.6 +19049 -0
  19. data/vendor/libsodium/autom4te.cache/requests +1151 -914
  20. data/vendor/libsodium/autom4te.cache/traces.1 +472 -426
  21. data/vendor/libsodium/autom4te.cache/traces.6 +3193 -0
  22. data/vendor/libsodium/builds/msvc/version.h +2 -2
  23. data/vendor/libsodium/builds/msvc/vs2010/libsodium.sln +50 -79
  24. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj +20 -8
  25. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters +208 -166
  26. data/vendor/libsodium/builds/msvc/vs2012/libsodium.sln +50 -79
  27. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj +20 -8
  28. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters +206 -164
  29. data/vendor/libsodium/builds/msvc/vs2013/libsodium.sln +52 -81
  30. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj +20 -8
  31. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters +206 -164
  32. data/vendor/libsodium/builds/msvc/vs2015/libsodium.sln +52 -81
  33. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj +20 -8
  34. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters +206 -164
  35. data/vendor/libsodium/configure +639 -125
  36. data/vendor/libsodium/configure.ac +94 -16
  37. data/vendor/libsodium/dist-build/Makefile.in +9 -0
  38. data/vendor/libsodium/dist-build/emscripten-symbols.def +370 -0
  39. data/vendor/libsodium/dist-build/emscripten.sh +9 -3
  40. data/vendor/libsodium/dist-build/generate-emscripten-symbols.sh +43 -0
  41. data/vendor/libsodium/libsodium-uninstalled.pc.in +1 -1
  42. data/vendor/libsodium/libsodium.pc.in +1 -1
  43. data/vendor/libsodium/libsodium.vcxproj +70 -66
  44. data/vendor/libsodium/libsodium.vcxproj.filters +204 -192
  45. data/vendor/libsodium/m4/ax_valgrind_check.m4 +190 -0
  46. data/vendor/libsodium/msvc-scripts/Makefile.in +9 -0
  47. data/vendor/libsodium/msvc-scripts/process.bat +2 -2
  48. data/vendor/libsodium/src/Makefile.in +9 -0
  49. data/vendor/libsodium/src/libsodium/Makefile.am +31 -6
  50. data/vendor/libsodium/src/libsodium/Makefile.in +238 -42
  51. data/vendor/libsodium/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +234 -38
  52. data/vendor/libsodium/src/libsodium/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c +208 -118
  53. data/vendor/libsodium/src/libsodium/crypto_box/crypto_box_seal.c +2 -2
  54. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/before_curve25519xsalsa20poly1305.c +1 -4
  55. data/vendor/libsodium/src/libsodium/crypto_core/curve25519/ref10/curve25519_ref10.c +1799 -1790
  56. data/vendor/libsodium/src/libsodium/crypto_core/curve25519/ref10/curve25519_ref10.h +39 -39
  57. data/vendor/libsodium/src/libsodium/crypto_core/hchacha20/core_hchacha20.c +86 -0
  58. data/vendor/libsodium/src/libsodium/crypto_core/hchacha20/core_hchacha20.h +28 -0
  59. data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/ref2/core_hsalsa20.c +38 -46
  60. data/vendor/libsodium/src/libsodium/crypto_core/salsa20/ref/core_salsa20.c +47 -55
  61. data/vendor/libsodium/src/libsodium/crypto_core/salsa2012/ref/core_salsa2012.c +47 -55
  62. data/vendor/libsodium/src/libsodium/crypto_core/salsa208/ref/core_salsa208.c +47 -55
  63. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/generichash_blake2_api.c +7 -0
  64. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2-impl.h +0 -89
  65. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2.h +50 -141
  66. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-avx2.c +45 -0
  67. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-avx2.h +123 -0
  68. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ref.c +3 -2
  69. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-sse41.c +2 -2
  70. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/{blake2b-round.h → blake2b-compress-sse41.h} +2 -28
  71. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.c +2 -4
  72. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.h +97 -0
  73. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-avx2.h +339 -0
  74. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse2.h +0 -2
  75. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse41.h +0 -2
  76. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-ref.c +29 -18
  77. data/vendor/libsodium/src/libsodium/crypto_hash/sha256/cp/hash_sha256.c +4 -43
  78. data/vendor/libsodium/src/libsodium/crypto_hash/sha512/cp/hash_sha512.c +3 -32
  79. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h +1 -20
  80. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h +22 -41
  81. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h +12 -39
  82. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.c +2 -4
  83. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.h +1 -20
  84. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-core.c +570 -0
  85. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-core.h +198 -0
  86. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-encoding.c +444 -0
  87. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-encoding.h +32 -0
  88. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ref.c +229 -0
  89. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ssse3.c +222 -0
  90. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-impl.h +40 -0
  91. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2.c +238 -0
  92. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2.h +251 -0
  93. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blake2b-long.c +80 -0
  94. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blake2b-long.h +8 -0
  95. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-ref.h +38 -0
  96. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-ssse3.h +117 -0
  97. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/pwhash_argon2i.c +164 -0
  98. data/vendor/libsodium/src/libsodium/crypto_pwhash/crypto_pwhash.c +106 -0
  99. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/crypto_scrypt-common.c +1 -1
  100. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/crypto_scrypt.h +4 -4
  101. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/nosse/pwhash_scryptsalsa208sha256_nosse.c +186 -186
  102. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pbkdf2-sha256.c +2 -2
  103. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pwhash_scryptsalsa208sha256.c +3 -2
  104. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/scrypt_platform.c +33 -33
  105. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c +253 -254
  106. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.c +16 -17
  107. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.h +1 -0
  108. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.c +11 -11
  109. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.h +1 -0
  110. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/consts_namespace.h +1 -1
  111. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe.h +3 -2
  112. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51.h +5 -3
  113. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_invert.c +41 -41
  114. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_mul.S +10 -2
  115. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_namespace.h +1 -1
  116. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_nsquare.S +4 -0
  117. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_pack.S +4 -0
  118. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe_frombytes_sandy2x.c +31 -32
  119. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.S +4 -0
  120. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.h +1 -1
  121. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base.S +4 -0
  122. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base.h +1 -1
  123. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base_namespace.h +1 -1
  124. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_namespace.h +1 -1
  125. data/vendor/libsodium/src/libsodium/crypto_secretbox/crypto_secretbox_easy.c +2 -6
  126. data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphash24.c +8 -28
  127. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/open.c +75 -0
  128. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c +6 -6
  129. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/common.h +1 -18
  130. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c +20 -20
  131. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/types.h +4 -4
  132. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c +6 -6
  133. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.c +56 -77
  134. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.h +1 -0
  135. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.h +1 -0
  136. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/stream_salsa20_ref.c +2 -8
  137. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/xor_salsa20_ref.c +2 -8
  138. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012.c +2 -8
  139. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/xor_salsa2012.c +2 -8
  140. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/stream_salsa208.c +2 -8
  141. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/xor_salsa208.c +2 -8
  142. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/stream_xsalsa20.c +1 -5
  143. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/xor_xsalsa20.c +1 -5
  144. data/vendor/libsodium/src/libsodium/include/Makefile.am +3 -0
  145. data/vendor/libsodium/src/libsodium/include/Makefile.in +19 -8
  146. data/vendor/libsodium/src/libsodium/include/sodium.h +3 -0
  147. data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_aes256gcm.h +50 -0
  148. data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_chacha20poly1305.h +94 -22
  149. data/vendor/libsodium/src/libsodium/include/sodium/crypto_box_curve25519xsalsa20poly1305.h +6 -6
  150. data/vendor/libsodium/src/libsodium/include/sodium/crypto_core_hchacha20.h +35 -0
  151. data/vendor/libsodium/src/libsodium/include/sodium/crypto_generichash_blake2b.h +3 -0
  152. data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash.h +89 -0
  153. data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash_argon2i.h +86 -0
  154. data/vendor/libsodium/src/libsodium/include/sodium/crypto_secretbox_xsalsa20poly1305.h +6 -6
  155. data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign_edwards25519sha512batch.h +0 -11
  156. data/vendor/libsodium/src/libsodium/include/sodium/runtime.h +3 -0
  157. data/vendor/libsodium/src/libsodium/randombytes/randombytes.c +3 -0
  158. data/vendor/libsodium/src/libsodium/randombytes/salsa20/randombytes_salsa20_random.c +5 -1
  159. data/vendor/libsodium/src/libsodium/sodium/common.h +150 -0
  160. data/vendor/libsodium/src/libsodium/sodium/core.c +3 -1
  161. data/vendor/libsodium/src/libsodium/sodium/runtime.c +37 -19
  162. data/vendor/libsodium/src/libsodium/sodium/utils.c +18 -9
  163. data/vendor/libsodium/test/Makefile.in +9 -0
  164. data/vendor/libsodium/test/default/Makefile.am +10 -0
  165. data/vendor/libsodium/test/default/Makefile.in +53 -20
  166. data/vendor/libsodium/test/default/aead_aes256gcm.c +43 -17
  167. data/vendor/libsodium/test/default/aead_chacha20poly1305.c +179 -86
  168. data/vendor/libsodium/test/default/auth7.c +5 -5
  169. data/vendor/libsodium/test/default/box.c +4 -4
  170. data/vendor/libsodium/test/default/box2.c +1 -1
  171. data/vendor/libsodium/test/default/core6.c +1 -1
  172. data/vendor/libsodium/test/default/generichash.c +12 -1
  173. data/vendor/libsodium/test/default/generichash2.c +2 -2
  174. data/vendor/libsodium/test/default/generichash3.c +21 -0
  175. data/vendor/libsodium/test/default/pwhash.c +186 -168
  176. data/vendor/libsodium/test/default/pwhash.exp +11 -30
  177. data/vendor/libsodium/test/default/pwhash_scrypt.c +349 -0
  178. data/vendor/libsodium/test/default/pwhash_scrypt.exp +31 -0
  179. data/vendor/libsodium/test/default/secretbox.c +1 -1
  180. data/vendor/libsodium/test/default/secretbox2.c +1 -1
  181. data/vendor/libsodium/test/default/sign.c +15 -0
  182. data/vendor/libsodium/test/default/sodium_utils2.c +8 -3
  183. data/vendor/libsodium/test/default/sodium_utils3.c +4 -2
  184. data/vendor/libsodium/test/default/verify1.c +0 -4
  185. data/vendor/libsodium/test/quirks/quirks.h +3 -0
  186. metadata +37 -22
  187. data/vendor/libsodium/builds/msvc/vs2010/test/test.props +0 -43
  188. data/vendor/libsodium/builds/msvc/vs2010/test/test.runner.bat +0 -78
  189. data/vendor/libsodium/builds/msvc/vs2010/test/test.vcxproj +0 -244
  190. data/vendor/libsodium/builds/msvc/vs2010/test/test.vcxproj.filters +0 -192
  191. data/vendor/libsodium/builds/msvc/vs2012/test/test.props +0 -43
  192. data/vendor/libsodium/builds/msvc/vs2012/test/test.runner.bat +0 -78
  193. data/vendor/libsodium/builds/msvc/vs2012/test/test.vcxproj +0 -244
  194. data/vendor/libsodium/builds/msvc/vs2012/test/test.vcxproj.filters +0 -192
  195. data/vendor/libsodium/builds/msvc/vs2013/test/test.props +0 -43
  196. data/vendor/libsodium/builds/msvc/vs2013/test/test.runner.bat +0 -78
  197. data/vendor/libsodium/builds/msvc/vs2013/test/test.vcxproj +0 -244
  198. data/vendor/libsodium/builds/msvc/vs2013/test/test.vcxproj.filters +0 -192
  199. data/vendor/libsodium/builds/msvc/vs2015/test/test.props +0 -43
  200. data/vendor/libsodium/builds/msvc/vs2015/test/test.runner.bat +0 -78
  201. data/vendor/libsodium/builds/msvc/vs2015/test/test.vcxproj +0 -244
  202. data/vendor/libsodium/builds/msvc/vs2015/test/test.vcxproj.filters +0 -192
  203. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/sysendian.h +0 -146
  204. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/common_aes128ctr.c +0 -64
@@ -0,0 +1,45 @@
1
+
2
+ #define BLAKE2_USE_SSSE3
3
+ #define BLAKE2_USE_SSE41
4
+ #define BLAKE2_USE_AVX2
5
+
6
+ #include <stdint.h>
7
+ #include <string.h>
8
+
9
+ #if (defined(HAVE_AVX2INTRIN_H) && defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && defined(HAVE_SMMINTRIN_H)) || \
10
+ (defined(_MSC_VER) && (defined(_M_X64) || defined(_M_AMD64)))
11
+
12
+ #pragma GCC target("sse2")
13
+ #pragma GCC target("ssse3")
14
+ #pragma GCC target("sse4.1")
15
+ #pragma GCC target("avx2")
16
+
17
+ #include <emmintrin.h>
18
+ #include <tmmintrin.h>
19
+ #include <smmintrin.h>
20
+ #include <immintrin.h>
21
+
22
+ #include "blake2.h"
23
+ #include "blake2-impl.h"
24
+ #include "blake2b-compress-avx2.h"
25
+
26
+ CRYPTO_ALIGN(64) static const uint64_t blake2b_IV[8] =
27
+ {
28
+ 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
29
+ 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
30
+ 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
31
+ 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
32
+ };
33
+
34
+ int blake2b_compress_avx2( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
35
+ {
36
+ __m256i a = LOADU(&S->h[0]);
37
+ __m256i b = LOADU(&S->h[4]);
38
+ BLAKE2B_COMPRESS_V1(a, b, block, S->t[0], S->t[1], S->f[0], S->f[1]);
39
+ STOREU(&S->h[0], a);
40
+ STOREU(&S->h[4], b);
41
+
42
+ return 0;
43
+ }
44
+
45
+ #endif
@@ -0,0 +1,123 @@
1
+
2
+ #ifndef blake2b_compress_avx2_H
3
+ #define blake2b_compress_avx2_H
4
+
5
+ #define LOAD128(p) _mm_load_si128((__m128i *)(p))
6
+ #define STORE128(p, r) _mm_store_si128((__m128i *)(p), r)
7
+
8
+ #define LOADU128(p) _mm_loadu_si128((__m128i *)(p))
9
+ #define STOREU128(p, r) _mm_storeu_si128((__m128i *)(p), r)
10
+
11
+ #define LOAD(p) _mm256_load_si256((__m256i *)(p))
12
+ #define STORE(p, r) _mm256_store_si256((__m256i *)(p), r)
13
+
14
+ #define LOADU(p) _mm256_loadu_si256((__m256i *)(p))
15
+ #define STOREU(p, r) _mm256_storeu_si256((__m256i *)(p), r)
16
+
17
+ static inline uint64_t LOADU64(const void *p) {
18
+ uint64_t v;
19
+ memcpy(&v, p, sizeof v);
20
+ return v;
21
+ }
22
+
23
+ #define ROTATE16 _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, \
24
+ 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)
25
+
26
+ #define ROTATE24 _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, \
27
+ 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)
28
+
29
+ #define ADD(a, b) _mm256_add_epi64(a, b)
30
+ #define SUB(a, b) _mm256_sub_epi64(a, b)
31
+
32
+ #define XOR(a, b) _mm256_xor_si256(a, b)
33
+ #define AND(a, b) _mm256_and_si256(a, b)
34
+ #define OR(a, b) _mm256_or_si256(a, b)
35
+
36
+ #define ROT32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))
37
+ #define ROT24(x) _mm256_shuffle_epi8((x), ROTATE24)
38
+ #define ROT16(x) _mm256_shuffle_epi8((x), ROTATE16)
39
+ #define ROT63(x) _mm256_or_si256(_mm256_srli_epi64((x), 63), ADD((x), (x)))
40
+
41
+ #define BLAKE2B_G1_V1(a, b, c, d, m) do { \
42
+ a = ADD(a, m); \
43
+ a = ADD(a, b); d = XOR(d, a); d = ROT32(d); \
44
+ c = ADD(c, d); b = XOR(b, c); b = ROT24(b); \
45
+ } while(0)
46
+
47
+ #define BLAKE2B_G2_V1(a, b, c, d, m) do { \
48
+ a = ADD(a, m); \
49
+ a = ADD(a, b); d = XOR(d, a); d = ROT16(d); \
50
+ c = ADD(c, d); b = XOR(b, c); b = ROT63(b); \
51
+ } while(0)
52
+
53
+ #define BLAKE2B_DIAG_V1(a, b, c, d) do { \
54
+ d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2,1,0,3)); \
55
+ c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1,0,3,2)); \
56
+ b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0,3,2,1)); \
57
+ } while(0)
58
+
59
+ #define BLAKE2B_UNDIAG_V1(a, b, c, d) do { \
60
+ d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0,3,2,1)); \
61
+ c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1,0,3,2)); \
62
+ b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2,1,0,3)); \
63
+ } while(0)
64
+
65
+ #include "blake2b-load-avx2.h"
66
+
67
+ #define BLAKE2B_ROUND_V1(a, b, c, d, r, m) do { \
68
+ __m256i b0; \
69
+ BLAKE2B_LOAD_MSG_ ##r ##_1(b0); \
70
+ BLAKE2B_G1_V1(a, b, c, d, b0); \
71
+ BLAKE2B_LOAD_MSG_ ##r ##_2(b0); \
72
+ BLAKE2B_G2_V1(a, b, c, d, b0); \
73
+ BLAKE2B_DIAG_V1(a, b, c, d); \
74
+ BLAKE2B_LOAD_MSG_ ##r ##_3(b0); \
75
+ BLAKE2B_G1_V1(a, b, c, d, b0); \
76
+ BLAKE2B_LOAD_MSG_ ##r ##_4(b0); \
77
+ BLAKE2B_G2_V1(a, b, c, d, b0); \
78
+ BLAKE2B_UNDIAG_V1(a, b, c, d); \
79
+ } while(0)
80
+
81
+ #define BLAKE2B_ROUNDS_V1(a, b, c, d, m) do { \
82
+ BLAKE2B_ROUND_V1(a, b, c, d, 0, (m)); \
83
+ BLAKE2B_ROUND_V1(a, b, c, d, 1, (m)); \
84
+ BLAKE2B_ROUND_V1(a, b, c, d, 2, (m)); \
85
+ BLAKE2B_ROUND_V1(a, b, c, d, 3, (m)); \
86
+ BLAKE2B_ROUND_V1(a, b, c, d, 4, (m)); \
87
+ BLAKE2B_ROUND_V1(a, b, c, d, 5, (m)); \
88
+ BLAKE2B_ROUND_V1(a, b, c, d, 6, (m)); \
89
+ BLAKE2B_ROUND_V1(a, b, c, d, 7, (m)); \
90
+ BLAKE2B_ROUND_V1(a, b, c, d, 8, (m)); \
91
+ BLAKE2B_ROUND_V1(a, b, c, d, 9, (m)); \
92
+ BLAKE2B_ROUND_V1(a, b, c, d, 10, (m)); \
93
+ BLAKE2B_ROUND_V1(a, b, c, d, 11, (m)); \
94
+ } while(0)
95
+
96
+ #define DECLARE_MESSAGE_WORDS(m) \
97
+ const __m256i m0 = _mm256_broadcastsi128_si256(LOADU128((m) + 0)); \
98
+ const __m256i m1 = _mm256_broadcastsi128_si256(LOADU128((m) + 16)); \
99
+ const __m256i m2 = _mm256_broadcastsi128_si256(LOADU128((m) + 32)); \
100
+ const __m256i m3 = _mm256_broadcastsi128_si256(LOADU128((m) + 48)); \
101
+ const __m256i m4 = _mm256_broadcastsi128_si256(LOADU128((m) + 64)); \
102
+ const __m256i m5 = _mm256_broadcastsi128_si256(LOADU128((m) + 80)); \
103
+ const __m256i m6 = _mm256_broadcastsi128_si256(LOADU128((m) + 96)); \
104
+ const __m256i m7 = _mm256_broadcastsi128_si256(LOADU128((m) + 112)); \
105
+ __m256i t0, t1;
106
+
107
+ #define BLAKE2B_COMPRESS_V1(a, b, m, t0, t1, f0, f1) do { \
108
+ DECLARE_MESSAGE_WORDS(m) \
109
+ const __m256i iv0 = a; \
110
+ const __m256i iv1 = b; \
111
+ __m256i c = LOAD(&blake2b_IV[0]); \
112
+ __m256i d = XOR( \
113
+ LOAD(&blake2b_IV[4]), \
114
+ _mm256_set_epi64x(f1, f0, t1, t0) \
115
+ ); \
116
+ BLAKE2B_ROUNDS_V1(a, b, c, d, m); \
117
+ a = XOR(a, c); \
118
+ b = XOR(b, d); \
119
+ a = XOR(a, iv0); \
120
+ b = XOR(b, iv1); \
121
+ } while(0)
122
+
123
+ #endif
@@ -4,8 +4,9 @@
4
4
 
5
5
  #include "blake2.h"
6
6
  #include "blake2-impl.h"
7
+ #include "../../sodium/common.h"
7
8
 
8
- static const uint64_t blake2b_IV[8] =
9
+ CRYPTO_ALIGN(64) static const uint64_t blake2b_IV[8] =
9
10
  {
10
11
  0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
11
12
  0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
@@ -36,7 +37,7 @@ int blake2b_compress_ref( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYT
36
37
  int i;
37
38
 
38
39
  for( i = 0; i < 16; ++i )
39
- m[i] = load64( block + i * sizeof( m[i] ) );
40
+ m[i] = LOAD64_LE( block + i * sizeof( m[i] ) );
40
41
 
41
42
  for( i = 0; i < 8; ++i )
42
43
  v[i] = S->h[i];
@@ -18,9 +18,9 @@
18
18
 
19
19
  #include "blake2.h"
20
20
  #include "blake2-impl.h"
21
- #include "blake2b-round.h"
21
+ #include "blake2b-compress-sse41.h"
22
22
 
23
- static const uint64_t blake2b_IV[8] =
23
+ CRYPTO_ALIGN(64) static const uint64_t blake2b_IV[8] =
24
24
  {
25
25
  0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
26
26
  0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
@@ -1,31 +1,10 @@
1
- /*
2
- BLAKE2 reference source code package - optimized C implementations
3
1
 
4
- Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
5
-
6
- To the extent possible under law, the author(s) have dedicated all copyright
7
- and related and neighboring rights to this software to the public domain
8
- worldwide. This software is distributed without any warranty.
9
-
10
- You should have received a copy of the CC0 Public Domain Dedication along with
11
- this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
12
- */
13
-
14
- #ifndef blake2b_round_H
15
- #define blake2b_round_H
16
-
17
- #ifndef BLAKE2_USE_SSSE3
18
- # error BLAKE2_USE_SSSE3 must be defined in order to use this file
19
- #endif
2
+ #ifndef blake2b_compress_sse41_H
3
+ #define blake2b_compress_sse41_H
20
4
 
21
5
  #define LOADU(p) _mm_loadu_si128( (const __m128i *)(const void *)(p) )
22
6
  #define STOREU(p,r) _mm_storeu_si128((__m128i *)(void *)(p), r)
23
7
 
24
- #define TOF(reg) _mm_castsi128_ps((reg))
25
- #define TOI(reg) _mm_castps_si128((reg))
26
-
27
-
28
- /* Microarchitecture-specific macros */
29
8
  #define _mm_roti_epi64(x, c) \
30
9
  (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
31
10
  : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
@@ -33,7 +12,6 @@
33
12
  : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
34
13
  : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
35
14
 
36
-
37
15
  #define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
38
16
  row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
39
17
  row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
@@ -102,11 +80,7 @@
102
80
  row4l = t1; \
103
81
  row4h = t0;
104
82
 
105
- #if defined(BLAKE2_USE_SSE41)
106
83
  #include "blake2b-load-sse41.h"
107
- #else
108
- #include "blake2b-load-sse2.h"
109
- #endif
110
84
 
111
85
  #define ROUND(r) \
112
86
  LOAD_MSG_ ##r ##_1(b0, b1); \
@@ -1,6 +1,4 @@
1
1
 
2
- #define BLAKE2_USE_SSSE3
3
-
4
2
  #include <stdint.h>
5
3
  #include <string.h>
6
4
 
@@ -18,9 +16,9 @@
18
16
 
19
17
  #include "blake2.h"
20
18
  #include "blake2-impl.h"
21
- #include "blake2b-round.h"
19
+ #include "blake2b-compress-ssse3.h"
22
20
 
23
- static const uint64_t blake2b_IV[8] =
21
+ CRYPTO_ALIGN(64) static const uint64_t blake2b_IV[8] =
24
22
  {
25
23
  0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
26
24
  0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
@@ -0,0 +1,97 @@
1
+
2
+ #ifndef blake2b_compress_ssse3_H
3
+ #define blake2b_compress_ssse3_H
4
+
5
+ #define LOADU(p) _mm_loadu_si128( (const __m128i *)(const void *)(p) )
6
+ #define STOREU(p,r) _mm_storeu_si128((__m128i *)(void *)(p), r)
7
+
8
+ #define _mm_roti_epi64(x, c) \
9
+ (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
10
+ : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
11
+ : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
12
+ : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
13
+ : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
14
+
15
+ #define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
16
+ row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
17
+ row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
18
+ \
19
+ row4l = _mm_xor_si128(row4l, row1l); \
20
+ row4h = _mm_xor_si128(row4h, row1h); \
21
+ \
22
+ row4l = _mm_roti_epi64(row4l, -32); \
23
+ row4h = _mm_roti_epi64(row4h, -32); \
24
+ \
25
+ row3l = _mm_add_epi64(row3l, row4l); \
26
+ row3h = _mm_add_epi64(row3h, row4h); \
27
+ \
28
+ row2l = _mm_xor_si128(row2l, row3l); \
29
+ row2h = _mm_xor_si128(row2h, row3h); \
30
+ \
31
+ row2l = _mm_roti_epi64(row2l, -24); \
32
+ row2h = _mm_roti_epi64(row2h, -24); \
33
+
34
+ #define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
35
+ row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
36
+ row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
37
+ \
38
+ row4l = _mm_xor_si128(row4l, row1l); \
39
+ row4h = _mm_xor_si128(row4h, row1h); \
40
+ \
41
+ row4l = _mm_roti_epi64(row4l, -16); \
42
+ row4h = _mm_roti_epi64(row4h, -16); \
43
+ \
44
+ row3l = _mm_add_epi64(row3l, row4l); \
45
+ row3h = _mm_add_epi64(row3h, row4h); \
46
+ \
47
+ row2l = _mm_xor_si128(row2l, row3l); \
48
+ row2h = _mm_xor_si128(row2h, row3h); \
49
+ \
50
+ row2l = _mm_roti_epi64(row2l, -63); \
51
+ row2h = _mm_roti_epi64(row2h, -63); \
52
+
53
+ #define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
54
+ t0 = _mm_alignr_epi8(row2h, row2l, 8); \
55
+ t1 = _mm_alignr_epi8(row2l, row2h, 8); \
56
+ row2l = t0; \
57
+ row2h = t1; \
58
+ \
59
+ t0 = row3l; \
60
+ row3l = row3h; \
61
+ row3h = t0; \
62
+ \
63
+ t0 = _mm_alignr_epi8(row4h, row4l, 8); \
64
+ t1 = _mm_alignr_epi8(row4l, row4h, 8); \
65
+ row4l = t1; \
66
+ row4h = t0;
67
+
68
+ #define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
69
+ t0 = _mm_alignr_epi8(row2l, row2h, 8); \
70
+ t1 = _mm_alignr_epi8(row2h, row2l, 8); \
71
+ row2l = t0; \
72
+ row2h = t1; \
73
+ \
74
+ t0 = row3l; \
75
+ row3l = row3h; \
76
+ row3h = t0; \
77
+ \
78
+ t0 = _mm_alignr_epi8(row4l, row4h, 8); \
79
+ t1 = _mm_alignr_epi8(row4h, row4l, 8); \
80
+ row4l = t1; \
81
+ row4h = t0;
82
+
83
+ #include "blake2b-load-sse2.h"
84
+
85
+ #define ROUND(r) \
86
+ LOAD_MSG_ ##r ##_1(b0, b1); \
87
+ G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
88
+ LOAD_MSG_ ##r ##_2(b0, b1); \
89
+ G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
90
+ DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
91
+ LOAD_MSG_ ##r ##_3(b0, b1); \
92
+ G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
93
+ LOAD_MSG_ ##r ##_4(b0, b1); \
94
+ G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
95
+ UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
96
+
97
+ #endif
@@ -0,0 +1,339 @@
1
+ #ifndef blake2b_load_avx2_H
2
+ #define blake2b_load_avx2_H
3
+
4
+ #define BLAKE2B_LOAD_MSG_0_1(b0) do { \
5
+ t0 = _mm256_unpacklo_epi64(m0, m1); \
6
+ t1 = _mm256_unpacklo_epi64(m2, m3); \
7
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
8
+ } while(0)
9
+
10
+ #define BLAKE2B_LOAD_MSG_0_2(b0) \
11
+ do { \
12
+ t0 = _mm256_unpackhi_epi64(m0, m1); \
13
+ t1 = _mm256_unpackhi_epi64(m2, m3); \
14
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
15
+ } while(0)
16
+
17
+ #define BLAKE2B_LOAD_MSG_0_3(b0) \
18
+ do { \
19
+ t0 = _mm256_unpacklo_epi64(m4, m5); \
20
+ t1 = _mm256_unpacklo_epi64(m6, m7); \
21
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
22
+ } while(0)
23
+
24
+ #define BLAKE2B_LOAD_MSG_0_4(b0) \
25
+ do { \
26
+ t0 = _mm256_unpackhi_epi64(m4, m5); \
27
+ t1 = _mm256_unpackhi_epi64(m6, m7); \
28
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
29
+ } while(0)
30
+
31
+ #define BLAKE2B_LOAD_MSG_1_1(b0) \
32
+ do { \
33
+ t0 = _mm256_unpacklo_epi64(m7, m2); \
34
+ t1 = _mm256_unpackhi_epi64(m4, m6); \
35
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
36
+ } while(0)
37
+
38
+ #define BLAKE2B_LOAD_MSG_1_2(b0) \
39
+ do { \
40
+ t0 = _mm256_unpacklo_epi64(m5, m4); \
41
+ t1 = _mm256_alignr_epi8(m3, m7, 8); \
42
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
43
+ } while(0)
44
+
45
+ #define BLAKE2B_LOAD_MSG_1_3(b0) \
46
+ do { \
47
+ t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
48
+ t1 = _mm256_unpackhi_epi64(m5, m2); \
49
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
50
+ } while(0)
51
+
52
+ #define BLAKE2B_LOAD_MSG_1_4(b0) \
53
+ do { \
54
+ t0 = _mm256_unpacklo_epi64(m6, m1); \
55
+ t1 = _mm256_unpackhi_epi64(m3, m1); \
56
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
57
+ } while(0)
58
+
59
+ #define BLAKE2B_LOAD_MSG_2_1(b0) \
60
+ do { \
61
+ t0 = _mm256_alignr_epi8(m6, m5, 8); \
62
+ t1 = _mm256_unpackhi_epi64(m2, m7); \
63
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
64
+ } while(0)
65
+
66
+ #define BLAKE2B_LOAD_MSG_2_2(b0) \
67
+ do { \
68
+ t0 = _mm256_unpacklo_epi64(m4, m0); \
69
+ t1 = _mm256_blend_epi32(m6, m1, 0x33); \
70
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
71
+ } while(0)
72
+
73
+ #define BLAKE2B_LOAD_MSG_2_3(b0) \
74
+ do { \
75
+ t0 = _mm256_blend_epi32(m1, m5, 0x33); \
76
+ t1 = _mm256_unpackhi_epi64(m3, m4); \
77
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
78
+ } while(0)
79
+
80
+ #define BLAKE2B_LOAD_MSG_2_4(b0) \
81
+ do { \
82
+ t0 = _mm256_unpacklo_epi64(m7, m3); \
83
+ t1 = _mm256_alignr_epi8(m2, m0, 8); \
84
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
85
+ } while(0)
86
+
87
+ #define BLAKE2B_LOAD_MSG_3_1(b0) \
88
+ do { \
89
+ t0 = _mm256_unpackhi_epi64(m3, m1); \
90
+ t1 = _mm256_unpackhi_epi64(m6, m5); \
91
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
92
+ } while(0)
93
+
94
+ #define BLAKE2B_LOAD_MSG_3_2(b0) \
95
+ do { \
96
+ t0 = _mm256_unpackhi_epi64(m4, m0); \
97
+ t1 = _mm256_unpacklo_epi64(m6, m7); \
98
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
99
+ } while(0)
100
+
101
+ #define BLAKE2B_LOAD_MSG_3_3(b0) \
102
+ do { \
103
+ t0 = _mm256_blend_epi32(m2, m1, 0x33); \
104
+ t1 = _mm256_blend_epi32(m7, m2, 0x33); \
105
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
106
+ } while(0)
107
+
108
+ #define BLAKE2B_LOAD_MSG_3_4(b0) \
109
+ do { \
110
+ t0 = _mm256_unpacklo_epi64(m3, m5); \
111
+ t1 = _mm256_unpacklo_epi64(m0, m4); \
112
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
113
+ } while(0)
114
+
115
+ #define BLAKE2B_LOAD_MSG_4_1(b0) \
116
+ do { \
117
+ t0 = _mm256_unpackhi_epi64(m4, m2); \
118
+ t1 = _mm256_unpacklo_epi64(m1, m5); \
119
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
120
+ } while(0)
121
+
122
+ #define BLAKE2B_LOAD_MSG_4_2(b0) \
123
+ do { \
124
+ t0 = _mm256_blend_epi32(m3, m0, 0x33); \
125
+ t1 = _mm256_blend_epi32(m7, m2, 0x33); \
126
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
127
+ } while(0)
128
+
129
+ #define BLAKE2B_LOAD_MSG_4_3(b0) \
130
+ do { \
131
+ t0 = _mm256_blend_epi32(m5, m7, 0x33); \
132
+ t1 = _mm256_blend_epi32(m1, m3, 0x33); \
133
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
134
+ } while(0)
135
+
136
+ #define BLAKE2B_LOAD_MSG_4_4(b0) \
137
+ do { \
138
+ t0 = _mm256_alignr_epi8(m6, m0, 8); \
139
+ t1 = _mm256_blend_epi32(m6, m4, 0x33); \
140
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
141
+ } while(0)
142
+
143
+ #define BLAKE2B_LOAD_MSG_5_1(b0) \
144
+ do { \
145
+ t0 = _mm256_unpacklo_epi64(m1, m3); \
146
+ t1 = _mm256_unpacklo_epi64(m0, m4); \
147
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
148
+ } while(0)
149
+
150
+ #define BLAKE2B_LOAD_MSG_5_2(b0) \
151
+ do { \
152
+ t0 = _mm256_unpacklo_epi64(m6, m5); \
153
+ t1 = _mm256_unpackhi_epi64(m5, m1); \
154
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
155
+ } while(0)
156
+
157
+ #define BLAKE2B_LOAD_MSG_5_3(b0) \
158
+ do { \
159
+ t0 = _mm256_blend_epi32(m3, m2, 0x33); \
160
+ t1 = _mm256_unpackhi_epi64(m7, m0); \
161
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
162
+ } while(0)
163
+
164
+ #define BLAKE2B_LOAD_MSG_5_4(b0) \
165
+ do { \
166
+ t0 = _mm256_unpackhi_epi64(m6, m2); \
167
+ t1 = _mm256_blend_epi32(m4, m7, 0x33); \
168
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
169
+ } while(0)
170
+
171
+ #define BLAKE2B_LOAD_MSG_6_1(b0) \
172
+ do { \
173
+ t0 = _mm256_blend_epi32(m0, m6, 0x33); \
174
+ t1 = _mm256_unpacklo_epi64(m7, m2); \
175
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
176
+ } while(0)
177
+
178
+ #define BLAKE2B_LOAD_MSG_6_2(b0) \
179
+ do { \
180
+ t0 = _mm256_unpackhi_epi64(m2, m7); \
181
+ t1 = _mm256_alignr_epi8(m5, m6, 8); \
182
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
183
+ } while(0)
184
+
185
+ #define BLAKE2B_LOAD_MSG_6_3(b0) \
186
+ do { \
187
+ t0 = _mm256_unpacklo_epi64(m0, m3); \
188
+ t1 = _mm256_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
189
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
190
+ } while(0)
191
+
192
+ #define BLAKE2B_LOAD_MSG_6_4(b0) \
193
+ do { \
194
+ t0 = _mm256_unpackhi_epi64(m3, m1); \
195
+ t1 = _mm256_blend_epi32(m5, m1, 0x33); \
196
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
197
+ } while(0)
198
+
199
+ #define BLAKE2B_LOAD_MSG_7_1(b0) \
200
+ do { \
201
+ t0 = _mm256_unpackhi_epi64(m6, m3); \
202
+ t1 = _mm256_blend_epi32(m1, m6, 0x33); \
203
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
204
+ } while(0)
205
+
206
+ #define BLAKE2B_LOAD_MSG_7_2(b0) \
207
+ do { \
208
+ t0 = _mm256_alignr_epi8(m7, m5, 8); \
209
+ t1 = _mm256_unpackhi_epi64(m0, m4); \
210
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
211
+ } while(0)
212
+
213
+ #define BLAKE2B_LOAD_MSG_7_3(b0) \
214
+ do { \
215
+ t0 = _mm256_unpackhi_epi64(m2, m7); \
216
+ t1 = _mm256_unpacklo_epi64(m4, m1); \
217
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
218
+ } while(0)
219
+
220
+ #define BLAKE2B_LOAD_MSG_7_4(b0) \
221
+ do { \
222
+ t0 = _mm256_unpacklo_epi64(m0, m2); \
223
+ t1 = _mm256_unpacklo_epi64(m3, m5); \
224
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
225
+ } while(0)
226
+
227
+ #define BLAKE2B_LOAD_MSG_8_1(b0) \
228
+ do { \
229
+ t0 = _mm256_unpacklo_epi64(m3, m7); \
230
+ t1 = _mm256_alignr_epi8(m0, m5, 8); \
231
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
232
+ } while(0)
233
+
234
+ #define BLAKE2B_LOAD_MSG_8_2(b0) \
235
+ do { \
236
+ t0 = _mm256_unpackhi_epi64(m7, m4); \
237
+ t1 = _mm256_alignr_epi8(m4, m1, 8); \
238
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
239
+ } while(0)
240
+
241
+ #define BLAKE2B_LOAD_MSG_8_3(b0) \
242
+ do { \
243
+ t0 = m6; \
244
+ t1 = _mm256_alignr_epi8(m5, m0, 8); \
245
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
246
+ } while(0)
247
+
248
+ #define BLAKE2B_LOAD_MSG_8_4(b0) \
249
+ do { \
250
+ t0 = _mm256_blend_epi32(m3, m1, 0x33); \
251
+ t1 = m2; \
252
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
253
+ } while(0)
254
+
255
+ #define BLAKE2B_LOAD_MSG_9_1(b0) \
256
+ do { \
257
+ t0 = _mm256_unpacklo_epi64(m5, m4); \
258
+ t1 = _mm256_unpackhi_epi64(m3, m0); \
259
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
260
+ } while(0)
261
+
262
+ #define BLAKE2B_LOAD_MSG_9_2(b0) \
263
+ do { \
264
+ t0 = _mm256_unpacklo_epi64(m1, m2); \
265
+ t1 = _mm256_blend_epi32(m2, m3, 0x33); \
266
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
267
+ } while(0)
268
+
269
+ #define BLAKE2B_LOAD_MSG_9_3(b0) \
270
+ do { \
271
+ t0 = _mm256_unpackhi_epi64(m7, m4); \
272
+ t1 = _mm256_unpackhi_epi64(m1, m6); \
273
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
274
+ } while(0)
275
+
276
+ #define BLAKE2B_LOAD_MSG_9_4(b0) \
277
+ do { \
278
+ t0 = _mm256_alignr_epi8(m7, m5, 8); \
279
+ t1 = _mm256_unpacklo_epi64(m6, m0); \
280
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
281
+ } while(0)
282
+
283
+ #define BLAKE2B_LOAD_MSG_10_1(b0) \
284
+ do { \
285
+ t0 = _mm256_unpacklo_epi64(m0, m1); \
286
+ t1 = _mm256_unpacklo_epi64(m2, m3); \
287
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
288
+ } while(0)
289
+
290
+ #define BLAKE2B_LOAD_MSG_10_2(b0) \
291
+ do { \
292
+ t0 = _mm256_unpackhi_epi64(m0, m1); \
293
+ t1 = _mm256_unpackhi_epi64(m2, m3); \
294
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
295
+ } while(0)
296
+
297
+ #define BLAKE2B_LOAD_MSG_10_3(b0) \
298
+ do { \
299
+ t0 = _mm256_unpacklo_epi64(m4, m5); \
300
+ t1 = _mm256_unpacklo_epi64(m6, m7); \
301
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
302
+ } while(0)
303
+
304
+ #define BLAKE2B_LOAD_MSG_10_4(b0) \
305
+ do { \
306
+ t0 = _mm256_unpackhi_epi64(m4, m5); \
307
+ t1 = _mm256_unpackhi_epi64(m6, m7); \
308
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
309
+ } while(0)
310
+
311
+ #define BLAKE2B_LOAD_MSG_11_1(b0) \
312
+ do { \
313
+ t0 = _mm256_unpacklo_epi64(m7, m2); \
314
+ t1 = _mm256_unpackhi_epi64(m4, m6); \
315
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
316
+ } while(0)
317
+
318
+ #define BLAKE2B_LOAD_MSG_11_2(b0) \
319
+ do { \
320
+ t0 = _mm256_unpacklo_epi64(m5, m4); \
321
+ t1 = _mm256_alignr_epi8(m3, m7, 8); \
322
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
323
+ } while(0)
324
+
325
+ #define BLAKE2B_LOAD_MSG_11_3(b0) \
326
+ do { \
327
+ t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
328
+ t1 = _mm256_unpackhi_epi64(m5, m2); \
329
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
330
+ } while(0)
331
+
332
+ #define BLAKE2B_LOAD_MSG_11_4(b0) \
333
+ do { \
334
+ t0 = _mm256_unpacklo_epi64(m6, m1); \
335
+ t1 = _mm256_unpackhi_epi64(m3, m1); \
336
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
337
+ } while(0)
338
+
339
+ #endif