argon2 1.1.4 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: a6ee58c4463bd652dcdb5f7c3bc8b0ec03f4d0e4
4
- data.tar.gz: 3bf76cb1750789f798a1a8554ffe1abc65acda55
2
+ SHA256:
3
+ metadata.gz: 9425c5c639ac3e940ffffe3f9baa5967976ad2caa49d0716db0de43aa6d41b66
4
+ data.tar.gz: 607eff42d6c915f2528d5e61f95d264bdace086515a250188e0f937397b684b1
5
5
  SHA512:
6
- metadata.gz: 02f0d59dc2ba610658959e8cc4add746d4851fde6e7b53be112c0b75ddb3d7f9b71c0d14d2a4e84c6fbdf0c2b3be8eaa8fbfa57fa6264345ec1f7933acc6838a
7
- data.tar.gz: '08da18d3d481efe20d48fcc83ab92ee99fb8cab196ee20b780e547fd46ad18c4fb45ac9708b3f532d455eda84ce4338597e5ee279d8aa1e09d35a87742ac5f79'
6
+ metadata.gz: 60b5ff68f0e29fa1c40dc2e9a10e21f585dacd1d95af4a219166be38e9b2734d8bcae85010245ceb5a2d8782f8f65906bce4f951159eaf5bd1e9b8bccd98cf71
7
+ data.tar.gz: e8d1074c9ad878cb2a7b7e3791b377f0fbf573ead9d9432ad178d1e4c4af4de80d337455d7fb438fd0e5ad15e2bcfa0f5091254674b7c5d45145ac8ce9e5f6e1
@@ -5,20 +5,16 @@ Metrics/CyclomaticComplexity:
5
5
  Enabled: false
6
6
  Metrics/PerceivedComplexity:
7
7
  Enabled: false
8
- #Style/MutableConstant:
9
- # Exclude:
10
- # - 'test/key_test.rb'
11
-
12
8
  Metrics/LineLength:
13
9
  Max: 160
14
10
 
15
11
  Metrics/MethodLength:
16
12
  Max: 24
17
13
 
18
- Style/AlignParameters:
14
+ Layout/AlignParameters:
19
15
  Enabled: false
20
16
 
21
- Style/AlignArray:
17
+ Layout/AlignArray:
22
18
  Enabled: false
23
19
 
24
20
  # Configuration parameters: Exclude.
@@ -30,7 +26,7 @@ Style/Documentation:
30
26
  # Offense count: 16
31
27
  # Cop supports --auto-correct.
32
28
  # Configuration parameters: EnforcedStyle, SupportedStyles.
33
- Style/FirstParameterIndentation:
29
+ Layout/FirstParameterIndentation:
34
30
  Exclude:
35
31
  - 'lib/argon2.rb'
36
32
  - 'test/low_level_test.rb'
@@ -41,13 +37,13 @@ Style/HashSyntax:
41
37
 
42
38
  # Offense count: 1
43
39
  # Cop supports --auto-correct.
44
- Style/IndentArray:
40
+ Layout/IndentArray:
45
41
  Exclude:
46
42
  - 'lib/argon2/errors.rb'
47
43
 
48
44
  # Offense count: 44
49
45
  # Cop supports --auto-correct.
50
- Style/LeadingCommentSpace:
46
+ Layout/LeadingCommentSpace:
51
47
  Exclude:
52
48
  - 'ext/argon2_wrap/extconf.rb'
53
49
 
@@ -60,7 +56,7 @@ Style/StringLiterals:
60
56
  Style/WordArray:
61
57
  MinSize: 33
62
58
 
63
- Style/MultilineMethodCallBraceLayout:
59
+ Layout/MultilineMethodCallBraceLayout:
64
60
  Exclude:
65
61
  - 'lib/argon2.rb'
66
62
  - 'test/low_level_test.rb'
@@ -1,5 +1,7 @@
1
1
  language: ruby
2
+ sudo: required
2
3
  rvm:
4
+ - 2.5.0
3
5
  - 2.4.0
4
6
  - 2.3.3
5
7
  - jruby-9000
@@ -10,4 +12,3 @@ install: bin/setup
10
12
  script:
11
13
  - cd ext/argon2_wrap/ && make test && cd ../..
12
14
  - bundle exec rake test
13
- - CODECLIMATE_REPO_TOKEN=2b619b81040453ecbcf1cf0869e1238c4bbaab666a42e7dd94d762c747c0f51a bundle exec codeclimate-test-reporter
@@ -1,3 +1,7 @@
1
+ ## v1.1.5: 2018-04-30
2
+ - Documentation updates
3
+ - Pulled latest reference
4
+
1
5
  ## v1.1.2: 2017-02-25
2
6
  - Fix build on SmartOS
3
7
 
data/README.md CHANGED
@@ -5,7 +5,7 @@ This Ruby Gem provides FFI bindings, and a simplified interface, to the Argon2 a
5
5
 
6
6
  [![Build Status](https://travis-ci.org/technion/ruby-argon2.svg?branch=master)](https://travis-ci.org/technion/ruby-argon2)
7
7
  [![Code Climate](https://codeclimate.com/github/technion/ruby-argon2/badges/gpa.svg)](https://codeclimate.com/github/technion/ruby-argon2)
8
- [![Test Coverage](https://codeclimate.com/github/technion/ruby-argon2/badges/coverage.svg)](https://codeclimate.com/github/technion/ruby-argon2/coverage)
8
+ [![Coverage Status](https://coveralls.io/repos/github/technion/ruby-argon2/badge.svg)](https://coveralls.io/github/technion/ruby-argon2)
9
9
 
10
10
  ## Design
11
11
 
@@ -44,6 +44,7 @@ hasher = Argon2::Password.new
44
44
  hasher.create("password")
45
45
  ```
46
46
 
47
+ If you follow this pattern, it is important to create a new `Argon2::Password` every time you generate a hash, in order to ensure a unique salt. See [issue 23](https://github.com/technion/ruby-argon2/issues/23) for more information.
47
48
  Alternatively, use this shotcut:
48
49
 
49
50
  ```ruby
@@ -1,5 +1,5 @@
1
- # coding: utf-8
2
1
  lib = File.expand_path('../lib', __FILE__)
2
+
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'argon2/version'
5
5
 
@@ -24,9 +24,9 @@ Gem::Specification.new do |spec|
24
24
  spec.add_dependency 'ffi-compiler', '~> 0.1'
25
25
 
26
26
  spec.add_development_dependency "bundler", '~> 1.10', '>= 1.10.5'
27
- spec.add_development_dependency "rake", '~> 10.4', '>= 10.4.2'
27
+ spec.add_development_dependency "coveralls", '~> 0.8'
28
28
  spec.add_development_dependency "minitest", '~> 5.8'
29
- spec.add_development_dependency "rubocop", '~> 0.35'
30
- spec.add_development_dependency "codeclimate-test-reporter", '~> 1.0'
29
+ spec.add_development_dependency "rake", '~> 10.4', '>= 10.4.2'
30
+ spec.add_development_dependency "rubocop", '~> 0.49'
31
31
  spec.extensions << 'ext/argon2_wrap/extconf.rb'
32
32
  end
@@ -1,3 +1,10 @@
1
+ # 20171227
2
+ * Added ABI version number
3
+ * AVX2/AVX-512F optimizations of BLAMKA
4
+ * Set Argon2 version number from the command line
5
+ * New bindings
6
+ * Minor bug and warning fixes (no security issue)
7
+
1
8
  # 20161029
2
9
 
3
10
  * Argon2id added
@@ -20,7 +20,7 @@ BENCH = bench
20
20
  GENKAT = genkat
21
21
 
22
22
  # Increment on an ABI breaking change
23
- ABI_VERSION = 0
23
+ ABI_VERSION = 1
24
24
 
25
25
  DIST = phc-winner-argon2
26
26
 
@@ -123,7 +123,7 @@ INST_BINARY = $(DESTDIR)$(PREFIX)/$(BINARY_REL)
123
123
 
124
124
  .PHONY: clean dist format $(GENKAT) all install
125
125
 
126
- all: clean $(RUN) libs
126
+ all: $(RUN) libs
127
127
  libs: $(LIBRARIES)
128
128
 
129
129
  $(RUN): $(SRC) $(SRC_RUN)
@@ -256,10 +256,14 @@ their documentation):
256
256
  * [OCaml](https://github.com/Khady/ocaml-argon2) by [@Khady](https://github.com/Khady)
257
257
  * [Python (native)](https://pypi.python.org/pypi/argon2), by [@flamewow](https://github.com/flamewow)
258
258
  * [Python (ffi)](https://pypi.python.org/pypi/argon2_cffi), by [@hynek](https://github.com/hynek)
259
+ * [Python (ffi, with keyed hashing)](https://github.com/thusoy/porridge), by [@thusoy](https://github.com/thusoy)
260
+ * [R](https://cran.r-project.org/package=argon2) by [@wrathematics](https://github.com/wrathematics)
259
261
  * [Ruby](https://github.com/technion/ruby-argon2) by [@technion](https://github.com/technion)
260
262
  * [Rust](https://github.com/quininer/argon2-rs) by [@quininer](https://github.com/quininer)
261
263
  * [C#/.NET CoreCLR](https://github.com/kmaragon/Konscious.Security.Cryptography) by [@kmaragon](https://github.com/kmaragon)
262
264
  * [Perl](https://github.com/Leont/crypt-argon2) by [@leont](https://github.com/Leont)
265
+ * [mruby](https://github.com/Asmod4n/mruby-argon2) by [@Asmod4n](https://github.com/Asmod4n)
266
+ * [Swift](https://github.com/ImKcat/CatCrypto) by [@ImKcat](https://github.com/ImKcat)
263
267
 
264
268
 
265
269
  ## Test suite
@@ -4,7 +4,7 @@
4
4
  * Copyright 2015
5
5
  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6
6
  *
7
- * You may use this work under the terms of a Creative Commons CC0 1.0
7
+ * You may use this work under the terms of a Creative Commons CC0 1.0
8
8
  * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9
9
  * these licenses can be found at:
10
10
  *
@@ -29,10 +29,13 @@ extern "C" {
29
29
  /* Symbols visibility control */
30
30
  #ifdef A2_VISCTL
31
31
  #define ARGON2_PUBLIC __attribute__((visibility("default")))
32
+ #define ARGON2_LOCAL __attribute__ ((visibility ("hidden")))
32
33
  #elif _MSC_VER
33
34
  #define ARGON2_PUBLIC __declspec(dllexport)
35
+ #define ARGON2_LOCAL
34
36
  #else
35
37
  #define ARGON2_PUBLIC
38
+ #define ARGON2_LOCAL
36
39
  #endif
37
40
 
38
41
  /*
@@ -267,8 +270,7 @@ ARGON2_PUBLIC int argon2i_hash_encoded(const uint32_t t_cost,
267
270
  const size_t encodedlen);
268
271
 
269
272
  /**
270
- * Hashes a password with Argon2i, producing a raw hash by allocating memory at
271
- * @hash
273
+ * Hashes a password with Argon2i, producing a raw hash at @hash
272
274
  * @param t_cost Number of iterations
273
275
  * @param m_cost Sets memory usage to m_cost kibibytes
274
276
  * @param parallelism Number of threads and compute lanes
@@ -4,7 +4,7 @@
4
4
  * Copyright 2015
5
5
  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6
6
  *
7
- * You may use this work under the terms of a Creative Commons CC0 1.0
7
+ * You may use this work under the terms of a Creative Commons CC0 1.0
8
8
  * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9
9
  * these licenses can be found at:
10
10
  *
@@ -4,7 +4,7 @@
4
4
  * Copyright 2015
5
5
  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6
6
  *
7
- * You may use this work under the terms of a Creative Commons CC0 1.0
7
+ * You may use this work under the terms of a Creative Commons CC0 1.0
8
8
  * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9
9
  * these licenses can be found at:
10
10
  *
@@ -45,7 +45,7 @@ static uint64_t rdtsc(void) {
45
45
  }
46
46
 
47
47
  /*
48
- * Benchmarks Argon2 with salt length 16, password length 16, t_cost 1,
48
+ * Benchmarks Argon2 with salt length 16, password length 16, t_cost 3,
49
49
  and different m_cost and threads
50
50
  */
51
51
  static void benchmark() {
@@ -4,7 +4,7 @@
4
4
  * Copyright 2015
5
5
  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6
6
  *
7
- * You may use this work under the terms of a Creative Commons CC0 1.0
7
+ * You may use this work under the terms of a Creative Commons CC0 1.0
8
8
  * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9
9
  * these licenses can be found at:
10
10
  *
@@ -4,7 +4,7 @@
4
4
  * Copyright 2015
5
5
  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6
6
  *
7
- * You may use this work under the terms of a Creative Commons CC0 1.0
7
+ * You may use this work under the terms of a Creative Commons CC0 1.0
8
8
  * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9
9
  * these licenses can be found at:
10
10
  *
@@ -18,9 +18,7 @@
18
18
  #ifndef PORTABLE_BLAKE2_H
19
19
  #define PORTABLE_BLAKE2_H
20
20
 
21
- #include <stddef.h>
22
- #include <stdint.h>
23
- #include <limits.h>
21
+ #include <argon2.h>
24
22
 
25
23
  #if defined(__cplusplus)
26
24
  extern "C" {
@@ -69,19 +67,19 @@ enum {
69
67
  };
70
68
 
71
69
  /* Streaming API */
72
- int blake2b_init(blake2b_state *S, size_t outlen);
73
- int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
70
+ ARGON2_LOCAL int blake2b_init(blake2b_state *S, size_t outlen);
71
+ ARGON2_LOCAL int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
74
72
  size_t keylen);
75
- int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
76
- int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
77
- int blake2b_final(blake2b_state *S, void *out, size_t outlen);
73
+ ARGON2_LOCAL int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
74
+ ARGON2_LOCAL int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
75
+ ARGON2_LOCAL int blake2b_final(blake2b_state *S, void *out, size_t outlen);
78
76
 
79
77
  /* Simple API */
80
- int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
81
- const void *key, size_t keylen);
78
+ ARGON2_LOCAL int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
79
+ const void *key, size_t keylen);
82
80
 
83
81
  /* Argon2 Team - Begin Code */
84
- int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
82
+ ARGON2_LOCAL int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
85
83
  /* Argon2 Team - End Code */
86
84
 
87
85
  #if defined(__cplusplus)
@@ -4,7 +4,7 @@
4
4
  * Copyright 2015
5
5
  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6
6
  *
7
- * You may use this work under the terms of a Creative Commons CC0 1.0
7
+ * You may use this work under the terms of a Creative Commons CC0 1.0
8
8
  * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9
9
  * these licenses can be found at:
10
10
  *
@@ -4,7 +4,7 @@
4
4
  * Copyright 2015
5
5
  * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6
6
  *
7
- * You may use this work under the terms of a Creative Commons CC0 1.0
7
+ * You may use this work under the terms of a Creative Commons CC0 1.0
8
8
  * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9
9
  * these licenses can be found at:
10
10
  *
@@ -29,6 +29,8 @@
29
29
  #include <x86intrin.h>
30
30
  #endif
31
31
 
32
+ #if !defined(__AVX512F__)
33
+ #if !defined(__AVX2__)
32
34
  #if !defined(__XOP__)
33
35
  #if defined(__SSSE3__)
34
36
  #define r16 \
@@ -176,5 +178,294 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
176
178
  \
177
179
  UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
178
180
  } while ((void)0, 0)
181
+ #else /* __AVX2__ */
179
182
 
180
- #endif
183
+ #include <immintrin.h>
184
+
185
+ #define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
186
+ #define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
187
+ #define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
188
+ #define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
189
+
190
+ #define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
191
+ do { \
192
+ __m256i ml = _mm256_mul_epu32(A0, B0); \
193
+ ml = _mm256_add_epi64(ml, ml); \
194
+ A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
195
+ D0 = _mm256_xor_si256(D0, A0); \
196
+ D0 = rotr32(D0); \
197
+ \
198
+ ml = _mm256_mul_epu32(C0, D0); \
199
+ ml = _mm256_add_epi64(ml, ml); \
200
+ C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
201
+ \
202
+ B0 = _mm256_xor_si256(B0, C0); \
203
+ B0 = rotr24(B0); \
204
+ \
205
+ ml = _mm256_mul_epu32(A1, B1); \
206
+ ml = _mm256_add_epi64(ml, ml); \
207
+ A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
208
+ D1 = _mm256_xor_si256(D1, A1); \
209
+ D1 = rotr32(D1); \
210
+ \
211
+ ml = _mm256_mul_epu32(C1, D1); \
212
+ ml = _mm256_add_epi64(ml, ml); \
213
+ C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
214
+ \
215
+ B1 = _mm256_xor_si256(B1, C1); \
216
+ B1 = rotr24(B1); \
217
+ } while((void)0, 0);
218
+
219
+ #define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
220
+ do { \
221
+ __m256i ml = _mm256_mul_epu32(A0, B0); \
222
+ ml = _mm256_add_epi64(ml, ml); \
223
+ A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
224
+ D0 = _mm256_xor_si256(D0, A0); \
225
+ D0 = rotr16(D0); \
226
+ \
227
+ ml = _mm256_mul_epu32(C0, D0); \
228
+ ml = _mm256_add_epi64(ml, ml); \
229
+ C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
230
+ B0 = _mm256_xor_si256(B0, C0); \
231
+ B0 = rotr63(B0); \
232
+ \
233
+ ml = _mm256_mul_epu32(A1, B1); \
234
+ ml = _mm256_add_epi64(ml, ml); \
235
+ A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
236
+ D1 = _mm256_xor_si256(D1, A1); \
237
+ D1 = rotr16(D1); \
238
+ \
239
+ ml = _mm256_mul_epu32(C1, D1); \
240
+ ml = _mm256_add_epi64(ml, ml); \
241
+ C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
242
+ B1 = _mm256_xor_si256(B1, C1); \
243
+ B1 = rotr63(B1); \
244
+ } while((void)0, 0);
245
+
246
+ #define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
247
+ do { \
248
+ B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
249
+ C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
250
+ D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
251
+ \
252
+ B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
253
+ C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
254
+ D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
255
+ } while((void)0, 0);
256
+
257
+ #define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
258
+ do { \
259
+ __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
260
+ __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
261
+ B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
262
+ B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
263
+ \
264
+ tmp1 = C0; \
265
+ C0 = C1; \
266
+ C1 = tmp1; \
267
+ \
268
+ tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
269
+ tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
270
+ D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
271
+ D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
272
+ } while(0);
273
+
274
+ #define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
275
+ do { \
276
+ B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
277
+ C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
278
+ D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
279
+ \
280
+ B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
281
+ C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
282
+ D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
283
+ } while((void)0, 0);
284
+
285
+ #define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
286
+ do { \
287
+ __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
288
+ __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
289
+ B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
290
+ B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
291
+ \
292
+ tmp1 = C0; \
293
+ C0 = C1; \
294
+ C1 = tmp1; \
295
+ \
296
+ tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
297
+ tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
298
+ D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
299
+ D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
300
+ } while((void)0, 0);
301
+
302
+ #define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
303
+ do{ \
304
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
305
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
306
+ \
307
+ DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
308
+ \
309
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
310
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
311
+ \
312
+ UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
313
+ } while((void)0, 0);
314
+
315
+ #define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
316
+ do{ \
317
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
318
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
319
+ \
320
+ DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
321
+ \
322
+ G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
323
+ G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
324
+ \
325
+ UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
326
+ } while((void)0, 0);
327
+
328
+ #endif /* __AVX2__ */
329
+
330
+ #else /* __AVX512F__ */
331
+
332
+ #include <immintrin.h>
333
+
334
+ #define ror64(x, n) _mm512_ror_epi64((x), (n))
335
+
336
+ static __m512i muladd(__m512i x, __m512i y)
337
+ {
338
+ __m512i z = _mm512_mul_epu32(x, y);
339
+ return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
340
+ }
341
+
342
+ #define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
343
+ do { \
344
+ A0 = muladd(A0, B0); \
345
+ A1 = muladd(A1, B1); \
346
+ \
347
+ D0 = _mm512_xor_si512(D0, A0); \
348
+ D1 = _mm512_xor_si512(D1, A1); \
349
+ \
350
+ D0 = ror64(D0, 32); \
351
+ D1 = ror64(D1, 32); \
352
+ \
353
+ C0 = muladd(C0, D0); \
354
+ C1 = muladd(C1, D1); \
355
+ \
356
+ B0 = _mm512_xor_si512(B0, C0); \
357
+ B1 = _mm512_xor_si512(B1, C1); \
358
+ \
359
+ B0 = ror64(B0, 24); \
360
+ B1 = ror64(B1, 24); \
361
+ } while ((void)0, 0)
362
+
363
+ #define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
364
+ do { \
365
+ A0 = muladd(A0, B0); \
366
+ A1 = muladd(A1, B1); \
367
+ \
368
+ D0 = _mm512_xor_si512(D0, A0); \
369
+ D1 = _mm512_xor_si512(D1, A1); \
370
+ \
371
+ D0 = ror64(D0, 16); \
372
+ D1 = ror64(D1, 16); \
373
+ \
374
+ C0 = muladd(C0, D0); \
375
+ C1 = muladd(C1, D1); \
376
+ \
377
+ B0 = _mm512_xor_si512(B0, C0); \
378
+ B1 = _mm512_xor_si512(B1, C1); \
379
+ \
380
+ B0 = ror64(B0, 63); \
381
+ B1 = ror64(B1, 63); \
382
+ } while ((void)0, 0)
383
+
384
+ #define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
385
+ do { \
386
+ B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
387
+ B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
388
+ \
389
+ C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
390
+ C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
391
+ \
392
+ D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
393
+ D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
394
+ } while ((void)0, 0)
395
+
396
+ #define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
397
+ do { \
398
+ B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
399
+ B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
400
+ \
401
+ C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
402
+ C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
403
+ \
404
+ D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
405
+ D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
406
+ } while ((void)0, 0)
407
+
408
+ #define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
409
+ do { \
410
+ G1(A0, B0, C0, D0, A1, B1, C1, D1); \
411
+ G2(A0, B0, C0, D0, A1, B1, C1, D1); \
412
+ \
413
+ DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
414
+ \
415
+ G1(A0, B0, C0, D0, A1, B1, C1, D1); \
416
+ G2(A0, B0, C0, D0, A1, B1, C1, D1); \
417
+ \
418
+ UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
419
+ } while ((void)0, 0)
420
+
421
+ #define SWAP_HALVES(A0, A1) \
422
+ do { \
423
+ __m512i t0, t1; \
424
+ t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
425
+ t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
426
+ A0 = t0; \
427
+ A1 = t1; \
428
+ } while((void)0, 0)
429
+
430
+ #define SWAP_QUARTERS(A0, A1) \
431
+ do { \
432
+ SWAP_HALVES(A0, A1); \
433
+ A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
434
+ A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
435
+ } while((void)0, 0)
436
+
437
+ #define UNSWAP_QUARTERS(A0, A1) \
438
+ do { \
439
+ A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
440
+ A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
441
+ SWAP_HALVES(A0, A1); \
442
+ } while((void)0, 0)
443
+
444
+ #define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
445
+ do { \
446
+ SWAP_HALVES(A0, B0); \
447
+ SWAP_HALVES(C0, D0); \
448
+ SWAP_HALVES(A1, B1); \
449
+ SWAP_HALVES(C1, D1); \
450
+ BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
451
+ SWAP_HALVES(A0, B0); \
452
+ SWAP_HALVES(C0, D0); \
453
+ SWAP_HALVES(A1, B1); \
454
+ SWAP_HALVES(C1, D1); \
455
+ } while ((void)0, 0)
456
+
457
+ #define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
458
+ do { \
459
+ SWAP_QUARTERS(A0, A1); \
460
+ SWAP_QUARTERS(B0, B1); \
461
+ SWAP_QUARTERS(C0, C1); \
462
+ SWAP_QUARTERS(D0, D1); \
463
+ BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
464
+ UNSWAP_QUARTERS(A0, A1); \
465
+ UNSWAP_QUARTERS(B0, B1); \
466
+ UNSWAP_QUARTERS(C0, C1); \
467
+ UNSWAP_QUARTERS(D0, D1); \
468
+ } while ((void)0, 0)
469
+
470
+ #endif /* __AVX512F__ */
471
+ #endif /* BLAKE_ROUND_MKA_OPT_H */