argon2 1.1.4 → 1.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rubocop.yml +6 -10
- data/.travis.yml +2 -1
- data/Changelog.md +4 -0
- data/README.md +2 -1
- data/argon2.gemspec +4 -4
- data/ext/phc-winner-argon2/CHANGELOG.md +7 -0
- data/ext/phc-winner-argon2/Makefile +2 -2
- data/ext/phc-winner-argon2/README.md +4 -0
- data/ext/phc-winner-argon2/include/argon2.h +5 -3
- data/ext/phc-winner-argon2/src/argon2.c +1 -1
- data/ext/phc-winner-argon2/src/bench.c +2 -2
- data/ext/phc-winner-argon2/src/blake2/blake2-impl.h +1 -1
- data/ext/phc-winner-argon2/src/blake2/blake2.h +10 -12
- data/ext/phc-winner-argon2/src/blake2/blake2b.c +1 -1
- data/ext/phc-winner-argon2/src/blake2/blamka-round-opt.h +293 -2
- data/ext/phc-winner-argon2/src/blake2/blamka-round-ref.h +2 -2
- data/ext/phc-winner-argon2/src/core.c +1 -2
- data/ext/phc-winner-argon2/src/core.h +3 -9
- data/ext/phc-winner-argon2/src/encoding.c +1 -1
- data/ext/phc-winner-argon2/src/encoding.h +1 -1
- data/ext/phc-winner-argon2/src/genkat.c +3 -4
- data/ext/phc-winner-argon2/src/genkat.h +1 -1
- data/ext/phc-winner-argon2/src/opt.c +90 -2
- data/ext/phc-winner-argon2/src/ref.c +1 -1
- data/ext/phc-winner-argon2/src/run.c +43 -23
- data/ext/phc-winner-argon2/src/test.c +5 -6
- data/ext/phc-winner-argon2/src/thread.c +1 -1
- data/ext/phc-winner-argon2/src/thread.h +2 -2
- data/lib/argon2.rb +1 -0
- data/lib/argon2/constants.rb +2 -0
- data/lib/argon2/engine.rb +1 -0
- data/lib/argon2/errors.rb +4 -2
- data/lib/argon2/version.rb +3 -1
- metadata +19 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9425c5c639ac3e940ffffe3f9baa5967976ad2caa49d0716db0de43aa6d41b66
|
4
|
+
data.tar.gz: 607eff42d6c915f2528d5e61f95d264bdace086515a250188e0f937397b684b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60b5ff68f0e29fa1c40dc2e9a10e21f585dacd1d95af4a219166be38e9b2734d8bcae85010245ceb5a2d8782f8f65906bce4f951159eaf5bd1e9b8bccd98cf71
|
7
|
+
data.tar.gz: e8d1074c9ad878cb2a7b7e3791b377f0fbf573ead9d9432ad178d1e4c4af4de80d337455d7fb438fd0e5ad15e2bcfa0f5091254674b7c5d45145ac8ce9e5f6e1
|
data/.rubocop.yml
CHANGED
@@ -5,20 +5,16 @@ Metrics/CyclomaticComplexity:
|
|
5
5
|
Enabled: false
|
6
6
|
Metrics/PerceivedComplexity:
|
7
7
|
Enabled: false
|
8
|
-
#Style/MutableConstant:
|
9
|
-
# Exclude:
|
10
|
-
# - 'test/key_test.rb'
|
11
|
-
|
12
8
|
Metrics/LineLength:
|
13
9
|
Max: 160
|
14
10
|
|
15
11
|
Metrics/MethodLength:
|
16
12
|
Max: 24
|
17
13
|
|
18
|
-
|
14
|
+
Layout/AlignParameters:
|
19
15
|
Enabled: false
|
20
16
|
|
21
|
-
|
17
|
+
Layout/AlignArray:
|
22
18
|
Enabled: false
|
23
19
|
|
24
20
|
# Configuration parameters: Exclude.
|
@@ -30,7 +26,7 @@ Style/Documentation:
|
|
30
26
|
# Offense count: 16
|
31
27
|
# Cop supports --auto-correct.
|
32
28
|
# Configuration parameters: EnforcedStyle, SupportedStyles.
|
33
|
-
|
29
|
+
Layout/FirstParameterIndentation:
|
34
30
|
Exclude:
|
35
31
|
- 'lib/argon2.rb'
|
36
32
|
- 'test/low_level_test.rb'
|
@@ -41,13 +37,13 @@ Style/HashSyntax:
|
|
41
37
|
|
42
38
|
# Offense count: 1
|
43
39
|
# Cop supports --auto-correct.
|
44
|
-
|
40
|
+
Layout/IndentArray:
|
45
41
|
Exclude:
|
46
42
|
- 'lib/argon2/errors.rb'
|
47
43
|
|
48
44
|
# Offense count: 44
|
49
45
|
# Cop supports --auto-correct.
|
50
|
-
|
46
|
+
Layout/LeadingCommentSpace:
|
51
47
|
Exclude:
|
52
48
|
- 'ext/argon2_wrap/extconf.rb'
|
53
49
|
|
@@ -60,7 +56,7 @@ Style/StringLiterals:
|
|
60
56
|
Style/WordArray:
|
61
57
|
MinSize: 33
|
62
58
|
|
63
|
-
|
59
|
+
Layout/MultilineMethodCallBraceLayout:
|
64
60
|
Exclude:
|
65
61
|
- 'lib/argon2.rb'
|
66
62
|
- 'test/low_level_test.rb'
|
data/.travis.yml
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
language: ruby
|
2
|
+
sudo: required
|
2
3
|
rvm:
|
4
|
+
- 2.5.0
|
3
5
|
- 2.4.0
|
4
6
|
- 2.3.3
|
5
7
|
- jruby-9000
|
@@ -10,4 +12,3 @@ install: bin/setup
|
|
10
12
|
script:
|
11
13
|
- cd ext/argon2_wrap/ && make test && cd ../..
|
12
14
|
- bundle exec rake test
|
13
|
-
- CODECLIMATE_REPO_TOKEN=2b619b81040453ecbcf1cf0869e1238c4bbaab666a42e7dd94d762c747c0f51a bundle exec codeclimate-test-reporter
|
data/Changelog.md
CHANGED
data/README.md
CHANGED
@@ -5,7 +5,7 @@ This Ruby Gem provides FFI bindings, and a simplified interface, to the Argon2 a
|
|
5
5
|
|
6
6
|
[![Build Status](https://travis-ci.org/technion/ruby-argon2.svg?branch=master)](https://travis-ci.org/technion/ruby-argon2)
|
7
7
|
[![Code Climate](https://codeclimate.com/github/technion/ruby-argon2/badges/gpa.svg)](https://codeclimate.com/github/technion/ruby-argon2)
|
8
|
-
[![
|
8
|
+
[![Coverage Status](https://coveralls.io/repos/github/technion/ruby-argon2/badge.svg)](https://coveralls.io/github/technion/ruby-argon2)
|
9
9
|
|
10
10
|
## Design
|
11
11
|
|
@@ -44,6 +44,7 @@ hasher = Argon2::Password.new
|
|
44
44
|
hasher.create("password")
|
45
45
|
```
|
46
46
|
|
47
|
+
If you follow this pattern, it is important to create a new `Argon2::Password` every time you generate a hash, in order to ensure a unique salt. See [issue 23](https://github.com/technion/ruby-argon2/issues/23) for more information.
|
47
48
|
Alternatively, use this shotcut:
|
48
49
|
|
49
50
|
```ruby
|
data/argon2.gemspec
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
# coding: utf-8
|
2
1
|
lib = File.expand_path('../lib', __FILE__)
|
2
|
+
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
4
|
require 'argon2/version'
|
5
5
|
|
@@ -24,9 +24,9 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_dependency 'ffi-compiler', '~> 0.1'
|
25
25
|
|
26
26
|
spec.add_development_dependency "bundler", '~> 1.10', '>= 1.10.5'
|
27
|
-
spec.add_development_dependency "
|
27
|
+
spec.add_development_dependency "coveralls", '~> 0.8'
|
28
28
|
spec.add_development_dependency "minitest", '~> 5.8'
|
29
|
-
spec.add_development_dependency "
|
30
|
-
spec.add_development_dependency "
|
29
|
+
spec.add_development_dependency "rake", '~> 10.4', '>= 10.4.2'
|
30
|
+
spec.add_development_dependency "rubocop", '~> 0.49'
|
31
31
|
spec.extensions << 'ext/argon2_wrap/extconf.rb'
|
32
32
|
end
|
@@ -20,7 +20,7 @@ BENCH = bench
|
|
20
20
|
GENKAT = genkat
|
21
21
|
|
22
22
|
# Increment on an ABI breaking change
|
23
|
-
ABI_VERSION =
|
23
|
+
ABI_VERSION = 1
|
24
24
|
|
25
25
|
DIST = phc-winner-argon2
|
26
26
|
|
@@ -123,7 +123,7 @@ INST_BINARY = $(DESTDIR)$(PREFIX)/$(BINARY_REL)
|
|
123
123
|
|
124
124
|
.PHONY: clean dist format $(GENKAT) all install
|
125
125
|
|
126
|
-
all:
|
126
|
+
all: $(RUN) libs
|
127
127
|
libs: $(LIBRARIES)
|
128
128
|
|
129
129
|
$(RUN): $(SRC) $(SRC_RUN)
|
@@ -256,10 +256,14 @@ their documentation):
|
|
256
256
|
* [OCaml](https://github.com/Khady/ocaml-argon2) by [@Khady](https://github.com/Khady)
|
257
257
|
* [Python (native)](https://pypi.python.org/pypi/argon2), by [@flamewow](https://github.com/flamewow)
|
258
258
|
* [Python (ffi)](https://pypi.python.org/pypi/argon2_cffi), by [@hynek](https://github.com/hynek)
|
259
|
+
* [Python (ffi, with keyed hashing)](https://github.com/thusoy/porridge), by [@thusoy](https://github.com/thusoy)
|
260
|
+
* [R](https://cran.r-project.org/package=argon2) by [@wrathematics](https://github.com/wrathematics)
|
259
261
|
* [Ruby](https://github.com/technion/ruby-argon2) by [@technion](https://github.com/technion)
|
260
262
|
* [Rust](https://github.com/quininer/argon2-rs) by [@quininer](https://github.com/quininer)
|
261
263
|
* [C#/.NET CoreCLR](https://github.com/kmaragon/Konscious.Security.Cryptography) by [@kmaragon](https://github.com/kmaragon)
|
262
264
|
* [Perl](https://github.com/Leont/crypt-argon2) by [@leont](https://github.com/Leont)
|
265
|
+
* [mruby](https://github.com/Asmod4n/mruby-argon2) by [@Asmod4n](https://github.com/Asmod4n)
|
266
|
+
* [Swift](https://github.com/ImKcat/CatCrypto) by [@ImKcat](https://github.com/ImKcat)
|
263
267
|
|
264
268
|
|
265
269
|
## Test suite
|
@@ -4,7 +4,7 @@
|
|
4
4
|
* Copyright 2015
|
5
5
|
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
6
6
|
*
|
7
|
-
* You may use this work under the terms of a Creative Commons CC0 1.0
|
7
|
+
* You may use this work under the terms of a Creative Commons CC0 1.0
|
8
8
|
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
9
9
|
* these licenses can be found at:
|
10
10
|
*
|
@@ -29,10 +29,13 @@ extern "C" {
|
|
29
29
|
/* Symbols visibility control */
|
30
30
|
#ifdef A2_VISCTL
|
31
31
|
#define ARGON2_PUBLIC __attribute__((visibility("default")))
|
32
|
+
#define ARGON2_LOCAL __attribute__ ((visibility ("hidden")))
|
32
33
|
#elif _MSC_VER
|
33
34
|
#define ARGON2_PUBLIC __declspec(dllexport)
|
35
|
+
#define ARGON2_LOCAL
|
34
36
|
#else
|
35
37
|
#define ARGON2_PUBLIC
|
38
|
+
#define ARGON2_LOCAL
|
36
39
|
#endif
|
37
40
|
|
38
41
|
/*
|
@@ -267,8 +270,7 @@ ARGON2_PUBLIC int argon2i_hash_encoded(const uint32_t t_cost,
|
|
267
270
|
const size_t encodedlen);
|
268
271
|
|
269
272
|
/**
|
270
|
-
* Hashes a password with Argon2i, producing a raw hash
|
271
|
-
* @hash
|
273
|
+
* Hashes a password with Argon2i, producing a raw hash at @hash
|
272
274
|
* @param t_cost Number of iterations
|
273
275
|
* @param m_cost Sets memory usage to m_cost kibibytes
|
274
276
|
* @param parallelism Number of threads and compute lanes
|
@@ -4,7 +4,7 @@
|
|
4
4
|
* Copyright 2015
|
5
5
|
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
6
6
|
*
|
7
|
-
* You may use this work under the terms of a Creative Commons CC0 1.0
|
7
|
+
* You may use this work under the terms of a Creative Commons CC0 1.0
|
8
8
|
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
9
9
|
* these licenses can be found at:
|
10
10
|
*
|
@@ -4,7 +4,7 @@
|
|
4
4
|
* Copyright 2015
|
5
5
|
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
6
6
|
*
|
7
|
-
* You may use this work under the terms of a Creative Commons CC0 1.0
|
7
|
+
* You may use this work under the terms of a Creative Commons CC0 1.0
|
8
8
|
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
9
9
|
* these licenses can be found at:
|
10
10
|
*
|
@@ -45,7 +45,7 @@ static uint64_t rdtsc(void) {
|
|
45
45
|
}
|
46
46
|
|
47
47
|
/*
|
48
|
-
* Benchmarks Argon2 with salt length 16, password length 16, t_cost
|
48
|
+
* Benchmarks Argon2 with salt length 16, password length 16, t_cost 3,
|
49
49
|
and different m_cost and threads
|
50
50
|
*/
|
51
51
|
static void benchmark() {
|
@@ -4,7 +4,7 @@
|
|
4
4
|
* Copyright 2015
|
5
5
|
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
6
6
|
*
|
7
|
-
* You may use this work under the terms of a Creative Commons CC0 1.0
|
7
|
+
* You may use this work under the terms of a Creative Commons CC0 1.0
|
8
8
|
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
9
9
|
* these licenses can be found at:
|
10
10
|
*
|
@@ -4,7 +4,7 @@
|
|
4
4
|
* Copyright 2015
|
5
5
|
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
6
6
|
*
|
7
|
-
* You may use this work under the terms of a Creative Commons CC0 1.0
|
7
|
+
* You may use this work under the terms of a Creative Commons CC0 1.0
|
8
8
|
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
9
9
|
* these licenses can be found at:
|
10
10
|
*
|
@@ -18,9 +18,7 @@
|
|
18
18
|
#ifndef PORTABLE_BLAKE2_H
|
19
19
|
#define PORTABLE_BLAKE2_H
|
20
20
|
|
21
|
-
#include <
|
22
|
-
#include <stdint.h>
|
23
|
-
#include <limits.h>
|
21
|
+
#include <argon2.h>
|
24
22
|
|
25
23
|
#if defined(__cplusplus)
|
26
24
|
extern "C" {
|
@@ -69,19 +67,19 @@ enum {
|
|
69
67
|
};
|
70
68
|
|
71
69
|
/* Streaming API */
|
72
|
-
int blake2b_init(blake2b_state *S, size_t outlen);
|
73
|
-
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
70
|
+
ARGON2_LOCAL int blake2b_init(blake2b_state *S, size_t outlen);
|
71
|
+
ARGON2_LOCAL int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
74
72
|
size_t keylen);
|
75
|
-
int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
|
76
|
-
int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
77
|
-
int blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
73
|
+
ARGON2_LOCAL int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
|
74
|
+
ARGON2_LOCAL int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
75
|
+
ARGON2_LOCAL int blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
78
76
|
|
79
77
|
/* Simple API */
|
80
|
-
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
81
|
-
|
78
|
+
ARGON2_LOCAL int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
79
|
+
const void *key, size_t keylen);
|
82
80
|
|
83
81
|
/* Argon2 Team - Begin Code */
|
84
|
-
int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
82
|
+
ARGON2_LOCAL int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
85
83
|
/* Argon2 Team - End Code */
|
86
84
|
|
87
85
|
#if defined(__cplusplus)
|
@@ -4,7 +4,7 @@
|
|
4
4
|
* Copyright 2015
|
5
5
|
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
6
6
|
*
|
7
|
-
* You may use this work under the terms of a Creative Commons CC0 1.0
|
7
|
+
* You may use this work under the terms of a Creative Commons CC0 1.0
|
8
8
|
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
9
9
|
* these licenses can be found at:
|
10
10
|
*
|
@@ -4,7 +4,7 @@
|
|
4
4
|
* Copyright 2015
|
5
5
|
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
6
6
|
*
|
7
|
-
* You may use this work under the terms of a Creative Commons CC0 1.0
|
7
|
+
* You may use this work under the terms of a Creative Commons CC0 1.0
|
8
8
|
* License/Waiver or the Apache Public License 2.0, at your option. The terms of
|
9
9
|
* these licenses can be found at:
|
10
10
|
*
|
@@ -29,6 +29,8 @@
|
|
29
29
|
#include <x86intrin.h>
|
30
30
|
#endif
|
31
31
|
|
32
|
+
#if !defined(__AVX512F__)
|
33
|
+
#if !defined(__AVX2__)
|
32
34
|
#if !defined(__XOP__)
|
33
35
|
#if defined(__SSSE3__)
|
34
36
|
#define r16 \
|
@@ -176,5 +178,294 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
|
176
178
|
\
|
177
179
|
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
178
180
|
} while ((void)0, 0)
|
181
|
+
#else /* __AVX2__ */
|
179
182
|
|
180
|
-
#
|
183
|
+
#include <immintrin.h>
|
184
|
+
|
185
|
+
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
|
186
|
+
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
187
|
+
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
188
|
+
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
|
189
|
+
|
190
|
+
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
191
|
+
do { \
|
192
|
+
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
193
|
+
ml = _mm256_add_epi64(ml, ml); \
|
194
|
+
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
195
|
+
D0 = _mm256_xor_si256(D0, A0); \
|
196
|
+
D0 = rotr32(D0); \
|
197
|
+
\
|
198
|
+
ml = _mm256_mul_epu32(C0, D0); \
|
199
|
+
ml = _mm256_add_epi64(ml, ml); \
|
200
|
+
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
201
|
+
\
|
202
|
+
B0 = _mm256_xor_si256(B0, C0); \
|
203
|
+
B0 = rotr24(B0); \
|
204
|
+
\
|
205
|
+
ml = _mm256_mul_epu32(A1, B1); \
|
206
|
+
ml = _mm256_add_epi64(ml, ml); \
|
207
|
+
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
208
|
+
D1 = _mm256_xor_si256(D1, A1); \
|
209
|
+
D1 = rotr32(D1); \
|
210
|
+
\
|
211
|
+
ml = _mm256_mul_epu32(C1, D1); \
|
212
|
+
ml = _mm256_add_epi64(ml, ml); \
|
213
|
+
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
214
|
+
\
|
215
|
+
B1 = _mm256_xor_si256(B1, C1); \
|
216
|
+
B1 = rotr24(B1); \
|
217
|
+
} while((void)0, 0);
|
218
|
+
|
219
|
+
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
220
|
+
do { \
|
221
|
+
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
222
|
+
ml = _mm256_add_epi64(ml, ml); \
|
223
|
+
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
224
|
+
D0 = _mm256_xor_si256(D0, A0); \
|
225
|
+
D0 = rotr16(D0); \
|
226
|
+
\
|
227
|
+
ml = _mm256_mul_epu32(C0, D0); \
|
228
|
+
ml = _mm256_add_epi64(ml, ml); \
|
229
|
+
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
230
|
+
B0 = _mm256_xor_si256(B0, C0); \
|
231
|
+
B0 = rotr63(B0); \
|
232
|
+
\
|
233
|
+
ml = _mm256_mul_epu32(A1, B1); \
|
234
|
+
ml = _mm256_add_epi64(ml, ml); \
|
235
|
+
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
236
|
+
D1 = _mm256_xor_si256(D1, A1); \
|
237
|
+
D1 = rotr16(D1); \
|
238
|
+
\
|
239
|
+
ml = _mm256_mul_epu32(C1, D1); \
|
240
|
+
ml = _mm256_add_epi64(ml, ml); \
|
241
|
+
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
242
|
+
B1 = _mm256_xor_si256(B1, C1); \
|
243
|
+
B1 = rotr63(B1); \
|
244
|
+
} while((void)0, 0);
|
245
|
+
|
246
|
+
#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
247
|
+
do { \
|
248
|
+
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
249
|
+
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
250
|
+
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
251
|
+
\
|
252
|
+
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
253
|
+
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
254
|
+
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
255
|
+
} while((void)0, 0);
|
256
|
+
|
257
|
+
#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
258
|
+
do { \
|
259
|
+
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
260
|
+
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
261
|
+
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
262
|
+
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
263
|
+
\
|
264
|
+
tmp1 = C0; \
|
265
|
+
C0 = C1; \
|
266
|
+
C1 = tmp1; \
|
267
|
+
\
|
268
|
+
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
269
|
+
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
|
270
|
+
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
271
|
+
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
272
|
+
} while(0);
|
273
|
+
|
274
|
+
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
275
|
+
do { \
|
276
|
+
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
277
|
+
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
278
|
+
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
279
|
+
\
|
280
|
+
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
281
|
+
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
282
|
+
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
283
|
+
} while((void)0, 0);
|
284
|
+
|
285
|
+
#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
286
|
+
do { \
|
287
|
+
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
288
|
+
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
289
|
+
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
290
|
+
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
291
|
+
\
|
292
|
+
tmp1 = C0; \
|
293
|
+
C0 = C1; \
|
294
|
+
C1 = tmp1; \
|
295
|
+
\
|
296
|
+
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
|
297
|
+
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
298
|
+
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
299
|
+
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
300
|
+
} while((void)0, 0);
|
301
|
+
|
302
|
+
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
|
303
|
+
do{ \
|
304
|
+
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
305
|
+
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
306
|
+
\
|
307
|
+
DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
308
|
+
\
|
309
|
+
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
310
|
+
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
311
|
+
\
|
312
|
+
UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
313
|
+
} while((void)0, 0);
|
314
|
+
|
315
|
+
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
316
|
+
do{ \
|
317
|
+
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
318
|
+
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
319
|
+
\
|
320
|
+
DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
321
|
+
\
|
322
|
+
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
323
|
+
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
324
|
+
\
|
325
|
+
UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
326
|
+
} while((void)0, 0);
|
327
|
+
|
328
|
+
#endif /* __AVX2__ */
|
329
|
+
|
330
|
+
#else /* __AVX512F__ */
|
331
|
+
|
332
|
+
#include <immintrin.h>
|
333
|
+
|
334
|
+
#define ror64(x, n) _mm512_ror_epi64((x), (n))
|
335
|
+
|
336
|
+
static __m512i muladd(__m512i x, __m512i y)
|
337
|
+
{
|
338
|
+
__m512i z = _mm512_mul_epu32(x, y);
|
339
|
+
return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
|
340
|
+
}
|
341
|
+
|
342
|
+
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
343
|
+
do { \
|
344
|
+
A0 = muladd(A0, B0); \
|
345
|
+
A1 = muladd(A1, B1); \
|
346
|
+
\
|
347
|
+
D0 = _mm512_xor_si512(D0, A0); \
|
348
|
+
D1 = _mm512_xor_si512(D1, A1); \
|
349
|
+
\
|
350
|
+
D0 = ror64(D0, 32); \
|
351
|
+
D1 = ror64(D1, 32); \
|
352
|
+
\
|
353
|
+
C0 = muladd(C0, D0); \
|
354
|
+
C1 = muladd(C1, D1); \
|
355
|
+
\
|
356
|
+
B0 = _mm512_xor_si512(B0, C0); \
|
357
|
+
B1 = _mm512_xor_si512(B1, C1); \
|
358
|
+
\
|
359
|
+
B0 = ror64(B0, 24); \
|
360
|
+
B1 = ror64(B1, 24); \
|
361
|
+
} while ((void)0, 0)
|
362
|
+
|
363
|
+
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
364
|
+
do { \
|
365
|
+
A0 = muladd(A0, B0); \
|
366
|
+
A1 = muladd(A1, B1); \
|
367
|
+
\
|
368
|
+
D0 = _mm512_xor_si512(D0, A0); \
|
369
|
+
D1 = _mm512_xor_si512(D1, A1); \
|
370
|
+
\
|
371
|
+
D0 = ror64(D0, 16); \
|
372
|
+
D1 = ror64(D1, 16); \
|
373
|
+
\
|
374
|
+
C0 = muladd(C0, D0); \
|
375
|
+
C1 = muladd(C1, D1); \
|
376
|
+
\
|
377
|
+
B0 = _mm512_xor_si512(B0, C0); \
|
378
|
+
B1 = _mm512_xor_si512(B1, C1); \
|
379
|
+
\
|
380
|
+
B0 = ror64(B0, 63); \
|
381
|
+
B1 = ror64(B1, 63); \
|
382
|
+
} while ((void)0, 0)
|
383
|
+
|
384
|
+
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
385
|
+
do { \
|
386
|
+
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
387
|
+
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
388
|
+
\
|
389
|
+
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
390
|
+
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
391
|
+
\
|
392
|
+
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
393
|
+
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
394
|
+
} while ((void)0, 0)
|
395
|
+
|
396
|
+
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
397
|
+
do { \
|
398
|
+
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
399
|
+
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
400
|
+
\
|
401
|
+
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
402
|
+
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
403
|
+
\
|
404
|
+
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
405
|
+
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
406
|
+
} while ((void)0, 0)
|
407
|
+
|
408
|
+
#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
|
409
|
+
do { \
|
410
|
+
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
411
|
+
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
412
|
+
\
|
413
|
+
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
414
|
+
\
|
415
|
+
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
416
|
+
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
417
|
+
\
|
418
|
+
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
419
|
+
} while ((void)0, 0)
|
420
|
+
|
421
|
+
#define SWAP_HALVES(A0, A1) \
|
422
|
+
do { \
|
423
|
+
__m512i t0, t1; \
|
424
|
+
t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
|
425
|
+
t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
|
426
|
+
A0 = t0; \
|
427
|
+
A1 = t1; \
|
428
|
+
} while((void)0, 0)
|
429
|
+
|
430
|
+
#define SWAP_QUARTERS(A0, A1) \
|
431
|
+
do { \
|
432
|
+
SWAP_HALVES(A0, A1); \
|
433
|
+
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
|
434
|
+
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
|
435
|
+
} while((void)0, 0)
|
436
|
+
|
437
|
+
#define UNSWAP_QUARTERS(A0, A1) \
|
438
|
+
do { \
|
439
|
+
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
|
440
|
+
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
|
441
|
+
SWAP_HALVES(A0, A1); \
|
442
|
+
} while((void)0, 0)
|
443
|
+
|
444
|
+
#define BLAKE2_ROUND_1(A0, C0, B0, D0, A1, C1, B1, D1) \
|
445
|
+
do { \
|
446
|
+
SWAP_HALVES(A0, B0); \
|
447
|
+
SWAP_HALVES(C0, D0); \
|
448
|
+
SWAP_HALVES(A1, B1); \
|
449
|
+
SWAP_HALVES(C1, D1); \
|
450
|
+
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
|
451
|
+
SWAP_HALVES(A0, B0); \
|
452
|
+
SWAP_HALVES(C0, D0); \
|
453
|
+
SWAP_HALVES(A1, B1); \
|
454
|
+
SWAP_HALVES(C1, D1); \
|
455
|
+
} while ((void)0, 0)
|
456
|
+
|
457
|
+
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
458
|
+
do { \
|
459
|
+
SWAP_QUARTERS(A0, A1); \
|
460
|
+
SWAP_QUARTERS(B0, B1); \
|
461
|
+
SWAP_QUARTERS(C0, C1); \
|
462
|
+
SWAP_QUARTERS(D0, D1); \
|
463
|
+
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
|
464
|
+
UNSWAP_QUARTERS(A0, A1); \
|
465
|
+
UNSWAP_QUARTERS(B0, B1); \
|
466
|
+
UNSWAP_QUARTERS(C0, C1); \
|
467
|
+
UNSWAP_QUARTERS(D0, D1); \
|
468
|
+
} while ((void)0, 0)
|
469
|
+
|
470
|
+
#endif /* __AVX512F__ */
|
471
|
+
#endif /* BLAKE_ROUND_MKA_OPT_H */
|