blake2b 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +39 -5
- data/blake2b.gemspec +1 -1
- data/ext/blake2b_ext/blake2-config.h +72 -0
- data/ext/blake2b_ext/blake2b-load-sse2.h +68 -0
- data/ext/blake2b_ext/blake2b-load-sse41.h +402 -0
- data/ext/blake2b_ext/blake2b-ref.c +92 -98
- data/ext/blake2b_ext/blake2b-round.h +157 -0
- data/ext/blake2b_ext/extconf.rb +1 -1
- data/ext/blake2b_ext/rbext.c +5 -5
- data/performance/performance_suite.rb +61 -0
- metadata +6 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1dc7b3e51fa897480c2b5c8e255b2b0fba72fde3945612bbcdd333e79b5254da
|
4
|
+
data.tar.gz: 06637f83a411f3b24b9b31b09b997e1cbc2fe264d583d52fe646d97c7f88b63e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ae81bc780bf5f4391bcd90f85cabefb3c1c735555df20a3dec205cbed741c298631566c0f05bf850fa13a50074a7311a061f84fa7a2d17c33f58600be25a19f
|
7
|
+
data.tar.gz: 5d4bd5931b54044d5729fea12b1d65f22d7643a111a1967d28a2907b1e86aa070855f06194c4fcd4d9fcc2f20fea84319d4af6022ef54660f1efd2efb44ff2c4
|
data/README.md
CHANGED
@@ -4,19 +4,19 @@ BLAKE2 is a cryptographic hash function faster than MD5, SHA-1, SHA-2, and SHA-3
|
|
4
4
|
|
5
5
|
More info at: [https://blake2.net](https://blake2.net).
|
6
6
|
|
7
|
-
##
|
7
|
+
## Summary
|
8
8
|
|
9
|
-
This gem is a C-extension
|
9
|
+
This gem is a C-extension to enable using BLAKE2b in Ruby. This BLAKE2b implementation (or just BLAKE2) is optimized for 64-bit platforms with SSE support (excluding NEON-enabled ARMs). It produces digests of any size between 1 and 64 bytes.
|
10
10
|
|
11
11
|
The C code for this gem is taken from the [official reference C implementation](https://github.com/BLAKE2/BLAKE2) as of commit [ca4c89314abff54e3806b44e4a08164f8204f09a](https://github.com/BLAKE2/BLAKE2/tree/ca4c89314abff54e3806b44e4a08164f8204f09a).
|
12
12
|
|
13
|
-
##
|
13
|
+
## Install
|
14
14
|
|
15
15
|
```
|
16
16
|
gem install blake2b
|
17
17
|
```
|
18
18
|
|
19
|
-
##
|
19
|
+
## Usage
|
20
20
|
|
21
21
|
``` ruby
|
22
22
|
require 'blake2b'
|
@@ -60,7 +60,41 @@ Blake2b.bytes(input, key, out_len)
|
|
60
60
|
|
61
61
|
```
|
62
62
|
|
63
|
-
##
|
63
|
+
## Performance
|
64
|
+
|
65
|
+
`Blake2b` really shines on larger inputs. Here are some benchmarks on various input sizes. You can find the performance suite used for these benchmarks at `performance/performance_suite.rb`. All tests were run on an iMac 27" Late 2014, 4GHz Core i7 CPU (4790K) w/ SSE4.1 + SSE4.2, 32GB DDR3 RAM.
|
66
|
+
|
67
|
+
### 1KB (1M digests)
|
68
|
+
|
69
|
+
```
|
70
|
+
MD5 result: 2.694545999998809 seconds.
|
71
|
+
SHA2 result: 4.037195000011707 seconds.
|
72
|
+
SHA512 result: 3.213850000000093 seconds.
|
73
|
+
BLAKE2s result: 5.6867979999951785 seconds.
|
74
|
+
BLAKE2b result: 4.375018999999156 seconds.
|
75
|
+
```
|
76
|
+
|
77
|
+
### 50KB (500k digests)
|
78
|
+
|
79
|
+
```
|
80
|
+
MD5 result: 34.33997299999464 seconds.
|
81
|
+
SHA2 result: 50.161426999999094 seconds.
|
82
|
+
SHA512 result: 35.24845699999423 seconds.
|
83
|
+
BLAKE2s result: 64.8592859999917 seconds.
|
84
|
+
BLAKE2b result: 30.783814999987953 seconds.
|
85
|
+
```
|
86
|
+
|
87
|
+
### 250KB (500k digests)
|
88
|
+
|
89
|
+
```
|
90
|
+
MD5 result: 67.89016799999808 seconds.
|
91
|
+
SHA2 result: 103.09026799999992 seconds.
|
92
|
+
SHA512 result: 72.46762200001103 seconds.
|
93
|
+
BLAKE2s result: 133.5229810000019 seconds.
|
94
|
+
BLAKE2b result: 64.30263599999307 seconds.
|
95
|
+
```
|
96
|
+
|
97
|
+
## Development
|
64
98
|
|
65
99
|
After checking out the repo, run `bundle` to install dependencies. Then,
|
66
100
|
run `rake full` to build and test, or `rake test` to only run the tests.
|
data/blake2b.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
Gem::Specification.new do |spec|
|
3
3
|
spec.name = "blake2b"
|
4
|
-
spec.version = "0.
|
4
|
+
spec.version = "0.10.0"
|
5
5
|
spec.authors = ["Franck Verrot", "Mauricio Gomes"]
|
6
6
|
spec.email = ["mauricio@edge14.com"]
|
7
7
|
spec.homepage = "https://github.com/mgomes/blake2b"
|
@@ -0,0 +1,72 @@
|
|
1
|
+
/*
|
2
|
+
BLAKE2 reference source code package - optimized C implementations
|
3
|
+
|
4
|
+
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
5
|
+
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
6
|
+
your option. The terms of these licenses can be found at:
|
7
|
+
|
8
|
+
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
9
|
+
- OpenSSL license : https://www.openssl.org/source/license.html
|
10
|
+
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
More information about the BLAKE2 hash function can be found at
|
13
|
+
https://blake2.net.
|
14
|
+
*/
|
15
|
+
#ifndef BLAKE2_CONFIG_H
|
16
|
+
#define BLAKE2_CONFIG_H
|
17
|
+
|
18
|
+
/* These don't work everywhere */
|
19
|
+
#if defined(__SSE2__) || defined(__x86_64__) || defined(__amd64__)
|
20
|
+
#define HAVE_SSE2
|
21
|
+
#endif
|
22
|
+
|
23
|
+
#if defined(__SSSE3__)
|
24
|
+
#define HAVE_SSSE3
|
25
|
+
#endif
|
26
|
+
|
27
|
+
#if defined(__SSE4_1__)
|
28
|
+
#define HAVE_SSE41
|
29
|
+
#endif
|
30
|
+
|
31
|
+
#if defined(__AVX__)
|
32
|
+
#define HAVE_AVX
|
33
|
+
#endif
|
34
|
+
|
35
|
+
#if defined(__XOP__)
|
36
|
+
#define HAVE_XOP
|
37
|
+
#endif
|
38
|
+
|
39
|
+
|
40
|
+
#ifdef HAVE_AVX2
|
41
|
+
#ifndef HAVE_AVX
|
42
|
+
#define HAVE_AVX
|
43
|
+
#endif
|
44
|
+
#endif
|
45
|
+
|
46
|
+
#ifdef HAVE_XOP
|
47
|
+
#ifndef HAVE_AVX
|
48
|
+
#define HAVE_AVX
|
49
|
+
#endif
|
50
|
+
#endif
|
51
|
+
|
52
|
+
#ifdef HAVE_AVX
|
53
|
+
#ifndef HAVE_SSE41
|
54
|
+
#define HAVE_SSE41
|
55
|
+
#endif
|
56
|
+
#endif
|
57
|
+
|
58
|
+
#ifdef HAVE_SSE41
|
59
|
+
#ifndef HAVE_SSSE3
|
60
|
+
#define HAVE_SSSE3
|
61
|
+
#endif
|
62
|
+
#endif
|
63
|
+
|
64
|
+
#ifdef HAVE_SSSE3
|
65
|
+
#define HAVE_SSE2
|
66
|
+
#endif
|
67
|
+
|
68
|
+
#if !defined(HAVE_SSE2)
|
69
|
+
#error "This code requires at least SSE2."
|
70
|
+
#endif
|
71
|
+
|
72
|
+
#endif
|
@@ -0,0 +1,68 @@
|
|
1
|
+
/*
|
2
|
+
BLAKE2 reference source code package - optimized C implementations
|
3
|
+
|
4
|
+
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
5
|
+
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
6
|
+
your option. The terms of these licenses can be found at:
|
7
|
+
|
8
|
+
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
9
|
+
- OpenSSL license : https://www.openssl.org/source/license.html
|
10
|
+
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
More information about the BLAKE2 hash function can be found at
|
13
|
+
https://blake2.net.
|
14
|
+
*/
|
15
|
+
#ifndef BLAKE2B_LOAD_SSE2_H
|
16
|
+
#define BLAKE2B_LOAD_SSE2_H
|
17
|
+
|
18
|
+
#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
|
19
|
+
#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
|
20
|
+
#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
|
21
|
+
#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
|
22
|
+
#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
|
23
|
+
#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
|
24
|
+
#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
|
25
|
+
#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
|
26
|
+
#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
|
27
|
+
#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
|
28
|
+
#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
|
29
|
+
#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
|
30
|
+
#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
|
31
|
+
#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
|
32
|
+
#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
|
33
|
+
#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
|
34
|
+
#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
|
35
|
+
#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
|
36
|
+
#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
|
37
|
+
#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
|
38
|
+
#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
|
39
|
+
#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
|
40
|
+
#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
|
41
|
+
#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
|
42
|
+
#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
|
43
|
+
#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
|
44
|
+
#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
|
45
|
+
#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
|
46
|
+
#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
|
47
|
+
#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
|
48
|
+
#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
|
49
|
+
#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
|
50
|
+
#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
|
51
|
+
#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
|
52
|
+
#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
|
53
|
+
#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
|
54
|
+
#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
|
55
|
+
#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
|
56
|
+
#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
|
57
|
+
#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
|
58
|
+
#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
|
59
|
+
#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
|
60
|
+
#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
|
61
|
+
#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
|
62
|
+
#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
|
63
|
+
#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
|
64
|
+
#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
|
65
|
+
#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
|
66
|
+
|
67
|
+
|
68
|
+
#endif
|
@@ -0,0 +1,402 @@
|
|
1
|
+
/*
|
2
|
+
BLAKE2 reference source code package - optimized C implementations
|
3
|
+
|
4
|
+
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
5
|
+
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
6
|
+
your option. The terms of these licenses can be found at:
|
7
|
+
|
8
|
+
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
9
|
+
- OpenSSL license : https://www.openssl.org/source/license.html
|
10
|
+
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
More information about the BLAKE2 hash function can be found at
|
13
|
+
https://blake2.net.
|
14
|
+
*/
|
15
|
+
#ifndef BLAKE2B_LOAD_SSE41_H
|
16
|
+
#define BLAKE2B_LOAD_SSE41_H
|
17
|
+
|
18
|
+
#define LOAD_MSG_0_1(b0, b1) \
|
19
|
+
do \
|
20
|
+
{ \
|
21
|
+
b0 = _mm_unpacklo_epi64(m0, m1); \
|
22
|
+
b1 = _mm_unpacklo_epi64(m2, m3); \
|
23
|
+
} while(0)
|
24
|
+
|
25
|
+
|
26
|
+
#define LOAD_MSG_0_2(b0, b1) \
|
27
|
+
do \
|
28
|
+
{ \
|
29
|
+
b0 = _mm_unpackhi_epi64(m0, m1); \
|
30
|
+
b1 = _mm_unpackhi_epi64(m2, m3); \
|
31
|
+
} while(0)
|
32
|
+
|
33
|
+
|
34
|
+
#define LOAD_MSG_0_3(b0, b1) \
|
35
|
+
do \
|
36
|
+
{ \
|
37
|
+
b0 = _mm_unpacklo_epi64(m4, m5); \
|
38
|
+
b1 = _mm_unpacklo_epi64(m6, m7); \
|
39
|
+
} while(0)
|
40
|
+
|
41
|
+
|
42
|
+
#define LOAD_MSG_0_4(b0, b1) \
|
43
|
+
do \
|
44
|
+
{ \
|
45
|
+
b0 = _mm_unpackhi_epi64(m4, m5); \
|
46
|
+
b1 = _mm_unpackhi_epi64(m6, m7); \
|
47
|
+
} while(0)
|
48
|
+
|
49
|
+
|
50
|
+
#define LOAD_MSG_1_1(b0, b1) \
|
51
|
+
do \
|
52
|
+
{ \
|
53
|
+
b0 = _mm_unpacklo_epi64(m7, m2); \
|
54
|
+
b1 = _mm_unpackhi_epi64(m4, m6); \
|
55
|
+
} while(0)
|
56
|
+
|
57
|
+
|
58
|
+
#define LOAD_MSG_1_2(b0, b1) \
|
59
|
+
do \
|
60
|
+
{ \
|
61
|
+
b0 = _mm_unpacklo_epi64(m5, m4); \
|
62
|
+
b1 = _mm_alignr_epi8(m3, m7, 8); \
|
63
|
+
} while(0)
|
64
|
+
|
65
|
+
|
66
|
+
#define LOAD_MSG_1_3(b0, b1) \
|
67
|
+
do \
|
68
|
+
{ \
|
69
|
+
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
|
70
|
+
b1 = _mm_unpackhi_epi64(m5, m2); \
|
71
|
+
} while(0)
|
72
|
+
|
73
|
+
|
74
|
+
#define LOAD_MSG_1_4(b0, b1) \
|
75
|
+
do \
|
76
|
+
{ \
|
77
|
+
b0 = _mm_unpacklo_epi64(m6, m1); \
|
78
|
+
b1 = _mm_unpackhi_epi64(m3, m1); \
|
79
|
+
} while(0)
|
80
|
+
|
81
|
+
|
82
|
+
#define LOAD_MSG_2_1(b0, b1) \
|
83
|
+
do \
|
84
|
+
{ \
|
85
|
+
b0 = _mm_alignr_epi8(m6, m5, 8); \
|
86
|
+
b1 = _mm_unpackhi_epi64(m2, m7); \
|
87
|
+
} while(0)
|
88
|
+
|
89
|
+
|
90
|
+
#define LOAD_MSG_2_2(b0, b1) \
|
91
|
+
do \
|
92
|
+
{ \
|
93
|
+
b0 = _mm_unpacklo_epi64(m4, m0); \
|
94
|
+
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
|
95
|
+
} while(0)
|
96
|
+
|
97
|
+
|
98
|
+
#define LOAD_MSG_2_3(b0, b1) \
|
99
|
+
do \
|
100
|
+
{ \
|
101
|
+
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
|
102
|
+
b1 = _mm_unpackhi_epi64(m3, m4); \
|
103
|
+
} while(0)
|
104
|
+
|
105
|
+
|
106
|
+
#define LOAD_MSG_2_4(b0, b1) \
|
107
|
+
do \
|
108
|
+
{ \
|
109
|
+
b0 = _mm_unpacklo_epi64(m7, m3); \
|
110
|
+
b1 = _mm_alignr_epi8(m2, m0, 8); \
|
111
|
+
} while(0)
|
112
|
+
|
113
|
+
|
114
|
+
#define LOAD_MSG_3_1(b0, b1) \
|
115
|
+
do \
|
116
|
+
{ \
|
117
|
+
b0 = _mm_unpackhi_epi64(m3, m1); \
|
118
|
+
b1 = _mm_unpackhi_epi64(m6, m5); \
|
119
|
+
} while(0)
|
120
|
+
|
121
|
+
|
122
|
+
#define LOAD_MSG_3_2(b0, b1) \
|
123
|
+
do \
|
124
|
+
{ \
|
125
|
+
b0 = _mm_unpackhi_epi64(m4, m0); \
|
126
|
+
b1 = _mm_unpacklo_epi64(m6, m7); \
|
127
|
+
} while(0)
|
128
|
+
|
129
|
+
|
130
|
+
#define LOAD_MSG_3_3(b0, b1) \
|
131
|
+
do \
|
132
|
+
{ \
|
133
|
+
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
|
134
|
+
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
|
135
|
+
} while(0)
|
136
|
+
|
137
|
+
|
138
|
+
#define LOAD_MSG_3_4(b0, b1) \
|
139
|
+
do \
|
140
|
+
{ \
|
141
|
+
b0 = _mm_unpacklo_epi64(m3, m5); \
|
142
|
+
b1 = _mm_unpacklo_epi64(m0, m4); \
|
143
|
+
} while(0)
|
144
|
+
|
145
|
+
|
146
|
+
#define LOAD_MSG_4_1(b0, b1) \
|
147
|
+
do \
|
148
|
+
{ \
|
149
|
+
b0 = _mm_unpackhi_epi64(m4, m2); \
|
150
|
+
b1 = _mm_unpacklo_epi64(m1, m5); \
|
151
|
+
} while(0)
|
152
|
+
|
153
|
+
|
154
|
+
#define LOAD_MSG_4_2(b0, b1) \
|
155
|
+
do \
|
156
|
+
{ \
|
157
|
+
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
|
158
|
+
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
|
159
|
+
} while(0)
|
160
|
+
|
161
|
+
|
162
|
+
#define LOAD_MSG_4_3(b0, b1) \
|
163
|
+
do \
|
164
|
+
{ \
|
165
|
+
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
|
166
|
+
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
|
167
|
+
} while(0)
|
168
|
+
|
169
|
+
|
170
|
+
#define LOAD_MSG_4_4(b0, b1) \
|
171
|
+
do \
|
172
|
+
{ \
|
173
|
+
b0 = _mm_alignr_epi8(m6, m0, 8); \
|
174
|
+
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
|
175
|
+
} while(0)
|
176
|
+
|
177
|
+
|
178
|
+
#define LOAD_MSG_5_1(b0, b1) \
|
179
|
+
do \
|
180
|
+
{ \
|
181
|
+
b0 = _mm_unpacklo_epi64(m1, m3); \
|
182
|
+
b1 = _mm_unpacklo_epi64(m0, m4); \
|
183
|
+
} while(0)
|
184
|
+
|
185
|
+
|
186
|
+
#define LOAD_MSG_5_2(b0, b1) \
|
187
|
+
do \
|
188
|
+
{ \
|
189
|
+
b0 = _mm_unpacklo_epi64(m6, m5); \
|
190
|
+
b1 = _mm_unpackhi_epi64(m5, m1); \
|
191
|
+
} while(0)
|
192
|
+
|
193
|
+
|
194
|
+
#define LOAD_MSG_5_3(b0, b1) \
|
195
|
+
do \
|
196
|
+
{ \
|
197
|
+
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
|
198
|
+
b1 = _mm_unpackhi_epi64(m7, m0); \
|
199
|
+
} while(0)
|
200
|
+
|
201
|
+
|
202
|
+
#define LOAD_MSG_5_4(b0, b1) \
|
203
|
+
do \
|
204
|
+
{ \
|
205
|
+
b0 = _mm_unpackhi_epi64(m6, m2); \
|
206
|
+
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
|
207
|
+
} while(0)
|
208
|
+
|
209
|
+
|
210
|
+
#define LOAD_MSG_6_1(b0, b1) \
|
211
|
+
do \
|
212
|
+
{ \
|
213
|
+
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
|
214
|
+
b1 = _mm_unpacklo_epi64(m7, m2); \
|
215
|
+
} while(0)
|
216
|
+
|
217
|
+
|
218
|
+
#define LOAD_MSG_6_2(b0, b1) \
|
219
|
+
do \
|
220
|
+
{ \
|
221
|
+
b0 = _mm_unpackhi_epi64(m2, m7); \
|
222
|
+
b1 = _mm_alignr_epi8(m5, m6, 8); \
|
223
|
+
} while(0)
|
224
|
+
|
225
|
+
|
226
|
+
#define LOAD_MSG_6_3(b0, b1) \
|
227
|
+
do \
|
228
|
+
{ \
|
229
|
+
b0 = _mm_unpacklo_epi64(m0, m3); \
|
230
|
+
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
|
231
|
+
} while(0)
|
232
|
+
|
233
|
+
|
234
|
+
#define LOAD_MSG_6_4(b0, b1) \
|
235
|
+
do \
|
236
|
+
{ \
|
237
|
+
b0 = _mm_unpackhi_epi64(m3, m1); \
|
238
|
+
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
|
239
|
+
} while(0)
|
240
|
+
|
241
|
+
|
242
|
+
#define LOAD_MSG_7_1(b0, b1) \
|
243
|
+
do \
|
244
|
+
{ \
|
245
|
+
b0 = _mm_unpackhi_epi64(m6, m3); \
|
246
|
+
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
|
247
|
+
} while(0)
|
248
|
+
|
249
|
+
|
250
|
+
#define LOAD_MSG_7_2(b0, b1) \
|
251
|
+
do \
|
252
|
+
{ \
|
253
|
+
b0 = _mm_alignr_epi8(m7, m5, 8); \
|
254
|
+
b1 = _mm_unpackhi_epi64(m0, m4); \
|
255
|
+
} while(0)
|
256
|
+
|
257
|
+
|
258
|
+
#define LOAD_MSG_7_3(b0, b1) \
|
259
|
+
do \
|
260
|
+
{ \
|
261
|
+
b0 = _mm_unpackhi_epi64(m2, m7); \
|
262
|
+
b1 = _mm_unpacklo_epi64(m4, m1); \
|
263
|
+
} while(0)
|
264
|
+
|
265
|
+
|
266
|
+
#define LOAD_MSG_7_4(b0, b1) \
|
267
|
+
do \
|
268
|
+
{ \
|
269
|
+
b0 = _mm_unpacklo_epi64(m0, m2); \
|
270
|
+
b1 = _mm_unpacklo_epi64(m3, m5); \
|
271
|
+
} while(0)
|
272
|
+
|
273
|
+
|
274
|
+
#define LOAD_MSG_8_1(b0, b1) \
|
275
|
+
do \
|
276
|
+
{ \
|
277
|
+
b0 = _mm_unpacklo_epi64(m3, m7); \
|
278
|
+
b1 = _mm_alignr_epi8(m0, m5, 8); \
|
279
|
+
} while(0)
|
280
|
+
|
281
|
+
|
282
|
+
#define LOAD_MSG_8_2(b0, b1) \
|
283
|
+
do \
|
284
|
+
{ \
|
285
|
+
b0 = _mm_unpackhi_epi64(m7, m4); \
|
286
|
+
b1 = _mm_alignr_epi8(m4, m1, 8); \
|
287
|
+
} while(0)
|
288
|
+
|
289
|
+
|
290
|
+
#define LOAD_MSG_8_3(b0, b1) \
|
291
|
+
do \
|
292
|
+
{ \
|
293
|
+
b0 = m6; \
|
294
|
+
b1 = _mm_alignr_epi8(m5, m0, 8); \
|
295
|
+
} while(0)
|
296
|
+
|
297
|
+
|
298
|
+
#define LOAD_MSG_8_4(b0, b1) \
|
299
|
+
do \
|
300
|
+
{ \
|
301
|
+
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
|
302
|
+
b1 = m2; \
|
303
|
+
} while(0)
|
304
|
+
|
305
|
+
|
306
|
+
#define LOAD_MSG_9_1(b0, b1) \
|
307
|
+
do \
|
308
|
+
{ \
|
309
|
+
b0 = _mm_unpacklo_epi64(m5, m4); \
|
310
|
+
b1 = _mm_unpackhi_epi64(m3, m0); \
|
311
|
+
} while(0)
|
312
|
+
|
313
|
+
|
314
|
+
#define LOAD_MSG_9_2(b0, b1) \
|
315
|
+
do \
|
316
|
+
{ \
|
317
|
+
b0 = _mm_unpacklo_epi64(m1, m2); \
|
318
|
+
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
|
319
|
+
} while(0)
|
320
|
+
|
321
|
+
|
322
|
+
#define LOAD_MSG_9_3(b0, b1) \
|
323
|
+
do \
|
324
|
+
{ \
|
325
|
+
b0 = _mm_unpackhi_epi64(m7, m4); \
|
326
|
+
b1 = _mm_unpackhi_epi64(m1, m6); \
|
327
|
+
} while(0)
|
328
|
+
|
329
|
+
|
330
|
+
#define LOAD_MSG_9_4(b0, b1) \
|
331
|
+
do \
|
332
|
+
{ \
|
333
|
+
b0 = _mm_alignr_epi8(m7, m5, 8); \
|
334
|
+
b1 = _mm_unpacklo_epi64(m6, m0); \
|
335
|
+
} while(0)
|
336
|
+
|
337
|
+
|
338
|
+
#define LOAD_MSG_10_1(b0, b1) \
|
339
|
+
do \
|
340
|
+
{ \
|
341
|
+
b0 = _mm_unpacklo_epi64(m0, m1); \
|
342
|
+
b1 = _mm_unpacklo_epi64(m2, m3); \
|
343
|
+
} while(0)
|
344
|
+
|
345
|
+
|
346
|
+
#define LOAD_MSG_10_2(b0, b1) \
|
347
|
+
do \
|
348
|
+
{ \
|
349
|
+
b0 = _mm_unpackhi_epi64(m0, m1); \
|
350
|
+
b1 = _mm_unpackhi_epi64(m2, m3); \
|
351
|
+
} while(0)
|
352
|
+
|
353
|
+
|
354
|
+
#define LOAD_MSG_10_3(b0, b1) \
|
355
|
+
do \
|
356
|
+
{ \
|
357
|
+
b0 = _mm_unpacklo_epi64(m4, m5); \
|
358
|
+
b1 = _mm_unpacklo_epi64(m6, m7); \
|
359
|
+
} while(0)
|
360
|
+
|
361
|
+
|
362
|
+
#define LOAD_MSG_10_4(b0, b1) \
|
363
|
+
do \
|
364
|
+
{ \
|
365
|
+
b0 = _mm_unpackhi_epi64(m4, m5); \
|
366
|
+
b1 = _mm_unpackhi_epi64(m6, m7); \
|
367
|
+
} while(0)
|
368
|
+
|
369
|
+
|
370
|
+
#define LOAD_MSG_11_1(b0, b1) \
|
371
|
+
do \
|
372
|
+
{ \
|
373
|
+
b0 = _mm_unpacklo_epi64(m7, m2); \
|
374
|
+
b1 = _mm_unpackhi_epi64(m4, m6); \
|
375
|
+
} while(0)
|
376
|
+
|
377
|
+
|
378
|
+
#define LOAD_MSG_11_2(b0, b1) \
|
379
|
+
do \
|
380
|
+
{ \
|
381
|
+
b0 = _mm_unpacklo_epi64(m5, m4); \
|
382
|
+
b1 = _mm_alignr_epi8(m3, m7, 8); \
|
383
|
+
} while(0)
|
384
|
+
|
385
|
+
|
386
|
+
#define LOAD_MSG_11_3(b0, b1) \
|
387
|
+
do \
|
388
|
+
{ \
|
389
|
+
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
|
390
|
+
b1 = _mm_unpackhi_epi64(m5, m2); \
|
391
|
+
} while(0)
|
392
|
+
|
393
|
+
|
394
|
+
#define LOAD_MSG_11_4(b0, b1) \
|
395
|
+
do \
|
396
|
+
{ \
|
397
|
+
b0 = _mm_unpacklo_epi64(m6, m1); \
|
398
|
+
b1 = _mm_unpackhi_epi64(m3, m1); \
|
399
|
+
} while(0)
|
400
|
+
|
401
|
+
|
402
|
+
#endif
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
BLAKE2 reference source code package -
|
2
|
+
BLAKE2 reference source code package - optimized C implementations
|
3
3
|
|
4
4
|
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
5
5
|
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
@@ -20,6 +20,27 @@
|
|
20
20
|
#include "blake2.h"
|
21
21
|
#include "blake2-impl.h"
|
22
22
|
|
23
|
+
#include "blake2-config.h"
|
24
|
+
|
25
|
+
#ifdef _MSC_VER
|
26
|
+
#include <intrin.h> /* for _mm_set_epi64x */
|
27
|
+
#endif
|
28
|
+
#include <emmintrin.h>
|
29
|
+
#if defined(HAVE_SSSE3)
|
30
|
+
#include <tmmintrin.h>
|
31
|
+
#endif
|
32
|
+
#if defined(HAVE_SSE41)
|
33
|
+
#include <smmintrin.h>
|
34
|
+
#endif
|
35
|
+
#if defined(HAVE_AVX)
|
36
|
+
#include <immintrin.h>
|
37
|
+
#endif
|
38
|
+
#if defined(HAVE_XOP)
|
39
|
+
#include <x86intrin.h>
|
40
|
+
#endif
|
41
|
+
|
42
|
+
#include "blake2b-round.h"
|
43
|
+
|
23
44
|
static const uint64_t blake2b_IV[8] =
|
24
45
|
{
|
25
46
|
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
|
@@ -28,29 +49,12 @@ static const uint64_t blake2b_IV[8] =
|
|
28
49
|
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
|
29
50
|
};
|
30
51
|
|
31
|
-
|
32
|
-
{
|
33
|
-
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
34
|
-
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
|
35
|
-
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
|
36
|
-
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
|
37
|
-
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
|
38
|
-
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
|
39
|
-
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
|
40
|
-
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
|
41
|
-
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
|
42
|
-
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
|
43
|
-
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
|
44
|
-
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
45
|
-
};
|
46
|
-
|
47
|
-
|
52
|
+
/* Some helper functions */
|
48
53
|
static void blake2b_set_lastnode( blake2b_state *S )
|
49
54
|
{
|
50
55
|
S->f[1] = (uint64_t)-1;
|
51
56
|
}
|
52
57
|
|
53
|
-
/* Some helper functions, not necessarily useful */
|
54
58
|
static int blake2b_is_lastblock( const blake2b_state *S )
|
55
59
|
{
|
56
60
|
return S->f[0] != 0;
|
@@ -69,32 +73,25 @@ static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
|
|
69
73
|
S->t[1] += ( S->t[0] < inc );
|
70
74
|
}
|
71
75
|
|
72
|
-
static void blake2b_init0( blake2b_state *S )
|
73
|
-
{
|
74
|
-
size_t i;
|
75
|
-
memset( S, 0, sizeof( blake2b_state ) );
|
76
|
-
|
77
|
-
for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
|
78
|
-
}
|
79
|
-
|
80
76
|
/* init xors IV with input parameter block */
|
81
77
|
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
|
82
78
|
{
|
83
|
-
const uint8_t *p = ( const uint8_t * )( P );
|
84
79
|
size_t i;
|
85
|
-
|
86
|
-
|
87
|
-
|
80
|
+
/*blake2b_init0( S ); */
|
81
|
+
const unsigned char * v = ( const unsigned char * )( blake2b_IV );
|
82
|
+
const unsigned char * p = ( const unsigned char * )( P );
|
83
|
+
unsigned char * h = ( unsigned char * )( S->h );
|
88
84
|
/* IV XOR ParamBlock */
|
89
|
-
|
90
|
-
|
85
|
+
memset( S, 0, sizeof( blake2b_state ) );
|
86
|
+
|
87
|
+
for( i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
|
91
88
|
|
92
89
|
S->outlen = P->digest_length;
|
93
90
|
return 0;
|
94
91
|
}
|
95
92
|
|
96
93
|
|
97
|
-
|
94
|
+
/* Some sort of default parameter block initialization, for sequential blake2b */
|
98
95
|
int blake2b_init( blake2b_state *S, size_t outlen )
|
99
96
|
{
|
100
97
|
blake2b_param P[1];
|
@@ -113,17 +110,17 @@ int blake2b_init( blake2b_state *S, size_t outlen )
|
|
113
110
|
memset( P->reserved, 0, sizeof( P->reserved ) );
|
114
111
|
memset( P->salt, 0, sizeof( P->salt ) );
|
115
112
|
memset( P->personal, 0, sizeof( P->personal ) );
|
113
|
+
|
116
114
|
return blake2b_init_param( S, P );
|
117
115
|
}
|
118
116
|
|
119
|
-
|
120
117
|
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
|
121
118
|
{
|
122
119
|
blake2b_param P[1];
|
123
120
|
|
124
121
|
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
|
125
122
|
|
126
|
-
if (
|
123
|
+
if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
|
127
124
|
|
128
125
|
P->digest_length = (uint8_t)outlen;
|
129
126
|
P->key_length = (uint8_t)keylen;
|
@@ -138,7 +135,8 @@ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t k
|
|
138
135
|
memset( P->salt, 0, sizeof( P->salt ) );
|
139
136
|
memset( P->personal, 0, sizeof( P->personal ) );
|
140
137
|
|
141
|
-
if( blake2b_init_param( S, P ) < 0 )
|
138
|
+
if( blake2b_init_param( S, P ) < 0 )
|
139
|
+
return 0;
|
142
140
|
|
143
141
|
{
|
144
142
|
uint8_t block[BLAKE2B_BLOCKBYTES];
|
@@ -150,53 +148,53 @@ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t k
|
|
150
148
|
return 0;
|
151
149
|
}
|
152
150
|
|
153
|
-
#define G(r,i,a,b,c,d) \
|
154
|
-
do { \
|
155
|
-
a = a + b + m[blake2b_sigma[r][2*i+0]]; \
|
156
|
-
d = rotr64(d ^ a, 32); \
|
157
|
-
c = c + d; \
|
158
|
-
b = rotr64(b ^ c, 24); \
|
159
|
-
a = a + b + m[blake2b_sigma[r][2*i+1]]; \
|
160
|
-
d = rotr64(d ^ a, 16); \
|
161
|
-
c = c + d; \
|
162
|
-
b = rotr64(b ^ c, 63); \
|
163
|
-
} while(0)
|
164
|
-
|
165
|
-
#define ROUND(r) \
|
166
|
-
do { \
|
167
|
-
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
|
168
|
-
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
|
169
|
-
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
|
170
|
-
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
|
171
|
-
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
|
172
|
-
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
|
173
|
-
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
|
174
|
-
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
|
175
|
-
} while(0)
|
176
|
-
|
177
151
|
static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
|
178
152
|
{
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
153
|
+
__m128i row1l, row1h;
|
154
|
+
__m128i row2l, row2h;
|
155
|
+
__m128i row3l, row3h;
|
156
|
+
__m128i row4l, row4h;
|
157
|
+
__m128i b0, b1;
|
158
|
+
__m128i t0, t1;
|
159
|
+
#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
|
160
|
+
const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
|
161
|
+
const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
|
162
|
+
#endif
|
163
|
+
#if defined(HAVE_SSE41)
|
164
|
+
const __m128i m0 = LOADU( block + 00 );
|
165
|
+
const __m128i m1 = LOADU( block + 16 );
|
166
|
+
const __m128i m2 = LOADU( block + 32 );
|
167
|
+
const __m128i m3 = LOADU( block + 48 );
|
168
|
+
const __m128i m4 = LOADU( block + 64 );
|
169
|
+
const __m128i m5 = LOADU( block + 80 );
|
170
|
+
const __m128i m6 = LOADU( block + 96 );
|
171
|
+
const __m128i m7 = LOADU( block + 112 );
|
172
|
+
#else
|
173
|
+
const uint64_t m0 = load64(block + 0 * sizeof(uint64_t));
|
174
|
+
const uint64_t m1 = load64(block + 1 * sizeof(uint64_t));
|
175
|
+
const uint64_t m2 = load64(block + 2 * sizeof(uint64_t));
|
176
|
+
const uint64_t m3 = load64(block + 3 * sizeof(uint64_t));
|
177
|
+
const uint64_t m4 = load64(block + 4 * sizeof(uint64_t));
|
178
|
+
const uint64_t m5 = load64(block + 5 * sizeof(uint64_t));
|
179
|
+
const uint64_t m6 = load64(block + 6 * sizeof(uint64_t));
|
180
|
+
const uint64_t m7 = load64(block + 7 * sizeof(uint64_t));
|
181
|
+
const uint64_t m8 = load64(block + 8 * sizeof(uint64_t));
|
182
|
+
const uint64_t m9 = load64(block + 9 * sizeof(uint64_t));
|
183
|
+
const uint64_t m10 = load64(block + 10 * sizeof(uint64_t));
|
184
|
+
const uint64_t m11 = load64(block + 11 * sizeof(uint64_t));
|
185
|
+
const uint64_t m12 = load64(block + 12 * sizeof(uint64_t));
|
186
|
+
const uint64_t m13 = load64(block + 13 * sizeof(uint64_t));
|
187
|
+
const uint64_t m14 = load64(block + 14 * sizeof(uint64_t));
|
188
|
+
const uint64_t m15 = load64(block + 15 * sizeof(uint64_t));
|
189
|
+
#endif
|
190
|
+
row1l = LOADU( &S->h[0] );
|
191
|
+
row1h = LOADU( &S->h[2] );
|
192
|
+
row2l = LOADU( &S->h[4] );
|
193
|
+
row2h = LOADU( &S->h[6] );
|
194
|
+
row3l = LOADU( &blake2b_IV[0] );
|
195
|
+
row3h = LOADU( &blake2b_IV[2] );
|
196
|
+
row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &S->t[0] ) );
|
197
|
+
row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &S->f[0] ) );
|
200
198
|
ROUND( 0 );
|
201
199
|
ROUND( 1 );
|
202
200
|
ROUND( 2 );
|
@@ -209,14 +207,16 @@ static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOC
|
|
209
207
|
ROUND( 9 );
|
210
208
|
ROUND( 10 );
|
211
209
|
ROUND( 11 );
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
210
|
+
row1l = _mm_xor_si128( row3l, row1l );
|
211
|
+
row1h = _mm_xor_si128( row3h, row1h );
|
212
|
+
STOREU( &S->h[0], _mm_xor_si128( LOADU( &S->h[0] ), row1l ) );
|
213
|
+
STOREU( &S->h[2], _mm_xor_si128( LOADU( &S->h[2] ), row1h ) );
|
214
|
+
row2l = _mm_xor_si128( row4l, row2l );
|
215
|
+
row2h = _mm_xor_si128( row4h, row2h );
|
216
|
+
STOREU( &S->h[4], _mm_xor_si128( LOADU( &S->h[4] ), row2l ) );
|
217
|
+
STOREU( &S->h[6], _mm_xor_si128( LOADU( &S->h[6] ), row2h ) );
|
216
218
|
}
|
217
219
|
|
218
|
-
#undef G
|
219
|
-
#undef ROUND
|
220
220
|
|
221
221
|
int blake2b_update( blake2b_state *S, const void *pin, size_t inlen )
|
222
222
|
{
|
@@ -245,11 +245,9 @@ int blake2b_update( blake2b_state *S, const void *pin, size_t inlen )
|
|
245
245
|
return 0;
|
246
246
|
}
|
247
247
|
|
248
|
+
|
248
249
|
int blake2b_final( blake2b_state *S, void *out, size_t outlen )
|
249
250
|
{
|
250
|
-
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
|
251
|
-
size_t i;
|
252
|
-
|
253
251
|
if( out == NULL || outlen < S->outlen )
|
254
252
|
return -1;
|
255
253
|
|
@@ -261,15 +259,11 @@ int blake2b_final( blake2b_state *S, void *out, size_t outlen )
|
|
261
259
|
memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
|
262
260
|
blake2b_compress( S, S->buf );
|
263
261
|
|
264
|
-
|
265
|
-
store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
|
266
|
-
|
267
|
-
memcpy( out, buffer, S->outlen );
|
268
|
-
secure_zero_memory(buffer, sizeof(buffer));
|
262
|
+
memcpy( out, &S->h[0], S->outlen );
|
269
263
|
return 0;
|
270
264
|
}
|
271
265
|
|
272
|
-
|
266
|
+
|
273
267
|
int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
|
274
268
|
{
|
275
269
|
blake2b_state S[1];
|
@@ -285,7 +279,7 @@ int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void
|
|
285
279
|
|
286
280
|
if( keylen > BLAKE2B_KEYBYTES ) return -1;
|
287
281
|
|
288
|
-
if( keylen
|
282
|
+
if( keylen )
|
289
283
|
{
|
290
284
|
if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
|
291
285
|
}
|
@@ -0,0 +1,157 @@
|
|
1
|
+
/*
|
2
|
+
BLAKE2 reference source code package - optimized C implementations
|
3
|
+
|
4
|
+
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
|
5
|
+
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
|
6
|
+
your option. The terms of these licenses can be found at:
|
7
|
+
|
8
|
+
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
9
|
+
- OpenSSL license : https://www.openssl.org/source/license.html
|
10
|
+
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
More information about the BLAKE2 hash function can be found at
|
13
|
+
https://blake2.net.
|
14
|
+
*/
|
15
|
+
#ifndef BLAKE2B_ROUND_H
|
16
|
+
#define BLAKE2B_ROUND_H
|
17
|
+
|
18
|
+
#define LOADU(p) _mm_loadu_si128( (const __m128i *)(p) )
|
19
|
+
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
|
20
|
+
|
21
|
+
#define TOF(reg) _mm_castsi128_ps((reg))
|
22
|
+
#define TOI(reg) _mm_castps_si128((reg))
|
23
|
+
|
24
|
+
#define LIKELY(x) __builtin_expect((x),1)
|
25
|
+
|
26
|
+
|
27
|
+
/* Microarchitecture-specific macros */
|
28
|
+
#ifndef HAVE_XOP
|
29
|
+
#ifdef HAVE_SSSE3
|
30
|
+
#define _mm_roti_epi64(x, c) \
|
31
|
+
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
|
32
|
+
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
|
33
|
+
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
|
34
|
+
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
|
35
|
+
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
|
36
|
+
#else
|
37
|
+
#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) ))
|
38
|
+
#endif
|
39
|
+
#else
|
40
|
+
/* ... */
|
41
|
+
#endif
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
46
|
+
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
47
|
+
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
48
|
+
\
|
49
|
+
row4l = _mm_xor_si128(row4l, row1l); \
|
50
|
+
row4h = _mm_xor_si128(row4h, row1h); \
|
51
|
+
\
|
52
|
+
row4l = _mm_roti_epi64(row4l, -32); \
|
53
|
+
row4h = _mm_roti_epi64(row4h, -32); \
|
54
|
+
\
|
55
|
+
row3l = _mm_add_epi64(row3l, row4l); \
|
56
|
+
row3h = _mm_add_epi64(row3h, row4h); \
|
57
|
+
\
|
58
|
+
row2l = _mm_xor_si128(row2l, row3l); \
|
59
|
+
row2h = _mm_xor_si128(row2h, row3h); \
|
60
|
+
\
|
61
|
+
row2l = _mm_roti_epi64(row2l, -24); \
|
62
|
+
row2h = _mm_roti_epi64(row2h, -24); \
|
63
|
+
|
64
|
+
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
|
65
|
+
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
|
66
|
+
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
|
67
|
+
\
|
68
|
+
row4l = _mm_xor_si128(row4l, row1l); \
|
69
|
+
row4h = _mm_xor_si128(row4h, row1h); \
|
70
|
+
\
|
71
|
+
row4l = _mm_roti_epi64(row4l, -16); \
|
72
|
+
row4h = _mm_roti_epi64(row4h, -16); \
|
73
|
+
\
|
74
|
+
row3l = _mm_add_epi64(row3l, row4l); \
|
75
|
+
row3h = _mm_add_epi64(row3h, row4h); \
|
76
|
+
\
|
77
|
+
row2l = _mm_xor_si128(row2l, row3l); \
|
78
|
+
row2h = _mm_xor_si128(row2h, row3h); \
|
79
|
+
\
|
80
|
+
row2l = _mm_roti_epi64(row2l, -63); \
|
81
|
+
row2h = _mm_roti_epi64(row2h, -63); \
|
82
|
+
|
83
|
+
#if defined(HAVE_SSSE3)
|
84
|
+
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
85
|
+
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
|
86
|
+
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
|
87
|
+
row2l = t0; \
|
88
|
+
row2h = t1; \
|
89
|
+
\
|
90
|
+
t0 = row3l; \
|
91
|
+
row3l = row3h; \
|
92
|
+
row3h = t0; \
|
93
|
+
\
|
94
|
+
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
|
95
|
+
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
|
96
|
+
row4l = t1; \
|
97
|
+
row4h = t0;
|
98
|
+
|
99
|
+
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
100
|
+
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
|
101
|
+
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
|
102
|
+
row2l = t0; \
|
103
|
+
row2h = t1; \
|
104
|
+
\
|
105
|
+
t0 = row3l; \
|
106
|
+
row3l = row3h; \
|
107
|
+
row3h = t0; \
|
108
|
+
\
|
109
|
+
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
|
110
|
+
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
|
111
|
+
row4l = t1; \
|
112
|
+
row4h = t0;
|
113
|
+
#else
|
114
|
+
|
115
|
+
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
116
|
+
t0 = row4l;\
|
117
|
+
t1 = row2l;\
|
118
|
+
row4l = row3l;\
|
119
|
+
row3l = row3h;\
|
120
|
+
row3h = row4l;\
|
121
|
+
row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
|
122
|
+
row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
|
123
|
+
row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
|
124
|
+
row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
|
125
|
+
|
126
|
+
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
|
127
|
+
t0 = row3l;\
|
128
|
+
row3l = row3h;\
|
129
|
+
row3h = t0;\
|
130
|
+
t0 = row2l;\
|
131
|
+
t1 = row4l;\
|
132
|
+
row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
|
133
|
+
row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
|
134
|
+
row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
|
135
|
+
row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
|
136
|
+
|
137
|
+
#endif
|
138
|
+
|
139
|
+
#if defined(HAVE_SSE41)
|
140
|
+
#include "blake2b-load-sse41.h"
|
141
|
+
#else
|
142
|
+
#include "blake2b-load-sse2.h"
|
143
|
+
#endif
|
144
|
+
|
145
|
+
#define ROUND(r) \
|
146
|
+
LOAD_MSG_ ##r ##_1(b0, b1); \
|
147
|
+
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
148
|
+
LOAD_MSG_ ##r ##_2(b0, b1); \
|
149
|
+
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
150
|
+
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
|
151
|
+
LOAD_MSG_ ##r ##_3(b0, b1); \
|
152
|
+
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
153
|
+
LOAD_MSG_ ##r ##_4(b0, b1); \
|
154
|
+
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
|
155
|
+
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
|
156
|
+
|
157
|
+
#endif
|
data/ext/blake2b_ext/extconf.rb
CHANGED
data/ext/blake2b_ext/rbext.c
CHANGED
@@ -34,7 +34,7 @@ static VALUE blake2_alloc(VALUE klass) {
|
|
34
34
|
VALUE m_blake2_initialize(VALUE self, VALUE _len, VALUE _key) {
|
35
35
|
Blake2 *blake2;
|
36
36
|
Data_Get_Struct(self, Blake2, blake2);
|
37
|
-
int i;
|
37
|
+
unsigned int i;
|
38
38
|
|
39
39
|
ID bytes_method = rb_intern("bytes");
|
40
40
|
blake2->to_hex = ID2SYM(rb_intern("to_hex"));
|
@@ -44,7 +44,7 @@ VALUE m_blake2_initialize(VALUE self, VALUE _len, VALUE _key) {
|
|
44
44
|
blake2->key_length = RARRAY_LEN(key_bytes_ary);
|
45
45
|
blake2->key_bytes = (uint8_t*)malloc(blake2->key_length * sizeof(uint8_t));
|
46
46
|
|
47
|
-
for(i = 0;
|
47
|
+
for(i = 0; i < blake2->key_length; i++) {
|
48
48
|
VALUE byte = rb_ary_entry(key_bytes_ary, i);
|
49
49
|
blake2->key_bytes[i] = NUM2INT(byte);
|
50
50
|
}
|
@@ -61,7 +61,7 @@ VALUE m_blake2_digest(VALUE self, VALUE _input, VALUE _representation) {
|
|
61
61
|
|
62
62
|
char *input = RSTRING_PTR(_input);
|
63
63
|
uint64_t input_length = RSTRING_LEN(_input);
|
64
|
-
int i;
|
64
|
+
unsigned int i;
|
65
65
|
|
66
66
|
Data_Get_Struct(self, Blake2, blake2);
|
67
67
|
|
@@ -73,14 +73,14 @@ VALUE m_blake2_digest(VALUE self, VALUE _input, VALUE _representation) {
|
|
73
73
|
if(_representation == blake2->to_bytes) {
|
74
74
|
result = rb_ary_new2(blake2->output_length);
|
75
75
|
|
76
|
-
for(i = 0;
|
76
|
+
for(i = 0; i < blake2->output_length; i++) {
|
77
77
|
rb_ary_push(result, INT2NUM(blake2->output[i]));
|
78
78
|
}
|
79
79
|
} else if(_representation == blake2->to_hex) {
|
80
80
|
unsigned long ary_len = blake2->output_length * (unsigned)sizeof(char) * 2;
|
81
81
|
char *c_str = (char*)malloc(ary_len + 1);
|
82
82
|
|
83
|
-
for(i = 0;
|
83
|
+
for(i = 0; i < blake2->output_length; i++) {
|
84
84
|
sprintf(c_str + (i * 2), "%02x", blake2->output[i]);
|
85
85
|
}
|
86
86
|
c_str[ary_len] = 0;
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'digest'
|
2
|
+
require 'blake2'
|
3
|
+
require 'blake2b'
|
4
|
+
|
5
|
+
sample_string = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec quis fermentum mauris, laoreet ultricies ipsum. Phasellus iaculis lacinia bibendum. Aenean eu lectus vitae nisi pellentesque condimentum. Cras imperdiet risus ut interdum dignissim. Nam ultricies vulputate varius. Morbi vehicula mi sit amet velit cursus, eu blandit dolor venenatis. Nunc vitae varius leo. Mauris metus nibh, ultrices nec odio in, viverra luctus purus. Duis luctus, dolor vel sodales semper, enim mauris sagittis dolor, at vehicula ligula ante eu lorem. Morbi porttitor lorem id turpis facilisis volutpat. Sed elementum porttitor sem, a ornare ligula. Integer tincidunt aliquam suscipit. Sed aliquam ligula id enim fringilla, vel ornare ante bibendum. Integer tincidunt, augue id condimentum fermentum, dolor urna molestie massa, sed congue enim quam eget arcu. Quisque feugiat purus sit amet porttitor tincidunt.
|
6
|
+
|
7
|
+
Fusce odio libero, lobortis quis ornare sit amet, dignissim sed erat. Praesent a iaculis ex. Ut libero amet."
|
8
|
+
|
9
|
+
run_count = 500_000
|
10
|
+
|
11
|
+
### MD5
|
12
|
+
starting = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
13
|
+
run_count.times do |i|
|
14
|
+
Digest::MD5.hexdigest("#{i}#{sample_string}")
|
15
|
+
end
|
16
|
+
ending = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
17
|
+
|
18
|
+
puts "MD5 result: #{ending - starting} seconds."
|
19
|
+
|
20
|
+
|
21
|
+
### SHA2
|
22
|
+
starting = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
23
|
+
run_count.times do |i|
|
24
|
+
Digest::SHA2.hexdigest("#{i}#{sample_string}")
|
25
|
+
end
|
26
|
+
ending = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
27
|
+
|
28
|
+
puts "SHA2 result: #{ending - starting} seconds."
|
29
|
+
|
30
|
+
|
31
|
+
### SHA512
|
32
|
+
starting = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
33
|
+
run_count.times do |i|
|
34
|
+
Digest::SHA512.hexdigest("#{i}#{sample_string}")
|
35
|
+
end
|
36
|
+
ending = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
37
|
+
|
38
|
+
puts "SHA512 result: #{ending - starting} seconds."
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
### BLAKE2s
|
43
|
+
starting = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
44
|
+
unkeyed = Blake2::Key.none
|
45
|
+
run_count.times do |i|
|
46
|
+
Blake2.new(32, unkeyed).digest("#{i}#{sample_string}", :to_hex)
|
47
|
+
end
|
48
|
+
ending = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
49
|
+
|
50
|
+
puts "BLAKE2s result: #{ending - starting} seconds."
|
51
|
+
|
52
|
+
|
53
|
+
### BLAKE2b
|
54
|
+
starting = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
55
|
+
unkeyed = Blake2b::Key.none
|
56
|
+
run_count.times do |i|
|
57
|
+
Blake2b.new(32, unkeyed).digest("#{i}#{sample_string}", :to_hex)
|
58
|
+
end
|
59
|
+
ending = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
60
|
+
|
61
|
+
puts "BLAKE2b result: #{ending - starting} seconds."
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: blake2b
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Franck Verrot
|
@@ -83,13 +83,18 @@ files:
|
|
83
83
|
- README.md
|
84
84
|
- Rakefile
|
85
85
|
- blake2b.gemspec
|
86
|
+
- ext/blake2b_ext/blake2-config.h
|
86
87
|
- ext/blake2b_ext/blake2-impl.h
|
87
88
|
- ext/blake2b_ext/blake2.h
|
89
|
+
- ext/blake2b_ext/blake2b-load-sse2.h
|
90
|
+
- ext/blake2b_ext/blake2b-load-sse41.h
|
88
91
|
- ext/blake2b_ext/blake2b-ref.c
|
92
|
+
- ext/blake2b_ext/blake2b-round.h
|
89
93
|
- ext/blake2b_ext/extconf.rb
|
90
94
|
- ext/blake2b_ext/rbext.c
|
91
95
|
- lib/blake2b.rb
|
92
96
|
- lib/blake2b/key.rb
|
97
|
+
- performance/performance_suite.rb
|
93
98
|
homepage: https://github.com/mgomes/blake2b
|
94
99
|
licenses:
|
95
100
|
- GPL-3.0
|