ed25519_blake2b 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/CODE_OF_CONDUCT.md +74 -0
  4. data/Gemfile +6 -0
  5. data/Gemfile.lock +23 -0
  6. data/LICENSE +21 -0
  7. data/README.md +39 -0
  8. data/Rakefile +13 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/ed25519_blake2b.gemspec +31 -0
  12. data/ext/ed25519_blake2b/blake2-config.h +72 -0
  13. data/ext/ed25519_blake2b/blake2-impl.h +160 -0
  14. data/ext/ed25519_blake2b/blake2.h +195 -0
  15. data/ext/ed25519_blake2b/blake2b-load-sse2.h +68 -0
  16. data/ext/ed25519_blake2b/blake2b-load-sse41.h +402 -0
  17. data/ext/ed25519_blake2b/blake2b-ref.c +373 -0
  18. data/ext/ed25519_blake2b/blake2b-round.h +157 -0
  19. data/ext/ed25519_blake2b/curve25519-donna-32bit.h +579 -0
  20. data/ext/ed25519_blake2b/curve25519-donna-64bit.h +413 -0
  21. data/ext/ed25519_blake2b/curve25519-donna-helpers.h +67 -0
  22. data/ext/ed25519_blake2b/curve25519-donna-sse2.h +1112 -0
  23. data/ext/ed25519_blake2b/ed25519-donna-32bit-sse2.h +513 -0
  24. data/ext/ed25519_blake2b/ed25519-donna-32bit-tables.h +61 -0
  25. data/ext/ed25519_blake2b/ed25519-donna-64bit-sse2.h +436 -0
  26. data/ext/ed25519_blake2b/ed25519-donna-64bit-tables.h +53 -0
  27. data/ext/ed25519_blake2b/ed25519-donna-64bit-x86-32bit.h +435 -0
  28. data/ext/ed25519_blake2b/ed25519-donna-64bit-x86.h +351 -0
  29. data/ext/ed25519_blake2b/ed25519-donna-basepoint-table.h +259 -0
  30. data/ext/ed25519_blake2b/ed25519-donna-batchverify.h +275 -0
  31. data/ext/ed25519_blake2b/ed25519-donna-impl-base.h +364 -0
  32. data/ext/ed25519_blake2b/ed25519-donna-impl-sse2.h +390 -0
  33. data/ext/ed25519_blake2b/ed25519-donna-portable-identify.h +103 -0
  34. data/ext/ed25519_blake2b/ed25519-donna-portable.h +135 -0
  35. data/ext/ed25519_blake2b/ed25519-donna.h +115 -0
  36. data/ext/ed25519_blake2b/ed25519-hash-custom.c +28 -0
  37. data/ext/ed25519_blake2b/ed25519-hash-custom.h +30 -0
  38. data/ext/ed25519_blake2b/ed25519-hash.h +219 -0
  39. data/ext/ed25519_blake2b/ed25519-randombytes-custom.h +10 -0
  40. data/ext/ed25519_blake2b/ed25519-randombytes.h +91 -0
  41. data/ext/ed25519_blake2b/ed25519.c +150 -0
  42. data/ext/ed25519_blake2b/ed25519.h +30 -0
  43. data/ext/ed25519_blake2b/extconf.rb +3 -0
  44. data/ext/ed25519_blake2b/fuzz/README.md +173 -0
  45. data/ext/ed25519_blake2b/fuzz/build-nix.php +134 -0
  46. data/ext/ed25519_blake2b/fuzz/curve25519-ref10.c +1272 -0
  47. data/ext/ed25519_blake2b/fuzz/curve25519-ref10.h +8 -0
  48. data/ext/ed25519_blake2b/fuzz/ed25519-donna-sse2.c +3 -0
  49. data/ext/ed25519_blake2b/fuzz/ed25519-donna.c +1 -0
  50. data/ext/ed25519_blake2b/fuzz/ed25519-donna.h +34 -0
  51. data/ext/ed25519_blake2b/fuzz/ed25519-ref10.c +4647 -0
  52. data/ext/ed25519_blake2b/fuzz/ed25519-ref10.h +9 -0
  53. data/ext/ed25519_blake2b/fuzz/fuzz-curve25519.c +172 -0
  54. data/ext/ed25519_blake2b/fuzz/fuzz-ed25519.c +219 -0
  55. data/ext/ed25519_blake2b/modm-donna-32bit.h +469 -0
  56. data/ext/ed25519_blake2b/modm-donna-64bit.h +361 -0
  57. data/ext/ed25519_blake2b/rbext.c +25 -0
  58. data/ext/ed25519_blake2b/regression.h +1024 -0
  59. data/lib/ed25519_blake2b/ed25519_blake2b.rb +4 -0
  60. data/lib/ed25519_blake2b/version.rb +3 -0
  61. metadata +147 -0
@@ -0,0 +1,195 @@
1
+ /*
2
+ BLAKE2 reference source code package - reference C implementations
3
+
4
+ Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6
+ your option. The terms of these licenses can be found at:
7
+
8
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9
+ - OpenSSL license : https://www.openssl.org/source/license.html
10
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ More information about the BLAKE2 hash function can be found at
13
+ https://blake2.net.
14
+ */
15
+ #ifndef BLAKE2_H
16
+ #define BLAKE2_H
17
+
18
+ #include <stddef.h>
19
+ #include <stdint.h>
20
+
21
+ #if defined(_MSC_VER)
22
+ #define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop))
23
+ #else
24
+ #define BLAKE2_PACKED(x) x __attribute__((packed))
25
+ #endif
26
+
27
+ #if defined(__cplusplus)
28
+ extern "C" {
29
+ #endif
30
+
31
+ enum blake2s_constant
32
+ {
33
+ BLAKE2S_BLOCKBYTES = 64,
34
+ BLAKE2S_OUTBYTES = 32,
35
+ BLAKE2S_KEYBYTES = 32,
36
+ BLAKE2S_SALTBYTES = 8,
37
+ BLAKE2S_PERSONALBYTES = 8
38
+ };
39
+
40
+ enum blake2b_constant
41
+ {
42
+ BLAKE2B_BLOCKBYTES = 128,
43
+ BLAKE2B_OUTBYTES = 64,
44
+ BLAKE2B_KEYBYTES = 64,
45
+ BLAKE2B_SALTBYTES = 16,
46
+ BLAKE2B_PERSONALBYTES = 16
47
+ };
48
+
49
+ typedef struct blake2s_state__
50
+ {
51
+ uint32_t h[8];
52
+ uint32_t t[2];
53
+ uint32_t f[2];
54
+ uint8_t buf[BLAKE2S_BLOCKBYTES];
55
+ size_t buflen;
56
+ size_t outlen;
57
+ uint8_t last_node;
58
+ } blake2s_state;
59
+
60
+ typedef struct blake2b_state__
61
+ {
62
+ uint64_t h[8];
63
+ uint64_t t[2];
64
+ uint64_t f[2];
65
+ uint8_t buf[BLAKE2B_BLOCKBYTES];
66
+ size_t buflen;
67
+ size_t outlen;
68
+ uint8_t last_node;
69
+ } blake2b_state;
70
+
71
+ typedef struct blake2sp_state__
72
+ {
73
+ blake2s_state S[8][1];
74
+ blake2s_state R[1];
75
+ uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
76
+ size_t buflen;
77
+ size_t outlen;
78
+ } blake2sp_state;
79
+
80
+ typedef struct blake2bp_state__
81
+ {
82
+ blake2b_state S[4][1];
83
+ blake2b_state R[1];
84
+ uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
85
+ size_t buflen;
86
+ size_t outlen;
87
+ } blake2bp_state;
88
+
89
+
90
+ BLAKE2_PACKED(struct blake2s_param__
91
+ {
92
+ uint8_t digest_length; /* 1 */
93
+ uint8_t key_length; /* 2 */
94
+ uint8_t fanout; /* 3 */
95
+ uint8_t depth; /* 4 */
96
+ uint32_t leaf_length; /* 8 */
97
+ uint32_t node_offset; /* 12 */
98
+ uint16_t xof_length; /* 14 */
99
+ uint8_t node_depth; /* 15 */
100
+ uint8_t inner_length; /* 16 */
101
+ /* uint8_t reserved[0]; */
102
+ uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */
103
+ uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */
104
+ });
105
+
106
+ typedef struct blake2s_param__ blake2s_param;
107
+
108
+ BLAKE2_PACKED(struct blake2b_param__
109
+ {
110
+ uint8_t digest_length; /* 1 */
111
+ uint8_t key_length; /* 2 */
112
+ uint8_t fanout; /* 3 */
113
+ uint8_t depth; /* 4 */
114
+ uint32_t leaf_length; /* 8 */
115
+ uint32_t node_offset; /* 12 */
116
+ uint32_t xof_length; /* 16 */
117
+ uint8_t node_depth; /* 17 */
118
+ uint8_t inner_length; /* 18 */
119
+ uint8_t reserved[14]; /* 32 */
120
+ uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
121
+ uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
122
+ });
123
+
124
+ typedef struct blake2b_param__ blake2b_param;
125
+
126
+ typedef struct blake2xs_state__
127
+ {
128
+ blake2s_state S[1];
129
+ blake2s_param P[1];
130
+ } blake2xs_state;
131
+
132
+ typedef struct blake2xb_state__
133
+ {
134
+ blake2b_state S[1];
135
+ blake2b_param P[1];
136
+ } blake2xb_state;
137
+
138
+ /* Padded structs result in a compile-time error */
139
+ enum {
140
+ BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES),
141
+ BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES)
142
+ };
143
+
144
+ /* Streaming API */
145
+ int blake2s_init( blake2s_state *S, size_t outlen );
146
+ int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
147
+ int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
148
+ int blake2s_update( blake2s_state *S, const void *in, size_t inlen );
149
+ int blake2s_final( blake2s_state *S, void *out, size_t outlen );
150
+
151
+ int blake2b_init( blake2b_state *S, size_t outlen );
152
+ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
153
+ int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
154
+ int blake2b_update( blake2b_state *S, const void *in, size_t inlen );
155
+ int blake2b_final( blake2b_state *S, void *out, size_t outlen );
156
+
157
+ int blake2sp_init( blake2sp_state *S, size_t outlen );
158
+ int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen );
159
+ int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen );
160
+ int blake2sp_final( blake2sp_state *S, void *out, size_t outlen );
161
+
162
+ int blake2bp_init( blake2bp_state *S, size_t outlen );
163
+ int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen );
164
+ int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen );
165
+ int blake2bp_final( blake2bp_state *S, void *out, size_t outlen );
166
+
167
+ /* Variable output length API */
168
+ int blake2xs_init( blake2xs_state *S, const size_t outlen );
169
+ int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen );
170
+ int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen );
171
+ int blake2xs_final(blake2xs_state *S, void *out, size_t outlen);
172
+
173
+ int blake2xb_init( blake2xb_state *S, const size_t outlen );
174
+ int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen );
175
+ int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen );
176
+ int blake2xb_final(blake2xb_state *S, void *out, size_t outlen);
177
+
178
+ /* Simple API */
179
+ int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
180
+ int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
181
+
182
+ int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
183
+ int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
184
+
185
+ int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
186
+ int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
187
+
188
+ /* This is simply an alias for blake2b */
189
+ int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
190
+
191
+ #if defined(__cplusplus)
192
+ }
193
+ #endif
194
+
195
+ #endif
@@ -0,0 +1,68 @@
1
+ /*
2
+ BLAKE2 reference source code package - optimized C implementations
3
+
4
+ Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6
+ your option. The terms of these licenses can be found at:
7
+
8
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9
+ - OpenSSL license : https://www.openssl.org/source/license.html
10
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ More information about the BLAKE2 hash function can be found at
13
+ https://blake2.net.
14
+ */
15
+ #ifndef BLAKE2B_LOAD_SSE2_H
16
+ #define BLAKE2B_LOAD_SSE2_H
17
+
18
+ #define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
19
+ #define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
20
+ #define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
21
+ #define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
22
+ #define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
23
+ #define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
24
+ #define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
25
+ #define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
26
+ #define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
27
+ #define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
28
+ #define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
29
+ #define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
30
+ #define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
31
+ #define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
32
+ #define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
33
+ #define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
34
+ #define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
35
+ #define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
36
+ #define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
37
+ #define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
38
+ #define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
39
+ #define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
40
+ #define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
41
+ #define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
42
+ #define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
43
+ #define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
44
+ #define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
45
+ #define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
46
+ #define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
47
+ #define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
48
+ #define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
49
+ #define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
50
+ #define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
51
+ #define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
52
+ #define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
53
+ #define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
54
+ #define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
55
+ #define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
56
+ #define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
57
+ #define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
58
+ #define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
59
+ #define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
60
+ #define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
61
+ #define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
62
+ #define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
63
+ #define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
64
+ #define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
65
+ #define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
66
+
67
+
68
+ #endif
@@ -0,0 +1,402 @@
1
+ /*
2
+ BLAKE2 reference source code package - optimized C implementations
3
+
4
+ Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6
+ your option. The terms of these licenses can be found at:
7
+
8
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9
+ - OpenSSL license : https://www.openssl.org/source/license.html
10
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ More information about the BLAKE2 hash function can be found at
13
+ https://blake2.net.
14
+ */
15
+ #ifndef BLAKE2B_LOAD_SSE41_H
16
+ #define BLAKE2B_LOAD_SSE41_H
17
+
18
+ #define LOAD_MSG_0_1(b0, b1) \
19
+ do \
20
+ { \
21
+ b0 = _mm_unpacklo_epi64(m0, m1); \
22
+ b1 = _mm_unpacklo_epi64(m2, m3); \
23
+ } while(0)
24
+
25
+
26
+ #define LOAD_MSG_0_2(b0, b1) \
27
+ do \
28
+ { \
29
+ b0 = _mm_unpackhi_epi64(m0, m1); \
30
+ b1 = _mm_unpackhi_epi64(m2, m3); \
31
+ } while(0)
32
+
33
+
34
+ #define LOAD_MSG_0_3(b0, b1) \
35
+ do \
36
+ { \
37
+ b0 = _mm_unpacklo_epi64(m4, m5); \
38
+ b1 = _mm_unpacklo_epi64(m6, m7); \
39
+ } while(0)
40
+
41
+
42
+ #define LOAD_MSG_0_4(b0, b1) \
43
+ do \
44
+ { \
45
+ b0 = _mm_unpackhi_epi64(m4, m5); \
46
+ b1 = _mm_unpackhi_epi64(m6, m7); \
47
+ } while(0)
48
+
49
+
50
+ #define LOAD_MSG_1_1(b0, b1) \
51
+ do \
52
+ { \
53
+ b0 = _mm_unpacklo_epi64(m7, m2); \
54
+ b1 = _mm_unpackhi_epi64(m4, m6); \
55
+ } while(0)
56
+
57
+
58
+ #define LOAD_MSG_1_2(b0, b1) \
59
+ do \
60
+ { \
61
+ b0 = _mm_unpacklo_epi64(m5, m4); \
62
+ b1 = _mm_alignr_epi8(m3, m7, 8); \
63
+ } while(0)
64
+
65
+
66
+ #define LOAD_MSG_1_3(b0, b1) \
67
+ do \
68
+ { \
69
+ b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
70
+ b1 = _mm_unpackhi_epi64(m5, m2); \
71
+ } while(0)
72
+
73
+
74
+ #define LOAD_MSG_1_4(b0, b1) \
75
+ do \
76
+ { \
77
+ b0 = _mm_unpacklo_epi64(m6, m1); \
78
+ b1 = _mm_unpackhi_epi64(m3, m1); \
79
+ } while(0)
80
+
81
+
82
+ #define LOAD_MSG_2_1(b0, b1) \
83
+ do \
84
+ { \
85
+ b0 = _mm_alignr_epi8(m6, m5, 8); \
86
+ b1 = _mm_unpackhi_epi64(m2, m7); \
87
+ } while(0)
88
+
89
+
90
+ #define LOAD_MSG_2_2(b0, b1) \
91
+ do \
92
+ { \
93
+ b0 = _mm_unpacklo_epi64(m4, m0); \
94
+ b1 = _mm_blend_epi16(m1, m6, 0xF0); \
95
+ } while(0)
96
+
97
+
98
+ #define LOAD_MSG_2_3(b0, b1) \
99
+ do \
100
+ { \
101
+ b0 = _mm_blend_epi16(m5, m1, 0xF0); \
102
+ b1 = _mm_unpackhi_epi64(m3, m4); \
103
+ } while(0)
104
+
105
+
106
+ #define LOAD_MSG_2_4(b0, b1) \
107
+ do \
108
+ { \
109
+ b0 = _mm_unpacklo_epi64(m7, m3); \
110
+ b1 = _mm_alignr_epi8(m2, m0, 8); \
111
+ } while(0)
112
+
113
+
114
+ #define LOAD_MSG_3_1(b0, b1) \
115
+ do \
116
+ { \
117
+ b0 = _mm_unpackhi_epi64(m3, m1); \
118
+ b1 = _mm_unpackhi_epi64(m6, m5); \
119
+ } while(0)
120
+
121
+
122
+ #define LOAD_MSG_3_2(b0, b1) \
123
+ do \
124
+ { \
125
+ b0 = _mm_unpackhi_epi64(m4, m0); \
126
+ b1 = _mm_unpacklo_epi64(m6, m7); \
127
+ } while(0)
128
+
129
+
130
+ #define LOAD_MSG_3_3(b0, b1) \
131
+ do \
132
+ { \
133
+ b0 = _mm_blend_epi16(m1, m2, 0xF0); \
134
+ b1 = _mm_blend_epi16(m2, m7, 0xF0); \
135
+ } while(0)
136
+
137
+
138
+ #define LOAD_MSG_3_4(b0, b1) \
139
+ do \
140
+ { \
141
+ b0 = _mm_unpacklo_epi64(m3, m5); \
142
+ b1 = _mm_unpacklo_epi64(m0, m4); \
143
+ } while(0)
144
+
145
+
146
+ #define LOAD_MSG_4_1(b0, b1) \
147
+ do \
148
+ { \
149
+ b0 = _mm_unpackhi_epi64(m4, m2); \
150
+ b1 = _mm_unpacklo_epi64(m1, m5); \
151
+ } while(0)
152
+
153
+
154
+ #define LOAD_MSG_4_2(b0, b1) \
155
+ do \
156
+ { \
157
+ b0 = _mm_blend_epi16(m0, m3, 0xF0); \
158
+ b1 = _mm_blend_epi16(m2, m7, 0xF0); \
159
+ } while(0)
160
+
161
+
162
+ #define LOAD_MSG_4_3(b0, b1) \
163
+ do \
164
+ { \
165
+ b0 = _mm_blend_epi16(m7, m5, 0xF0); \
166
+ b1 = _mm_blend_epi16(m3, m1, 0xF0); \
167
+ } while(0)
168
+
169
+
170
+ #define LOAD_MSG_4_4(b0, b1) \
171
+ do \
172
+ { \
173
+ b0 = _mm_alignr_epi8(m6, m0, 8); \
174
+ b1 = _mm_blend_epi16(m4, m6, 0xF0); \
175
+ } while(0)
176
+
177
+
178
+ #define LOAD_MSG_5_1(b0, b1) \
179
+ do \
180
+ { \
181
+ b0 = _mm_unpacklo_epi64(m1, m3); \
182
+ b1 = _mm_unpacklo_epi64(m0, m4); \
183
+ } while(0)
184
+
185
+
186
+ #define LOAD_MSG_5_2(b0, b1) \
187
+ do \
188
+ { \
189
+ b0 = _mm_unpacklo_epi64(m6, m5); \
190
+ b1 = _mm_unpackhi_epi64(m5, m1); \
191
+ } while(0)
192
+
193
+
194
+ #define LOAD_MSG_5_3(b0, b1) \
195
+ do \
196
+ { \
197
+ b0 = _mm_blend_epi16(m2, m3, 0xF0); \
198
+ b1 = _mm_unpackhi_epi64(m7, m0); \
199
+ } while(0)
200
+
201
+
202
+ #define LOAD_MSG_5_4(b0, b1) \
203
+ do \
204
+ { \
205
+ b0 = _mm_unpackhi_epi64(m6, m2); \
206
+ b1 = _mm_blend_epi16(m7, m4, 0xF0); \
207
+ } while(0)
208
+
209
+
210
+ #define LOAD_MSG_6_1(b0, b1) \
211
+ do \
212
+ { \
213
+ b0 = _mm_blend_epi16(m6, m0, 0xF0); \
214
+ b1 = _mm_unpacklo_epi64(m7, m2); \
215
+ } while(0)
216
+
217
+
218
+ #define LOAD_MSG_6_2(b0, b1) \
219
+ do \
220
+ { \
221
+ b0 = _mm_unpackhi_epi64(m2, m7); \
222
+ b1 = _mm_alignr_epi8(m5, m6, 8); \
223
+ } while(0)
224
+
225
+
226
+ #define LOAD_MSG_6_3(b0, b1) \
227
+ do \
228
+ { \
229
+ b0 = _mm_unpacklo_epi64(m0, m3); \
230
+ b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
231
+ } while(0)
232
+
233
+
234
+ #define LOAD_MSG_6_4(b0, b1) \
235
+ do \
236
+ { \
237
+ b0 = _mm_unpackhi_epi64(m3, m1); \
238
+ b1 = _mm_blend_epi16(m1, m5, 0xF0); \
239
+ } while(0)
240
+
241
+
242
+ #define LOAD_MSG_7_1(b0, b1) \
243
+ do \
244
+ { \
245
+ b0 = _mm_unpackhi_epi64(m6, m3); \
246
+ b1 = _mm_blend_epi16(m6, m1, 0xF0); \
247
+ } while(0)
248
+
249
+
250
+ #define LOAD_MSG_7_2(b0, b1) \
251
+ do \
252
+ { \
253
+ b0 = _mm_alignr_epi8(m7, m5, 8); \
254
+ b1 = _mm_unpackhi_epi64(m0, m4); \
255
+ } while(0)
256
+
257
+
258
+ #define LOAD_MSG_7_3(b0, b1) \
259
+ do \
260
+ { \
261
+ b0 = _mm_unpackhi_epi64(m2, m7); \
262
+ b1 = _mm_unpacklo_epi64(m4, m1); \
263
+ } while(0)
264
+
265
+
266
+ #define LOAD_MSG_7_4(b0, b1) \
267
+ do \
268
+ { \
269
+ b0 = _mm_unpacklo_epi64(m0, m2); \
270
+ b1 = _mm_unpacklo_epi64(m3, m5); \
271
+ } while(0)
272
+
273
+
274
+ #define LOAD_MSG_8_1(b0, b1) \
275
+ do \
276
+ { \
277
+ b0 = _mm_unpacklo_epi64(m3, m7); \
278
+ b1 = _mm_alignr_epi8(m0, m5, 8); \
279
+ } while(0)
280
+
281
+
282
+ #define LOAD_MSG_8_2(b0, b1) \
283
+ do \
284
+ { \
285
+ b0 = _mm_unpackhi_epi64(m7, m4); \
286
+ b1 = _mm_alignr_epi8(m4, m1, 8); \
287
+ } while(0)
288
+
289
+
290
+ #define LOAD_MSG_8_3(b0, b1) \
291
+ do \
292
+ { \
293
+ b0 = m6; \
294
+ b1 = _mm_alignr_epi8(m5, m0, 8); \
295
+ } while(0)
296
+
297
+
298
+ #define LOAD_MSG_8_4(b0, b1) \
299
+ do \
300
+ { \
301
+ b0 = _mm_blend_epi16(m1, m3, 0xF0); \
302
+ b1 = m2; \
303
+ } while(0)
304
+
305
+
306
+ #define LOAD_MSG_9_1(b0, b1) \
307
+ do \
308
+ { \
309
+ b0 = _mm_unpacklo_epi64(m5, m4); \
310
+ b1 = _mm_unpackhi_epi64(m3, m0); \
311
+ } while(0)
312
+
313
+
314
+ #define LOAD_MSG_9_2(b0, b1) \
315
+ do \
316
+ { \
317
+ b0 = _mm_unpacklo_epi64(m1, m2); \
318
+ b1 = _mm_blend_epi16(m3, m2, 0xF0); \
319
+ } while(0)
320
+
321
+
322
+ #define LOAD_MSG_9_3(b0, b1) \
323
+ do \
324
+ { \
325
+ b0 = _mm_unpackhi_epi64(m7, m4); \
326
+ b1 = _mm_unpackhi_epi64(m1, m6); \
327
+ } while(0)
328
+
329
+
330
+ #define LOAD_MSG_9_4(b0, b1) \
331
+ do \
332
+ { \
333
+ b0 = _mm_alignr_epi8(m7, m5, 8); \
334
+ b1 = _mm_unpacklo_epi64(m6, m0); \
335
+ } while(0)
336
+
337
+
338
+ #define LOAD_MSG_10_1(b0, b1) \
339
+ do \
340
+ { \
341
+ b0 = _mm_unpacklo_epi64(m0, m1); \
342
+ b1 = _mm_unpacklo_epi64(m2, m3); \
343
+ } while(0)
344
+
345
+
346
+ #define LOAD_MSG_10_2(b0, b1) \
347
+ do \
348
+ { \
349
+ b0 = _mm_unpackhi_epi64(m0, m1); \
350
+ b1 = _mm_unpackhi_epi64(m2, m3); \
351
+ } while(0)
352
+
353
+
354
+ #define LOAD_MSG_10_3(b0, b1) \
355
+ do \
356
+ { \
357
+ b0 = _mm_unpacklo_epi64(m4, m5); \
358
+ b1 = _mm_unpacklo_epi64(m6, m7); \
359
+ } while(0)
360
+
361
+
362
+ #define LOAD_MSG_10_4(b0, b1) \
363
+ do \
364
+ { \
365
+ b0 = _mm_unpackhi_epi64(m4, m5); \
366
+ b1 = _mm_unpackhi_epi64(m6, m7); \
367
+ } while(0)
368
+
369
+
370
+ #define LOAD_MSG_11_1(b0, b1) \
371
+ do \
372
+ { \
373
+ b0 = _mm_unpacklo_epi64(m7, m2); \
374
+ b1 = _mm_unpackhi_epi64(m4, m6); \
375
+ } while(0)
376
+
377
+
378
+ #define LOAD_MSG_11_2(b0, b1) \
379
+ do \
380
+ { \
381
+ b0 = _mm_unpacklo_epi64(m5, m4); \
382
+ b1 = _mm_alignr_epi8(m3, m7, 8); \
383
+ } while(0)
384
+
385
+
386
+ #define LOAD_MSG_11_3(b0, b1) \
387
+ do \
388
+ { \
389
+ b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
390
+ b1 = _mm_unpackhi_epi64(m5, m2); \
391
+ } while(0)
392
+
393
+
394
+ #define LOAD_MSG_11_4(b0, b1) \
395
+ do \
396
+ { \
397
+ b0 = _mm_unpacklo_epi64(m6, m1); \
398
+ b1 = _mm_unpackhi_epi64(m3, m1); \
399
+ } while(0)
400
+
401
+
402
+ #endif