ed25519_blake2b 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/CODE_OF_CONDUCT.md +74 -0
  4. data/Gemfile +6 -0
  5. data/Gemfile.lock +23 -0
  6. data/LICENSE +21 -0
  7. data/README.md +39 -0
  8. data/Rakefile +13 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/ed25519_blake2b.gemspec +31 -0
  12. data/ext/ed25519_blake2b/blake2-config.h +72 -0
  13. data/ext/ed25519_blake2b/blake2-impl.h +160 -0
  14. data/ext/ed25519_blake2b/blake2.h +195 -0
  15. data/ext/ed25519_blake2b/blake2b-load-sse2.h +68 -0
  16. data/ext/ed25519_blake2b/blake2b-load-sse41.h +402 -0
  17. data/ext/ed25519_blake2b/blake2b-ref.c +373 -0
  18. data/ext/ed25519_blake2b/blake2b-round.h +157 -0
  19. data/ext/ed25519_blake2b/curve25519-donna-32bit.h +579 -0
  20. data/ext/ed25519_blake2b/curve25519-donna-64bit.h +413 -0
  21. data/ext/ed25519_blake2b/curve25519-donna-helpers.h +67 -0
  22. data/ext/ed25519_blake2b/curve25519-donna-sse2.h +1112 -0
  23. data/ext/ed25519_blake2b/ed25519-donna-32bit-sse2.h +513 -0
  24. data/ext/ed25519_blake2b/ed25519-donna-32bit-tables.h +61 -0
  25. data/ext/ed25519_blake2b/ed25519-donna-64bit-sse2.h +436 -0
  26. data/ext/ed25519_blake2b/ed25519-donna-64bit-tables.h +53 -0
  27. data/ext/ed25519_blake2b/ed25519-donna-64bit-x86-32bit.h +435 -0
  28. data/ext/ed25519_blake2b/ed25519-donna-64bit-x86.h +351 -0
  29. data/ext/ed25519_blake2b/ed25519-donna-basepoint-table.h +259 -0
  30. data/ext/ed25519_blake2b/ed25519-donna-batchverify.h +275 -0
  31. data/ext/ed25519_blake2b/ed25519-donna-impl-base.h +364 -0
  32. data/ext/ed25519_blake2b/ed25519-donna-impl-sse2.h +390 -0
  33. data/ext/ed25519_blake2b/ed25519-donna-portable-identify.h +103 -0
  34. data/ext/ed25519_blake2b/ed25519-donna-portable.h +135 -0
  35. data/ext/ed25519_blake2b/ed25519-donna.h +115 -0
  36. data/ext/ed25519_blake2b/ed25519-hash-custom.c +28 -0
  37. data/ext/ed25519_blake2b/ed25519-hash-custom.h +30 -0
  38. data/ext/ed25519_blake2b/ed25519-hash.h +219 -0
  39. data/ext/ed25519_blake2b/ed25519-randombytes-custom.h +10 -0
  40. data/ext/ed25519_blake2b/ed25519-randombytes.h +91 -0
  41. data/ext/ed25519_blake2b/ed25519.c +150 -0
  42. data/ext/ed25519_blake2b/ed25519.h +30 -0
  43. data/ext/ed25519_blake2b/extconf.rb +3 -0
  44. data/ext/ed25519_blake2b/fuzz/README.md +173 -0
  45. data/ext/ed25519_blake2b/fuzz/build-nix.php +134 -0
  46. data/ext/ed25519_blake2b/fuzz/curve25519-ref10.c +1272 -0
  47. data/ext/ed25519_blake2b/fuzz/curve25519-ref10.h +8 -0
  48. data/ext/ed25519_blake2b/fuzz/ed25519-donna-sse2.c +3 -0
  49. data/ext/ed25519_blake2b/fuzz/ed25519-donna.c +1 -0
  50. data/ext/ed25519_blake2b/fuzz/ed25519-donna.h +34 -0
  51. data/ext/ed25519_blake2b/fuzz/ed25519-ref10.c +4647 -0
  52. data/ext/ed25519_blake2b/fuzz/ed25519-ref10.h +9 -0
  53. data/ext/ed25519_blake2b/fuzz/fuzz-curve25519.c +172 -0
  54. data/ext/ed25519_blake2b/fuzz/fuzz-ed25519.c +219 -0
  55. data/ext/ed25519_blake2b/modm-donna-32bit.h +469 -0
  56. data/ext/ed25519_blake2b/modm-donna-64bit.h +361 -0
  57. data/ext/ed25519_blake2b/rbext.c +25 -0
  58. data/ext/ed25519_blake2b/regression.h +1024 -0
  59. data/lib/ed25519_blake2b/ed25519_blake2b.rb +4 -0
  60. data/lib/ed25519_blake2b/version.rb +3 -0
  61. metadata +147 -0
@@ -0,0 +1,195 @@
1
+ /*
2
+ BLAKE2 reference source code package - reference C implementations
3
+
4
+ Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6
+ your option. The terms of these licenses can be found at:
7
+
8
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9
+ - OpenSSL license : https://www.openssl.org/source/license.html
10
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ More information about the BLAKE2 hash function can be found at
13
+ https://blake2.net.
14
+ */
15
+ #ifndef BLAKE2_H
16
+ #define BLAKE2_H
17
+
18
+ #include <stddef.h>
19
+ #include <stdint.h>
20
+
21
+ #if defined(_MSC_VER)
22
+ #define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop))
23
+ #else
24
+ #define BLAKE2_PACKED(x) x __attribute__((packed))
25
+ #endif
26
+
27
+ #if defined(__cplusplus)
28
+ extern "C" {
29
+ #endif
30
+
31
+ enum blake2s_constant
32
+ {
33
+ BLAKE2S_BLOCKBYTES = 64,
34
+ BLAKE2S_OUTBYTES = 32,
35
+ BLAKE2S_KEYBYTES = 32,
36
+ BLAKE2S_SALTBYTES = 8,
37
+ BLAKE2S_PERSONALBYTES = 8
38
+ };
39
+
40
+ enum blake2b_constant
41
+ {
42
+ BLAKE2B_BLOCKBYTES = 128,
43
+ BLAKE2B_OUTBYTES = 64,
44
+ BLAKE2B_KEYBYTES = 64,
45
+ BLAKE2B_SALTBYTES = 16,
46
+ BLAKE2B_PERSONALBYTES = 16
47
+ };
48
+
49
+ typedef struct blake2s_state__
50
+ {
51
+ uint32_t h[8];
52
+ uint32_t t[2];
53
+ uint32_t f[2];
54
+ uint8_t buf[BLAKE2S_BLOCKBYTES];
55
+ size_t buflen;
56
+ size_t outlen;
57
+ uint8_t last_node;
58
+ } blake2s_state;
59
+
60
+ typedef struct blake2b_state__
61
+ {
62
+ uint64_t h[8];
63
+ uint64_t t[2];
64
+ uint64_t f[2];
65
+ uint8_t buf[BLAKE2B_BLOCKBYTES];
66
+ size_t buflen;
67
+ size_t outlen;
68
+ uint8_t last_node;
69
+ } blake2b_state;
70
+
71
+ typedef struct blake2sp_state__
72
+ {
73
+ blake2s_state S[8][1];
74
+ blake2s_state R[1];
75
+ uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
76
+ size_t buflen;
77
+ size_t outlen;
78
+ } blake2sp_state;
79
+
80
+ typedef struct blake2bp_state__
81
+ {
82
+ blake2b_state S[4][1];
83
+ blake2b_state R[1];
84
+ uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
85
+ size_t buflen;
86
+ size_t outlen;
87
+ } blake2bp_state;
88
+
89
+
90
+ BLAKE2_PACKED(struct blake2s_param__
91
+ {
92
+ uint8_t digest_length; /* 1 */
93
+ uint8_t key_length; /* 2 */
94
+ uint8_t fanout; /* 3 */
95
+ uint8_t depth; /* 4 */
96
+ uint32_t leaf_length; /* 8 */
97
+ uint32_t node_offset; /* 12 */
98
+ uint16_t xof_length; /* 14 */
99
+ uint8_t node_depth; /* 15 */
100
+ uint8_t inner_length; /* 16 */
101
+ /* uint8_t reserved[0]; */
102
+ uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */
103
+ uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */
104
+ });
105
+
106
+ typedef struct blake2s_param__ blake2s_param;
107
+
108
+ BLAKE2_PACKED(struct blake2b_param__
109
+ {
110
+ uint8_t digest_length; /* 1 */
111
+ uint8_t key_length; /* 2 */
112
+ uint8_t fanout; /* 3 */
113
+ uint8_t depth; /* 4 */
114
+ uint32_t leaf_length; /* 8 */
115
+ uint32_t node_offset; /* 12 */
116
+ uint32_t xof_length; /* 16 */
117
+ uint8_t node_depth; /* 17 */
118
+ uint8_t inner_length; /* 18 */
119
+ uint8_t reserved[14]; /* 32 */
120
+ uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
121
+ uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
122
+ });
123
+
124
+ typedef struct blake2b_param__ blake2b_param;
125
+
126
+ typedef struct blake2xs_state__
127
+ {
128
+ blake2s_state S[1];
129
+ blake2s_param P[1];
130
+ } blake2xs_state;
131
+
132
+ typedef struct blake2xb_state__
133
+ {
134
+ blake2b_state S[1];
135
+ blake2b_param P[1];
136
+ } blake2xb_state;
137
+
138
+ /* Padded structs result in a compile-time error */
139
+ enum {
140
+ BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES),
141
+ BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES)
142
+ };
143
+
144
+ /* Streaming API */
145
+ int blake2s_init( blake2s_state *S, size_t outlen );
146
+ int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
147
+ int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
148
+ int blake2s_update( blake2s_state *S, const void *in, size_t inlen );
149
+ int blake2s_final( blake2s_state *S, void *out, size_t outlen );
150
+
151
+ int blake2b_init( blake2b_state *S, size_t outlen );
152
+ int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
153
+ int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
154
+ int blake2b_update( blake2b_state *S, const void *in, size_t inlen );
155
+ int blake2b_final( blake2b_state *S, void *out, size_t outlen );
156
+
157
+ int blake2sp_init( blake2sp_state *S, size_t outlen );
158
+ int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen );
159
+ int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen );
160
+ int blake2sp_final( blake2sp_state *S, void *out, size_t outlen );
161
+
162
+ int blake2bp_init( blake2bp_state *S, size_t outlen );
163
+ int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen );
164
+ int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen );
165
+ int blake2bp_final( blake2bp_state *S, void *out, size_t outlen );
166
+
167
+ /* Variable output length API */
168
+ int blake2xs_init( blake2xs_state *S, const size_t outlen );
169
+ int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen );
170
+ int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen );
171
+ int blake2xs_final(blake2xs_state *S, void *out, size_t outlen);
172
+
173
+ int blake2xb_init( blake2xb_state *S, const size_t outlen );
174
+ int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen );
175
+ int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen );
176
+ int blake2xb_final(blake2xb_state *S, void *out, size_t outlen);
177
+
178
+ /* Simple API */
179
+ int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
180
+ int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
181
+
182
+ int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
183
+ int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
184
+
185
+ int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
186
+ int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
187
+
188
+ /* This is simply an alias for blake2b */
189
+ int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
190
+
191
+ #if defined(__cplusplus)
192
+ }
193
+ #endif
194
+
195
+ #endif
@@ -0,0 +1,68 @@
1
+ /*
2
+ BLAKE2 reference source code package - optimized C implementations
3
+
4
+ Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6
+ your option. The terms of these licenses can be found at:
7
+
8
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9
+ - OpenSSL license : https://www.openssl.org/source/license.html
10
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ More information about the BLAKE2 hash function can be found at
13
+ https://blake2.net.
14
+ */
15
+ #ifndef BLAKE2B_LOAD_SSE2_H
16
+ #define BLAKE2B_LOAD_SSE2_H
17
+
18
+ #define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
19
+ #define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
20
+ #define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
21
+ #define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
22
+ #define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
23
+ #define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
24
+ #define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
25
+ #define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
26
+ #define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
27
+ #define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
28
+ #define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
29
+ #define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
30
+ #define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
31
+ #define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
32
+ #define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
33
+ #define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
34
+ #define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
35
+ #define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
36
+ #define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
37
+ #define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
38
+ #define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
39
+ #define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
40
+ #define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
41
+ #define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
42
+ #define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
43
+ #define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
44
+ #define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
45
+ #define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
46
+ #define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
47
+ #define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
48
+ #define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
49
+ #define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
50
+ #define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
51
+ #define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
52
+ #define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
53
+ #define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
54
+ #define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
55
+ #define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
56
+ #define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
57
+ #define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
58
+ #define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
59
+ #define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
60
+ #define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
61
+ #define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
62
+ #define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
63
+ #define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
64
+ #define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
65
+ #define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
66
+
67
+
68
+ #endif
@@ -0,0 +1,402 @@
1
+ /*
2
+ BLAKE2 reference source code package - optimized C implementations
3
+
4
+ Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6
+ your option. The terms of these licenses can be found at:
7
+
8
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9
+ - OpenSSL license : https://www.openssl.org/source/license.html
10
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ More information about the BLAKE2 hash function can be found at
13
+ https://blake2.net.
14
+ */
15
+ #ifndef BLAKE2B_LOAD_SSE41_H
16
+ #define BLAKE2B_LOAD_SSE41_H
17
+
18
+ #define LOAD_MSG_0_1(b0, b1) \
19
+ do \
20
+ { \
21
+ b0 = _mm_unpacklo_epi64(m0, m1); \
22
+ b1 = _mm_unpacklo_epi64(m2, m3); \
23
+ } while(0)
24
+
25
+
26
+ #define LOAD_MSG_0_2(b0, b1) \
27
+ do \
28
+ { \
29
+ b0 = _mm_unpackhi_epi64(m0, m1); \
30
+ b1 = _mm_unpackhi_epi64(m2, m3); \
31
+ } while(0)
32
+
33
+
34
+ #define LOAD_MSG_0_3(b0, b1) \
35
+ do \
36
+ { \
37
+ b0 = _mm_unpacklo_epi64(m4, m5); \
38
+ b1 = _mm_unpacklo_epi64(m6, m7); \
39
+ } while(0)
40
+
41
+
42
+ #define LOAD_MSG_0_4(b0, b1) \
43
+ do \
44
+ { \
45
+ b0 = _mm_unpackhi_epi64(m4, m5); \
46
+ b1 = _mm_unpackhi_epi64(m6, m7); \
47
+ } while(0)
48
+
49
+
50
+ #define LOAD_MSG_1_1(b0, b1) \
51
+ do \
52
+ { \
53
+ b0 = _mm_unpacklo_epi64(m7, m2); \
54
+ b1 = _mm_unpackhi_epi64(m4, m6); \
55
+ } while(0)
56
+
57
+
58
+ #define LOAD_MSG_1_2(b0, b1) \
59
+ do \
60
+ { \
61
+ b0 = _mm_unpacklo_epi64(m5, m4); \
62
+ b1 = _mm_alignr_epi8(m3, m7, 8); \
63
+ } while(0)
64
+
65
+
66
+ #define LOAD_MSG_1_3(b0, b1) \
67
+ do \
68
+ { \
69
+ b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
70
+ b1 = _mm_unpackhi_epi64(m5, m2); \
71
+ } while(0)
72
+
73
+
74
+ #define LOAD_MSG_1_4(b0, b1) \
75
+ do \
76
+ { \
77
+ b0 = _mm_unpacklo_epi64(m6, m1); \
78
+ b1 = _mm_unpackhi_epi64(m3, m1); \
79
+ } while(0)
80
+
81
+
82
+ #define LOAD_MSG_2_1(b0, b1) \
83
+ do \
84
+ { \
85
+ b0 = _mm_alignr_epi8(m6, m5, 8); \
86
+ b1 = _mm_unpackhi_epi64(m2, m7); \
87
+ } while(0)
88
+
89
+
90
+ #define LOAD_MSG_2_2(b0, b1) \
91
+ do \
92
+ { \
93
+ b0 = _mm_unpacklo_epi64(m4, m0); \
94
+ b1 = _mm_blend_epi16(m1, m6, 0xF0); \
95
+ } while(0)
96
+
97
+
98
+ #define LOAD_MSG_2_3(b0, b1) \
99
+ do \
100
+ { \
101
+ b0 = _mm_blend_epi16(m5, m1, 0xF0); \
102
+ b1 = _mm_unpackhi_epi64(m3, m4); \
103
+ } while(0)
104
+
105
+
106
+ #define LOAD_MSG_2_4(b0, b1) \
107
+ do \
108
+ { \
109
+ b0 = _mm_unpacklo_epi64(m7, m3); \
110
+ b1 = _mm_alignr_epi8(m2, m0, 8); \
111
+ } while(0)
112
+
113
+
114
+ #define LOAD_MSG_3_1(b0, b1) \
115
+ do \
116
+ { \
117
+ b0 = _mm_unpackhi_epi64(m3, m1); \
118
+ b1 = _mm_unpackhi_epi64(m6, m5); \
119
+ } while(0)
120
+
121
+
122
+ #define LOAD_MSG_3_2(b0, b1) \
123
+ do \
124
+ { \
125
+ b0 = _mm_unpackhi_epi64(m4, m0); \
126
+ b1 = _mm_unpacklo_epi64(m6, m7); \
127
+ } while(0)
128
+
129
+
130
+ #define LOAD_MSG_3_3(b0, b1) \
131
+ do \
132
+ { \
133
+ b0 = _mm_blend_epi16(m1, m2, 0xF0); \
134
+ b1 = _mm_blend_epi16(m2, m7, 0xF0); \
135
+ } while(0)
136
+
137
+
138
+ #define LOAD_MSG_3_4(b0, b1) \
139
+ do \
140
+ { \
141
+ b0 = _mm_unpacklo_epi64(m3, m5); \
142
+ b1 = _mm_unpacklo_epi64(m0, m4); \
143
+ } while(0)
144
+
145
+
146
+ #define LOAD_MSG_4_1(b0, b1) \
147
+ do \
148
+ { \
149
+ b0 = _mm_unpackhi_epi64(m4, m2); \
150
+ b1 = _mm_unpacklo_epi64(m1, m5); \
151
+ } while(0)
152
+
153
+
154
+ #define LOAD_MSG_4_2(b0, b1) \
155
+ do \
156
+ { \
157
+ b0 = _mm_blend_epi16(m0, m3, 0xF0); \
158
+ b1 = _mm_blend_epi16(m2, m7, 0xF0); \
159
+ } while(0)
160
+
161
+
162
+ #define LOAD_MSG_4_3(b0, b1) \
163
+ do \
164
+ { \
165
+ b0 = _mm_blend_epi16(m7, m5, 0xF0); \
166
+ b1 = _mm_blend_epi16(m3, m1, 0xF0); \
167
+ } while(0)
168
+
169
+
170
+ #define LOAD_MSG_4_4(b0, b1) \
171
+ do \
172
+ { \
173
+ b0 = _mm_alignr_epi8(m6, m0, 8); \
174
+ b1 = _mm_blend_epi16(m4, m6, 0xF0); \
175
+ } while(0)
176
+
177
+
178
+ #define LOAD_MSG_5_1(b0, b1) \
179
+ do \
180
+ { \
181
+ b0 = _mm_unpacklo_epi64(m1, m3); \
182
+ b1 = _mm_unpacklo_epi64(m0, m4); \
183
+ } while(0)
184
+
185
+
186
+ #define LOAD_MSG_5_2(b0, b1) \
187
+ do \
188
+ { \
189
+ b0 = _mm_unpacklo_epi64(m6, m5); \
190
+ b1 = _mm_unpackhi_epi64(m5, m1); \
191
+ } while(0)
192
+
193
+
194
+ #define LOAD_MSG_5_3(b0, b1) \
195
+ do \
196
+ { \
197
+ b0 = _mm_blend_epi16(m2, m3, 0xF0); \
198
+ b1 = _mm_unpackhi_epi64(m7, m0); \
199
+ } while(0)
200
+
201
+
202
+ #define LOAD_MSG_5_4(b0, b1) \
203
+ do \
204
+ { \
205
+ b0 = _mm_unpackhi_epi64(m6, m2); \
206
+ b1 = _mm_blend_epi16(m7, m4, 0xF0); \
207
+ } while(0)
208
+
209
+
210
+ #define LOAD_MSG_6_1(b0, b1) \
211
+ do \
212
+ { \
213
+ b0 = _mm_blend_epi16(m6, m0, 0xF0); \
214
+ b1 = _mm_unpacklo_epi64(m7, m2); \
215
+ } while(0)
216
+
217
+
218
+ #define LOAD_MSG_6_2(b0, b1) \
219
+ do \
220
+ { \
221
+ b0 = _mm_unpackhi_epi64(m2, m7); \
222
+ b1 = _mm_alignr_epi8(m5, m6, 8); \
223
+ } while(0)
224
+
225
+
226
+ #define LOAD_MSG_6_3(b0, b1) \
227
+ do \
228
+ { \
229
+ b0 = _mm_unpacklo_epi64(m0, m3); \
230
+ b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
231
+ } while(0)
232
+
233
+
234
+ #define LOAD_MSG_6_4(b0, b1) \
235
+ do \
236
+ { \
237
+ b0 = _mm_unpackhi_epi64(m3, m1); \
238
+ b1 = _mm_blend_epi16(m1, m5, 0xF0); \
239
+ } while(0)
240
+
241
+
242
+ #define LOAD_MSG_7_1(b0, b1) \
243
+ do \
244
+ { \
245
+ b0 = _mm_unpackhi_epi64(m6, m3); \
246
+ b1 = _mm_blend_epi16(m6, m1, 0xF0); \
247
+ } while(0)
248
+
249
+
250
+ #define LOAD_MSG_7_2(b0, b1) \
251
+ do \
252
+ { \
253
+ b0 = _mm_alignr_epi8(m7, m5, 8); \
254
+ b1 = _mm_unpackhi_epi64(m0, m4); \
255
+ } while(0)
256
+
257
+
258
+ #define LOAD_MSG_7_3(b0, b1) \
259
+ do \
260
+ { \
261
+ b0 = _mm_unpackhi_epi64(m2, m7); \
262
+ b1 = _mm_unpacklo_epi64(m4, m1); \
263
+ } while(0)
264
+
265
+
266
+ #define LOAD_MSG_7_4(b0, b1) \
267
+ do \
268
+ { \
269
+ b0 = _mm_unpacklo_epi64(m0, m2); \
270
+ b1 = _mm_unpacklo_epi64(m3, m5); \
271
+ } while(0)
272
+
273
+
274
+ #define LOAD_MSG_8_1(b0, b1) \
275
+ do \
276
+ { \
277
+ b0 = _mm_unpacklo_epi64(m3, m7); \
278
+ b1 = _mm_alignr_epi8(m0, m5, 8); \
279
+ } while(0)
280
+
281
+
282
+ #define LOAD_MSG_8_2(b0, b1) \
283
+ do \
284
+ { \
285
+ b0 = _mm_unpackhi_epi64(m7, m4); \
286
+ b1 = _mm_alignr_epi8(m4, m1, 8); \
287
+ } while(0)
288
+
289
+
290
+ #define LOAD_MSG_8_3(b0, b1) \
291
+ do \
292
+ { \
293
+ b0 = m6; \
294
+ b1 = _mm_alignr_epi8(m5, m0, 8); \
295
+ } while(0)
296
+
297
+
298
+ #define LOAD_MSG_8_4(b0, b1) \
299
+ do \
300
+ { \
301
+ b0 = _mm_blend_epi16(m1, m3, 0xF0); \
302
+ b1 = m2; \
303
+ } while(0)
304
+
305
+
306
+ #define LOAD_MSG_9_1(b0, b1) \
307
+ do \
308
+ { \
309
+ b0 = _mm_unpacklo_epi64(m5, m4); \
310
+ b1 = _mm_unpackhi_epi64(m3, m0); \
311
+ } while(0)
312
+
313
+
314
+ #define LOAD_MSG_9_2(b0, b1) \
315
+ do \
316
+ { \
317
+ b0 = _mm_unpacklo_epi64(m1, m2); \
318
+ b1 = _mm_blend_epi16(m3, m2, 0xF0); \
319
+ } while(0)
320
+
321
+
322
+ #define LOAD_MSG_9_3(b0, b1) \
323
+ do \
324
+ { \
325
+ b0 = _mm_unpackhi_epi64(m7, m4); \
326
+ b1 = _mm_unpackhi_epi64(m1, m6); \
327
+ } while(0)
328
+
329
+
330
+ #define LOAD_MSG_9_4(b0, b1) \
331
+ do \
332
+ { \
333
+ b0 = _mm_alignr_epi8(m7, m5, 8); \
334
+ b1 = _mm_unpacklo_epi64(m6, m0); \
335
+ } while(0)
336
+
337
+
338
+ #define LOAD_MSG_10_1(b0, b1) \
339
+ do \
340
+ { \
341
+ b0 = _mm_unpacklo_epi64(m0, m1); \
342
+ b1 = _mm_unpacklo_epi64(m2, m3); \
343
+ } while(0)
344
+
345
+
346
+ #define LOAD_MSG_10_2(b0, b1) \
347
+ do \
348
+ { \
349
+ b0 = _mm_unpackhi_epi64(m0, m1); \
350
+ b1 = _mm_unpackhi_epi64(m2, m3); \
351
+ } while(0)
352
+
353
+
354
+ #define LOAD_MSG_10_3(b0, b1) \
355
+ do \
356
+ { \
357
+ b0 = _mm_unpacklo_epi64(m4, m5); \
358
+ b1 = _mm_unpacklo_epi64(m6, m7); \
359
+ } while(0)
360
+
361
+
362
+ #define LOAD_MSG_10_4(b0, b1) \
363
+ do \
364
+ { \
365
+ b0 = _mm_unpackhi_epi64(m4, m5); \
366
+ b1 = _mm_unpackhi_epi64(m6, m7); \
367
+ } while(0)
368
+
369
+
370
+ #define LOAD_MSG_11_1(b0, b1) \
371
+ do \
372
+ { \
373
+ b0 = _mm_unpacklo_epi64(m7, m2); \
374
+ b1 = _mm_unpackhi_epi64(m4, m6); \
375
+ } while(0)
376
+
377
+
378
+ #define LOAD_MSG_11_2(b0, b1) \
379
+ do \
380
+ { \
381
+ b0 = _mm_unpacklo_epi64(m5, m4); \
382
+ b1 = _mm_alignr_epi8(m3, m7, 8); \
383
+ } while(0)
384
+
385
+
386
+ #define LOAD_MSG_11_3(b0, b1) \
387
+ do \
388
+ { \
389
+ b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
390
+ b1 = _mm_unpackhi_epi64(m5, m2); \
391
+ } while(0)
392
+
393
+
394
+ #define LOAD_MSG_11_4(b0, b1) \
395
+ do \
396
+ { \
397
+ b0 = _mm_unpacklo_epi64(m6, m1); \
398
+ b1 = _mm_unpackhi_epi64(m3, m1); \
399
+ } while(0)
400
+
401
+
402
+ #endif