digest-blake2b 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,68 @@
1
+ /*
2
+ BLAKE2 reference source code package - optimized C implementations
3
+
4
+ Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6
+ your option. The terms of these licenses can be found at:
7
+
8
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9
+ - OpenSSL license : https://www.openssl.org/source/license.html
10
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ More information about the BLAKE2 hash function can be found at
13
+ https://blake2.net.
14
+ */
15
+ #ifndef BLAKE2B_LOAD_SSE2_H
16
+ #define BLAKE2B_LOAD_SSE2_H
17
+
18
+ #define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
19
+ #define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
20
+ #define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
21
+ #define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
22
+ #define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
23
+ #define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
24
+ #define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
25
+ #define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
26
+ #define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
27
+ #define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
28
+ #define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
29
+ #define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
30
+ #define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
31
+ #define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
32
+ #define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
33
+ #define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
34
+ #define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
35
+ #define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
36
+ #define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
37
+ #define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
38
+ #define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
39
+ #define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
40
+ #define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
41
+ #define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
42
+ #define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
43
+ #define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
44
+ #define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
45
+ #define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
46
+ #define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
47
+ #define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
48
+ #define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
49
+ #define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
50
+ #define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
51
+ #define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
52
+ #define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
53
+ #define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
54
+ #define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
55
+ #define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
56
+ #define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
57
+ #define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
58
+ #define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
59
+ #define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
60
+ #define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
61
+ #define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
62
+ #define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
63
+ #define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
64
+ #define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
65
+ #define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
66
+
67
+
68
+ #endif
@@ -0,0 +1,402 @@
1
+ /*
2
+ BLAKE2 reference source code package - optimized C implementations
3
+
4
+ Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
5
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
6
+ your option. The terms of these licenses can be found at:
7
+
8
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
9
+ - OpenSSL license : https://www.openssl.org/source/license.html
10
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ More information about the BLAKE2 hash function can be found at
13
+ https://blake2.net.
14
+ */
15
+ #ifndef BLAKE2B_LOAD_SSE41_H
16
+ #define BLAKE2B_LOAD_SSE41_H
17
+
18
+ #define LOAD_MSG_0_1(b0, b1) \
19
+ do \
20
+ { \
21
+ b0 = _mm_unpacklo_epi64(m0, m1); \
22
+ b1 = _mm_unpacklo_epi64(m2, m3); \
23
+ } while(0)
24
+
25
+
26
+ #define LOAD_MSG_0_2(b0, b1) \
27
+ do \
28
+ { \
29
+ b0 = _mm_unpackhi_epi64(m0, m1); \
30
+ b1 = _mm_unpackhi_epi64(m2, m3); \
31
+ } while(0)
32
+
33
+
34
+ #define LOAD_MSG_0_3(b0, b1) \
35
+ do \
36
+ { \
37
+ b0 = _mm_unpacklo_epi64(m4, m5); \
38
+ b1 = _mm_unpacklo_epi64(m6, m7); \
39
+ } while(0)
40
+
41
+
42
+ #define LOAD_MSG_0_4(b0, b1) \
43
+ do \
44
+ { \
45
+ b0 = _mm_unpackhi_epi64(m4, m5); \
46
+ b1 = _mm_unpackhi_epi64(m6, m7); \
47
+ } while(0)
48
+
49
+
50
+ #define LOAD_MSG_1_1(b0, b1) \
51
+ do \
52
+ { \
53
+ b0 = _mm_unpacklo_epi64(m7, m2); \
54
+ b1 = _mm_unpackhi_epi64(m4, m6); \
55
+ } while(0)
56
+
57
+
58
+ #define LOAD_MSG_1_2(b0, b1) \
59
+ do \
60
+ { \
61
+ b0 = _mm_unpacklo_epi64(m5, m4); \
62
+ b1 = _mm_alignr_epi8(m3, m7, 8); \
63
+ } while(0)
64
+
65
+
66
+ #define LOAD_MSG_1_3(b0, b1) \
67
+ do \
68
+ { \
69
+ b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
70
+ b1 = _mm_unpackhi_epi64(m5, m2); \
71
+ } while(0)
72
+
73
+
74
+ #define LOAD_MSG_1_4(b0, b1) \
75
+ do \
76
+ { \
77
+ b0 = _mm_unpacklo_epi64(m6, m1); \
78
+ b1 = _mm_unpackhi_epi64(m3, m1); \
79
+ } while(0)
80
+
81
+
82
+ #define LOAD_MSG_2_1(b0, b1) \
83
+ do \
84
+ { \
85
+ b0 = _mm_alignr_epi8(m6, m5, 8); \
86
+ b1 = _mm_unpackhi_epi64(m2, m7); \
87
+ } while(0)
88
+
89
+
90
+ #define LOAD_MSG_2_2(b0, b1) \
91
+ do \
92
+ { \
93
+ b0 = _mm_unpacklo_epi64(m4, m0); \
94
+ b1 = _mm_blend_epi16(m1, m6, 0xF0); \
95
+ } while(0)
96
+
97
+
98
+ #define LOAD_MSG_2_3(b0, b1) \
99
+ do \
100
+ { \
101
+ b0 = _mm_blend_epi16(m5, m1, 0xF0); \
102
+ b1 = _mm_unpackhi_epi64(m3, m4); \
103
+ } while(0)
104
+
105
+
106
+ #define LOAD_MSG_2_4(b0, b1) \
107
+ do \
108
+ { \
109
+ b0 = _mm_unpacklo_epi64(m7, m3); \
110
+ b1 = _mm_alignr_epi8(m2, m0, 8); \
111
+ } while(0)
112
+
113
+
114
+ #define LOAD_MSG_3_1(b0, b1) \
115
+ do \
116
+ { \
117
+ b0 = _mm_unpackhi_epi64(m3, m1); \
118
+ b1 = _mm_unpackhi_epi64(m6, m5); \
119
+ } while(0)
120
+
121
+
122
+ #define LOAD_MSG_3_2(b0, b1) \
123
+ do \
124
+ { \
125
+ b0 = _mm_unpackhi_epi64(m4, m0); \
126
+ b1 = _mm_unpacklo_epi64(m6, m7); \
127
+ } while(0)
128
+
129
+
130
+ #define LOAD_MSG_3_3(b0, b1) \
131
+ do \
132
+ { \
133
+ b0 = _mm_blend_epi16(m1, m2, 0xF0); \
134
+ b1 = _mm_blend_epi16(m2, m7, 0xF0); \
135
+ } while(0)
136
+
137
+
138
+ #define LOAD_MSG_3_4(b0, b1) \
139
+ do \
140
+ { \
141
+ b0 = _mm_unpacklo_epi64(m3, m5); \
142
+ b1 = _mm_unpacklo_epi64(m0, m4); \
143
+ } while(0)
144
+
145
+
146
+ #define LOAD_MSG_4_1(b0, b1) \
147
+ do \
148
+ { \
149
+ b0 = _mm_unpackhi_epi64(m4, m2); \
150
+ b1 = _mm_unpacklo_epi64(m1, m5); \
151
+ } while(0)
152
+
153
+
154
+ #define LOAD_MSG_4_2(b0, b1) \
155
+ do \
156
+ { \
157
+ b0 = _mm_blend_epi16(m0, m3, 0xF0); \
158
+ b1 = _mm_blend_epi16(m2, m7, 0xF0); \
159
+ } while(0)
160
+
161
+
162
+ #define LOAD_MSG_4_3(b0, b1) \
163
+ do \
164
+ { \
165
+ b0 = _mm_blend_epi16(m7, m5, 0xF0); \
166
+ b1 = _mm_blend_epi16(m3, m1, 0xF0); \
167
+ } while(0)
168
+
169
+
170
+ #define LOAD_MSG_4_4(b0, b1) \
171
+ do \
172
+ { \
173
+ b0 = _mm_alignr_epi8(m6, m0, 8); \
174
+ b1 = _mm_blend_epi16(m4, m6, 0xF0); \
175
+ } while(0)
176
+
177
+
178
+ #define LOAD_MSG_5_1(b0, b1) \
179
+ do \
180
+ { \
181
+ b0 = _mm_unpacklo_epi64(m1, m3); \
182
+ b1 = _mm_unpacklo_epi64(m0, m4); \
183
+ } while(0)
184
+
185
+
186
+ #define LOAD_MSG_5_2(b0, b1) \
187
+ do \
188
+ { \
189
+ b0 = _mm_unpacklo_epi64(m6, m5); \
190
+ b1 = _mm_unpackhi_epi64(m5, m1); \
191
+ } while(0)
192
+
193
+
194
+ #define LOAD_MSG_5_3(b0, b1) \
195
+ do \
196
+ { \
197
+ b0 = _mm_blend_epi16(m2, m3, 0xF0); \
198
+ b1 = _mm_unpackhi_epi64(m7, m0); \
199
+ } while(0)
200
+
201
+
202
+ #define LOAD_MSG_5_4(b0, b1) \
203
+ do \
204
+ { \
205
+ b0 = _mm_unpackhi_epi64(m6, m2); \
206
+ b1 = _mm_blend_epi16(m7, m4, 0xF0); \
207
+ } while(0)
208
+
209
+
210
+ #define LOAD_MSG_6_1(b0, b1) \
211
+ do \
212
+ { \
213
+ b0 = _mm_blend_epi16(m6, m0, 0xF0); \
214
+ b1 = _mm_unpacklo_epi64(m7, m2); \
215
+ } while(0)
216
+
217
+
218
+ #define LOAD_MSG_6_2(b0, b1) \
219
+ do \
220
+ { \
221
+ b0 = _mm_unpackhi_epi64(m2, m7); \
222
+ b1 = _mm_alignr_epi8(m5, m6, 8); \
223
+ } while(0)
224
+
225
+
226
+ #define LOAD_MSG_6_3(b0, b1) \
227
+ do \
228
+ { \
229
+ b0 = _mm_unpacklo_epi64(m0, m3); \
230
+ b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
231
+ } while(0)
232
+
233
+
234
+ #define LOAD_MSG_6_4(b0, b1) \
235
+ do \
236
+ { \
237
+ b0 = _mm_unpackhi_epi64(m3, m1); \
238
+ b1 = _mm_blend_epi16(m1, m5, 0xF0); \
239
+ } while(0)
240
+
241
+
242
+ #define LOAD_MSG_7_1(b0, b1) \
243
+ do \
244
+ { \
245
+ b0 = _mm_unpackhi_epi64(m6, m3); \
246
+ b1 = _mm_blend_epi16(m6, m1, 0xF0); \
247
+ } while(0)
248
+
249
+
250
+ #define LOAD_MSG_7_2(b0, b1) \
251
+ do \
252
+ { \
253
+ b0 = _mm_alignr_epi8(m7, m5, 8); \
254
+ b1 = _mm_unpackhi_epi64(m0, m4); \
255
+ } while(0)
256
+
257
+
258
+ #define LOAD_MSG_7_3(b0, b1) \
259
+ do \
260
+ { \
261
+ b0 = _mm_unpackhi_epi64(m2, m7); \
262
+ b1 = _mm_unpacklo_epi64(m4, m1); \
263
+ } while(0)
264
+
265
+
266
+ #define LOAD_MSG_7_4(b0, b1) \
267
+ do \
268
+ { \
269
+ b0 = _mm_unpacklo_epi64(m0, m2); \
270
+ b1 = _mm_unpacklo_epi64(m3, m5); \
271
+ } while(0)
272
+
273
+
274
+ #define LOAD_MSG_8_1(b0, b1) \
275
+ do \
276
+ { \
277
+ b0 = _mm_unpacklo_epi64(m3, m7); \
278
+ b1 = _mm_alignr_epi8(m0, m5, 8); \
279
+ } while(0)
280
+
281
+
282
+ #define LOAD_MSG_8_2(b0, b1) \
283
+ do \
284
+ { \
285
+ b0 = _mm_unpackhi_epi64(m7, m4); \
286
+ b1 = _mm_alignr_epi8(m4, m1, 8); \
287
+ } while(0)
288
+
289
+
290
+ #define LOAD_MSG_8_3(b0, b1) \
291
+ do \
292
+ { \
293
+ b0 = m6; \
294
+ b1 = _mm_alignr_epi8(m5, m0, 8); \
295
+ } while(0)
296
+
297
+
298
+ #define LOAD_MSG_8_4(b0, b1) \
299
+ do \
300
+ { \
301
+ b0 = _mm_blend_epi16(m1, m3, 0xF0); \
302
+ b1 = m2; \
303
+ } while(0)
304
+
305
+
306
+ #define LOAD_MSG_9_1(b0, b1) \
307
+ do \
308
+ { \
309
+ b0 = _mm_unpacklo_epi64(m5, m4); \
310
+ b1 = _mm_unpackhi_epi64(m3, m0); \
311
+ } while(0)
312
+
313
+
314
+ #define LOAD_MSG_9_2(b0, b1) \
315
+ do \
316
+ { \
317
+ b0 = _mm_unpacklo_epi64(m1, m2); \
318
+ b1 = _mm_blend_epi16(m3, m2, 0xF0); \
319
+ } while(0)
320
+
321
+
322
+ #define LOAD_MSG_9_3(b0, b1) \
323
+ do \
324
+ { \
325
+ b0 = _mm_unpackhi_epi64(m7, m4); \
326
+ b1 = _mm_unpackhi_epi64(m1, m6); \
327
+ } while(0)
328
+
329
+
330
+ #define LOAD_MSG_9_4(b0, b1) \
331
+ do \
332
+ { \
333
+ b0 = _mm_alignr_epi8(m7, m5, 8); \
334
+ b1 = _mm_unpacklo_epi64(m6, m0); \
335
+ } while(0)
336
+
337
+
338
+ #define LOAD_MSG_10_1(b0, b1) \
339
+ do \
340
+ { \
341
+ b0 = _mm_unpacklo_epi64(m0, m1); \
342
+ b1 = _mm_unpacklo_epi64(m2, m3); \
343
+ } while(0)
344
+
345
+
346
+ #define LOAD_MSG_10_2(b0, b1) \
347
+ do \
348
+ { \
349
+ b0 = _mm_unpackhi_epi64(m0, m1); \
350
+ b1 = _mm_unpackhi_epi64(m2, m3); \
351
+ } while(0)
352
+
353
+
354
+ #define LOAD_MSG_10_3(b0, b1) \
355
+ do \
356
+ { \
357
+ b0 = _mm_unpacklo_epi64(m4, m5); \
358
+ b1 = _mm_unpacklo_epi64(m6, m7); \
359
+ } while(0)
360
+
361
+
362
+ #define LOAD_MSG_10_4(b0, b1) \
363
+ do \
364
+ { \
365
+ b0 = _mm_unpackhi_epi64(m4, m5); \
366
+ b1 = _mm_unpackhi_epi64(m6, m7); \
367
+ } while(0)
368
+
369
+
370
+ #define LOAD_MSG_11_1(b0, b1) \
371
+ do \
372
+ { \
373
+ b0 = _mm_unpacklo_epi64(m7, m2); \
374
+ b1 = _mm_unpackhi_epi64(m4, m6); \
375
+ } while(0)
376
+
377
+
378
+ #define LOAD_MSG_11_2(b0, b1) \
379
+ do \
380
+ { \
381
+ b0 = _mm_unpacklo_epi64(m5, m4); \
382
+ b1 = _mm_alignr_epi8(m3, m7, 8); \
383
+ } while(0)
384
+
385
+
386
+ #define LOAD_MSG_11_3(b0, b1) \
387
+ do \
388
+ { \
389
+ b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
390
+ b1 = _mm_unpackhi_epi64(m5, m2); \
391
+ } while(0)
392
+
393
+
394
+ #define LOAD_MSG_11_4(b0, b1) \
395
+ do \
396
+ { \
397
+ b0 = _mm_unpacklo_epi64(m6, m1); \
398
+ b1 = _mm_unpackhi_epi64(m3, m1); \
399
+ } while(0)
400
+
401
+
402
+ #endif