uncle_blake3 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.md +27 -0
  3. data/README.md +89 -0
  4. data/ext/Rakefile +55 -0
  5. data/ext/binding/uncle_blake3.c +41 -0
  6. data/ext/blake3/c/Makefile.testing +82 -0
  7. data/ext/blake3/c/README.md +316 -0
  8. data/ext/blake3/c/blake3.c +616 -0
  9. data/ext/blake3/c/blake3.h +60 -0
  10. data/ext/blake3/c/blake3_avx2.c +326 -0
  11. data/ext/blake3/c/blake3_avx2_x86-64_unix.S +1815 -0
  12. data/ext/blake3/c/blake3_avx2_x86-64_windows_gnu.S +1817 -0
  13. data/ext/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +1828 -0
  14. data/ext/blake3/c/blake3_avx512.c +1207 -0
  15. data/ext/blake3/c/blake3_avx512_x86-64_unix.S +2585 -0
  16. data/ext/blake3/c/blake3_avx512_x86-64_windows_gnu.S +2615 -0
  17. data/ext/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +2634 -0
  18. data/ext/blake3/c/blake3_dispatch.c +276 -0
  19. data/ext/blake3/c/blake3_impl.h +282 -0
  20. data/ext/blake3/c/blake3_neon.c +351 -0
  21. data/ext/blake3/c/blake3_portable.c +160 -0
  22. data/ext/blake3/c/blake3_sse2.c +566 -0
  23. data/ext/blake3/c/blake3_sse2_x86-64_unix.S +2291 -0
  24. data/ext/blake3/c/blake3_sse2_x86-64_windows_gnu.S +2332 -0
  25. data/ext/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +2350 -0
  26. data/ext/blake3/c/blake3_sse41.c +560 -0
  27. data/ext/blake3/c/blake3_sse41_x86-64_unix.S +2028 -0
  28. data/ext/blake3/c/blake3_sse41_x86-64_windows_gnu.S +2069 -0
  29. data/ext/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +2089 -0
  30. data/ext/blake3/c/example.c +37 -0
  31. data/ext/blake3/c/main.c +166 -0
  32. data/ext/blake3/c/test.py +97 -0
  33. data/lib/uncle_blake3/binding.rb +20 -0
  34. data/lib/uncle_blake3/build/loader.rb +40 -0
  35. data/lib/uncle_blake3/build/platform.rb +37 -0
  36. data/lib/uncle_blake3/build.rb +4 -0
  37. data/lib/uncle_blake3/digest.rb +119 -0
  38. data/lib/uncle_blake3/version.rb +5 -0
  39. data/lib/uncle_blake3.rb +7 -0
  40. metadata +112 -0
@@ -0,0 +1,2028 @@
1
+ #if defined(__ELF__) && defined(__linux__)
2
+ .section .note.GNU-stack,"",%progbits
3
+ #endif
4
+
5
+ #if defined(__ELF__) && defined(__CET__) && defined(__has_include)
6
+ #if __has_include(<cet.h>)
7
+ #include <cet.h>
8
+ #endif
9
+ #endif
10
+
11
+ #if !defined(_CET_ENDBR)
12
+ #define _CET_ENDBR
13
+ #endif
14
+
15
+ .intel_syntax noprefix
16
+ .global blake3_hash_many_sse41
17
+ .global _blake3_hash_many_sse41
18
+ .global blake3_compress_in_place_sse41
19
+ .global _blake3_compress_in_place_sse41
20
+ .global blake3_compress_xof_sse41
21
+ .global _blake3_compress_xof_sse41
22
+ #ifdef __APPLE__
23
+ .text
24
+ #else
25
+ .section .text
26
+ #endif
27
+ .p2align 6
28
+ _blake3_hash_many_sse41:
29
+ blake3_hash_many_sse41:
30
+ _CET_ENDBR
31
+ push r15
32
+ push r14
33
+ push r13
34
+ push r12
35
+ push rbx
36
+ push rbp
37
+ mov rbp, rsp
38
+ sub rsp, 360
39
+ and rsp, 0xFFFFFFFFFFFFFFC0
40
+ neg r9d
41
+ movd xmm0, r9d
42
+ pshufd xmm0, xmm0, 0x00
43
+ movdqa xmmword ptr [rsp+0x130], xmm0
44
+ movdqa xmm1, xmm0
45
+ pand xmm1, xmmword ptr [ADD0+rip]
46
+ pand xmm0, xmmword ptr [ADD1+rip]
47
+ movdqa xmmword ptr [rsp+0x150], xmm0
48
+ movd xmm0, r8d
49
+ pshufd xmm0, xmm0, 0x00
50
+ paddd xmm0, xmm1
51
+ movdqa xmmword ptr [rsp+0x110], xmm0
52
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
53
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
54
+ pcmpgtd xmm1, xmm0
55
+ shr r8, 32
56
+ movd xmm2, r8d
57
+ pshufd xmm2, xmm2, 0x00
58
+ psubd xmm2, xmm1
59
+ movdqa xmmword ptr [rsp+0x120], xmm2
60
+ mov rbx, qword ptr [rbp+0x50]
61
+ mov r15, rdx
62
+ shl r15, 6
63
+ movzx r13d, byte ptr [rbp+0x38]
64
+ movzx r12d, byte ptr [rbp+0x48]
65
+ cmp rsi, 4
66
+ jc 3f
67
+ 2:
68
+ movdqu xmm3, xmmword ptr [rcx]
69
+ pshufd xmm0, xmm3, 0x00
70
+ pshufd xmm1, xmm3, 0x55
71
+ pshufd xmm2, xmm3, 0xAA
72
+ pshufd xmm3, xmm3, 0xFF
73
+ movdqu xmm7, xmmword ptr [rcx+0x10]
74
+ pshufd xmm4, xmm7, 0x00
75
+ pshufd xmm5, xmm7, 0x55
76
+ pshufd xmm6, xmm7, 0xAA
77
+ pshufd xmm7, xmm7, 0xFF
78
+ mov r8, qword ptr [rdi]
79
+ mov r9, qword ptr [rdi+0x8]
80
+ mov r10, qword ptr [rdi+0x10]
81
+ mov r11, qword ptr [rdi+0x18]
82
+ movzx eax, byte ptr [rbp+0x40]
83
+ or eax, r13d
84
+ xor edx, edx
85
+ 9:
86
+ mov r14d, eax
87
+ or eax, r12d
88
+ add rdx, 64
89
+ cmp rdx, r15
90
+ cmovne eax, r14d
91
+ movdqu xmm8, xmmword ptr [r8+rdx-0x40]
92
+ movdqu xmm9, xmmword ptr [r9+rdx-0x40]
93
+ movdqu xmm10, xmmword ptr [r10+rdx-0x40]
94
+ movdqu xmm11, xmmword ptr [r11+rdx-0x40]
95
+ movdqa xmm12, xmm8
96
+ punpckldq xmm8, xmm9
97
+ punpckhdq xmm12, xmm9
98
+ movdqa xmm14, xmm10
99
+ punpckldq xmm10, xmm11
100
+ punpckhdq xmm14, xmm11
101
+ movdqa xmm9, xmm8
102
+ punpcklqdq xmm8, xmm10
103
+ punpckhqdq xmm9, xmm10
104
+ movdqa xmm13, xmm12
105
+ punpcklqdq xmm12, xmm14
106
+ punpckhqdq xmm13, xmm14
107
+ movdqa xmmword ptr [rsp], xmm8
108
+ movdqa xmmword ptr [rsp+0x10], xmm9
109
+ movdqa xmmword ptr [rsp+0x20], xmm12
110
+ movdqa xmmword ptr [rsp+0x30], xmm13
111
+ movdqu xmm8, xmmword ptr [r8+rdx-0x30]
112
+ movdqu xmm9, xmmword ptr [r9+rdx-0x30]
113
+ movdqu xmm10, xmmword ptr [r10+rdx-0x30]
114
+ movdqu xmm11, xmmword ptr [r11+rdx-0x30]
115
+ movdqa xmm12, xmm8
116
+ punpckldq xmm8, xmm9
117
+ punpckhdq xmm12, xmm9
118
+ movdqa xmm14, xmm10
119
+ punpckldq xmm10, xmm11
120
+ punpckhdq xmm14, xmm11
121
+ movdqa xmm9, xmm8
122
+ punpcklqdq xmm8, xmm10
123
+ punpckhqdq xmm9, xmm10
124
+ movdqa xmm13, xmm12
125
+ punpcklqdq xmm12, xmm14
126
+ punpckhqdq xmm13, xmm14
127
+ movdqa xmmword ptr [rsp+0x40], xmm8
128
+ movdqa xmmword ptr [rsp+0x50], xmm9
129
+ movdqa xmmword ptr [rsp+0x60], xmm12
130
+ movdqa xmmword ptr [rsp+0x70], xmm13
131
+ movdqu xmm8, xmmword ptr [r8+rdx-0x20]
132
+ movdqu xmm9, xmmword ptr [r9+rdx-0x20]
133
+ movdqu xmm10, xmmword ptr [r10+rdx-0x20]
134
+ movdqu xmm11, xmmword ptr [r11+rdx-0x20]
135
+ movdqa xmm12, xmm8
136
+ punpckldq xmm8, xmm9
137
+ punpckhdq xmm12, xmm9
138
+ movdqa xmm14, xmm10
139
+ punpckldq xmm10, xmm11
140
+ punpckhdq xmm14, xmm11
141
+ movdqa xmm9, xmm8
142
+ punpcklqdq xmm8, xmm10
143
+ punpckhqdq xmm9, xmm10
144
+ movdqa xmm13, xmm12
145
+ punpcklqdq xmm12, xmm14
146
+ punpckhqdq xmm13, xmm14
147
+ movdqa xmmword ptr [rsp+0x80], xmm8
148
+ movdqa xmmword ptr [rsp+0x90], xmm9
149
+ movdqa xmmword ptr [rsp+0xA0], xmm12
150
+ movdqa xmmword ptr [rsp+0xB0], xmm13
151
+ movdqu xmm8, xmmword ptr [r8+rdx-0x10]
152
+ movdqu xmm9, xmmword ptr [r9+rdx-0x10]
153
+ movdqu xmm10, xmmword ptr [r10+rdx-0x10]
154
+ movdqu xmm11, xmmword ptr [r11+rdx-0x10]
155
+ movdqa xmm12, xmm8
156
+ punpckldq xmm8, xmm9
157
+ punpckhdq xmm12, xmm9
158
+ movdqa xmm14, xmm10
159
+ punpckldq xmm10, xmm11
160
+ punpckhdq xmm14, xmm11
161
+ movdqa xmm9, xmm8
162
+ punpcklqdq xmm8, xmm10
163
+ punpckhqdq xmm9, xmm10
164
+ movdqa xmm13, xmm12
165
+ punpcklqdq xmm12, xmm14
166
+ punpckhqdq xmm13, xmm14
167
+ movdqa xmmword ptr [rsp+0xC0], xmm8
168
+ movdqa xmmword ptr [rsp+0xD0], xmm9
169
+ movdqa xmmword ptr [rsp+0xE0], xmm12
170
+ movdqa xmmword ptr [rsp+0xF0], xmm13
171
+ movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip]
172
+ movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip]
173
+ movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip]
174
+ movdqa xmm12, xmmword ptr [rsp+0x110]
175
+ movdqa xmm13, xmmword ptr [rsp+0x120]
176
+ movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
177
+ movd xmm15, eax
178
+ pshufd xmm15, xmm15, 0x00
179
+ prefetcht0 [r8+rdx+0x80]
180
+ prefetcht0 [r9+rdx+0x80]
181
+ prefetcht0 [r10+rdx+0x80]
182
+ prefetcht0 [r11+rdx+0x80]
183
+ paddd xmm0, xmmword ptr [rsp]
184
+ paddd xmm1, xmmword ptr [rsp+0x20]
185
+ paddd xmm2, xmmword ptr [rsp+0x40]
186
+ paddd xmm3, xmmword ptr [rsp+0x60]
187
+ paddd xmm0, xmm4
188
+ paddd xmm1, xmm5
189
+ paddd xmm2, xmm6
190
+ paddd xmm3, xmm7
191
+ pxor xmm12, xmm0
192
+ pxor xmm13, xmm1
193
+ pxor xmm14, xmm2
194
+ pxor xmm15, xmm3
195
+ movdqa xmm8, xmmword ptr [ROT16+rip]
196
+ pshufb xmm12, xmm8
197
+ pshufb xmm13, xmm8
198
+ pshufb xmm14, xmm8
199
+ pshufb xmm15, xmm8
200
+ movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip]
201
+ paddd xmm8, xmm12
202
+ paddd xmm9, xmm13
203
+ paddd xmm10, xmm14
204
+ paddd xmm11, xmm15
205
+ pxor xmm4, xmm8
206
+ pxor xmm5, xmm9
207
+ pxor xmm6, xmm10
208
+ pxor xmm7, xmm11
209
+ movdqa xmmword ptr [rsp+0x100], xmm8
210
+ movdqa xmm8, xmm4
211
+ psrld xmm8, 12
212
+ pslld xmm4, 20
213
+ por xmm4, xmm8
214
+ movdqa xmm8, xmm5
215
+ psrld xmm8, 12
216
+ pslld xmm5, 20
217
+ por xmm5, xmm8
218
+ movdqa xmm8, xmm6
219
+ psrld xmm8, 12
220
+ pslld xmm6, 20
221
+ por xmm6, xmm8
222
+ movdqa xmm8, xmm7
223
+ psrld xmm8, 12
224
+ pslld xmm7, 20
225
+ por xmm7, xmm8
226
+ paddd xmm0, xmmword ptr [rsp+0x10]
227
+ paddd xmm1, xmmword ptr [rsp+0x30]
228
+ paddd xmm2, xmmword ptr [rsp+0x50]
229
+ paddd xmm3, xmmword ptr [rsp+0x70]
230
+ paddd xmm0, xmm4
231
+ paddd xmm1, xmm5
232
+ paddd xmm2, xmm6
233
+ paddd xmm3, xmm7
234
+ pxor xmm12, xmm0
235
+ pxor xmm13, xmm1
236
+ pxor xmm14, xmm2
237
+ pxor xmm15, xmm3
238
+ movdqa xmm8, xmmword ptr [ROT8+rip]
239
+ pshufb xmm12, xmm8
240
+ pshufb xmm13, xmm8
241
+ pshufb xmm14, xmm8
242
+ pshufb xmm15, xmm8
243
+ movdqa xmm8, xmmword ptr [rsp+0x100]
244
+ paddd xmm8, xmm12
245
+ paddd xmm9, xmm13
246
+ paddd xmm10, xmm14
247
+ paddd xmm11, xmm15
248
+ pxor xmm4, xmm8
249
+ pxor xmm5, xmm9
250
+ pxor xmm6, xmm10
251
+ pxor xmm7, xmm11
252
+ movdqa xmmword ptr [rsp+0x100], xmm8
253
+ movdqa xmm8, xmm4
254
+ psrld xmm8, 7
255
+ pslld xmm4, 25
256
+ por xmm4, xmm8
257
+ movdqa xmm8, xmm5
258
+ psrld xmm8, 7
259
+ pslld xmm5, 25
260
+ por xmm5, xmm8
261
+ movdqa xmm8, xmm6
262
+ psrld xmm8, 7
263
+ pslld xmm6, 25
264
+ por xmm6, xmm8
265
+ movdqa xmm8, xmm7
266
+ psrld xmm8, 7
267
+ pslld xmm7, 25
268
+ por xmm7, xmm8
269
+ paddd xmm0, xmmword ptr [rsp+0x80]
270
+ paddd xmm1, xmmword ptr [rsp+0xA0]
271
+ paddd xmm2, xmmword ptr [rsp+0xC0]
272
+ paddd xmm3, xmmword ptr [rsp+0xE0]
273
+ paddd xmm0, xmm5
274
+ paddd xmm1, xmm6
275
+ paddd xmm2, xmm7
276
+ paddd xmm3, xmm4
277
+ pxor xmm15, xmm0
278
+ pxor xmm12, xmm1
279
+ pxor xmm13, xmm2
280
+ pxor xmm14, xmm3
281
+ movdqa xmm8, xmmword ptr [ROT16+rip]
282
+ pshufb xmm15, xmm8
283
+ pshufb xmm12, xmm8
284
+ pshufb xmm13, xmm8
285
+ pshufb xmm14, xmm8
286
+ paddd xmm10, xmm15
287
+ paddd xmm11, xmm12
288
+ movdqa xmm8, xmmword ptr [rsp+0x100]
289
+ paddd xmm8, xmm13
290
+ paddd xmm9, xmm14
291
+ pxor xmm5, xmm10
292
+ pxor xmm6, xmm11
293
+ pxor xmm7, xmm8
294
+ pxor xmm4, xmm9
295
+ movdqa xmmword ptr [rsp+0x100], xmm8
296
+ movdqa xmm8, xmm5
297
+ psrld xmm8, 12
298
+ pslld xmm5, 20
299
+ por xmm5, xmm8
300
+ movdqa xmm8, xmm6
301
+ psrld xmm8, 12
302
+ pslld xmm6, 20
303
+ por xmm6, xmm8
304
+ movdqa xmm8, xmm7
305
+ psrld xmm8, 12
306
+ pslld xmm7, 20
307
+ por xmm7, xmm8
308
+ movdqa xmm8, xmm4
309
+ psrld xmm8, 12
310
+ pslld xmm4, 20
311
+ por xmm4, xmm8
312
+ paddd xmm0, xmmword ptr [rsp+0x90]
313
+ paddd xmm1, xmmword ptr [rsp+0xB0]
314
+ paddd xmm2, xmmword ptr [rsp+0xD0]
315
+ paddd xmm3, xmmword ptr [rsp+0xF0]
316
+ paddd xmm0, xmm5
317
+ paddd xmm1, xmm6
318
+ paddd xmm2, xmm7
319
+ paddd xmm3, xmm4
320
+ pxor xmm15, xmm0
321
+ pxor xmm12, xmm1
322
+ pxor xmm13, xmm2
323
+ pxor xmm14, xmm3
324
+ movdqa xmm8, xmmword ptr [ROT8+rip]
325
+ pshufb xmm15, xmm8
326
+ pshufb xmm12, xmm8
327
+ pshufb xmm13, xmm8
328
+ pshufb xmm14, xmm8
329
+ paddd xmm10, xmm15
330
+ paddd xmm11, xmm12
331
+ movdqa xmm8, xmmword ptr [rsp+0x100]
332
+ paddd xmm8, xmm13
333
+ paddd xmm9, xmm14
334
+ pxor xmm5, xmm10
335
+ pxor xmm6, xmm11
336
+ pxor xmm7, xmm8
337
+ pxor xmm4, xmm9
338
+ movdqa xmmword ptr [rsp+0x100], xmm8
339
+ movdqa xmm8, xmm5
340
+ psrld xmm8, 7
341
+ pslld xmm5, 25
342
+ por xmm5, xmm8
343
+ movdqa xmm8, xmm6
344
+ psrld xmm8, 7
345
+ pslld xmm6, 25
346
+ por xmm6, xmm8
347
+ movdqa xmm8, xmm7
348
+ psrld xmm8, 7
349
+ pslld xmm7, 25
350
+ por xmm7, xmm8
351
+ movdqa xmm8, xmm4
352
+ psrld xmm8, 7
353
+ pslld xmm4, 25
354
+ por xmm4, xmm8
355
+ paddd xmm0, xmmword ptr [rsp+0x20]
356
+ paddd xmm1, xmmword ptr [rsp+0x30]
357
+ paddd xmm2, xmmword ptr [rsp+0x70]
358
+ paddd xmm3, xmmword ptr [rsp+0x40]
359
+ paddd xmm0, xmm4
360
+ paddd xmm1, xmm5
361
+ paddd xmm2, xmm6
362
+ paddd xmm3, xmm7
363
+ pxor xmm12, xmm0
364
+ pxor xmm13, xmm1
365
+ pxor xmm14, xmm2
366
+ pxor xmm15, xmm3
367
+ movdqa xmm8, xmmword ptr [ROT16+rip]
368
+ pshufb xmm12, xmm8
369
+ pshufb xmm13, xmm8
370
+ pshufb xmm14, xmm8
371
+ pshufb xmm15, xmm8
372
+ movdqa xmm8, xmmword ptr [rsp+0x100]
373
+ paddd xmm8, xmm12
374
+ paddd xmm9, xmm13
375
+ paddd xmm10, xmm14
376
+ paddd xmm11, xmm15
377
+ pxor xmm4, xmm8
378
+ pxor xmm5, xmm9
379
+ pxor xmm6, xmm10
380
+ pxor xmm7, xmm11
381
+ movdqa xmmword ptr [rsp+0x100], xmm8
382
+ movdqa xmm8, xmm4
383
+ psrld xmm8, 12
384
+ pslld xmm4, 20
385
+ por xmm4, xmm8
386
+ movdqa xmm8, xmm5
387
+ psrld xmm8, 12
388
+ pslld xmm5, 20
389
+ por xmm5, xmm8
390
+ movdqa xmm8, xmm6
391
+ psrld xmm8, 12
392
+ pslld xmm6, 20
393
+ por xmm6, xmm8
394
+ movdqa xmm8, xmm7
395
+ psrld xmm8, 12
396
+ pslld xmm7, 20
397
+ por xmm7, xmm8
398
+ paddd xmm0, xmmword ptr [rsp+0x60]
399
+ paddd xmm1, xmmword ptr [rsp+0xA0]
400
+ paddd xmm2, xmmword ptr [rsp]
401
+ paddd xmm3, xmmword ptr [rsp+0xD0]
402
+ paddd xmm0, xmm4
403
+ paddd xmm1, xmm5
404
+ paddd xmm2, xmm6
405
+ paddd xmm3, xmm7
406
+ pxor xmm12, xmm0
407
+ pxor xmm13, xmm1
408
+ pxor xmm14, xmm2
409
+ pxor xmm15, xmm3
410
+ movdqa xmm8, xmmword ptr [ROT8+rip]
411
+ pshufb xmm12, xmm8
412
+ pshufb xmm13, xmm8
413
+ pshufb xmm14, xmm8
414
+ pshufb xmm15, xmm8
415
+ movdqa xmm8, xmmword ptr [rsp+0x100]
416
+ paddd xmm8, xmm12
417
+ paddd xmm9, xmm13
418
+ paddd xmm10, xmm14
419
+ paddd xmm11, xmm15
420
+ pxor xmm4, xmm8
421
+ pxor xmm5, xmm9
422
+ pxor xmm6, xmm10
423
+ pxor xmm7, xmm11
424
+ movdqa xmmword ptr [rsp+0x100], xmm8
425
+ movdqa xmm8, xmm4
426
+ psrld xmm8, 7
427
+ pslld xmm4, 25
428
+ por xmm4, xmm8
429
+ movdqa xmm8, xmm5
430
+ psrld xmm8, 7
431
+ pslld xmm5, 25
432
+ por xmm5, xmm8
433
+ movdqa xmm8, xmm6
434
+ psrld xmm8, 7
435
+ pslld xmm6, 25
436
+ por xmm6, xmm8
437
+ movdqa xmm8, xmm7
438
+ psrld xmm8, 7
439
+ pslld xmm7, 25
440
+ por xmm7, xmm8
441
+ paddd xmm0, xmmword ptr [rsp+0x10]
442
+ paddd xmm1, xmmword ptr [rsp+0xC0]
443
+ paddd xmm2, xmmword ptr [rsp+0x90]
444
+ paddd xmm3, xmmword ptr [rsp+0xF0]
445
+ paddd xmm0, xmm5
446
+ paddd xmm1, xmm6
447
+ paddd xmm2, xmm7
448
+ paddd xmm3, xmm4
449
+ pxor xmm15, xmm0
450
+ pxor xmm12, xmm1
451
+ pxor xmm13, xmm2
452
+ pxor xmm14, xmm3
453
+ movdqa xmm8, xmmword ptr [ROT16+rip]
454
+ pshufb xmm15, xmm8
455
+ pshufb xmm12, xmm8
456
+ pshufb xmm13, xmm8
457
+ pshufb xmm14, xmm8
458
+ paddd xmm10, xmm15
459
+ paddd xmm11, xmm12
460
+ movdqa xmm8, xmmword ptr [rsp+0x100]
461
+ paddd xmm8, xmm13
462
+ paddd xmm9, xmm14
463
+ pxor xmm5, xmm10
464
+ pxor xmm6, xmm11
465
+ pxor xmm7, xmm8
466
+ pxor xmm4, xmm9
467
+ movdqa xmmword ptr [rsp+0x100], xmm8
468
+ movdqa xmm8, xmm5
469
+ psrld xmm8, 12
470
+ pslld xmm5, 20
471
+ por xmm5, xmm8
472
+ movdqa xmm8, xmm6
473
+ psrld xmm8, 12
474
+ pslld xmm6, 20
475
+ por xmm6, xmm8
476
+ movdqa xmm8, xmm7
477
+ psrld xmm8, 12
478
+ pslld xmm7, 20
479
+ por xmm7, xmm8
480
+ movdqa xmm8, xmm4
481
+ psrld xmm8, 12
482
+ pslld xmm4, 20
483
+ por xmm4, xmm8
484
+ paddd xmm0, xmmword ptr [rsp+0xB0]
485
+ paddd xmm1, xmmword ptr [rsp+0x50]
486
+ paddd xmm2, xmmword ptr [rsp+0xE0]
487
+ paddd xmm3, xmmword ptr [rsp+0x80]
488
+ paddd xmm0, xmm5
489
+ paddd xmm1, xmm6
490
+ paddd xmm2, xmm7
491
+ paddd xmm3, xmm4
492
+ pxor xmm15, xmm0
493
+ pxor xmm12, xmm1
494
+ pxor xmm13, xmm2
495
+ pxor xmm14, xmm3
496
+ movdqa xmm8, xmmword ptr [ROT8+rip]
497
+ pshufb xmm15, xmm8
498
+ pshufb xmm12, xmm8
499
+ pshufb xmm13, xmm8
500
+ pshufb xmm14, xmm8
501
+ paddd xmm10, xmm15
502
+ paddd xmm11, xmm12
503
+ movdqa xmm8, xmmword ptr [rsp+0x100]
504
+ paddd xmm8, xmm13
505
+ paddd xmm9, xmm14
506
+ pxor xmm5, xmm10
507
+ pxor xmm6, xmm11
508
+ pxor xmm7, xmm8
509
+ pxor xmm4, xmm9
510
+ movdqa xmmword ptr [rsp+0x100], xmm8
511
+ movdqa xmm8, xmm5
512
+ psrld xmm8, 7
513
+ pslld xmm5, 25
514
+ por xmm5, xmm8
515
+ movdqa xmm8, xmm6
516
+ psrld xmm8, 7
517
+ pslld xmm6, 25
518
+ por xmm6, xmm8
519
+ movdqa xmm8, xmm7
520
+ psrld xmm8, 7
521
+ pslld xmm7, 25
522
+ por xmm7, xmm8
523
+ movdqa xmm8, xmm4
524
+ psrld xmm8, 7
525
+ pslld xmm4, 25
526
+ por xmm4, xmm8
527
+ paddd xmm0, xmmword ptr [rsp+0x30]
528
+ paddd xmm1, xmmword ptr [rsp+0xA0]
529
+ paddd xmm2, xmmword ptr [rsp+0xD0]
530
+ paddd xmm3, xmmword ptr [rsp+0x70]
531
+ paddd xmm0, xmm4
532
+ paddd xmm1, xmm5
533
+ paddd xmm2, xmm6
534
+ paddd xmm3, xmm7
535
+ pxor xmm12, xmm0
536
+ pxor xmm13, xmm1
537
+ pxor xmm14, xmm2
538
+ pxor xmm15, xmm3
539
+ movdqa xmm8, xmmword ptr [ROT16+rip]
540
+ pshufb xmm12, xmm8
541
+ pshufb xmm13, xmm8
542
+ pshufb xmm14, xmm8
543
+ pshufb xmm15, xmm8
544
+ movdqa xmm8, xmmword ptr [rsp+0x100]
545
+ paddd xmm8, xmm12
546
+ paddd xmm9, xmm13
547
+ paddd xmm10, xmm14
548
+ paddd xmm11, xmm15
549
+ pxor xmm4, xmm8
550
+ pxor xmm5, xmm9
551
+ pxor xmm6, xmm10
552
+ pxor xmm7, xmm11
553
+ movdqa xmmword ptr [rsp+0x100], xmm8
554
+ movdqa xmm8, xmm4
555
+ psrld xmm8, 12
556
+ pslld xmm4, 20
557
+ por xmm4, xmm8
558
+ movdqa xmm8, xmm5
559
+ psrld xmm8, 12
560
+ pslld xmm5, 20
561
+ por xmm5, xmm8
562
+ movdqa xmm8, xmm6
563
+ psrld xmm8, 12
564
+ pslld xmm6, 20
565
+ por xmm6, xmm8
566
+ movdqa xmm8, xmm7
567
+ psrld xmm8, 12
568
+ pslld xmm7, 20
569
+ por xmm7, xmm8
570
+ paddd xmm0, xmmword ptr [rsp+0x40]
571
+ paddd xmm1, xmmword ptr [rsp+0xC0]
572
+ paddd xmm2, xmmword ptr [rsp+0x20]
573
+ paddd xmm3, xmmword ptr [rsp+0xE0]
574
+ paddd xmm0, xmm4
575
+ paddd xmm1, xmm5
576
+ paddd xmm2, xmm6
577
+ paddd xmm3, xmm7
578
+ pxor xmm12, xmm0
579
+ pxor xmm13, xmm1
580
+ pxor xmm14, xmm2
581
+ pxor xmm15, xmm3
582
+ movdqa xmm8, xmmword ptr [ROT8+rip]
583
+ pshufb xmm12, xmm8
584
+ pshufb xmm13, xmm8
585
+ pshufb xmm14, xmm8
586
+ pshufb xmm15, xmm8
587
+ movdqa xmm8, xmmword ptr [rsp+0x100]
588
+ paddd xmm8, xmm12
589
+ paddd xmm9, xmm13
590
+ paddd xmm10, xmm14
591
+ paddd xmm11, xmm15
592
+ pxor xmm4, xmm8
593
+ pxor xmm5, xmm9
594
+ pxor xmm6, xmm10
595
+ pxor xmm7, xmm11
596
+ movdqa xmmword ptr [rsp+0x100], xmm8
597
+ movdqa xmm8, xmm4
598
+ psrld xmm8, 7
599
+ pslld xmm4, 25
600
+ por xmm4, xmm8
601
+ movdqa xmm8, xmm5
602
+ psrld xmm8, 7
603
+ pslld xmm5, 25
604
+ por xmm5, xmm8
605
+ movdqa xmm8, xmm6
606
+ psrld xmm8, 7
607
+ pslld xmm6, 25
608
+ por xmm6, xmm8
609
+ movdqa xmm8, xmm7
610
+ psrld xmm8, 7
611
+ pslld xmm7, 25
612
+ por xmm7, xmm8
613
+ paddd xmm0, xmmword ptr [rsp+0x60]
614
+ paddd xmm1, xmmword ptr [rsp+0x90]
615
+ paddd xmm2, xmmword ptr [rsp+0xB0]
616
+ paddd xmm3, xmmword ptr [rsp+0x80]
617
+ paddd xmm0, xmm5
618
+ paddd xmm1, xmm6
619
+ paddd xmm2, xmm7
620
+ paddd xmm3, xmm4
621
+ pxor xmm15, xmm0
622
+ pxor xmm12, xmm1
623
+ pxor xmm13, xmm2
624
+ pxor xmm14, xmm3
625
+ movdqa xmm8, xmmword ptr [ROT16+rip]
626
+ pshufb xmm15, xmm8
627
+ pshufb xmm12, xmm8
628
+ pshufb xmm13, xmm8
629
+ pshufb xmm14, xmm8
630
+ paddd xmm10, xmm15
631
+ paddd xmm11, xmm12
632
+ movdqa xmm8, xmmword ptr [rsp+0x100]
633
+ paddd xmm8, xmm13
634
+ paddd xmm9, xmm14
635
+ pxor xmm5, xmm10
636
+ pxor xmm6, xmm11
637
+ pxor xmm7, xmm8
638
+ pxor xmm4, xmm9
639
+ movdqa xmmword ptr [rsp+0x100], xmm8
640
+ movdqa xmm8, xmm5
641
+ psrld xmm8, 12
642
+ pslld xmm5, 20
643
+ por xmm5, xmm8
644
+ movdqa xmm8, xmm6
645
+ psrld xmm8, 12
646
+ pslld xmm6, 20
647
+ por xmm6, xmm8
648
+ movdqa xmm8, xmm7
649
+ psrld xmm8, 12
650
+ pslld xmm7, 20
651
+ por xmm7, xmm8
652
+ movdqa xmm8, xmm4
653
+ psrld xmm8, 12
654
+ pslld xmm4, 20
655
+ por xmm4, xmm8
656
+ paddd xmm0, xmmword ptr [rsp+0x50]
657
+ paddd xmm1, xmmword ptr [rsp]
658
+ paddd xmm2, xmmword ptr [rsp+0xF0]
659
+ paddd xmm3, xmmword ptr [rsp+0x10]
660
+ paddd xmm0, xmm5
661
+ paddd xmm1, xmm6
662
+ paddd xmm2, xmm7
663
+ paddd xmm3, xmm4
664
+ pxor xmm15, xmm0
665
+ pxor xmm12, xmm1
666
+ pxor xmm13, xmm2
667
+ pxor xmm14, xmm3
668
+ movdqa xmm8, xmmword ptr [ROT8+rip]
669
+ pshufb xmm15, xmm8
670
+ pshufb xmm12, xmm8
671
+ pshufb xmm13, xmm8
672
+ pshufb xmm14, xmm8
673
+ paddd xmm10, xmm15
674
+ paddd xmm11, xmm12
675
+ movdqa xmm8, xmmword ptr [rsp+0x100]
676
+ paddd xmm8, xmm13
677
+ paddd xmm9, xmm14
678
+ pxor xmm5, xmm10
679
+ pxor xmm6, xmm11
680
+ pxor xmm7, xmm8
681
+ pxor xmm4, xmm9
682
+ movdqa xmmword ptr [rsp+0x100], xmm8
683
+ movdqa xmm8, xmm5
684
+ psrld xmm8, 7
685
+ pslld xmm5, 25
686
+ por xmm5, xmm8
687
+ movdqa xmm8, xmm6
688
+ psrld xmm8, 7
689
+ pslld xmm6, 25
690
+ por xmm6, xmm8
691
+ movdqa xmm8, xmm7
692
+ psrld xmm8, 7
693
+ pslld xmm7, 25
694
+ por xmm7, xmm8
695
+ movdqa xmm8, xmm4
696
+ psrld xmm8, 7
697
+ pslld xmm4, 25
698
+ por xmm4, xmm8
699
+ paddd xmm0, xmmword ptr [rsp+0xA0]
700
+ paddd xmm1, xmmword ptr [rsp+0xC0]
701
+ paddd xmm2, xmmword ptr [rsp+0xE0]
702
+ paddd xmm3, xmmword ptr [rsp+0xD0]
703
+ paddd xmm0, xmm4
704
+ paddd xmm1, xmm5
705
+ paddd xmm2, xmm6
706
+ paddd xmm3, xmm7
707
+ pxor xmm12, xmm0
708
+ pxor xmm13, xmm1
709
+ pxor xmm14, xmm2
710
+ pxor xmm15, xmm3
711
+ movdqa xmm8, xmmword ptr [ROT16+rip]
712
+ pshufb xmm12, xmm8
713
+ pshufb xmm13, xmm8
714
+ pshufb xmm14, xmm8
715
+ pshufb xmm15, xmm8
716
+ movdqa xmm8, xmmword ptr [rsp+0x100]
717
+ paddd xmm8, xmm12
718
+ paddd xmm9, xmm13
719
+ paddd xmm10, xmm14
720
+ paddd xmm11, xmm15
721
+ pxor xmm4, xmm8
722
+ pxor xmm5, xmm9
723
+ pxor xmm6, xmm10
724
+ pxor xmm7, xmm11
725
+ movdqa xmmword ptr [rsp+0x100], xmm8
726
+ movdqa xmm8, xmm4
727
+ psrld xmm8, 12
728
+ pslld xmm4, 20
729
+ por xmm4, xmm8
730
+ movdqa xmm8, xmm5
731
+ psrld xmm8, 12
732
+ pslld xmm5, 20
733
+ por xmm5, xmm8
734
+ movdqa xmm8, xmm6
735
+ psrld xmm8, 12
736
+ pslld xmm6, 20
737
+ por xmm6, xmm8
738
+ movdqa xmm8, xmm7
739
+ psrld xmm8, 12
740
+ pslld xmm7, 20
741
+ por xmm7, xmm8
742
+ paddd xmm0, xmmword ptr [rsp+0x70]
743
+ paddd xmm1, xmmword ptr [rsp+0x90]
744
+ paddd xmm2, xmmword ptr [rsp+0x30]
745
+ paddd xmm3, xmmword ptr [rsp+0xF0]
746
+ paddd xmm0, xmm4
747
+ paddd xmm1, xmm5
748
+ paddd xmm2, xmm6
749
+ paddd xmm3, xmm7
750
+ pxor xmm12, xmm0
751
+ pxor xmm13, xmm1
752
+ pxor xmm14, xmm2
753
+ pxor xmm15, xmm3
754
+ movdqa xmm8, xmmword ptr [ROT8+rip]
755
+ pshufb xmm12, xmm8
756
+ pshufb xmm13, xmm8
757
+ pshufb xmm14, xmm8
758
+ pshufb xmm15, xmm8
759
+ movdqa xmm8, xmmword ptr [rsp+0x100]
760
+ paddd xmm8, xmm12
761
+ paddd xmm9, xmm13
762
+ paddd xmm10, xmm14
763
+ paddd xmm11, xmm15
764
+ pxor xmm4, xmm8
765
+ pxor xmm5, xmm9
766
+ pxor xmm6, xmm10
767
+ pxor xmm7, xmm11
768
+ movdqa xmmword ptr [rsp+0x100], xmm8
769
+ movdqa xmm8, xmm4
770
+ psrld xmm8, 7
771
+ pslld xmm4, 25
772
+ por xmm4, xmm8
773
+ movdqa xmm8, xmm5
774
+ psrld xmm8, 7
775
+ pslld xmm5, 25
776
+ por xmm5, xmm8
777
+ movdqa xmm8, xmm6
778
+ psrld xmm8, 7
779
+ pslld xmm6, 25
780
+ por xmm6, xmm8
781
+ movdqa xmm8, xmm7
782
+ psrld xmm8, 7
783
+ pslld xmm7, 25
784
+ por xmm7, xmm8
785
+ paddd xmm0, xmmword ptr [rsp+0x40]
786
+ paddd xmm1, xmmword ptr [rsp+0xB0]
787
+ paddd xmm2, xmmword ptr [rsp+0x50]
788
+ paddd xmm3, xmmword ptr [rsp+0x10]
789
+ paddd xmm0, xmm5
790
+ paddd xmm1, xmm6
791
+ paddd xmm2, xmm7
792
+ paddd xmm3, xmm4
793
+ pxor xmm15, xmm0
794
+ pxor xmm12, xmm1
795
+ pxor xmm13, xmm2
796
+ pxor xmm14, xmm3
797
+ movdqa xmm8, xmmword ptr [ROT16+rip]
798
+ pshufb xmm15, xmm8
799
+ pshufb xmm12, xmm8
800
+ pshufb xmm13, xmm8
801
+ pshufb xmm14, xmm8
802
+ paddd xmm10, xmm15
803
+ paddd xmm11, xmm12
804
+ movdqa xmm8, xmmword ptr [rsp+0x100]
805
+ paddd xmm8, xmm13
806
+ paddd xmm9, xmm14
807
+ pxor xmm5, xmm10
808
+ pxor xmm6, xmm11
809
+ pxor xmm7, xmm8
810
+ pxor xmm4, xmm9
811
+ movdqa xmmword ptr [rsp+0x100], xmm8
812
+ movdqa xmm8, xmm5
813
+ psrld xmm8, 12
814
+ pslld xmm5, 20
815
+ por xmm5, xmm8
816
+ movdqa xmm8, xmm6
817
+ psrld xmm8, 12
818
+ pslld xmm6, 20
819
+ por xmm6, xmm8
820
+ movdqa xmm8, xmm7
821
+ psrld xmm8, 12
822
+ pslld xmm7, 20
823
+ por xmm7, xmm8
824
+ movdqa xmm8, xmm4
825
+ psrld xmm8, 12
826
+ pslld xmm4, 20
827
+ por xmm4, xmm8
828
+ paddd xmm0, xmmword ptr [rsp]
829
+ paddd xmm1, xmmword ptr [rsp+0x20]
830
+ paddd xmm2, xmmword ptr [rsp+0x80]
831
+ paddd xmm3, xmmword ptr [rsp+0x60]
832
+ paddd xmm0, xmm5
833
+ paddd xmm1, xmm6
834
+ paddd xmm2, xmm7
835
+ paddd xmm3, xmm4
836
+ pxor xmm15, xmm0
837
+ pxor xmm12, xmm1
838
+ pxor xmm13, xmm2
839
+ pxor xmm14, xmm3
840
+ movdqa xmm8, xmmword ptr [ROT8+rip]
841
+ pshufb xmm15, xmm8
842
+ pshufb xmm12, xmm8
843
+ pshufb xmm13, xmm8
844
+ pshufb xmm14, xmm8
845
+ paddd xmm10, xmm15
846
+ paddd xmm11, xmm12
847
+ movdqa xmm8, xmmword ptr [rsp+0x100]
848
+ paddd xmm8, xmm13
849
+ paddd xmm9, xmm14
850
+ pxor xmm5, xmm10
851
+ pxor xmm6, xmm11
852
+ pxor xmm7, xmm8
853
+ pxor xmm4, xmm9
854
+ movdqa xmmword ptr [rsp+0x100], xmm8
855
+ movdqa xmm8, xmm5
856
+ psrld xmm8, 7
857
+ pslld xmm5, 25
858
+ por xmm5, xmm8
859
+ movdqa xmm8, xmm6
860
+ psrld xmm8, 7
861
+ pslld xmm6, 25
862
+ por xmm6, xmm8
863
+ movdqa xmm8, xmm7
864
+ psrld xmm8, 7
865
+ pslld xmm7, 25
866
+ por xmm7, xmm8
867
+ movdqa xmm8, xmm4
868
+ psrld xmm8, 7
869
+ pslld xmm4, 25
870
+ por xmm4, xmm8
871
+ paddd xmm0, xmmword ptr [rsp+0xC0]
872
+ paddd xmm1, xmmword ptr [rsp+0x90]
873
+ paddd xmm2, xmmword ptr [rsp+0xF0]
874
+ paddd xmm3, xmmword ptr [rsp+0xE0]
875
+ paddd xmm0, xmm4
876
+ paddd xmm1, xmm5
877
+ paddd xmm2, xmm6
878
+ paddd xmm3, xmm7
879
+ pxor xmm12, xmm0
880
+ pxor xmm13, xmm1
881
+ pxor xmm14, xmm2
882
+ pxor xmm15, xmm3
883
+ movdqa xmm8, xmmword ptr [ROT16+rip]
884
+ pshufb xmm12, xmm8
885
+ pshufb xmm13, xmm8
886
+ pshufb xmm14, xmm8
887
+ pshufb xmm15, xmm8
888
+ movdqa xmm8, xmmword ptr [rsp+0x100]
889
+ paddd xmm8, xmm12
890
+ paddd xmm9, xmm13
891
+ paddd xmm10, xmm14
892
+ paddd xmm11, xmm15
893
+ pxor xmm4, xmm8
894
+ pxor xmm5, xmm9
895
+ pxor xmm6, xmm10
896
+ pxor xmm7, xmm11
897
+ movdqa xmmword ptr [rsp+0x100], xmm8
898
+ movdqa xmm8, xmm4
899
+ psrld xmm8, 12
900
+ pslld xmm4, 20
901
+ por xmm4, xmm8
902
+ movdqa xmm8, xmm5
903
+ psrld xmm8, 12
904
+ pslld xmm5, 20
905
+ por xmm5, xmm8
906
+ movdqa xmm8, xmm6
907
+ psrld xmm8, 12
908
+ pslld xmm6, 20
909
+ por xmm6, xmm8
910
+ movdqa xmm8, xmm7
911
+ psrld xmm8, 12
912
+ pslld xmm7, 20
913
+ por xmm7, xmm8
914
+ paddd xmm0, xmmword ptr [rsp+0xD0]
915
+ paddd xmm1, xmmword ptr [rsp+0xB0]
916
+ paddd xmm2, xmmword ptr [rsp+0xA0]
917
+ paddd xmm3, xmmword ptr [rsp+0x80]
918
+ paddd xmm0, xmm4
919
+ paddd xmm1, xmm5
920
+ paddd xmm2, xmm6
921
+ paddd xmm3, xmm7
922
+ pxor xmm12, xmm0
923
+ pxor xmm13, xmm1
924
+ pxor xmm14, xmm2
925
+ pxor xmm15, xmm3
926
+ movdqa xmm8, xmmword ptr [ROT8+rip]
927
+ pshufb xmm12, xmm8
928
+ pshufb xmm13, xmm8
929
+ pshufb xmm14, xmm8
930
+ pshufb xmm15, xmm8
931
+ movdqa xmm8, xmmword ptr [rsp+0x100]
932
+ paddd xmm8, xmm12
933
+ paddd xmm9, xmm13
934
+ paddd xmm10, xmm14
935
+ paddd xmm11, xmm15
936
+ pxor xmm4, xmm8
937
+ pxor xmm5, xmm9
938
+ pxor xmm6, xmm10
939
+ pxor xmm7, xmm11
940
+ movdqa xmmword ptr [rsp+0x100], xmm8
941
+ movdqa xmm8, xmm4
942
+ psrld xmm8, 7
943
+ pslld xmm4, 25
944
+ por xmm4, xmm8
945
+ movdqa xmm8, xmm5
946
+ psrld xmm8, 7
947
+ pslld xmm5, 25
948
+ por xmm5, xmm8
949
+ movdqa xmm8, xmm6
950
+ psrld xmm8, 7
951
+ pslld xmm6, 25
952
+ por xmm6, xmm8
953
+ movdqa xmm8, xmm7
954
+ psrld xmm8, 7
955
+ pslld xmm7, 25
956
+ por xmm7, xmm8
957
+ paddd xmm0, xmmword ptr [rsp+0x70]
958
+ paddd xmm1, xmmword ptr [rsp+0x50]
959
+ paddd xmm2, xmmword ptr [rsp]
960
+ paddd xmm3, xmmword ptr [rsp+0x60]
961
+ paddd xmm0, xmm5
962
+ paddd xmm1, xmm6
963
+ paddd xmm2, xmm7
964
+ paddd xmm3, xmm4
965
+ pxor xmm15, xmm0
966
+ pxor xmm12, xmm1
967
+ pxor xmm13, xmm2
968
+ pxor xmm14, xmm3
969
+ movdqa xmm8, xmmword ptr [ROT16+rip]
970
+ pshufb xmm15, xmm8
971
+ pshufb xmm12, xmm8
972
+ pshufb xmm13, xmm8
973
+ pshufb xmm14, xmm8
974
+ paddd xmm10, xmm15
975
+ paddd xmm11, xmm12
976
+ movdqa xmm8, xmmword ptr [rsp+0x100]
977
+ paddd xmm8, xmm13
978
+ paddd xmm9, xmm14
979
+ pxor xmm5, xmm10
980
+ pxor xmm6, xmm11
981
+ pxor xmm7, xmm8
982
+ pxor xmm4, xmm9
983
+ movdqa xmmword ptr [rsp+0x100], xmm8
984
+ movdqa xmm8, xmm5
985
+ psrld xmm8, 12
986
+ pslld xmm5, 20
987
+ por xmm5, xmm8
988
+ movdqa xmm8, xmm6
989
+ psrld xmm8, 12
990
+ pslld xmm6, 20
991
+ por xmm6, xmm8
992
+ movdqa xmm8, xmm7
993
+ psrld xmm8, 12
994
+ pslld xmm7, 20
995
+ por xmm7, xmm8
996
+ movdqa xmm8, xmm4
997
+ psrld xmm8, 12
998
+ pslld xmm4, 20
999
+ por xmm4, xmm8
1000
+ paddd xmm0, xmmword ptr [rsp+0x20]
1001
+ paddd xmm1, xmmword ptr [rsp+0x30]
1002
+ paddd xmm2, xmmword ptr [rsp+0x10]
1003
+ paddd xmm3, xmmword ptr [rsp+0x40]
1004
+ paddd xmm0, xmm5
1005
+ paddd xmm1, xmm6
1006
+ paddd xmm2, xmm7
1007
+ paddd xmm3, xmm4
1008
+ pxor xmm15, xmm0
1009
+ pxor xmm12, xmm1
1010
+ pxor xmm13, xmm2
1011
+ pxor xmm14, xmm3
1012
+ movdqa xmm8, xmmword ptr [ROT8+rip]
1013
+ pshufb xmm15, xmm8
1014
+ pshufb xmm12, xmm8
1015
+ pshufb xmm13, xmm8
1016
+ pshufb xmm14, xmm8
1017
+ paddd xmm10, xmm15
1018
+ paddd xmm11, xmm12
1019
+ movdqa xmm8, xmmword ptr [rsp+0x100]
1020
+ paddd xmm8, xmm13
1021
+ paddd xmm9, xmm14
1022
+ pxor xmm5, xmm10
1023
+ pxor xmm6, xmm11
1024
+ pxor xmm7, xmm8
1025
+ pxor xmm4, xmm9
1026
+ movdqa xmmword ptr [rsp+0x100], xmm8
1027
+ movdqa xmm8, xmm5
1028
+ psrld xmm8, 7
1029
+ pslld xmm5, 25
1030
+ por xmm5, xmm8
1031
+ movdqa xmm8, xmm6
1032
+ psrld xmm8, 7
1033
+ pslld xmm6, 25
1034
+ por xmm6, xmm8
1035
+ movdqa xmm8, xmm7
1036
+ psrld xmm8, 7
1037
+ pslld xmm7, 25
1038
+ por xmm7, xmm8
1039
+ movdqa xmm8, xmm4
1040
+ psrld xmm8, 7
1041
+ pslld xmm4, 25
1042
+ por xmm4, xmm8
1043
+ paddd xmm0, xmmword ptr [rsp+0x90]
1044
+ paddd xmm1, xmmword ptr [rsp+0xB0]
1045
+ paddd xmm2, xmmword ptr [rsp+0x80]
1046
+ paddd xmm3, xmmword ptr [rsp+0xF0]
1047
+ paddd xmm0, xmm4
1048
+ paddd xmm1, xmm5
1049
+ paddd xmm2, xmm6
1050
+ paddd xmm3, xmm7
1051
+ pxor xmm12, xmm0
1052
+ pxor xmm13, xmm1
1053
+ pxor xmm14, xmm2
1054
+ pxor xmm15, xmm3
1055
+ movdqa xmm8, xmmword ptr [ROT16+rip]
1056
+ pshufb xmm12, xmm8
1057
+ pshufb xmm13, xmm8
1058
+ pshufb xmm14, xmm8
1059
+ pshufb xmm15, xmm8
1060
+ movdqa xmm8, xmmword ptr [rsp+0x100]
1061
+ paddd xmm8, xmm12
1062
+ paddd xmm9, xmm13
1063
+ paddd xmm10, xmm14
1064
+ paddd xmm11, xmm15
1065
+ pxor xmm4, xmm8
1066
+ pxor xmm5, xmm9
1067
+ pxor xmm6, xmm10
1068
+ pxor xmm7, xmm11
1069
+ movdqa xmmword ptr [rsp+0x100], xmm8
1070
+ movdqa xmm8, xmm4
1071
+ psrld xmm8, 12
1072
+ pslld xmm4, 20
1073
+ por xmm4, xmm8
1074
+ movdqa xmm8, xmm5
1075
+ psrld xmm8, 12
1076
+ pslld xmm5, 20
1077
+ por xmm5, xmm8
1078
+ movdqa xmm8, xmm6
1079
+ psrld xmm8, 12
1080
+ pslld xmm6, 20
1081
+ por xmm6, xmm8
1082
+ movdqa xmm8, xmm7
1083
+ psrld xmm8, 12
1084
+ pslld xmm7, 20
1085
+ por xmm7, xmm8
1086
+ paddd xmm0, xmmword ptr [rsp+0xE0]
1087
+ paddd xmm1, xmmword ptr [rsp+0x50]
1088
+ paddd xmm2, xmmword ptr [rsp+0xC0]
1089
+ paddd xmm3, xmmword ptr [rsp+0x10]
1090
+ paddd xmm0, xmm4
1091
+ paddd xmm1, xmm5
1092
+ paddd xmm2, xmm6
1093
+ paddd xmm3, xmm7
1094
+ pxor xmm12, xmm0
1095
+ pxor xmm13, xmm1
1096
+ pxor xmm14, xmm2
1097
+ pxor xmm15, xmm3
1098
+ movdqa xmm8, xmmword ptr [ROT8+rip]
1099
+ pshufb xmm12, xmm8
1100
+ pshufb xmm13, xmm8
1101
+ pshufb xmm14, xmm8
1102
+ pshufb xmm15, xmm8
1103
+ movdqa xmm8, xmmword ptr [rsp+0x100]
1104
+ paddd xmm8, xmm12
1105
+ paddd xmm9, xmm13
1106
+ paddd xmm10, xmm14
1107
+ paddd xmm11, xmm15
1108
+ pxor xmm4, xmm8
1109
+ pxor xmm5, xmm9
1110
+ pxor xmm6, xmm10
1111
+ pxor xmm7, xmm11
1112
+ movdqa xmmword ptr [rsp+0x100], xmm8
1113
+ movdqa xmm8, xmm4
1114
+ psrld xmm8, 7
1115
+ pslld xmm4, 25
1116
+ por xmm4, xmm8
1117
+ movdqa xmm8, xmm5
1118
+ psrld xmm8, 7
1119
+ pslld xmm5, 25
1120
+ por xmm5, xmm8
1121
+ movdqa xmm8, xmm6
1122
+ psrld xmm8, 7
1123
+ pslld xmm6, 25
1124
+ por xmm6, xmm8
1125
+ movdqa xmm8, xmm7
1126
+ psrld xmm8, 7
1127
+ pslld xmm7, 25
1128
+ por xmm7, xmm8
1129
+ paddd xmm0, xmmword ptr [rsp+0xD0]
1130
+ paddd xmm1, xmmword ptr [rsp]
1131
+ paddd xmm2, xmmword ptr [rsp+0x20]
1132
+ paddd xmm3, xmmword ptr [rsp+0x40]
1133
+ paddd xmm0, xmm5
1134
+ paddd xmm1, xmm6
1135
+ paddd xmm2, xmm7
1136
+ paddd xmm3, xmm4
1137
+ pxor xmm15, xmm0
1138
+ pxor xmm12, xmm1
1139
+ pxor xmm13, xmm2
1140
+ pxor xmm14, xmm3
1141
+ movdqa xmm8, xmmword ptr [ROT16+rip]
1142
+ pshufb xmm15, xmm8
1143
+ pshufb xmm12, xmm8
1144
+ pshufb xmm13, xmm8
1145
+ pshufb xmm14, xmm8
1146
+ paddd xmm10, xmm15
1147
+ paddd xmm11, xmm12
1148
+ movdqa xmm8, xmmword ptr [rsp+0x100]
1149
+ paddd xmm8, xmm13
1150
+ paddd xmm9, xmm14
1151
+ pxor xmm5, xmm10
1152
+ pxor xmm6, xmm11
1153
+ pxor xmm7, xmm8
1154
+ pxor xmm4, xmm9
1155
+ movdqa xmmword ptr [rsp+0x100], xmm8
1156
+ movdqa xmm8, xmm5
1157
+ psrld xmm8, 12
1158
+ pslld xmm5, 20
1159
+ por xmm5, xmm8
1160
+ movdqa xmm8, xmm6
1161
+ psrld xmm8, 12
1162
+ pslld xmm6, 20
1163
+ por xmm6, xmm8
1164
+ movdqa xmm8, xmm7
1165
+ psrld xmm8, 12
1166
+ pslld xmm7, 20
1167
+ por xmm7, xmm8
1168
+ movdqa xmm8, xmm4
1169
+ psrld xmm8, 12
1170
+ pslld xmm4, 20
1171
+ por xmm4, xmm8
1172
+ paddd xmm0, xmmword ptr [rsp+0x30]
1173
+ paddd xmm1, xmmword ptr [rsp+0xA0]
1174
+ paddd xmm2, xmmword ptr [rsp+0x60]
1175
+ paddd xmm3, xmmword ptr [rsp+0x70]
1176
+ paddd xmm0, xmm5
1177
+ paddd xmm1, xmm6
1178
+ paddd xmm2, xmm7
1179
+ paddd xmm3, xmm4
1180
+ pxor xmm15, xmm0
1181
+ pxor xmm12, xmm1
1182
+ pxor xmm13, xmm2
1183
+ pxor xmm14, xmm3
1184
+ movdqa xmm8, xmmword ptr [ROT8+rip]
1185
+ pshufb xmm15, xmm8
1186
+ pshufb xmm12, xmm8
1187
+ pshufb xmm13, xmm8
1188
+ pshufb xmm14, xmm8
1189
+ paddd xmm10, xmm15
1190
+ paddd xmm11, xmm12
1191
+ movdqa xmm8, xmmword ptr [rsp+0x100]
1192
+ paddd xmm8, xmm13
1193
+ paddd xmm9, xmm14
1194
+ pxor xmm5, xmm10
1195
+ pxor xmm6, xmm11
1196
+ pxor xmm7, xmm8
1197
+ pxor xmm4, xmm9
1198
+ movdqa xmmword ptr [rsp+0x100], xmm8
1199
+ movdqa xmm8, xmm5
1200
+ psrld xmm8, 7
1201
+ pslld xmm5, 25
1202
+ por xmm5, xmm8
1203
+ movdqa xmm8, xmm6
1204
+ psrld xmm8, 7
1205
+ pslld xmm6, 25
1206
+ por xmm6, xmm8
1207
+ movdqa xmm8, xmm7
1208
+ psrld xmm8, 7
1209
+ pslld xmm7, 25
1210
+ por xmm7, xmm8
1211
+ movdqa xmm8, xmm4
1212
+ psrld xmm8, 7
1213
+ pslld xmm4, 25
1214
+ por xmm4, xmm8
1215
+ paddd xmm0, xmmword ptr [rsp+0xB0]
1216
+ paddd xmm1, xmmword ptr [rsp+0x50]
1217
+ paddd xmm2, xmmword ptr [rsp+0x10]
1218
+ paddd xmm3, xmmword ptr [rsp+0x80]
1219
+ paddd xmm0, xmm4
1220
+ paddd xmm1, xmm5
1221
+ paddd xmm2, xmm6
1222
+ paddd xmm3, xmm7
1223
+ pxor xmm12, xmm0
1224
+ pxor xmm13, xmm1
1225
+ pxor xmm14, xmm2
1226
+ pxor xmm15, xmm3
1227
+ movdqa xmm8, xmmword ptr [ROT16+rip]
1228
+ pshufb xmm12, xmm8
1229
+ pshufb xmm13, xmm8
1230
+ pshufb xmm14, xmm8
1231
+ pshufb xmm15, xmm8
1232
+ movdqa xmm8, xmmword ptr [rsp+0x100]
1233
+ paddd xmm8, xmm12
1234
+ paddd xmm9, xmm13
1235
+ paddd xmm10, xmm14
1236
+ paddd xmm11, xmm15
1237
+ pxor xmm4, xmm8
1238
+ pxor xmm5, xmm9
1239
+ pxor xmm6, xmm10
1240
+ pxor xmm7, xmm11
1241
+ movdqa xmmword ptr [rsp+0x100], xmm8
1242
+ movdqa xmm8, xmm4
1243
+ psrld xmm8, 12
1244
+ pslld xmm4, 20
1245
+ por xmm4, xmm8
1246
+ movdqa xmm8, xmm5
1247
+ psrld xmm8, 12
1248
+ pslld xmm5, 20
1249
+ por xmm5, xmm8
1250
+ movdqa xmm8, xmm6
1251
+ psrld xmm8, 12
1252
+ pslld xmm6, 20
1253
+ por xmm6, xmm8
1254
+ movdqa xmm8, xmm7
1255
+ psrld xmm8, 12
1256
+ pslld xmm7, 20
1257
+ por xmm7, xmm8
1258
+ paddd xmm0, xmmword ptr [rsp+0xF0]
1259
+ paddd xmm1, xmmword ptr [rsp]
1260
+ paddd xmm2, xmmword ptr [rsp+0x90]
1261
+ paddd xmm3, xmmword ptr [rsp+0x60]
1262
+ paddd xmm0, xmm4
1263
+ paddd xmm1, xmm5
1264
+ paddd xmm2, xmm6
1265
+ paddd xmm3, xmm7
1266
+ pxor xmm12, xmm0
1267
+ pxor xmm13, xmm1
1268
+ pxor xmm14, xmm2
1269
+ pxor xmm15, xmm3
1270
+ movdqa xmm8, xmmword ptr [ROT8+rip]
1271
+ pshufb xmm12, xmm8
1272
+ pshufb xmm13, xmm8
1273
+ pshufb xmm14, xmm8
1274
+ pshufb xmm15, xmm8
1275
+ movdqa xmm8, xmmword ptr [rsp+0x100]
1276
+ paddd xmm8, xmm12
1277
+ paddd xmm9, xmm13
1278
+ paddd xmm10, xmm14
1279
+ paddd xmm11, xmm15
1280
+ pxor xmm4, xmm8
1281
+ pxor xmm5, xmm9
1282
+ pxor xmm6, xmm10
1283
+ pxor xmm7, xmm11
1284
+ movdqa xmmword ptr [rsp+0x100], xmm8
1285
+ movdqa xmm8, xmm4
1286
+ psrld xmm8, 7
1287
+ pslld xmm4, 25
1288
+ por xmm4, xmm8
1289
+ movdqa xmm8, xmm5
1290
+ psrld xmm8, 7
1291
+ pslld xmm5, 25
1292
+ por xmm5, xmm8
1293
+ movdqa xmm8, xmm6
1294
+ psrld xmm8, 7
1295
+ pslld xmm6, 25
1296
+ por xmm6, xmm8
1297
+ movdqa xmm8, xmm7
1298
+ psrld xmm8, 7
1299
+ pslld xmm7, 25
1300
+ por xmm7, xmm8
1301
+ paddd xmm0, xmmword ptr [rsp+0xE0]
1302
+ paddd xmm1, xmmword ptr [rsp+0x20]
1303
+ paddd xmm2, xmmword ptr [rsp+0x30]
1304
+ paddd xmm3, xmmword ptr [rsp+0x70]
1305
+ paddd xmm0, xmm5
1306
+ paddd xmm1, xmm6
1307
+ paddd xmm2, xmm7
1308
+ paddd xmm3, xmm4
1309
+ pxor xmm15, xmm0
1310
+ pxor xmm12, xmm1
1311
+ pxor xmm13, xmm2
1312
+ pxor xmm14, xmm3
1313
+ movdqa xmm8, xmmword ptr [ROT16+rip]
1314
+ pshufb xmm15, xmm8
1315
+ pshufb xmm12, xmm8
1316
+ pshufb xmm13, xmm8
1317
+ pshufb xmm14, xmm8
1318
+ paddd xmm10, xmm15
1319
+ paddd xmm11, xmm12
1320
+ movdqa xmm8, xmmword ptr [rsp+0x100]
1321
+ paddd xmm8, xmm13
1322
+ paddd xmm9, xmm14
1323
+ pxor xmm5, xmm10
1324
+ pxor xmm6, xmm11
1325
+ pxor xmm7, xmm8
1326
+ pxor xmm4, xmm9
1327
+ movdqa xmmword ptr [rsp+0x100], xmm8
1328
+ movdqa xmm8, xmm5
1329
+ psrld xmm8, 12
1330
+ pslld xmm5, 20
1331
+ por xmm5, xmm8
1332
+ movdqa xmm8, xmm6
1333
+ psrld xmm8, 12
1334
+ pslld xmm6, 20
1335
+ por xmm6, xmm8
1336
+ movdqa xmm8, xmm7
1337
+ psrld xmm8, 12
1338
+ pslld xmm7, 20
1339
+ por xmm7, xmm8
1340
+ movdqa xmm8, xmm4
1341
+ psrld xmm8, 12
1342
+ pslld xmm4, 20
1343
+ por xmm4, xmm8
1344
+ paddd xmm0, xmmword ptr [rsp+0xA0]
1345
+ paddd xmm1, xmmword ptr [rsp+0xC0]
1346
+ paddd xmm2, xmmword ptr [rsp+0x40]
1347
+ paddd xmm3, xmmword ptr [rsp+0xD0]
1348
+ paddd xmm0, xmm5
1349
+ paddd xmm1, xmm6
1350
+ paddd xmm2, xmm7
1351
+ paddd xmm3, xmm4
1352
+ pxor xmm15, xmm0
1353
+ pxor xmm12, xmm1
1354
+ pxor xmm13, xmm2
1355
+ pxor xmm14, xmm3
1356
+ movdqa xmm8, xmmword ptr [ROT8+rip]
1357
+ pshufb xmm15, xmm8
1358
+ pshufb xmm12, xmm8
1359
+ pshufb xmm13, xmm8
1360
+ pshufb xmm14, xmm8
1361
+ paddd xmm10, xmm15
1362
+ paddd xmm11, xmm12
1363
+ movdqa xmm8, xmmword ptr [rsp+0x100]
1364
+ paddd xmm8, xmm13
1365
+ paddd xmm9, xmm14
1366
+ pxor xmm5, xmm10
1367
+ pxor xmm6, xmm11
1368
+ pxor xmm7, xmm8
1369
+ pxor xmm4, xmm9
1370
+ pxor xmm0, xmm8
1371
+ pxor xmm1, xmm9
1372
+ pxor xmm2, xmm10
1373
+ pxor xmm3, xmm11
1374
+ movdqa xmm8, xmm5
1375
+ psrld xmm8, 7
1376
+ pslld xmm5, 25
1377
+ por xmm5, xmm8
1378
+ movdqa xmm8, xmm6
1379
+ psrld xmm8, 7
1380
+ pslld xmm6, 25
1381
+ por xmm6, xmm8
1382
+ movdqa xmm8, xmm7
1383
+ psrld xmm8, 7
1384
+ pslld xmm7, 25
1385
+ por xmm7, xmm8
1386
+ movdqa xmm8, xmm4
1387
+ psrld xmm8, 7
1388
+ pslld xmm4, 25
1389
+ por xmm4, xmm8
1390
+ pxor xmm4, xmm12
1391
+ pxor xmm5, xmm13
1392
+ pxor xmm6, xmm14
1393
+ pxor xmm7, xmm15
1394
+ mov eax, r13d
1395
+ jne 9b
1396
+ movdqa xmm9, xmm0
1397
+ punpckldq xmm0, xmm1
1398
+ punpckhdq xmm9, xmm1
1399
+ movdqa xmm11, xmm2
1400
+ punpckldq xmm2, xmm3
1401
+ punpckhdq xmm11, xmm3
1402
+ movdqa xmm1, xmm0
1403
+ punpcklqdq xmm0, xmm2
1404
+ punpckhqdq xmm1, xmm2
1405
+ movdqa xmm3, xmm9
1406
+ punpcklqdq xmm9, xmm11
1407
+ punpckhqdq xmm3, xmm11
1408
+ movdqu xmmword ptr [rbx], xmm0
1409
+ movdqu xmmword ptr [rbx+0x20], xmm1
1410
+ movdqu xmmword ptr [rbx+0x40], xmm9
1411
+ movdqu xmmword ptr [rbx+0x60], xmm3
1412
+ movdqa xmm9, xmm4
1413
+ punpckldq xmm4, xmm5
1414
+ punpckhdq xmm9, xmm5
1415
+ movdqa xmm11, xmm6
1416
+ punpckldq xmm6, xmm7
1417
+ punpckhdq xmm11, xmm7
1418
+ movdqa xmm5, xmm4
1419
+ punpcklqdq xmm4, xmm6
1420
+ punpckhqdq xmm5, xmm6
1421
+ movdqa xmm7, xmm9
1422
+ punpcklqdq xmm9, xmm11
1423
+ punpckhqdq xmm7, xmm11
1424
+ movdqu xmmword ptr [rbx+0x10], xmm4
1425
+ movdqu xmmword ptr [rbx+0x30], xmm5
1426
+ movdqu xmmword ptr [rbx+0x50], xmm9
1427
+ movdqu xmmword ptr [rbx+0x70], xmm7
1428
+ movdqa xmm1, xmmword ptr [rsp+0x110]
1429
+ movdqa xmm0, xmm1
1430
+ paddd xmm1, xmmword ptr [rsp+0x150]
1431
+ movdqa xmmword ptr [rsp+0x110], xmm1
1432
+ pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip]
1433
+ pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip]
1434
+ pcmpgtd xmm0, xmm1
1435
+ movdqa xmm1, xmmword ptr [rsp+0x120]
1436
+ psubd xmm1, xmm0
1437
+ movdqa xmmword ptr [rsp+0x120], xmm1
1438
+ add rbx, 128
1439
+ add rdi, 32
1440
+ sub rsi, 4
1441
+ cmp rsi, 4
1442
+ jnc 2b
1443
+ test rsi, rsi
1444
+ jnz 3f
1445
+ 4:
1446
+ mov rsp, rbp
1447
+ pop rbp
1448
+ pop rbx
1449
+ pop r12
1450
+ pop r13
1451
+ pop r14
1452
+ pop r15
1453
+ ret
1454
+ .p2align 5
1455
+ 3:
1456
+ test esi, 0x2
1457
+ je 3f
1458
+ movups xmm0, xmmword ptr [rcx]
1459
+ movups xmm1, xmmword ptr [rcx+0x10]
1460
+ movaps xmm8, xmm0
1461
+ movaps xmm9, xmm1
1462
+ movd xmm13, dword ptr [rsp+0x110]
1463
+ pinsrd xmm13, dword ptr [rsp+0x120], 1
1464
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
1465
+ movaps xmmword ptr [rsp], xmm13
1466
+ movd xmm14, dword ptr [rsp+0x114]
1467
+ pinsrd xmm14, dword ptr [rsp+0x124], 1
1468
+ pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
1469
+ movaps xmmword ptr [rsp+0x10], xmm14
1470
+ mov r8, qword ptr [rdi]
1471
+ mov r9, qword ptr [rdi+0x8]
1472
+ movzx eax, byte ptr [rbp+0x40]
1473
+ or eax, r13d
1474
+ xor edx, edx
1475
+ 2:
1476
+ mov r14d, eax
1477
+ or eax, r12d
1478
+ add rdx, 64
1479
+ cmp rdx, r15
1480
+ cmovne eax, r14d
1481
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1482
+ movaps xmm10, xmm2
1483
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
1484
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
1485
+ movaps xmm3, xmm4
1486
+ shufps xmm4, xmm5, 136
1487
+ shufps xmm3, xmm5, 221
1488
+ movaps xmm5, xmm3
1489
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
1490
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
1491
+ movaps xmm3, xmm6
1492
+ shufps xmm6, xmm7, 136
1493
+ pshufd xmm6, xmm6, 0x93
1494
+ shufps xmm3, xmm7, 221
1495
+ pshufd xmm7, xmm3, 0x93
1496
+ movups xmm12, xmmword ptr [r9+rdx-0x40]
1497
+ movups xmm13, xmmword ptr [r9+rdx-0x30]
1498
+ movaps xmm11, xmm12
1499
+ shufps xmm12, xmm13, 136
1500
+ shufps xmm11, xmm13, 221
1501
+ movaps xmm13, xmm11
1502
+ movups xmm14, xmmword ptr [r9+rdx-0x20]
1503
+ movups xmm15, xmmword ptr [r9+rdx-0x10]
1504
+ movaps xmm11, xmm14
1505
+ shufps xmm14, xmm15, 136
1506
+ pshufd xmm14, xmm14, 0x93
1507
+ shufps xmm11, xmm15, 221
1508
+ pshufd xmm15, xmm11, 0x93
1509
+ movaps xmm3, xmmword ptr [rsp]
1510
+ movaps xmm11, xmmword ptr [rsp+0x10]
1511
+ pinsrd xmm3, eax, 3
1512
+ pinsrd xmm11, eax, 3
1513
+ mov al, 7
1514
+ 9:
1515
+ paddd xmm0, xmm4
1516
+ paddd xmm8, xmm12
1517
+ movaps xmmword ptr [rsp+0x20], xmm4
1518
+ movaps xmmword ptr [rsp+0x30], xmm12
1519
+ paddd xmm0, xmm1
1520
+ paddd xmm8, xmm9
1521
+ pxor xmm3, xmm0
1522
+ pxor xmm11, xmm8
1523
+ movaps xmm12, xmmword ptr [ROT16+rip]
1524
+ pshufb xmm3, xmm12
1525
+ pshufb xmm11, xmm12
1526
+ paddd xmm2, xmm3
1527
+ paddd xmm10, xmm11
1528
+ pxor xmm1, xmm2
1529
+ pxor xmm9, xmm10
1530
+ movdqa xmm4, xmm1
1531
+ pslld xmm1, 20
1532
+ psrld xmm4, 12
1533
+ por xmm1, xmm4
1534
+ movdqa xmm4, xmm9
1535
+ pslld xmm9, 20
1536
+ psrld xmm4, 12
1537
+ por xmm9, xmm4
1538
+ paddd xmm0, xmm5
1539
+ paddd xmm8, xmm13
1540
+ movaps xmmword ptr [rsp+0x40], xmm5
1541
+ movaps xmmword ptr [rsp+0x50], xmm13
1542
+ paddd xmm0, xmm1
1543
+ paddd xmm8, xmm9
1544
+ pxor xmm3, xmm0
1545
+ pxor xmm11, xmm8
1546
+ movaps xmm13, xmmword ptr [ROT8+rip]
1547
+ pshufb xmm3, xmm13
1548
+ pshufb xmm11, xmm13
1549
+ paddd xmm2, xmm3
1550
+ paddd xmm10, xmm11
1551
+ pxor xmm1, xmm2
1552
+ pxor xmm9, xmm10
1553
+ movdqa xmm4, xmm1
1554
+ pslld xmm1, 25
1555
+ psrld xmm4, 7
1556
+ por xmm1, xmm4
1557
+ movdqa xmm4, xmm9
1558
+ pslld xmm9, 25
1559
+ psrld xmm4, 7
1560
+ por xmm9, xmm4
1561
+ pshufd xmm0, xmm0, 0x93
1562
+ pshufd xmm8, xmm8, 0x93
1563
+ pshufd xmm3, xmm3, 0x4E
1564
+ pshufd xmm11, xmm11, 0x4E
1565
+ pshufd xmm2, xmm2, 0x39
1566
+ pshufd xmm10, xmm10, 0x39
1567
+ paddd xmm0, xmm6
1568
+ paddd xmm8, xmm14
1569
+ paddd xmm0, xmm1
1570
+ paddd xmm8, xmm9
1571
+ pxor xmm3, xmm0
1572
+ pxor xmm11, xmm8
1573
+ pshufb xmm3, xmm12
1574
+ pshufb xmm11, xmm12
1575
+ paddd xmm2, xmm3
1576
+ paddd xmm10, xmm11
1577
+ pxor xmm1, xmm2
1578
+ pxor xmm9, xmm10
1579
+ movdqa xmm4, xmm1
1580
+ pslld xmm1, 20
1581
+ psrld xmm4, 12
1582
+ por xmm1, xmm4
1583
+ movdqa xmm4, xmm9
1584
+ pslld xmm9, 20
1585
+ psrld xmm4, 12
1586
+ por xmm9, xmm4
1587
+ paddd xmm0, xmm7
1588
+ paddd xmm8, xmm15
1589
+ paddd xmm0, xmm1
1590
+ paddd xmm8, xmm9
1591
+ pxor xmm3, xmm0
1592
+ pxor xmm11, xmm8
1593
+ pshufb xmm3, xmm13
1594
+ pshufb xmm11, xmm13
1595
+ paddd xmm2, xmm3
1596
+ paddd xmm10, xmm11
1597
+ pxor xmm1, xmm2
1598
+ pxor xmm9, xmm10
1599
+ movdqa xmm4, xmm1
1600
+ pslld xmm1, 25
1601
+ psrld xmm4, 7
1602
+ por xmm1, xmm4
1603
+ movdqa xmm4, xmm9
1604
+ pslld xmm9, 25
1605
+ psrld xmm4, 7
1606
+ por xmm9, xmm4
1607
+ pshufd xmm0, xmm0, 0x39
1608
+ pshufd xmm8, xmm8, 0x39
1609
+ pshufd xmm3, xmm3, 0x4E
1610
+ pshufd xmm11, xmm11, 0x4E
1611
+ pshufd xmm2, xmm2, 0x93
1612
+ pshufd xmm10, xmm10, 0x93
1613
+ dec al
1614
+ je 9f
1615
+ movdqa xmm12, xmmword ptr [rsp+0x20]
1616
+ movdqa xmm5, xmmword ptr [rsp+0x40]
1617
+ pshufd xmm13, xmm12, 0x0F
1618
+ shufps xmm12, xmm5, 214
1619
+ pshufd xmm4, xmm12, 0x39
1620
+ movdqa xmm12, xmm6
1621
+ shufps xmm12, xmm7, 250
1622
+ pblendw xmm13, xmm12, 0xCC
1623
+ movdqa xmm12, xmm7
1624
+ punpcklqdq xmm12, xmm5
1625
+ pblendw xmm12, xmm6, 0xC0
1626
+ pshufd xmm12, xmm12, 0x78
1627
+ punpckhdq xmm5, xmm7
1628
+ punpckldq xmm6, xmm5
1629
+ pshufd xmm7, xmm6, 0x1E
1630
+ movdqa xmmword ptr [rsp+0x20], xmm13
1631
+ movdqa xmmword ptr [rsp+0x40], xmm12
1632
+ movdqa xmm5, xmmword ptr [rsp+0x30]
1633
+ movdqa xmm13, xmmword ptr [rsp+0x50]
1634
+ pshufd xmm6, xmm5, 0x0F
1635
+ shufps xmm5, xmm13, 214
1636
+ pshufd xmm12, xmm5, 0x39
1637
+ movdqa xmm5, xmm14
1638
+ shufps xmm5, xmm15, 250
1639
+ pblendw xmm6, xmm5, 0xCC
1640
+ movdqa xmm5, xmm15
1641
+ punpcklqdq xmm5, xmm13
1642
+ pblendw xmm5, xmm14, 0xC0
1643
+ pshufd xmm5, xmm5, 0x78
1644
+ punpckhdq xmm13, xmm15
1645
+ punpckldq xmm14, xmm13
1646
+ pshufd xmm15, xmm14, 0x1E
1647
+ movdqa xmm13, xmm6
1648
+ movdqa xmm14, xmm5
1649
+ movdqa xmm5, xmmword ptr [rsp+0x20]
1650
+ movdqa xmm6, xmmword ptr [rsp+0x40]
1651
+ jmp 9b
1652
+ 9:
1653
+ pxor xmm0, xmm2
1654
+ pxor xmm1, xmm3
1655
+ pxor xmm8, xmm10
1656
+ pxor xmm9, xmm11
1657
+ mov eax, r13d
1658
+ cmp rdx, r15
1659
+ jne 2b
1660
+ movups xmmword ptr [rbx], xmm0
1661
+ movups xmmword ptr [rbx+0x10], xmm1
1662
+ movups xmmword ptr [rbx+0x20], xmm8
1663
+ movups xmmword ptr [rbx+0x30], xmm9
1664
+ movdqa xmm0, xmmword ptr [rsp+0x130]
1665
+ movdqa xmm1, xmmword ptr [rsp+0x110]
1666
+ movdqa xmm2, xmmword ptr [rsp+0x120]
1667
+ movdqu xmm3, xmmword ptr [rsp+0x118]
1668
+ movdqu xmm4, xmmword ptr [rsp+0x128]
1669
+ blendvps xmm1, xmm3, xmm0
1670
+ blendvps xmm2, xmm4, xmm0
1671
+ movdqa xmmword ptr [rsp+0x110], xmm1
1672
+ movdqa xmmword ptr [rsp+0x120], xmm2
1673
+ add rdi, 16
1674
+ add rbx, 64
1675
+ sub rsi, 2
1676
+ 3:
1677
+ test esi, 0x1
1678
+ je 4b
1679
+ movups xmm0, xmmword ptr [rcx]
1680
+ movups xmm1, xmmword ptr [rcx+0x10]
1681
+ movd xmm13, dword ptr [rsp+0x110]
1682
+ pinsrd xmm13, dword ptr [rsp+0x120], 1
1683
+ pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
1684
+ movaps xmm14, xmmword ptr [ROT8+rip]
1685
+ movaps xmm15, xmmword ptr [ROT16+rip]
1686
+ mov r8, qword ptr [rdi]
1687
+ movzx eax, byte ptr [rbp+0x40]
1688
+ or eax, r13d
1689
+ xor edx, edx
1690
+ 2:
1691
+ mov r14d, eax
1692
+ or eax, r12d
1693
+ add rdx, 64
1694
+ cmp rdx, r15
1695
+ cmovne eax, r14d
1696
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1697
+ movaps xmm3, xmm13
1698
+ pinsrd xmm3, eax, 3
1699
+ movups xmm4, xmmword ptr [r8+rdx-0x40]
1700
+ movups xmm5, xmmword ptr [r8+rdx-0x30]
1701
+ movaps xmm8, xmm4
1702
+ shufps xmm4, xmm5, 136
1703
+ shufps xmm8, xmm5, 221
1704
+ movaps xmm5, xmm8
1705
+ movups xmm6, xmmword ptr [r8+rdx-0x20]
1706
+ movups xmm7, xmmword ptr [r8+rdx-0x10]
1707
+ movaps xmm8, xmm6
1708
+ shufps xmm6, xmm7, 136
1709
+ pshufd xmm6, xmm6, 0x93
1710
+ shufps xmm8, xmm7, 221
1711
+ pshufd xmm7, xmm8, 0x93
1712
+ mov al, 7
1713
+ 9:
1714
+ paddd xmm0, xmm4
1715
+ paddd xmm0, xmm1
1716
+ pxor xmm3, xmm0
1717
+ pshufb xmm3, xmm15
1718
+ paddd xmm2, xmm3
1719
+ pxor xmm1, xmm2
1720
+ movdqa xmm11, xmm1
1721
+ pslld xmm1, 20
1722
+ psrld xmm11, 12
1723
+ por xmm1, xmm11
1724
+ paddd xmm0, xmm5
1725
+ paddd xmm0, xmm1
1726
+ pxor xmm3, xmm0
1727
+ pshufb xmm3, xmm14
1728
+ paddd xmm2, xmm3
1729
+ pxor xmm1, xmm2
1730
+ movdqa xmm11, xmm1
1731
+ pslld xmm1, 25
1732
+ psrld xmm11, 7
1733
+ por xmm1, xmm11
1734
+ pshufd xmm0, xmm0, 0x93
1735
+ pshufd xmm3, xmm3, 0x4E
1736
+ pshufd xmm2, xmm2, 0x39
1737
+ paddd xmm0, xmm6
1738
+ paddd xmm0, xmm1
1739
+ pxor xmm3, xmm0
1740
+ pshufb xmm3, xmm15
1741
+ paddd xmm2, xmm3
1742
+ pxor xmm1, xmm2
1743
+ movdqa xmm11, xmm1
1744
+ pslld xmm1, 20
1745
+ psrld xmm11, 12
1746
+ por xmm1, xmm11
1747
+ paddd xmm0, xmm7
1748
+ paddd xmm0, xmm1
1749
+ pxor xmm3, xmm0
1750
+ pshufb xmm3, xmm14
1751
+ paddd xmm2, xmm3
1752
+ pxor xmm1, xmm2
1753
+ movdqa xmm11, xmm1
1754
+ pslld xmm1, 25
1755
+ psrld xmm11, 7
1756
+ por xmm1, xmm11
1757
+ pshufd xmm0, xmm0, 0x39
1758
+ pshufd xmm3, xmm3, 0x4E
1759
+ pshufd xmm2, xmm2, 0x93
1760
+ dec al
1761
+ jz 9f
1762
+ movdqa xmm8, xmm4
1763
+ shufps xmm8, xmm5, 214
1764
+ pshufd xmm9, xmm4, 0x0F
1765
+ pshufd xmm4, xmm8, 0x39
1766
+ movdqa xmm8, xmm6
1767
+ shufps xmm8, xmm7, 250
1768
+ pblendw xmm9, xmm8, 0xCC
1769
+ movdqa xmm8, xmm7
1770
+ punpcklqdq xmm8, xmm5
1771
+ pblendw xmm8, xmm6, 0xC0
1772
+ pshufd xmm8, xmm8, 0x78
1773
+ punpckhdq xmm5, xmm7
1774
+ punpckldq xmm6, xmm5
1775
+ pshufd xmm7, xmm6, 0x1E
1776
+ movdqa xmm5, xmm9
1777
+ movdqa xmm6, xmm8
1778
+ jmp 9b
1779
+ 9:
1780
+ pxor xmm0, xmm2
1781
+ pxor xmm1, xmm3
1782
+ mov eax, r13d
1783
+ cmp rdx, r15
1784
+ jne 2b
1785
+ movups xmmword ptr [rbx], xmm0
1786
+ movups xmmword ptr [rbx+0x10], xmm1
1787
+ jmp 4b
1788
+
1789
+ .p2align 6
1790
+ blake3_compress_in_place_sse41:
1791
+ _blake3_compress_in_place_sse41:
1792
+ _CET_ENDBR
1793
+ movups xmm0, xmmword ptr [rdi]
1794
+ movups xmm1, xmmword ptr [rdi+0x10]
1795
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1796
+ shl r8, 32
1797
+ add rdx, r8
1798
+ movq xmm3, rcx
1799
+ movq xmm4, rdx
1800
+ punpcklqdq xmm3, xmm4
1801
+ movups xmm4, xmmword ptr [rsi]
1802
+ movups xmm5, xmmword ptr [rsi+0x10]
1803
+ movaps xmm8, xmm4
1804
+ shufps xmm4, xmm5, 136
1805
+ shufps xmm8, xmm5, 221
1806
+ movaps xmm5, xmm8
1807
+ movups xmm6, xmmword ptr [rsi+0x20]
1808
+ movups xmm7, xmmword ptr [rsi+0x30]
1809
+ movaps xmm8, xmm6
1810
+ shufps xmm6, xmm7, 136
1811
+ pshufd xmm6, xmm6, 0x93
1812
+ shufps xmm8, xmm7, 221
1813
+ pshufd xmm7, xmm8, 0x93
1814
+ movaps xmm14, xmmword ptr [ROT8+rip]
1815
+ movaps xmm15, xmmword ptr [ROT16+rip]
1816
+ mov al, 7
1817
+ 9:
1818
+ paddd xmm0, xmm4
1819
+ paddd xmm0, xmm1
1820
+ pxor xmm3, xmm0
1821
+ pshufb xmm3, xmm15
1822
+ paddd xmm2, xmm3
1823
+ pxor xmm1, xmm2
1824
+ movdqa xmm11, xmm1
1825
+ pslld xmm1, 20
1826
+ psrld xmm11, 12
1827
+ por xmm1, xmm11
1828
+ paddd xmm0, xmm5
1829
+ paddd xmm0, xmm1
1830
+ pxor xmm3, xmm0
1831
+ pshufb xmm3, xmm14
1832
+ paddd xmm2, xmm3
1833
+ pxor xmm1, xmm2
1834
+ movdqa xmm11, xmm1
1835
+ pslld xmm1, 25
1836
+ psrld xmm11, 7
1837
+ por xmm1, xmm11
1838
+ pshufd xmm0, xmm0, 0x93
1839
+ pshufd xmm3, xmm3, 0x4E
1840
+ pshufd xmm2, xmm2, 0x39
1841
+ paddd xmm0, xmm6
1842
+ paddd xmm0, xmm1
1843
+ pxor xmm3, xmm0
1844
+ pshufb xmm3, xmm15
1845
+ paddd xmm2, xmm3
1846
+ pxor xmm1, xmm2
1847
+ movdqa xmm11, xmm1
1848
+ pslld xmm1, 20
1849
+ psrld xmm11, 12
1850
+ por xmm1, xmm11
1851
+ paddd xmm0, xmm7
1852
+ paddd xmm0, xmm1
1853
+ pxor xmm3, xmm0
1854
+ pshufb xmm3, xmm14
1855
+ paddd xmm2, xmm3
1856
+ pxor xmm1, xmm2
1857
+ movdqa xmm11, xmm1
1858
+ pslld xmm1, 25
1859
+ psrld xmm11, 7
1860
+ por xmm1, xmm11
1861
+ pshufd xmm0, xmm0, 0x39
1862
+ pshufd xmm3, xmm3, 0x4E
1863
+ pshufd xmm2, xmm2, 0x93
1864
+ dec al
1865
+ jz 9f
1866
+ movdqa xmm8, xmm4
1867
+ shufps xmm8, xmm5, 214
1868
+ pshufd xmm9, xmm4, 0x0F
1869
+ pshufd xmm4, xmm8, 0x39
1870
+ movdqa xmm8, xmm6
1871
+ shufps xmm8, xmm7, 250
1872
+ pblendw xmm9, xmm8, 0xCC
1873
+ movdqa xmm8, xmm7
1874
+ punpcklqdq xmm8, xmm5
1875
+ pblendw xmm8, xmm6, 0xC0
1876
+ pshufd xmm8, xmm8, 0x78
1877
+ punpckhdq xmm5, xmm7
1878
+ punpckldq xmm6, xmm5
1879
+ pshufd xmm7, xmm6, 0x1E
1880
+ movdqa xmm5, xmm9
1881
+ movdqa xmm6, xmm8
1882
+ jmp 9b
1883
+ 9:
1884
+ pxor xmm0, xmm2
1885
+ pxor xmm1, xmm3
1886
+ movups xmmword ptr [rdi], xmm0
1887
+ movups xmmword ptr [rdi+0x10], xmm1
1888
+ ret
1889
+
1890
+ .p2align 6
1891
+ blake3_compress_xof_sse41:
1892
+ _blake3_compress_xof_sse41:
1893
+ _CET_ENDBR
1894
+ movups xmm0, xmmword ptr [rdi]
1895
+ movups xmm1, xmmword ptr [rdi+0x10]
1896
+ movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
1897
+ movzx eax, r8b
1898
+ movzx edx, dl
1899
+ shl rax, 32
1900
+ add rdx, rax
1901
+ movq xmm3, rcx
1902
+ movq xmm4, rdx
1903
+ punpcklqdq xmm3, xmm4
1904
+ movups xmm4, xmmword ptr [rsi]
1905
+ movups xmm5, xmmword ptr [rsi+0x10]
1906
+ movaps xmm8, xmm4
1907
+ shufps xmm4, xmm5, 136
1908
+ shufps xmm8, xmm5, 221
1909
+ movaps xmm5, xmm8
1910
+ movups xmm6, xmmword ptr [rsi+0x20]
1911
+ movups xmm7, xmmword ptr [rsi+0x30]
1912
+ movaps xmm8, xmm6
1913
+ shufps xmm6, xmm7, 136
1914
+ pshufd xmm6, xmm6, 0x93
1915
+ shufps xmm8, xmm7, 221
1916
+ pshufd xmm7, xmm8, 0x93
1917
+ movaps xmm14, xmmword ptr [ROT8+rip]
1918
+ movaps xmm15, xmmword ptr [ROT16+rip]
1919
+ mov al, 7
1920
+ 9:
1921
+ paddd xmm0, xmm4
1922
+ paddd xmm0, xmm1
1923
+ pxor xmm3, xmm0
1924
+ pshufb xmm3, xmm15
1925
+ paddd xmm2, xmm3
1926
+ pxor xmm1, xmm2
1927
+ movdqa xmm11, xmm1
1928
+ pslld xmm1, 20
1929
+ psrld xmm11, 12
1930
+ por xmm1, xmm11
1931
+ paddd xmm0, xmm5
1932
+ paddd xmm0, xmm1
1933
+ pxor xmm3, xmm0
1934
+ pshufb xmm3, xmm14
1935
+ paddd xmm2, xmm3
1936
+ pxor xmm1, xmm2
1937
+ movdqa xmm11, xmm1
1938
+ pslld xmm1, 25
1939
+ psrld xmm11, 7
1940
+ por xmm1, xmm11
1941
+ pshufd xmm0, xmm0, 0x93
1942
+ pshufd xmm3, xmm3, 0x4E
1943
+ pshufd xmm2, xmm2, 0x39
1944
+ paddd xmm0, xmm6
1945
+ paddd xmm0, xmm1
1946
+ pxor xmm3, xmm0
1947
+ pshufb xmm3, xmm15
1948
+ paddd xmm2, xmm3
1949
+ pxor xmm1, xmm2
1950
+ movdqa xmm11, xmm1
1951
+ pslld xmm1, 20
1952
+ psrld xmm11, 12
1953
+ por xmm1, xmm11
1954
+ paddd xmm0, xmm7
1955
+ paddd xmm0, xmm1
1956
+ pxor xmm3, xmm0
1957
+ pshufb xmm3, xmm14
1958
+ paddd xmm2, xmm3
1959
+ pxor xmm1, xmm2
1960
+ movdqa xmm11, xmm1
1961
+ pslld xmm1, 25
1962
+ psrld xmm11, 7
1963
+ por xmm1, xmm11
1964
+ pshufd xmm0, xmm0, 0x39
1965
+ pshufd xmm3, xmm3, 0x4E
1966
+ pshufd xmm2, xmm2, 0x93
1967
+ dec al
1968
+ jz 9f
1969
+ movdqa xmm8, xmm4
1970
+ shufps xmm8, xmm5, 214
1971
+ pshufd xmm9, xmm4, 0x0F
1972
+ pshufd xmm4, xmm8, 0x39
1973
+ movdqa xmm8, xmm6
1974
+ shufps xmm8, xmm7, 250
1975
+ pblendw xmm9, xmm8, 0xCC
1976
+ movdqa xmm8, xmm7
1977
+ punpcklqdq xmm8, xmm5
1978
+ pblendw xmm8, xmm6, 0xC0
1979
+ pshufd xmm8, xmm8, 0x78
1980
+ punpckhdq xmm5, xmm7
1981
+ punpckldq xmm6, xmm5
1982
+ pshufd xmm7, xmm6, 0x1E
1983
+ movdqa xmm5, xmm9
1984
+ movdqa xmm6, xmm8
1985
+ jmp 9b
1986
+ 9:
1987
+ movdqu xmm4, xmmword ptr [rdi]
1988
+ movdqu xmm5, xmmword ptr [rdi+0x10]
1989
+ pxor xmm0, xmm2
1990
+ pxor xmm1, xmm3
1991
+ pxor xmm2, xmm4
1992
+ pxor xmm3, xmm5
1993
+ movups xmmword ptr [r9], xmm0
1994
+ movups xmmword ptr [r9+0x10], xmm1
1995
+ movups xmmword ptr [r9+0x20], xmm2
1996
+ movups xmmword ptr [r9+0x30], xmm3
1997
+ ret
1998
+
1999
+
2000
+ #ifdef __APPLE__
2001
+ .static_data
2002
+ #else
2003
+ .section .rodata
2004
+ #endif
2005
+ .p2align 6
2006
+ BLAKE3_IV:
2007
+ .long 0x6A09E667, 0xBB67AE85
2008
+ .long 0x3C6EF372, 0xA54FF53A
2009
+ ROT16:
2010
+ .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13
2011
+ ROT8:
2012
+ .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
2013
+ ADD0:
2014
+ .long 0, 1, 2, 3
2015
+ ADD1:
2016
+ .long 4, 4, 4, 4
2017
+ BLAKE3_IV_0:
2018
+ .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
2019
+ BLAKE3_IV_1:
2020
+ .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
2021
+ BLAKE3_IV_2:
2022
+ .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
2023
+ BLAKE3_IV_3:
2024
+ .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
2025
+ BLAKE3_BLOCK_LEN:
2026
+ .long 64, 64, 64, 64
2027
+ CMP_MSB_MASK:
2028
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000