sha3 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sha3 might be problematic. Click here for more details.

@@ -0,0 +1,766 @@
1
+ #
2
+ # The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
3
+ # Michaël Peeters and Gilles Van Assche. For more information, feedback or
4
+ # questions, please refer to our website: http://keccak.noekeon.org/
5
+ #
6
+ # Implementation by Ronny Van Keer,
7
+ # hereby denoted as "the implementer".
8
+ #
9
+ # To the extent possible under law, the implementer has waived all copyright
10
+ # and related or neighboring rights to the source code in this file.
11
+ # http://creativecommons.org/publicdomain/zero/1.0/
12
+ #
13
+
14
+ .text
15
+
16
+
17
+ #// --- defines
18
+
19
+ .equ UseSIMD, 1
20
+
21
+
22
+ .equ _ba, 0*8
23
+ .equ _be, 1*8
24
+ .equ _bi, 2*8
25
+ .equ _bo, 3*8
26
+ .equ _bu, 4*8
27
+ .equ _ga, 5*8
28
+ .equ _ge, 6*8
29
+ .equ _gi, 7*8
30
+ .equ _go, 8*8
31
+ .equ _gu, 9*8
32
+ .equ _ka, 10*8
33
+ .equ _ke, 11*8
34
+ .equ _ki, 12*8
35
+ .equ _ko, 13*8
36
+ .equ _ku, 14*8
37
+ .equ _ma, 15*8
38
+ .equ _me, 16*8
39
+ .equ _mi, 17*8
40
+ .equ _mo, 18*8
41
+ .equ _mu, 19*8
42
+ .equ _sa, 20*8
43
+ .equ _se, 21*8
44
+ .equ _si, 22*8
45
+ .equ _so, 23*8
46
+ .equ _su, 24*8
47
+
48
+
49
+ # arguments
50
+ .equ apState, %rdi
51
+ .equ apInput, %rsi
52
+ .equ aNbrWords, %rdx
53
+
54
+ # xor input into state section
55
+ .equ xpState, %r9
56
+
57
+ # round vars
58
+ .equ rT1, %rax
59
+ .equ rpState, %rdi
60
+ .equ rpStack, %rsp
61
+
62
+ .equ rDa, %rbx
63
+ .equ rDe, %rcx
64
+ .equ rDi, %rdx
65
+ .equ rDo, %r8
66
+ .equ rDu, %r9
67
+
68
+ .equ rBa, %r10
69
+ .equ rBe, %r11
70
+ .equ rBi, %r12
71
+ .equ rBo, %r13
72
+ .equ rBu, %r14
73
+
74
+ .equ rCa, %rsi
75
+ .equ rCe, %rbp
76
+ .equ rCi, rBi
77
+ .equ rCo, rBo
78
+ .equ rCu, %r15
79
+
80
+ .macro mKeccakRound iState, oState, rc, lastRound
81
+
82
+ movq rCe, rDa
83
+ shld $1, rDa, rDa
84
+
85
+ movq _bi(\iState), rCi
86
+ xorq _gi(\iState), rDi
87
+ xorq _ki(\iState), rCi
88
+ xorq rCu, rDa
89
+ xorq _mi(\iState), rDi
90
+ xorq rDi, rCi
91
+
92
+ movq rCi, rDe
93
+ shld $1, rDe, rDe
94
+
95
+ movq _bo(\iState), rCo
96
+ xorq _go(\iState), rDo
97
+ xorq _ko(\iState), rCo
98
+ xorq rCa, rDe
99
+ xorq _mo(\iState), rDo
100
+ xorq rDo, rCo
101
+
102
+ movq rCo, rDi
103
+ shld $1, rDi, rDi
104
+
105
+ movq rCu, rDo
106
+ xorq rCe, rDi
107
+ shld $1, rDo, rDo
108
+
109
+ movq rCa, rDu
110
+ xorq rCi, rDo
111
+ shld $1, rDu, rDu
112
+
113
+ movq _ba(\iState), rBa
114
+ movq _ge(\iState), rBe
115
+ xorq rCo, rDu
116
+ movq _ki(\iState), rBi
117
+ movq _mo(\iState), rBo
118
+ movq _su(\iState), rBu
119
+ xorq rDe, rBe
120
+ shld $44, rBe, rBe
121
+ xorq rDi, rBi
122
+ xorq rDa, rBa
123
+ shld $43, rBi, rBi
124
+
125
+ movq rBe, rCa
126
+ movq $\rc, rT1
127
+ orq rBi, rCa
128
+ xorq rBa, rT1
129
+ xorq rT1, rCa
130
+ movq rCa, _ba(\oState)
131
+
132
+ xorq rDu, rBu
133
+ shld $14, rBu, rBu
134
+ movq rBa, rCu
135
+ andq rBe, rCu
136
+ xorq rBu, rCu
137
+ movq rCu, _bu(\oState)
138
+
139
+ xorq rDo, rBo
140
+ shld $21, rBo, rBo
141
+ movq rBo, rT1
142
+ andq rBu, rT1
143
+ xorq rBi, rT1
144
+ movq rT1, _bi(\oState)
145
+
146
+ notq rBi
147
+ orq rBa, rBu
148
+ orq rBo, rBi
149
+ xorq rBo, rBu
150
+ xorq rBe, rBi
151
+ movq rBu, _bo(\oState)
152
+ movq rBi, _be(\oState)
153
+ .if \lastRound == 0
154
+ movq rBi, rCe
155
+ .endif
156
+
157
+
158
+ movq _gu(\iState), rBe
159
+ xorq rDu, rBe
160
+ movq _ka(\iState), rBi
161
+ shld $20, rBe, rBe
162
+ xorq rDa, rBi
163
+ shld $3, rBi, rBi
164
+ movq _bo(\iState), rBa
165
+ movq rBe, rT1
166
+ orq rBi, rT1
167
+ xorq rDo, rBa
168
+ movq _me(\iState), rBo
169
+ movq _si(\iState), rBu
170
+ shld $28, rBa, rBa
171
+ xorq rBa, rT1
172
+ movq rT1, _ga(\oState)
173
+ .if \lastRound == 0
174
+ xor rT1, rCa
175
+ .endif
176
+
177
+ xorq rDe, rBo
178
+ shld $45, rBo, rBo
179
+ movq rBi, rT1
180
+ andq rBo, rT1
181
+ xorq rBe, rT1
182
+ movq rT1, _ge(\oState)
183
+ .if \lastRound == 0
184
+ xorq rT1, rCe
185
+ .endif
186
+
187
+ xorq rDi, rBu
188
+ shld $61, rBu, rBu
189
+ movq rBu, rT1
190
+ orq rBa, rT1
191
+ xorq rBo, rT1
192
+ movq rT1, _go(\oState)
193
+
194
+ andq rBe, rBa
195
+ xorq rBu, rBa
196
+ movq rBa, _gu(\oState)
197
+ notq rBu
198
+ .if \lastRound == 0
199
+ xorq rBa, rCu
200
+ .endif
201
+
202
+ orq rBu, rBo
203
+ xorq rBi, rBo
204
+ movq rBo, _gi(\oState)
205
+
206
+
207
+ movq _be(\iState), rBa
208
+ movq _gi(\iState), rBe
209
+ movq _ko(\iState), rBi
210
+ movq _mu(\iState), rBo
211
+ movq _sa(\iState), rBu
212
+ xorq rDi, rBe
213
+ shld $6, rBe, rBe
214
+ xorq rDo, rBi
215
+ shld $25, rBi, rBi
216
+ movq rBe, rT1
217
+ orq rBi, rT1
218
+ xorq rDe, rBa
219
+ shld $1, rBa, rBa
220
+ xorq rBa, rT1
221
+ movq rT1, _ka(\oState)
222
+ .if \lastRound == 0
223
+ xor rT1, rCa
224
+ .endif
225
+
226
+ xorq rDu, rBo
227
+ shld $8, rBo, rBo
228
+ movq rBi, rT1
229
+ andq rBo, rT1
230
+ xorq rBe, rT1
231
+ movq rT1, _ke(\oState)
232
+ .if \lastRound == 0
233
+ xorq rT1, rCe
234
+ .endif
235
+
236
+ xorq rDa, rBu
237
+ shld $18, rBu, rBu
238
+ notq rBo
239
+ movq rBo, rT1
240
+ andq rBu, rT1
241
+ xorq rBi, rT1
242
+ movq rT1, _ki(\oState)
243
+
244
+ movq rBu, rT1
245
+ orq rBa, rT1
246
+ xorq rBo, rT1
247
+ movq rT1, _ko(\oState)
248
+
249
+ andq rBe, rBa
250
+ xorq rBu, rBa
251
+ movq rBa, _ku(\oState)
252
+ .if \lastRound == 0
253
+ xorq rBa, rCu
254
+ .endif
255
+
256
+ movq _ga(\iState), rBe
257
+ xorq rDa, rBe
258
+ movq _ke(\iState), rBi
259
+ shld $36, rBe, rBe
260
+ xorq rDe, rBi
261
+ movq _bu(\iState), rBa
262
+ shld $10, rBi, rBi
263
+ movq rBe, rT1
264
+ movq _mi(\iState), rBo
265
+ andq rBi, rT1
266
+ xorq rDu, rBa
267
+ movq _so(\iState), rBu
268
+ shld $27, rBa, rBa
269
+ xorq rBa, rT1
270
+ movq rT1, _ma(\oState)
271
+ .if \lastRound == 0
272
+ xor rT1, rCa
273
+ .endif
274
+
275
+ xorq rDi, rBo
276
+ shld $15, rBo, rBo
277
+ movq rBi, rT1
278
+ orq rBo, rT1
279
+ xorq rBe, rT1
280
+ movq rT1, _me(\oState)
281
+ .if \lastRound == 0
282
+ xorq rT1, rCe
283
+ .endif
284
+
285
+ xorq rDo, rBu
286
+ shld $56, rBu, rBu
287
+ notq rBo
288
+ movq rBo, rT1
289
+ orq rBu, rT1
290
+ xorq rBi, rT1
291
+ movq rT1, _mi(\oState)
292
+
293
+ orq rBa, rBe
294
+ xorq rBu, rBe
295
+ movq rBe, _mu(\oState)
296
+
297
+ andq rBa, rBu
298
+ xorq rBo, rBu
299
+ movq rBu, _mo(\oState)
300
+ .if \lastRound == 0
301
+ xorq rBe, rCu
302
+ .endif
303
+
304
+
305
+ movq _bi(\iState), rBa
306
+ movq _go(\iState), rBe
307
+ movq _ku(\iState), rBi
308
+ xorq rDi, rBa
309
+ movq _ma(\iState), rBo
310
+ shld $62, rBa, rBa
311
+ xorq rDo, rBe
312
+ movq _se(\iState), rBu
313
+ shld $55, rBe, rBe
314
+
315
+ xorq rDu, rBi
316
+ movq rBa, rDu
317
+ xorq rDe, rBu
318
+ shld $2, rBu, rBu
319
+ andq rBe, rDu
320
+ xorq rBu, rDu
321
+ movq rDu, _su(\oState)
322
+
323
+ shld $39, rBi, rBi
324
+ .if \lastRound == 0
325
+ xorq rDu, rCu
326
+ .endif
327
+ notq rBe
328
+ xorq rDa, rBo
329
+ movq rBe, rDa
330
+ andq rBi, rDa
331
+ xorq rBa, rDa
332
+ movq rDa, _sa(\oState)
333
+ .if \lastRound == 0
334
+ xor rDa, rCa
335
+ .endif
336
+
337
+ shld $41, rBo, rBo
338
+ movq rBi, rDe
339
+ orq rBo, rDe
340
+ xorq rBe, rDe
341
+ movq rDe, _se(\oState)
342
+ .if \lastRound == 0
343
+ xorq rDe, rCe
344
+ .endif
345
+
346
+ movq rBo, rDi
347
+ movq rBu, rDo
348
+ andq rBu, rDi
349
+ orq rBa, rDo
350
+ xorq rBi, rDi
351
+ xorq rBo, rDo
352
+ movq rDi, _si(\oState)
353
+ movq rDo, _so(\oState)
354
+
355
+ .endm
356
+
357
+ .macro mKeccakPermutation
358
+
359
+ subq $8*25, %rsp
360
+
361
+ movq _ba(rpState), rCa
362
+ movq _be(rpState), rCe
363
+ movq _bu(rpState), rCu
364
+
365
+ xorq _ga(rpState), rCa
366
+ xorq _ge(rpState), rCe
367
+ xorq _gu(rpState), rCu
368
+
369
+ xorq _ka(rpState), rCa
370
+ xorq _ke(rpState), rCe
371
+ xorq _ku(rpState), rCu
372
+
373
+ xorq _ma(rpState), rCa
374
+ xorq _me(rpState), rCe
375
+ xorq _mu(rpState), rCu
376
+
377
+ xorq _sa(rpState), rCa
378
+ xorq _se(rpState), rCe
379
+ movq _si(rpState), rDi
380
+ movq _so(rpState), rDo
381
+ xorq _su(rpState), rCu
382
+
383
+
384
+ mKeccakRound rpState, rpStack, 0x0000000000000001, 0
385
+ mKeccakRound rpStack, rpState, 0x0000000000008082, 0
386
+ mKeccakRound rpState, rpStack, 0x800000000000808a, 0
387
+ mKeccakRound rpStack, rpState, 0x8000000080008000, 0
388
+ mKeccakRound rpState, rpStack, 0x000000000000808b, 0
389
+ mKeccakRound rpStack, rpState, 0x0000000080000001, 0
390
+
391
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
392
+ mKeccakRound rpStack, rpState, 0x8000000000008009, 0
393
+ mKeccakRound rpState, rpStack, 0x000000000000008a, 0
394
+ mKeccakRound rpStack, rpState, 0x0000000000000088, 0
395
+ mKeccakRound rpState, rpStack, 0x0000000080008009, 0
396
+ mKeccakRound rpStack, rpState, 0x000000008000000a, 0
397
+
398
+ mKeccakRound rpState, rpStack, 0x000000008000808b, 0
399
+ mKeccakRound rpStack, rpState, 0x800000000000008b, 0
400
+ mKeccakRound rpState, rpStack, 0x8000000000008089, 0
401
+ mKeccakRound rpStack, rpState, 0x8000000000008003, 0
402
+ mKeccakRound rpState, rpStack, 0x8000000000008002, 0
403
+ mKeccakRound rpStack, rpState, 0x8000000000000080, 0
404
+
405
+ mKeccakRound rpState, rpStack, 0x000000000000800a, 0
406
+ mKeccakRound rpStack, rpState, 0x800000008000000a, 0
407
+ mKeccakRound rpState, rpStack, 0x8000000080008081, 0
408
+ mKeccakRound rpStack, rpState, 0x8000000000008080, 0
409
+ mKeccakRound rpState, rpStack, 0x0000000080000001, 0
410
+ mKeccakRound rpStack, rpState, 0x8000000080008008, 1
411
+
412
+ addq $8*25, %rsp
413
+
414
+ .endm
415
+
416
+ .macro mPushRegs
417
+
418
+ pushq %rbx
419
+ pushq %rbp
420
+ pushq %r12
421
+ pushq %r13
422
+ pushq %r14
423
+ pushq %r15
424
+
425
+ .endm
426
+
427
+
428
+ .macro mPopRegs
429
+
430
+ popq %r15
431
+ popq %r14
432
+ popq %r13
433
+ popq %r12
434
+ popq %rbp
435
+ popq %rbx
436
+
437
+ .endm
438
+
439
+
440
+ .macro mXorState128 input, state, offset
441
+ .if UseSIMD == 0
442
+ movq \offset(\input), %rax
443
+ movq \offset+8(\input), %rcx
444
+ xorq %rax, \offset(\state)
445
+ xorq %rcx, \offset+8(\state)
446
+ .else
447
+ movdqu \offset(\input), %xmm0
448
+ pxor \offset(\state), %xmm0
449
+ movdqu %xmm0, \offset(\state)
450
+ .endif
451
+ .endm
452
+
453
+ .macro mXorState256 input, state, offset
454
+ .if UseSIMD == 0
455
+ movq \offset(\input), %rax
456
+ movq \offset+8(\input), %r10
457
+ movq \offset+16(\input), %rcx
458
+ movq \offset+24(\input), %r8
459
+ xorq %rax, \offset(\state)
460
+ xorq %r10, \offset+8(\state)
461
+ xorq %rcx, \offset+16(\state)
462
+ xorq %r8, \offset+24(\state)
463
+ .else
464
+ movdqu \offset(\input), %xmm0
465
+ pxor \offset(\state), %xmm0
466
+ movdqu \offset+16(\input), %xmm1
467
+ pxor \offset+16(\state), %xmm1
468
+ movdqu %xmm0, \offset(\state)
469
+ movdqu %xmm1, \offset+16(\state)
470
+ .endif
471
+ .endm
472
+
473
+ .macro mXorState512 input, state, offset
474
+ .if UseSIMD == 0
475
+ mXorState256 \input, \state, \offset
476
+ mXorState256 \input, \state, \offset+32
477
+ .else
478
+ movdqu \offset(\input), %xmm0
479
+ movdqu \offset+16(\input), %xmm1
480
+ pxor \offset(\state), %xmm0
481
+ movdqu \offset+32(\input), %xmm2
482
+ pxor \offset+16(\state), %xmm1
483
+ movdqu %xmm0, \offset(\state)
484
+ movdqu \offset+48(\input), %xmm3
485
+ pxor \offset+32(\state), %xmm2
486
+ movdqu %xmm1, \offset+16(\state)
487
+ pxor \offset+48(\state), %xmm3
488
+ movdqu %xmm2, \offset+32(\state)
489
+ movdqu %xmm3, \offset+48(\state)
490
+ .endif
491
+ .endm
492
+
493
+ # -------------------------------------------------------------------------
494
+
495
+ .size KeccakPermutation, .-KeccakPermutation
496
+ .align 2
497
+ .global KeccakPermutation
498
+ .type KeccakPermutation, %function
499
+ KeccakPermutation:
500
+
501
+ mPushRegs
502
+ mKeccakPermutation
503
+ mPopRegs
504
+ ret
505
+
506
+ # -------------------------------------------------------------------------
507
+
508
+ .size KeccakAbsorb576bits, .-KeccakAbsorb576bits
509
+ .align 2
510
+ .global KeccakAbsorb576bits
511
+ .type KeccakAbsorb576bits, %function
512
+ KeccakAbsorb576bits:
513
+
514
+ mXorState512 apInput, apState, 0
515
+ movq 64(apInput), %rax
516
+ xorq %rax, 64(apState)
517
+ mPushRegs
518
+ mKeccakPermutation
519
+ mPopRegs
520
+ ret
521
+
522
+ # -------------------------------------------------------------------------
523
+
524
+ .size KeccakAbsorb832bits, .-KeccakAbsorb832bits
525
+ .align 2
526
+ .global KeccakAbsorb832bits
527
+ .type KeccakAbsorb832bits, %function
528
+ KeccakAbsorb832bits:
529
+
530
+ mXorState512 apInput, apState, 0
531
+ mXorState256 apInput, apState, 64
532
+ movq 96(apInput), %rax
533
+ xorq %rax, 96(apState)
534
+ mPushRegs
535
+ mKeccakPermutation
536
+ mPopRegs
537
+ ret
538
+
539
+ # -------------------------------------------------------------------------
540
+
541
+ .size KeccakAbsorb1024bits, .-KeccakAbsorb1024bits
542
+ .align 2
543
+ .global KeccakAbsorb1024bits
544
+ .type KeccakAbsorb1024bits, %function
545
+ KeccakAbsorb1024bits:
546
+
547
+ mXorState512 apInput, apState, 0
548
+ mXorState512 apInput, apState, 64
549
+ mPushRegs
550
+ mKeccakPermutation
551
+ mPopRegs
552
+ ret
553
+
554
+ # -------------------------------------------------------------------------
555
+
556
+ .size KeccakAbsorb1088bits, .-KeccakAbsorb1088bits
557
+ .align 2
558
+ .global KeccakAbsorb1088bits
559
+ .type KeccakAbsorb1088bits, %function
560
+ KeccakAbsorb1088bits:
561
+
562
+ mXorState512 apInput, apState, 0
563
+ mXorState512 apInput, apState, 64
564
+ movq 128(apInput), %rax
565
+ xorq %rax, 128(apState)
566
+ mPushRegs
567
+ mKeccakPermutation
568
+ mPopRegs
569
+ ret
570
+
571
+ # -------------------------------------------------------------------------
572
+
573
+ .size KeccakAbsorb1152bits, .-KeccakAbsorb1152bits
574
+ .align 2
575
+ .global KeccakAbsorb1152bits
576
+ .type KeccakAbsorb1152bits, %function
577
+ KeccakAbsorb1152bits:
578
+
579
+ mXorState512 apInput, apState, 0
580
+ mXorState512 apInput, apState, 64
581
+ mXorState128 apInput, apState, 128
582
+ mPushRegs
583
+ mKeccakPermutation
584
+ mPopRegs
585
+ ret
586
+
587
+ # -------------------------------------------------------------------------
588
+
589
+ .size KeccakAbsorb1344bits, .-KeccakAbsorb1344bits
590
+ .align 2
591
+ .global KeccakAbsorb1344bits
592
+ .type KeccakAbsorb1344bits, %function
593
+ KeccakAbsorb1344bits:
594
+
595
+ mXorState512 apInput, apState, 0
596
+ mXorState512 apInput, apState, 64
597
+ mXorState256 apInput, apState, 128
598
+ movq 160(apInput), %rax
599
+ xorq %rax, 160(apState)
600
+ mPushRegs
601
+ mKeccakPermutation
602
+ mPopRegs
603
+ ret
604
+
605
+ # -------------------------------------------------------------------------
606
+
607
+ .size KeccakAbsorb, .-KeccakAbsorb
608
+ .align 2
609
+ .global KeccakAbsorb
610
+ .type KeccakAbsorb, %function
611
+ KeccakAbsorb:
612
+
613
+ movq apState, xpState
614
+
615
+ test $16, aNbrWords
616
+ jz xorInputToState8
617
+ mXorState512 apInput, xpState, 0
618
+ mXorState512 apInput, xpState, 64
619
+ addq $128, apInput
620
+ addq $128, xpState
621
+
622
+ xorInputToState8:
623
+ test $8, aNbrWords
624
+ jz xorInputToState4
625
+ mXorState512 apInput, xpState, 0
626
+ addq $64, apInput
627
+ addq $64, xpState
628
+
629
+ xorInputToState4:
630
+ test $4, aNbrWords
631
+ jz xorInputToState2
632
+ mXorState256 apInput, xpState, 0
633
+ addq $32, apInput
634
+ addq $32, xpState
635
+
636
+ xorInputToState2:
637
+ test $2, aNbrWords
638
+ jz xorInputToState1
639
+ mXorState128 apInput, xpState, 0
640
+ addq $16, apInput
641
+ addq $16, xpState
642
+
643
+ xorInputToState1:
644
+ test $1, aNbrWords
645
+ jz xorInputToStateDone
646
+ movq (apInput), %rax
647
+ xorq %rax, (xpState)
648
+
649
+ xorInputToStateDone:
650
+
651
+ mPushRegs
652
+ mKeccakPermutation
653
+ mPopRegs
654
+ ret
655
+
656
+ # -------------------------------------------------------------------------
657
+
658
+ .size KeccakInitializeState, .-KeccakInitializeState
659
+ .align 2
660
+ .global KeccakInitializeState
661
+ .type KeccakInitializeState, %function
662
+ KeccakInitializeState:
663
+ xorq %rax, %rax
664
+ xorq %rcx, %rcx
665
+ notq %rcx
666
+
667
+ .if UseSIMD == 0
668
+ movq %rax, 0*8(apState)
669
+ movq %rcx, 1*8(apState)
670
+ movq %rcx, 2*8(apState)
671
+ movq %rax, 3*8(apState)
672
+ movq %rax, 4*8(apState)
673
+ movq %rax, 5*8(apState)
674
+ movq %rax, 6*8(apState)
675
+ movq %rax, 7*8(apState)
676
+ movq %rcx, 8*8(apState)
677
+ movq %rax, 9*8(apState)
678
+ movq %rax, 10*8(apState)
679
+ movq %rax, 11*8(apState)
680
+ movq %rcx, 12*8(apState)
681
+ movq %rax, 13*8(apState)
682
+ movq %rax, 14*8(apState)
683
+ movq %rax, 15*8(apState)
684
+ movq %rax, 16*8(apState)
685
+ movq %rcx, 17*8(apState)
686
+ movq %rax, 18*8(apState)
687
+ movq %rax, 19*8(apState)
688
+ movq %rcx, 20*8(apState)
689
+ movq %rax, 21*8(apState)
690
+ movq %rax, 22*8(apState)
691
+ movq %rax, 23*8(apState)
692
+ movq %rax, 24*8(apState)
693
+ .else
694
+ pxor %xmm0, %xmm0
695
+
696
+ movq %rax, 0*8(apState)
697
+ movq %rcx, 1*8(apState)
698
+ movq %rcx, 2*8(apState)
699
+ movq %rax, 3*8(apState)
700
+ movdqu %xmm0, 4*8(apState)
701
+ movdqu %xmm0, 6*8(apState)
702
+ movq %rcx, 8*8(apState)
703
+ movq %rax, 9*8(apState)
704
+ movdqu %xmm0, 10*8(apState)
705
+ movq %rcx, 12*8(apState)
706
+ movq %rax, 13*8(apState)
707
+ movdqu %xmm0, 14*8(apState)
708
+ movq %rax, 16*8(apState)
709
+ movq %rcx, 17*8(apState)
710
+ movdqu %xmm0, 18*8(apState)
711
+ movq %rcx, 20*8(apState)
712
+ movq %rax, 21*8(apState)
713
+ movdqu %xmm0, 22*8(apState)
714
+ movq %rax, 24*8(apState)
715
+ .endif
716
+ ret
717
+
718
+ # -------------------------------------------------------------------------
719
+
720
+ .size KeccakExtract1024bits, .-KeccakExtract1024bits
721
+ .align 2
722
+ .global KeccakExtract1024bits
723
+ .type KeccakExtract1024bits, %function
724
+ KeccakExtract1024bits:
725
+
726
+ movq 0*8(apState), %rax
727
+ movq 1*8(apState), %rcx
728
+ movq 2*8(apState), %rdx
729
+ movq 3*8(apState), %r8
730
+ notq %rcx
731
+ notq %rdx
732
+ movq %rax, 0*8(%rsi)
733
+ movq %rcx, 1*8(%rsi)
734
+ movq %rdx, 2*8(%rsi)
735
+ movq %r8, 3*8(%rsi)
736
+
737
+ movq 4*8(apState), %rax
738
+ movq 5*8(apState), %rcx
739
+ movq 6*8(apState), %rdx
740
+ movq 7*8(apState), %r8
741
+ movq %rax, 4*8(%rsi)
742
+ movq %rcx, 5*8(%rsi)
743
+ movq %rdx, 6*8(%rsi)
744
+ movq %r8, 7*8(%rsi)
745
+
746
+ movq 8*8(apState), %rax
747
+ movq 9*8(apState), %rcx
748
+ movq 10*8(apState), %rdx
749
+ movq 11*8(apState), %r8
750
+ notq %rax
751
+ movq %rax, 8*8(%rsi)
752
+ movq %rcx, 9*8(%rsi)
753
+ movq %rdx, 10*8(%rsi)
754
+ movq %r8, 11*8(%rsi)
755
+
756
+ movq 12*8(apState), %rax
757
+ movq 13*8(apState), %rcx
758
+ movq 14*8(apState), %rdx
759
+ movq 15*8(apState), %r8
760
+ notq %rax
761
+ movq %rax, 12*8(%rsi)
762
+ movq %rcx, 13*8(%rsi)
763
+ movq %rdx, 14*8(%rsi)
764
+ movq %r8, 15*8(%rsi)
765
+ ret
766
+