rbnacl-libsodium 1.0.11 → 1.0.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (465) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +4 -0
  3. data/lib/rbnacl/libsodium/version.rb +1 -1
  4. data/vendor/libsodium/AUTHORS +45 -33
  5. data/vendor/libsodium/ChangeLog +63 -2
  6. data/vendor/libsodium/LICENSE +1 -1
  7. data/vendor/libsodium/Makefile.am +1 -0
  8. data/vendor/libsodium/Makefile.in +17 -14
  9. data/vendor/libsodium/README.markdown +1 -0
  10. data/vendor/libsodium/THANKS +38 -4
  11. data/vendor/libsodium/aclocal.m4 +25 -24
  12. data/vendor/libsodium/autogen.sh +12 -7
  13. data/vendor/libsodium/autom4te.cache/output.0 +5066 -2281
  14. data/vendor/libsodium/autom4te.cache/output.1 +1013 -600
  15. data/vendor/libsodium/autom4te.cache/output.2 +5066 -2281
  16. data/vendor/libsodium/autom4te.cache/requests +426 -1584
  17. data/vendor/libsodium/autom4te.cache/traces.0 +2044 -876
  18. data/vendor/libsodium/autom4te.cache/traces.1 +996 -523
  19. data/vendor/libsodium/autom4te.cache/traces.2 +1996 -828
  20. data/vendor/libsodium/build-aux/compile +5 -4
  21. data/vendor/libsodium/build-aux/config.guess +120 -68
  22. data/vendor/libsodium/build-aux/config.sub +51 -22
  23. data/vendor/libsodium/build-aux/depcomp +3 -3
  24. data/vendor/libsodium/build-aux/install-sh +2 -2
  25. data/vendor/libsodium/build-aux/missing +3 -3
  26. data/vendor/libsodium/build-aux/test-driver +3 -3
  27. data/vendor/libsodium/builds/msvc/properties/Win32.props +4 -1
  28. data/vendor/libsodium/builds/msvc/properties/x64.props +4 -1
  29. data/vendor/libsodium/builds/msvc/resource.h +14 -0
  30. data/vendor/libsodium/builds/msvc/resource.rc +63 -0
  31. data/vendor/libsodium/builds/msvc/version.h +7 -4
  32. data/vendor/libsodium/builds/msvc/vs2010/libsodium.import.props +1 -1
  33. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.props +10 -5
  34. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj +182 -139
  35. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters +632 -470
  36. data/vendor/libsodium/builds/msvc/vs2012/libsodium.import.props +1 -1
  37. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.props +10 -5
  38. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj +182 -129
  39. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters +632 -470
  40. data/vendor/libsodium/builds/msvc/vs2013/libsodium.import.props +1 -1
  41. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.props +10 -5
  42. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj +182 -129
  43. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters +632 -470
  44. data/vendor/libsodium/builds/msvc/vs2015/libsodium.import.props +1 -1
  45. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.props +10 -5
  46. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj +181 -118
  47. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters +632 -470
  48. data/vendor/libsodium/builds/msvc/vs2017/libsodium.import.props +52 -0
  49. data/vendor/libsodium/builds/msvc/vs2017/libsodium.import.xml +17 -0
  50. data/vendor/libsodium/builds/msvc/vs2017/libsodium.sln +52 -0
  51. data/vendor/libsodium/builds/msvc/vs2017/libsodium/libsodium.props +48 -0
  52. data/vendor/libsodium/builds/msvc/vs2017/libsodium/libsodium.vcxproj +320 -0
  53. data/vendor/libsodium/builds/msvc/vs2017/libsodium/libsodium.vcxproj.filters +962 -0
  54. data/vendor/libsodium/builds/msvc/vs2017/libsodium/libsodium.xml +15 -0
  55. data/vendor/libsodium/configure +1002 -589
  56. data/vendor/libsodium/configure.ac +48 -21
  57. data/vendor/libsodium/contrib/Findsodium.cmake +267 -0
  58. data/vendor/libsodium/contrib/Makefile.am +3 -0
  59. data/vendor/libsodium/contrib/Makefile.in +483 -0
  60. data/vendor/libsodium/dist-build/Makefile.in +11 -7
  61. data/vendor/libsodium/dist-build/android-armv8-a.sh +1 -1
  62. data/vendor/libsodium/dist-build/android-build.sh +25 -17
  63. data/vendor/libsodium/dist-build/android-mips32.sh +1 -1
  64. data/vendor/libsodium/dist-build/android-mips64.sh +1 -1
  65. data/vendor/libsodium/dist-build/android-x86_64.sh +1 -1
  66. data/vendor/libsodium/dist-build/emscripten-symbols.def +150 -2
  67. data/vendor/libsodium/dist-build/emscripten-wasm.sh +132 -0
  68. data/vendor/libsodium/dist-build/emscripten.sh +8 -6
  69. data/vendor/libsodium/dist-build/ios.sh +29 -5
  70. data/vendor/libsodium/libsodium.vcxproj +139 -77
  71. data/vendor/libsodium/libsodium.vcxproj.filters +315 -144
  72. data/vendor/libsodium/m4/ax_check_catchable_segv.m4 +42 -0
  73. data/vendor/libsodium/m4/ax_check_compile_flag.m4 +6 -4
  74. data/vendor/libsodium/m4/ax_check_define.m4 +3 -3
  75. data/vendor/libsodium/m4/ax_check_gnu_make.m4 +31 -25
  76. data/vendor/libsodium/m4/ax_check_link_flag.m4 +8 -6
  77. data/vendor/libsodium/m4/ax_pthread.m4 +275 -275
  78. data/vendor/libsodium/m4/ax_valgrind_check.m4 +92 -41
  79. data/vendor/libsodium/m4/pkg.m4 +1 -1
  80. data/vendor/libsodium/msvc-scripts/Makefile.in +11 -7
  81. data/vendor/libsodium/msvc-scripts/process.bat +4 -3
  82. data/vendor/libsodium/packaging/dotnet-core/README.md +59 -0
  83. data/vendor/libsodium/packaging/dotnet-core/desktop.targets +16 -0
  84. data/vendor/libsodium/packaging/dotnet-core/libsodium.props +33 -0
  85. data/vendor/libsodium/packaging/dotnet-core/prepare.py +262 -0
  86. data/vendor/libsodium/packaging/dotnet-core/recipes/alpine-x64 +3 -0
  87. data/vendor/libsodium/packaging/dotnet-core/recipes/build +9 -0
  88. data/vendor/libsodium/packaging/dotnet-core/recipes/centos-x64 +3 -0
  89. data/vendor/libsodium/packaging/dotnet-core/recipes/debian-x64 +4 -0
  90. data/vendor/libsodium/packaging/dotnet-core/recipes/fedora-x64 +3 -0
  91. data/vendor/libsodium/packaging/dotnet-core/recipes/opensuse-x64 +3 -0
  92. data/vendor/libsodium/packaging/dotnet-core/recipes/pack +5 -0
  93. data/vendor/libsodium/packaging/dotnet-core/recipes/test +27 -0
  94. data/vendor/libsodium/packaging/dotnet-core/recipes/ubuntu-x64 +4 -0
  95. data/vendor/libsodium/packaging/nuget/package.config +1 -1
  96. data/vendor/libsodium/packaging/nuget/package.gsl +3 -3
  97. data/vendor/libsodium/src/Makefile.in +11 -7
  98. data/vendor/libsodium/src/libsodium/Makefile.am +113 -98
  99. data/vendor/libsodium/src/libsodium/Makefile.in +1034 -1236
  100. data/vendor/libsodium/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +31 -12
  101. data/vendor/libsodium/src/libsodium/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c +31 -10
  102. data/vendor/libsodium/src/libsodium/crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c +153 -0
  103. data/vendor/libsodium/src/libsodium/crypto_auth/crypto_auth.c +7 -0
  104. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha256/{cp/hmac_hmacsha256.c → auth_hmacsha256.c} +43 -35
  105. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512/{cp/hmac_hmacsha512.c → auth_hmacsha512.c} +43 -35
  106. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512256/{cp/hmac_hmacsha512256.c → auth_hmacsha512256.c} +48 -9
  107. data/vendor/libsodium/src/libsodium/crypto_box/crypto_box_easy.c +4 -3
  108. data/vendor/libsodium/src/libsodium/crypto_box/crypto_box_seal.c +2 -1
  109. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xchacha20poly1305/box_curve25519xchacha20poly1305.c +197 -0
  110. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xchacha20poly1305/box_seal_curve25519xchacha20poly1305.c +79 -0
  111. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/box_curve25519xsalsa20poly1305.c +150 -0
  112. data/vendor/libsodium/src/libsodium/crypto_core/curve25519/ref10/curve25519_ref10.c +1156 -662
  113. data/vendor/libsodium/src/libsodium/crypto_core/hchacha20/core_hchacha20.c +12 -5
  114. data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/{core_hsalsa20_api.c → core_hsalsa20.c} +0 -0
  115. data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/ref2/core_hsalsa20_ref2.c +95 -0
  116. data/vendor/libsodium/src/libsodium/crypto_core/salsa/ref/core_salsa_ref.c +195 -0
  117. data/vendor/libsodium/src/libsodium/crypto_generichash/{blake2/generichash_blake2_api.c → blake2b/generichash_blake2.c} +7 -0
  118. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2.h +109 -0
  119. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-avx2.c +49 -0
  120. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-avx2.h +140 -0
  121. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-ref.c +92 -0
  122. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-sse41.c +87 -0
  123. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-sse41.h +103 -0
  124. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.c +90 -0
  125. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.h +103 -0
  126. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-load-avx2.h +340 -0
  127. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-load-sse2.h +164 -0
  128. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-load-sse41.h +307 -0
  129. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2b/ref/blake2b-ref.c +494 -0
  130. data/vendor/libsodium/src/libsodium/crypto_generichash/{blake2 → blake2b}/ref/generichash_blake2b.c +22 -26
  131. data/vendor/libsodium/src/libsodium/crypto_generichash/crypto_generichash.c +7 -0
  132. data/vendor/libsodium/src/libsodium/crypto_hash/sha256/cp/hash_sha256_cp.c +254 -0
  133. data/vendor/libsodium/src/libsodium/crypto_hash/sha256/{hash_sha256_api.c → hash_sha256.c} +4 -2
  134. data/vendor/libsodium/src/libsodium/crypto_hash/sha512/cp/hash_sha512_cp.c +280 -0
  135. data/vendor/libsodium/src/libsodium/crypto_hash/sha512/{hash_sha512_api.c → hash_sha512.c} +4 -2
  136. data/vendor/libsodium/src/libsodium/crypto_kdf/blake2b/kdf_blake2b.c +52 -0
  137. data/vendor/libsodium/src/libsodium/crypto_kdf/crypto_kdf.c +49 -0
  138. data/vendor/libsodium/src/libsodium/crypto_kx/crypto_kx.c +136 -0
  139. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/crypto_onetimeauth.c +6 -0
  140. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.c +34 -27
  141. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h +1 -1
  142. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h +203 -156
  143. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h +178 -134
  144. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.c +22 -4
  145. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.h +10 -12
  146. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.c +564 -315
  147. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.h +1 -1
  148. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-core.c +131 -84
  149. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-core.h +23 -18
  150. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-encoding.c +163 -145
  151. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-encoding.h +2 -1
  152. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-avx2.c +247 -0
  153. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ref.c +42 -29
  154. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ssse3.c +71 -47
  155. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2.c +100 -65
  156. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2.h +77 -23
  157. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blake2b-long.c +30 -31
  158. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-avx2.h +150 -0
  159. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-ref.h +28 -26
  160. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/blamka-round-ssse3.h +102 -99
  161. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/pwhash_argon2i.c +90 -41
  162. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/pwhash_argon2id.c +213 -0
  163. data/vendor/libsodium/src/libsodium/crypto_pwhash/crypto_pwhash.c +72 -4
  164. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/crypto_scrypt-common.c +34 -37
  165. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/crypto_scrypt.h +27 -32
  166. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/nosse/pwhash_scryptsalsa208sha256_nosse.c +120 -86
  167. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pbkdf2-sha256.c +16 -13
  168. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pbkdf2-sha256.h +4 -4
  169. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/pwhash_scryptsalsa208sha256.c +98 -50
  170. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/scrypt_platform.c +23 -18
  171. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c +105 -105
  172. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.c +395 -330
  173. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.c +225 -198
  174. data/vendor/libsodium/src/libsodium/crypto_secretbox/crypto_secretbox.c +7 -0
  175. data/vendor/libsodium/src/libsodium/crypto_secretbox/crypto_secretbox_easy.c +6 -5
  176. data/vendor/libsodium/src/libsodium/crypto_secretbox/xchacha20poly1305/secretbox_xchacha20poly1305.c +170 -0
  177. data/vendor/libsodium/src/libsodium/crypto_secretbox/xsalsa20poly1305/secretbox_xsalsa20poly1305.c +83 -0
  178. data/vendor/libsodium/src/libsodium/crypto_shorthash/crypto_shorthash.c +7 -0
  179. data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphash24_ref.c +65 -0
  180. data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphash_ref.h +24 -0
  181. data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphashx24_ref.c +71 -0
  182. data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/{shorthash_siphash24_api.c → shorthash_siphash24.c} +0 -0
  183. data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/shorthash_siphashx24.c +11 -0
  184. data/vendor/libsodium/src/libsodium/crypto_sign/crypto_sign.c +33 -0
  185. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ed25519_ref10.h +18 -0
  186. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/keypair.c +18 -13
  187. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/obsolete.c +29 -26
  188. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/open.c +75 -36
  189. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/sign.c +39 -15
  190. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/sign_ed25519.c +91 -0
  191. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/afternm_aes128ctr.c +174 -0
  192. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/beforenm_aes128ctr.c +66 -0
  193. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/common.h +766 -0
  194. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/consts.h +28 -0
  195. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/consts_aes128ctr.c +28 -0
  196. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/int128.h +50 -0
  197. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/int128_aes128ctr.c +149 -0
  198. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/stream_aes128ctr_nacl.c +31 -0
  199. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/nacl/xor_afternm_aes128ctr.c +195 -0
  200. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/{stream_aes128ctr_api.c → stream_aes128ctr.c} +6 -3
  201. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/chacha20_dolbeau-avx2.c +179 -0
  202. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/chacha20_dolbeau-avx2.h +8 -0
  203. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/chacha20_dolbeau-ssse3.c +173 -0
  204. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/chacha20_dolbeau-ssse3.h +8 -0
  205. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/u0.h +86 -0
  206. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/u1.h +98 -0
  207. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/u4.h +175 -0
  208. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/dolbeau/u8.h +357 -0
  209. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/{stream_chacha20_ref.c → chacha20_ref.c} +93 -94
  210. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/chacha20_ref.h +8 -0
  211. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/stream_chacha20.c +39 -7
  212. data/vendor/libsodium/src/libsodium/crypto_stream/crypto_stream.c +7 -0
  213. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/salsa20_ref.c +120 -0
  214. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/salsa20_ref.h +8 -0
  215. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/stream_salsa20.c +93 -0
  216. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/stream_salsa20.h +16 -0
  217. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/{amd64_xmm6/stream_salsa20_amd64_xmm6.S → xmm6/salsa20_xmm6-asm.S} +20 -12
  218. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6/salsa20_xmm6.c +31 -0
  219. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6/salsa20_xmm6.h +8 -0
  220. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/salsa20_xmm6int-avx2.c +131 -0
  221. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/salsa20_xmm6int-avx2.h +8 -0
  222. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/salsa20_xmm6int-sse2.c +122 -0
  223. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/salsa20_xmm6int-sse2.h +8 -0
  224. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/u0.h +195 -0
  225. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/u1.h +207 -0
  226. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/u4.h +547 -0
  227. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/xmm6int/u8.h +476 -0
  228. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012_ref.c +106 -0
  229. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/stream_salsa2012.c +20 -0
  230. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/stream_salsa208_ref.c +106 -0
  231. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/stream_salsa208.c +20 -0
  232. data/vendor/libsodium/src/libsodium/crypto_stream/xchacha20/stream_xchacha20.c +63 -0
  233. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/stream_xsalsa20.c +60 -0
  234. data/vendor/libsodium/src/libsodium/crypto_verify/sodium/verify.c +61 -0
  235. data/vendor/libsodium/src/libsodium/include/Makefile.am +8 -6
  236. data/vendor/libsodium/src/libsodium/include/Makefile.in +29 -21
  237. data/vendor/libsodium/src/libsodium/include/sodium.h +15 -4
  238. data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_aes256gcm.h +4 -0
  239. data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_chacha20poly1305.h +6 -0
  240. data/vendor/libsodium/src/libsodium/include/sodium/crypto_aead_xchacha20poly1305.h +91 -0
  241. data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth.h +3 -0
  242. data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha256.h +5 -0
  243. data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha512.h +4 -0
  244. data/vendor/libsodium/src/libsodium/include/sodium/crypto_auth_hmacsha512256.h +4 -0
  245. data/vendor/libsodium/src/libsodium/include/sodium/crypto_box_curve25519xchacha20poly1305.h +153 -0
  246. data/vendor/libsodium/src/libsodium/include/sodium/crypto_generichash.h +4 -0
  247. data/vendor/libsodium/src/libsodium/include/sodium/crypto_generichash_blake2b.h +3 -4
  248. data/vendor/libsodium/src/libsodium/include/sodium/crypto_hash_sha256.h +4 -3
  249. data/vendor/libsodium/src/libsodium/include/sodium/crypto_hash_sha512.h +4 -3
  250. data/vendor/libsodium/src/libsodium/include/sodium/crypto_kdf.h +51 -0
  251. data/vendor/libsodium/src/libsodium/include/sodium/crypto_kdf_blake2b.h +42 -0
  252. data/vendor/libsodium/src/libsodium/include/sodium/crypto_kx.h +64 -0
  253. data/vendor/libsodium/src/libsodium/include/sodium/crypto_onetimeauth.h +4 -0
  254. data/vendor/libsodium/src/libsodium/include/sodium/crypto_onetimeauth_poly1305.h +11 -9
  255. data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash.h +37 -1
  256. data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash_argon2i.h +40 -10
  257. data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash_argon2id.h +116 -0
  258. data/vendor/libsodium/src/libsodium/include/sodium/crypto_pwhash_scryptsalsa208sha256.h +37 -4
  259. data/vendor/libsodium/src/libsodium/include/sodium/crypto_scalarmult_curve25519.h +0 -4
  260. data/vendor/libsodium/src/libsodium/include/sodium/crypto_secretbox.h +3 -0
  261. data/vendor/libsodium/src/libsodium/include/sodium/crypto_secretbox_xchacha20poly1305.h +62 -0
  262. data/vendor/libsodium/src/libsodium/include/sodium/crypto_secretbox_xsalsa20poly1305.h +5 -1
  263. data/vendor/libsodium/src/libsodium/include/sodium/crypto_shorthash.h +3 -0
  264. data/vendor/libsodium/src/libsodium/include/sodium/crypto_shorthash_siphash24.h +18 -0
  265. data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign.h +22 -0
  266. data/vendor/libsodium/src/libsodium/include/sodium/crypto_sign_ed25519.h +28 -0
  267. data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream.h +3 -0
  268. data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_aes128ctr.h +10 -5
  269. data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_chacha20.h +14 -3
  270. data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_salsa20.h +4 -0
  271. data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_salsa2012.h +3 -0
  272. data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_salsa208.h +3 -0
  273. data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_xchacha20.h +53 -0
  274. data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_xsalsa20.h +4 -0
  275. data/vendor/libsodium/src/libsodium/include/sodium/private/common.h +84 -17
  276. data/vendor/libsodium/src/libsodium/include/sodium/private/implementations.h +11 -0
  277. data/vendor/libsodium/src/libsodium/include/sodium/private/sse2_64_32.h +50 -0
  278. data/vendor/libsodium/src/libsodium/include/sodium/randombytes.h +10 -2
  279. data/vendor/libsodium/src/libsodium/include/sodium/utils.h +4 -5
  280. data/vendor/libsodium/src/libsodium/include/sodium/version.h.in +4 -0
  281. data/vendor/libsodium/src/libsodium/randombytes/randombytes.c +47 -19
  282. data/vendor/libsodium/src/libsodium/randombytes/salsa20/randombytes_salsa20_random.c +30 -50
  283. data/vendor/libsodium/src/libsodium/randombytes/sysrandom/randombytes_sysrandom.c +25 -15
  284. data/vendor/libsodium/src/libsodium/sodium/core.c +25 -23
  285. data/vendor/libsodium/src/libsodium/sodium/runtime.c +66 -57
  286. data/vendor/libsodium/src/libsodium/sodium/utils.c +120 -106
  287. data/vendor/libsodium/src/libsodium/sodium/version.c +10 -0
  288. data/vendor/libsodium/test/Makefile.in +11 -7
  289. data/vendor/libsodium/test/default/Makefile.am +65 -5
  290. data/vendor/libsodium/test/default/Makefile.in +243 -78
  291. data/vendor/libsodium/test/default/aead_aes256gcm.c +2 -2
  292. data/vendor/libsodium/test/default/aead_xchacha20poly1305.c +188 -0
  293. data/vendor/libsodium/test/default/aead_xchacha20poly1305.exp +51 -0
  294. data/vendor/libsodium/test/default/auth.c +11 -7
  295. data/vendor/libsodium/test/default/auth2.c +15 -12
  296. data/vendor/libsodium/test/default/auth3.c +18 -15
  297. data/vendor/libsodium/test/default/auth5.c +3 -2
  298. data/vendor/libsodium/test/default/auth6.c +4 -3
  299. data/vendor/libsodium/test/default/auth7.c +3 -2
  300. data/vendor/libsodium/test/default/box.c +57 -52
  301. data/vendor/libsodium/test/default/box2.c +41 -36
  302. data/vendor/libsodium/test/default/box7.c +4 -3
  303. data/vendor/libsodium/test/default/box8.c +4 -3
  304. data/vendor/libsodium/test/default/box_easy.c +36 -32
  305. data/vendor/libsodium/test/default/box_easy2.c +41 -34
  306. data/vendor/libsodium/test/default/box_seal.c +7 -6
  307. data/vendor/libsodium/test/default/box_seed.c +10 -8
  308. data/vendor/libsodium/test/default/chacha20.c +18 -3
  309. data/vendor/libsodium/test/default/chacha20.exp +45 -0
  310. data/vendor/libsodium/test/default/cmptest.h +1 -0
  311. data/vendor/libsodium/test/default/core1.c +10 -9
  312. data/vendor/libsodium/test/default/core2.c +13 -12
  313. data/vendor/libsodium/test/default/core3.c +13 -12
  314. data/vendor/libsodium/test/default/core4.c +11 -12
  315. data/vendor/libsodium/test/default/core5.c +13 -12
  316. data/vendor/libsodium/test/default/core6.c +15 -13
  317. data/vendor/libsodium/test/default/ed25519_convert.c +12 -9
  318. data/vendor/libsodium/test/default/hash.c +10 -6
  319. data/vendor/libsodium/test/default/hash3.c +3 -2
  320. data/vendor/libsodium/test/default/index-wasm.html.tpl +118 -0
  321. data/vendor/libsodium/test/default/kdf.c +61 -0
  322. data/vendor/libsodium/test/default/kdf.exp +77 -0
  323. data/vendor/libsodium/test/default/keygen.c +64 -0
  324. data/vendor/libsodium/test/default/keygen.exp +1 -0
  325. data/vendor/libsodium/test/default/kx.c +119 -0
  326. data/vendor/libsodium/test/default/kx.exp +7 -0
  327. data/vendor/libsodium/test/default/nacl-test-wrapper.sh +9 -2
  328. data/vendor/libsodium/test/default/onetimeauth.c +26 -23
  329. data/vendor/libsodium/test/default/onetimeauth2.c +22 -20
  330. data/vendor/libsodium/test/default/onetimeauth7.c +3 -2
  331. data/vendor/libsodium/test/default/pwhash.c +209 -157
  332. data/vendor/libsodium/test/default/pwhash_argon2id.c +388 -0
  333. data/vendor/libsodium/test/default/pwhash_argon2id.exp +15 -0
  334. data/vendor/libsodium/test/default/pwhash_scrypt.c +232 -224
  335. data/vendor/libsodium/test/default/pwhash_scrypt.exp +2 -1
  336. data/vendor/libsodium/test/default/pwhash_scrypt_ll.c +39 -41
  337. data/vendor/libsodium/test/default/randombytes.c +34 -13
  338. data/vendor/libsodium/test/default/randombytes.exp +1 -0
  339. data/vendor/libsodium/test/default/scalarmult.c +21 -18
  340. data/vendor/libsodium/test/default/scalarmult2.c +8 -6
  341. data/vendor/libsodium/test/default/scalarmult5.c +13 -10
  342. data/vendor/libsodium/test/default/scalarmult6.c +17 -14
  343. data/vendor/libsodium/test/default/scalarmult7.c +9 -10
  344. data/vendor/libsodium/test/default/secretbox.c +39 -36
  345. data/vendor/libsodium/test/default/secretbox2.c +28 -25
  346. data/vendor/libsodium/test/default/secretbox7.c +3 -2
  347. data/vendor/libsodium/test/default/secretbox8.c +4 -3
  348. data/vendor/libsodium/test/default/secretbox_easy.c +40 -37
  349. data/vendor/libsodium/test/default/secretbox_easy2.c +19 -18
  350. data/vendor/libsodium/test/default/shorthash.c +4 -4
  351. data/vendor/libsodium/test/default/sign.c +70 -13
  352. data/vendor/libsodium/test/default/sign.exp +2 -0
  353. data/vendor/libsodium/test/default/siphashx24.c +33 -0
  354. data/vendor/libsodium/test/default/siphashx24.exp +64 -0
  355. data/vendor/libsodium/test/default/sodium_core.c +9 -8
  356. data/vendor/libsodium/test/default/sodium_utils.c +52 -46
  357. data/vendor/libsodium/test/default/sodium_utils2.c +17 -8
  358. data/vendor/libsodium/test/default/sodium_utils3.c +15 -6
  359. data/vendor/libsodium/test/default/sodium_version.c +7 -1
  360. data/vendor/libsodium/test/default/stream.c +31 -18
  361. data/vendor/libsodium/test/default/stream.exp +65 -0
  362. data/vendor/libsodium/test/default/stream2.c +13 -9
  363. data/vendor/libsodium/test/default/stream3.c +12 -10
  364. data/vendor/libsodium/test/default/stream4.c +30 -27
  365. data/vendor/libsodium/test/default/verify1.c +5 -4
  366. data/vendor/libsodium/test/default/xchacha20.c +376 -0
  367. data/vendor/libsodium/test/default/xchacha20.exp +5 -0
  368. data/vendor/libsodium/test/quirks/quirks.h +4 -3
  369. metadata +140 -111
  370. data/vendor/libsodium/autom4te.cache/output.3 +0 -17240
  371. data/vendor/libsodium/autom4te.cache/output.4 +0 -17517
  372. data/vendor/libsodium/autom4te.cache/output.5 +0 -18535
  373. data/vendor/libsodium/autom4te.cache/output.6 +0 -19077
  374. data/vendor/libsodium/autom4te.cache/output.7 +0 -19837
  375. data/vendor/libsodium/autom4te.cache/traces.3 +0 -2833
  376. data/vendor/libsodium/autom4te.cache/traces.4 +0 -2951
  377. data/vendor/libsodium/autom4te.cache/traces.5 +0 -3042
  378. data/vendor/libsodium/autom4te.cache/traces.6 +0 -3194
  379. data/vendor/libsodium/autom4te.cache/traces.7 +0 -3614
  380. data/vendor/libsodium/builds/msvc/properties/ARM.props +0 -20
  381. data/vendor/libsodium/compile +0 -347
  382. data/vendor/libsodium/config.guess +0 -1568
  383. data/vendor/libsodium/config.sub +0 -1793
  384. data/vendor/libsodium/depcomp +0 -791
  385. data/vendor/libsodium/install-sh +0 -527
  386. data/vendor/libsodium/ltmain.sh +0 -9655
  387. data/vendor/libsodium/missing +0 -215
  388. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha256/auth_hmacsha256_api.c +0 -16
  389. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha256/cp/verify_hmacsha256.c +0 -11
  390. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512/auth_hmacsha512_api.c +0 -16
  391. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512/cp/verify_hmacsha512.c +0 -12
  392. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512256/auth_hmacsha512256_api.c +0 -16
  393. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512256/cp/verify_hmacsha512256.c +0 -14
  394. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/box_curve25519xsalsa20poly1305_api.c +0 -41
  395. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/after_curve25519xsalsa20poly1305.c +0 -22
  396. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/before_curve25519xsalsa20poly1305.c +0 -18
  397. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/box_curve25519xsalsa20poly1305.c +0 -42
  398. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/keypair_curve25519xsalsa20poly1305.c +0 -29
  399. data/vendor/libsodium/src/libsodium/crypto_core/hchacha20/core_hchacha20.h +0 -28
  400. data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/ref2/core_hsalsa20.c +0 -100
  401. data/vendor/libsodium/src/libsodium/crypto_core/salsa20/core_salsa20_api.c +0 -21
  402. data/vendor/libsodium/src/libsodium/crypto_core/salsa20/ref/core_salsa20.c +0 -126
  403. data/vendor/libsodium/src/libsodium/crypto_core/salsa2012/core_salsa2012_api.c +0 -21
  404. data/vendor/libsodium/src/libsodium/crypto_core/salsa2012/ref/core_salsa2012.c +0 -126
  405. data/vendor/libsodium/src/libsodium/crypto_core/salsa208/core_salsa208_api.c +0 -21
  406. data/vendor/libsodium/src/libsodium/crypto_core/salsa208/ref/core_salsa208.c +0 -126
  407. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2-impl.h +0 -48
  408. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2.h +0 -97
  409. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-avx2.c +0 -45
  410. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-avx2.h +0 -123
  411. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ref.c +0 -94
  412. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-sse41.c +0 -80
  413. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-sse41.h +0 -97
  414. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.c +0 -87
  415. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-compress-ssse3.h +0 -97
  416. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-avx2.h +0 -339
  417. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse2.h +0 -66
  418. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-load-sse41.h +0 -400
  419. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-ref.c +0 -456
  420. data/vendor/libsodium/src/libsodium/crypto_hash/sha256/cp/hash_sha256.c +0 -269
  421. data/vendor/libsodium/src/libsodium/crypto_hash/sha512/cp/hash_sha512.c +0 -298
  422. data/vendor/libsodium/src/libsodium/crypto_pwhash/argon2/argon2-impl.h +0 -40
  423. data/vendor/libsodium/src/libsodium/crypto_secretbox/xsalsa20poly1305/ref/box_xsalsa20poly1305.c +0 -35
  424. data/vendor/libsodium/src/libsodium/crypto_secretbox/xsalsa20poly1305/secretbox_xsalsa20poly1305_api.c +0 -26
  425. data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphash24.c +0 -72
  426. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/description +0 -1
  427. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/sign_ed25519_api.c +0 -39
  428. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c +0 -159
  429. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/beforenm_aes128ctr.c +0 -59
  430. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/common.h +0 -771
  431. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/consts.h +0 -28
  432. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/consts_aes128ctr.c +0 -14
  433. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/int128.h +0 -56
  434. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/int128_aes128ctr.c +0 -131
  435. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/stream_aes128ctr.c +0 -29
  436. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/types.h +0 -10
  437. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c +0 -180
  438. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.h +0 -28
  439. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.c +0 -336
  440. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.h +0 -28
  441. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/stream_salsa20_ref.c +0 -55
  442. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/xor_salsa20_ref.c +0 -63
  443. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/stream_salsa20_api.c +0 -19
  444. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012.c +0 -51
  445. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/xor_salsa2012.c +0 -54
  446. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/stream_salsa2012_api.c +0 -11
  447. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/stream_salsa208.c +0 -51
  448. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/xor_salsa208.c +0 -54
  449. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/stream_salsa208_api.c +0 -11
  450. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/stream_xsalsa20.c +0 -24
  451. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/xor_xsalsa20.c +0 -35
  452. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/stream_xsalsa20_api.c +0 -11
  453. data/vendor/libsodium/src/libsodium/crypto_verify/16/ref/verify_16.c +0 -17
  454. data/vendor/libsodium/src/libsodium/crypto_verify/16/verify_16_api.c +0 -6
  455. data/vendor/libsodium/src/libsodium/crypto_verify/32/ref/verify_32.c +0 -17
  456. data/vendor/libsodium/src/libsodium/crypto_verify/32/verify_32_api.c +0 -6
  457. data/vendor/libsodium/src/libsodium/crypto_verify/64/ref/verify_64.c +0 -17
  458. data/vendor/libsodium/src/libsodium/crypto_verify/64/verify_64_api.c +0 -6
  459. data/vendor/libsodium/src/libsodium/include/sodium/crypto_int32.h +0 -8
  460. data/vendor/libsodium/src/libsodium/include/sodium/crypto_int64.h +0 -8
  461. data/vendor/libsodium/src/libsodium/include/sodium/crypto_uint16.h +0 -8
  462. data/vendor/libsodium/src/libsodium/include/sodium/crypto_uint32.h +0 -8
  463. data/vendor/libsodium/src/libsodium/include/sodium/crypto_uint64.h +0 -8
  464. data/vendor/libsodium/src/libsodium/include/sodium/crypto_uint8.h +0 -8
  465. data/vendor/libsodium/test-driver +0 -139
@@ -1,23 +1,24 @@
1
1
  /*
2
- poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication and 128 bit addition
2
+ poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication
3
+ and 128 bit addition
3
4
  */
4
5
 
5
6
  #if defined(__SIZEOF_INT128__)
6
7
  typedef unsigned __int128 uint128_t;
7
8
  #else
8
- typedef unsigned uint128_t __attribute__ ((mode(TI)));
9
+ typedef unsigned uint128_t __attribute__((mode(TI)));
9
10
  #endif
10
11
 
11
- #define MUL(out, x, y) out = ((uint128_t)x * y)
12
+ #define MUL(out, x, y) out = ((uint128_t) x * y)
12
13
  #define ADD(out, in) out += in
13
14
  #define ADDLO(out, in) out += in
14
- #define SHR(in, shift) (unsigned long long)(in >> (shift))
15
- #define LO(in) (unsigned long long)(in)
15
+ #define SHR(in, shift) (unsigned long long) (in >> (shift))
16
+ #define LO(in) (unsigned long long) (in)
16
17
 
17
18
  #if defined(_MSC_VER)
18
19
  # define POLY1305_NOINLINE __declspec(noinline)
19
20
  #elif defined(__GNUC__)
20
- # define POLY1305_NOINLINE __attribute__ ((noinline))
21
+ # define POLY1305_NOINLINE __attribute__((noinline))
21
22
  #else
22
23
  # define POLY1305_NOINLINE
23
24
  #endif
@@ -28,154 +29,197 @@ typedef unsigned uint128_t __attribute__ ((mode(TI)));
28
29
 
29
30
  /* 17 + sizeof(unsigned long long) + 8*sizeof(unsigned long long) */
30
31
  typedef struct poly1305_state_internal_t {
31
- unsigned long long r[3];
32
- unsigned long long h[3];
33
- unsigned long long pad[2];
34
- unsigned long long leftover;
35
- unsigned char buffer[poly1305_block_size];
36
- unsigned char final;
32
+ unsigned long long r[3];
33
+ unsigned long long h[3];
34
+ unsigned long long pad[2];
35
+ unsigned long long leftover;
36
+ unsigned char buffer[poly1305_block_size];
37
+ unsigned char final;
37
38
  } poly1305_state_internal_t;
38
39
 
39
40
  static void
40
41
  poly1305_init(poly1305_state_internal_t *st, const unsigned char key[32])
41
42
  {
42
- unsigned long long t0,t1;
43
+ unsigned long long t0, t1;
43
44
 
44
- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
45
- t0 = LOAD64_LE(&key[0]);
46
- t1 = LOAD64_LE(&key[8]);
45
+ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
46
+ t0 = LOAD64_LE(&key[0]);
47
+ t1 = LOAD64_LE(&key[8]);
47
48
 
48
- st->r[0] = ( t0 ) & 0xffc0fffffff;
49
- st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
50
- st->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f;
49
+ st->r[0] = (t0) &0xffc0fffffff;
50
+ st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
51
+ st->r[2] = ((t1 >> 24)) & 0x00ffffffc0f;
51
52
 
52
- /* h = 0 */
53
- st->h[0] = 0;
54
- st->h[1] = 0;
55
- st->h[2] = 0;
53
+ /* h = 0 */
54
+ st->h[0] = 0;
55
+ st->h[1] = 0;
56
+ st->h[2] = 0;
56
57
 
57
- /* save pad for later */
58
- st->pad[0] = LOAD64_LE(&key[16]);
59
- st->pad[1] = LOAD64_LE(&key[24]);
58
+ /* save pad for later */
59
+ st->pad[0] = LOAD64_LE(&key[16]);
60
+ st->pad[1] = LOAD64_LE(&key[24]);
60
61
 
61
- st->leftover = 0;
62
- st->final = 0;
62
+ st->leftover = 0;
63
+ st->final = 0;
63
64
  }
64
65
 
65
66
  static void
66
- poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, unsigned long long bytes)
67
+ poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
68
+ unsigned long long bytes)
67
69
  {
68
- const unsigned long long hibit = (st->final) ? 0ULL : (1ULL << 40); /* 1 << 128 */
69
- unsigned long long r0,r1,r2;
70
- unsigned long long s1,s2;
71
- unsigned long long h0,h1,h2;
72
- unsigned long long c;
73
- uint128_t d0,d1,d2,d;
74
-
75
- r0 = st->r[0];
76
- r1 = st->r[1];
77
- r2 = st->r[2];
78
-
79
- h0 = st->h[0];
80
- h1 = st->h[1];
81
- h2 = st->h[2];
82
-
83
- s1 = r1 * (5 << 2);
84
- s2 = r2 * (5 << 2);
85
-
86
- while (bytes >= poly1305_block_size) {
87
- unsigned long long t0,t1;
88
-
89
- /* h += m[i] */
90
- t0 = LOAD64_LE(&m[0]);
91
- t1 = LOAD64_LE(&m[8]);
92
-
93
- h0 += (( t0 ) & 0xfffffffffff);
94
- h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
95
- h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit;
96
-
97
- /* h *= r */
98
- MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d);
99
- MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d);
100
- MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d);
101
-
102
- /* (partial) h %= p */
103
- c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff;
104
- ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff;
105
- ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff;
106
- h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff;
107
- h1 += c;
108
-
109
- m += poly1305_block_size;
110
- bytes -= poly1305_block_size;
111
- }
70
+ const unsigned long long hibit =
71
+ (st->final) ? 0ULL : (1ULL << 40); /* 1 << 128 */
72
+ unsigned long long r0, r1, r2;
73
+ unsigned long long s1, s2;
74
+ unsigned long long h0, h1, h2;
75
+ unsigned long long c;
76
+ uint128_t d0, d1, d2, d;
77
+
78
+ r0 = st->r[0];
79
+ r1 = st->r[1];
80
+ r2 = st->r[2];
81
+
82
+ h0 = st->h[0];
83
+ h1 = st->h[1];
84
+ h2 = st->h[2];
85
+
86
+ s1 = r1 * (5 << 2);
87
+ s2 = r2 * (5 << 2);
88
+
89
+ while (bytes >= poly1305_block_size) {
90
+ unsigned long long t0, t1;
91
+
92
+ /* h += m[i] */
93
+ t0 = LOAD64_LE(&m[0]);
94
+ t1 = LOAD64_LE(&m[8]);
95
+
96
+ h0 += ((t0) &0xfffffffffff);
97
+ h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
98
+ h2 += (((t1 >> 24)) & 0x3ffffffffff) | hibit;
99
+
100
+ /* h *= r */
101
+ MUL(d0, h0, r0);
102
+ MUL(d, h1, s2);
103
+ ADD(d0, d);
104
+ MUL(d, h2, s1);
105
+ ADD(d0, d);
106
+ MUL(d1, h0, r1);
107
+ MUL(d, h1, r0);
108
+ ADD(d1, d);
109
+ MUL(d, h2, s2);
110
+ ADD(d1, d);
111
+ MUL(d2, h0, r2);
112
+ MUL(d, h1, r1);
113
+ ADD(d2, d);
114
+ MUL(d, h2, r0);
115
+ ADD(d2, d);
116
+
117
+ /* (partial) h %= p */
118
+ c = SHR(d0, 44);
119
+ h0 = LO(d0) & 0xfffffffffff;
120
+ ADDLO(d1, c);
121
+ c = SHR(d1, 44);
122
+ h1 = LO(d1) & 0xfffffffffff;
123
+ ADDLO(d2, c);
124
+ c = SHR(d2, 42);
125
+ h2 = LO(d2) & 0x3ffffffffff;
126
+ h0 += c * 5;
127
+ c = (h0 >> 44);
128
+ h0 = h0 & 0xfffffffffff;
129
+ h1 += c;
112
130
 
113
- st->h[0] = h0;
114
- st->h[1] = h1;
115
- st->h[2] = h2;
116
- }
131
+ m += poly1305_block_size;
132
+ bytes -= poly1305_block_size;
133
+ }
117
134
 
135
+ st->h[0] = h0;
136
+ st->h[1] = h1;
137
+ st->h[2] = h2;
138
+ }
118
139
 
119
140
  static POLY1305_NOINLINE void
120
141
  poly1305_finish(poly1305_state_internal_t *st, unsigned char mac[16])
121
142
  {
122
- unsigned long long h0,h1,h2,c;
123
- unsigned long long g0,g1,g2;
124
- unsigned long long t0,t1;
125
-
126
- /* process the remaining block */
127
- if (st->leftover) {
128
- unsigned long long i = st->leftover;
129
- st->buffer[i] = 1;
130
- for (i = i + 1; i < poly1305_block_size; i++)
131
- st->buffer[i] = 0;
132
- st->final = 1;
133
- poly1305_blocks(st, st->buffer, poly1305_block_size);
134
- }
143
+ unsigned long long h0, h1, h2, c;
144
+ unsigned long long g0, g1, g2;
145
+ unsigned long long t0, t1;
135
146
 
136
- /* fully carry h */
137
- h0 = st->h[0];
138
- h1 = st->h[1];
139
- h2 = st->h[2];
140
-
141
- c = (h1 >> 44); h1 &= 0xfffffffffff;
142
- h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
143
- h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
144
- h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff;
145
- h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
146
- h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
147
- h1 += c;
147
+ /* process the remaining block */
148
+ if (st->leftover) {
149
+ unsigned long long i = st->leftover;
148
150
 
149
- /* compute h + -p */
150
- g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
151
- g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
152
- g2 = h2 + c - (1ULL << 42);
153
-
154
- /* select h if h < p, or h + -p if h >= p */
155
- c = (g2 >> ((sizeof(unsigned long long) * 8) - 1)) - 1;
156
- g0 &= c;
157
- g1 &= c;
158
- g2 &= c;
159
- c = ~c;
160
- h0 = (h0 & c) | g0;
161
- h1 = (h1 & c) | g1;
162
- h2 = (h2 & c) | g2;
163
-
164
- /* h = (h + pad) */
165
- t0 = st->pad[0];
166
- t1 = st->pad[1];
167
-
168
- h0 += (( t0 ) & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff;
169
- h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff;
170
- h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c; h2 &= 0x3ffffffffff;
171
-
172
- /* mac = h % (2^128) */
173
- h0 = ((h0 ) | (h1 << 44));
174
- h1 = ((h1 >> 20) | (h2 << 24));
175
-
176
- STORE64_LE(&mac[0], h0);
177
- STORE64_LE(&mac[8], h1);
178
-
179
- /* zero out the state */
180
- sodium_memzero((void *)st, sizeof *st);
151
+ st->buffer[i] = 1;
152
+
153
+ for (i = i + 1; i < poly1305_block_size; i++) {
154
+ st->buffer[i] = 0;
155
+ }
156
+ st->final = 1;
157
+ poly1305_blocks(st, st->buffer, poly1305_block_size);
158
+ }
159
+
160
+ /* fully carry h */
161
+ h0 = st->h[0];
162
+ h1 = st->h[1];
163
+ h2 = st->h[2];
164
+
165
+ c = (h1 >> 44);
166
+ h1 &= 0xfffffffffff;
167
+ h2 += c;
168
+ c = (h2 >> 42);
169
+ h2 &= 0x3ffffffffff;
170
+ h0 += c * 5;
171
+ c = (h0 >> 44);
172
+ h0 &= 0xfffffffffff;
173
+ h1 += c;
174
+ c = (h1 >> 44);
175
+ h1 &= 0xfffffffffff;
176
+ h2 += c;
177
+ c = (h2 >> 42);
178
+ h2 &= 0x3ffffffffff;
179
+ h0 += c * 5;
180
+ c = (h0 >> 44);
181
+ h0 &= 0xfffffffffff;
182
+ h1 += c;
183
+
184
+ /* compute h + -p */
185
+ g0 = h0 + 5;
186
+ c = (g0 >> 44);
187
+ g0 &= 0xfffffffffff;
188
+ g1 = h1 + c;
189
+ c = (g1 >> 44);
190
+ g1 &= 0xfffffffffff;
191
+ g2 = h2 + c - (1ULL << 42);
192
+
193
+ /* select h if h < p, or h + -p if h >= p */
194
+ c = (g2 >> ((sizeof(unsigned long long) * 8) - 1)) - 1;
195
+ g0 &= c;
196
+ g1 &= c;
197
+ g2 &= c;
198
+ c = ~c;
199
+ h0 = (h0 & c) | g0;
200
+ h1 = (h1 & c) | g1;
201
+ h2 = (h2 & c) | g2;
202
+
203
+ /* h = (h + pad) */
204
+ t0 = st->pad[0];
205
+ t1 = st->pad[1];
206
+
207
+ h0 += ((t0) &0xfffffffffff);
208
+ c = (h0 >> 44);
209
+ h0 &= 0xfffffffffff;
210
+ h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c;
211
+ c = (h1 >> 44);
212
+ h1 &= 0xfffffffffff;
213
+ h2 += (((t1 >> 24)) & 0x3ffffffffff) + c;
214
+ h2 &= 0x3ffffffffff;
215
+
216
+ /* mac = h % (2^128) */
217
+ h0 = ((h0) | (h1 << 44));
218
+ h1 = ((h1 >> 20) | (h2 << 24));
219
+
220
+ STORE64_LE(&mac[0], h0);
221
+ STORE64_LE(&mac[8], h1);
222
+
223
+ /* zero out the state */
224
+ sodium_memzero((void *) st, sizeof *st);
181
225
  }
@@ -1,7 +1,10 @@
1
1
 
2
- #include "crypto_onetimeauth_poly1305.h"
3
2
  #include "onetimeauth_poly1305.h"
3
+ #include "crypto_onetimeauth_poly1305.h"
4
+ #include "private/common.h"
5
+ #include "randombytes.h"
4
6
  #include "runtime.h"
7
+
5
8
  #include "donna/poly1305_donna.h"
6
9
  #if defined(HAVE_TI_MODE) && defined(HAVE_EMMINTRIN_H)
7
10
  # include "sse2/poly1305_sse2.h"
@@ -20,7 +23,7 @@ crypto_onetimeauth_poly1305(unsigned char *out, const unsigned char *in,
20
23
  int
21
24
  crypto_onetimeauth_poly1305_verify(const unsigned char *h,
22
25
  const unsigned char *in,
23
- unsigned long long inlen,
26
+ unsigned long long inlen,
24
27
  const unsigned char *k)
25
28
  {
26
29
  return implementation->onetimeauth_verify(h, in, inlen, k);
@@ -49,15 +52,30 @@ crypto_onetimeauth_poly1305_final(crypto_onetimeauth_poly1305_state *state,
49
52
  }
50
53
 
51
54
  size_t
52
- crypto_onetimeauth_poly1305_bytes(void) {
55
+ crypto_onetimeauth_poly1305_bytes(void)
56
+ {
53
57
  return crypto_onetimeauth_poly1305_BYTES;
54
58
  }
55
59
 
56
60
  size_t
57
- crypto_onetimeauth_poly1305_keybytes(void) {
61
+ crypto_onetimeauth_poly1305_keybytes(void)
62
+ {
58
63
  return crypto_onetimeauth_poly1305_KEYBYTES;
59
64
  }
60
65
 
66
+ size_t
67
+ crypto_onetimeauth_poly1305_statebytes(void)
68
+ {
69
+ return sizeof(crypto_onetimeauth_poly1305_state);
70
+ }
71
+
72
+ void
73
+ crypto_onetimeauth_poly1305_keygen(
74
+ unsigned char k[crypto_onetimeauth_poly1305_KEYBYTES])
75
+ {
76
+ randombytes_buf(k, crypto_onetimeauth_poly1305_KEYBYTES);
77
+ }
78
+
61
79
  int
62
80
  _crypto_onetimeauth_poly1305_pick_best_implementation(void)
63
81
  {
@@ -2,22 +2,20 @@
2
2
  #ifndef onetimeauth_poly1305_H
3
3
  #define onetimeauth_poly1305_H
4
4
 
5
+ #include "crypto_onetimeauth_poly1305.h"
6
+
5
7
  typedef struct crypto_onetimeauth_poly1305_implementation {
6
- int (*onetimeauth)(unsigned char *out,
7
- const unsigned char *in,
8
- unsigned long long inlen,
9
- const unsigned char *k);
10
- int (*onetimeauth_verify)(const unsigned char *h,
11
- const unsigned char *in,
12
- unsigned long long inlen,
13
- const unsigned char *k);
8
+ int (*onetimeauth)(unsigned char *out, const unsigned char *in,
9
+ unsigned long long inlen, const unsigned char *k);
10
+ int (*onetimeauth_verify)(const unsigned char *h, const unsigned char *in,
11
+ unsigned long long inlen, const unsigned char *k);
14
12
  int (*onetimeauth_init)(crypto_onetimeauth_poly1305_state *state,
15
- const unsigned char *key);
13
+ const unsigned char * key);
16
14
  int (*onetimeauth_update)(crypto_onetimeauth_poly1305_state *state,
17
- const unsigned char *in,
18
- unsigned long long inlen);
15
+ const unsigned char * in,
16
+ unsigned long long inlen);
19
17
  int (*onetimeauth_final)(crypto_onetimeauth_poly1305_state *state,
20
- unsigned char *out);
18
+ unsigned char * out);
21
19
  } crypto_onetimeauth_poly1305_implementation;
22
20
 
23
21
  #endif
@@ -2,67 +2,73 @@
2
2
  #include <stdint.h>
3
3
  #include <string.h>
4
4
 
5
+ #include "../onetimeauth_poly1305.h"
5
6
  #include "crypto_verify_16.h"
6
- #include "utils.h"
7
7
  #include "poly1305_sse2.h"
8
- #include "../onetimeauth_poly1305.h"
8
+ #include "private/common.h"
9
+ #include "private/sse2_64_32.h"
10
+ #include "utils.h"
9
11
 
10
12
  #if defined(HAVE_TI_MODE) && defined(HAVE_EMMINTRIN_H)
11
13
 
12
- #pragma GCC target("sse2")
14
+ # ifdef __GNUC__
15
+ # pragma GCC target("sse2")
16
+ # endif
13
17
 
14
- #include <emmintrin.h>
18
+ # include <emmintrin.h>
15
19
 
16
20
  typedef __m128i xmmi;
17
21
 
18
- #if defined(__SIZEOF_INT128__)
22
+ # if defined(__SIZEOF_INT128__)
19
23
  typedef unsigned __int128 uint128_t;
20
- #else
21
- typedef unsigned uint128_t __attribute__ ((mode(TI)));
22
- #endif
24
+ # else
25
+ typedef unsigned uint128_t __attribute__((mode(TI)));
26
+ # endif
23
27
 
24
- #if defined(_MSC_VER)
25
- # define POLY1305_NOINLINE __declspec(noinline)
26
- #elif defined(__GNUC__)
27
- # define POLY1305_NOINLINE __attribute__ ((noinline))
28
- #else
29
- # define POLY1305_NOINLINE
30
- #endif
28
+ # if defined(_MSC_VER)
29
+ # define POLY1305_NOINLINE __declspec(noinline)
30
+ # elif defined(__GNUC__)
31
+ # define POLY1305_NOINLINE __attribute__((noinline))
32
+ # else
33
+ # define POLY1305_NOINLINE
34
+ # endif
31
35
 
32
- #define poly1305_block_size 32
36
+ # define poly1305_block_size 32
33
37
 
34
38
  enum poly1305_state_flags_t {
35
- poly1305_started = 1,
36
- poly1305_final_shift8 = 4,
39
+ poly1305_started = 1,
40
+ poly1305_final_shift8 = 4,
37
41
  poly1305_final_shift16 = 8,
38
- poly1305_final_r2_r = 16, /* use [r^2,r] for the final block */
39
- poly1305_final_r_1 = 32, /* use [r,1] for the final block */
42
+ poly1305_final_r2_r = 16, /* use [r^2,r] for the final block */
43
+ poly1305_final_r_1 = 32, /* use [r,1] for the final block */
40
44
  };
41
45
 
42
46
  typedef struct poly1305_state_internal_t {
43
47
  union {
44
48
  uint64_t h[3];
45
49
  uint32_t hh[10];
46
- }; /* 40 bytes */
47
- uint32_t R[5]; /* 20 bytes */
48
- uint32_t R2[5]; /* 20 bytes */
49
- uint32_t R4[5]; /* 20 bytes */
50
- uint64_t pad[2]; /* 16 bytes */
51
- uint64_t flags; /* 8 bytes */
52
- unsigned long long leftover; /* 8 bytes */
53
- unsigned char buffer[poly1305_block_size]; /* 32 bytes */
54
- } poly1305_state_internal_t; /* 164 bytes total */
50
+ }; /* 40 bytes */
51
+ uint32_t R[5]; /* 20 bytes */
52
+ uint32_t R2[5]; /* 20 bytes */
53
+ uint32_t R4[5]; /* 20 bytes */
54
+ uint64_t pad[2]; /* 16 bytes */
55
+ uint64_t flags; /* 8 bytes */
56
+ unsigned long long leftover; /* 8 bytes */
57
+ unsigned char buffer[poly1305_block_size]; /* 32 bytes */
58
+ } poly1305_state_internal_t; /* 164 bytes total */
55
59
 
56
60
  /*
57
- * _mm_loadl_epi64() is turned into a simple MOVQ. So, unaligned accesses are totally fine, even though this intrinsic requires a __m128i* input.
61
+ * _mm_loadl_epi64() is turned into a simple MOVQ. So, unaligned accesses are
62
+ * totally fine, even though this intrinsic requires a __m128i* input.
58
63
  * This confuses dynamic analysis, so force alignment, only in debug mode.
59
64
  */
60
- #ifdef DEBUG
65
+ # ifdef DEBUG
61
66
  static xmmi
62
67
  _fakealign_mm_loadl_epi64(const void *m)
63
68
  {
64
69
  xmmi tmp;
65
70
  memcpy(&tmp, m, 8);
71
+
66
72
  return _mm_loadl_epi64(&tmp);
67
73
  }
68
74
  # define _mm_loadl_epi64(X) _fakealign_mm_loadl_epi64(X)
@@ -70,51 +76,71 @@ _fakealign_mm_loadl_epi64(const void *m)
70
76
 
71
77
  /* copy 0-31 bytes */
72
78
  static inline void
73
- poly1305_block_copy31(unsigned char *dst, const unsigned char *src, unsigned long long bytes)
79
+ poly1305_block_copy31(unsigned char *dst, const unsigned char *src,
80
+ unsigned long long bytes)
74
81
  {
75
82
  if (bytes & 16) {
76
83
  _mm_store_si128((xmmi *) (void *) dst,
77
84
  _mm_loadu_si128((const xmmi *) (const void *) src));
78
- src += 16; dst += 16;
85
+ src += 16;
86
+ dst += 16;
87
+ }
88
+ if (bytes & 8) {
89
+ memcpy(dst, src, 8);
90
+ src += 8;
91
+ dst += 8;
92
+ }
93
+ if (bytes & 4) {
94
+ memcpy(dst, src, 4);
95
+ src += 4;
96
+ dst += 4;
97
+ }
98
+ if (bytes & 2) {
99
+ memcpy(dst, src, 2);
100
+ src += 2;
101
+ dst += 2;
102
+ }
103
+ if (bytes & 1) {
104
+ *dst = *src;
79
105
  }
80
- if (bytes & 8) { memcpy(dst, src, 8); src += 8; dst += 8; }
81
- if (bytes & 4) { memcpy(dst, src, 4); src += 4; dst += 4; }
82
- if (bytes & 2) { memcpy(dst, src, 2); src += 2; dst += 2; }
83
- if (bytes & 1) { *dst = *src; }
84
106
  }
85
107
 
86
108
  static POLY1305_NOINLINE void
87
- poly1305_init_ext(poly1305_state_internal_t *st,
88
- const unsigned char key[32], unsigned long long bytes)
109
+ poly1305_init_ext(poly1305_state_internal_t *st, const unsigned char key[32],
110
+ unsigned long long bytes)
89
111
  {
90
- uint32_t *R;
91
- uint128_t d[3];
92
- uint64_t r0,r1,r2;
93
- uint64_t rt0,rt1,rt2,st2,c;
94
- uint64_t t0,t1;
112
+ uint32_t *R;
113
+ uint128_t d[3];
114
+ uint64_t r0, r1, r2;
115
+ uint64_t rt0, rt1, rt2, st2, c;
116
+ uint64_t t0, t1;
95
117
  unsigned long long i;
96
118
 
97
- if (!bytes) bytes = ~(unsigned long long)0;
98
-
119
+ if (!bytes) {
120
+ bytes = ~(unsigned long long) 0;
121
+ }
99
122
  /* H = 0 */
100
- _mm_storeu_si128((xmmi *)(void *)&st->hh[0], _mm_setzero_si128());
101
- _mm_storeu_si128((xmmi *)(void *)&st->hh[4], _mm_setzero_si128());
102
- _mm_storeu_si128((xmmi *)(void *)&st->hh[8], _mm_setzero_si128());
123
+ _mm_storeu_si128((xmmi *) (void *) &st->hh[0], _mm_setzero_si128());
124
+ _mm_storeu_si128((xmmi *) (void *) &st->hh[4], _mm_setzero_si128());
125
+ _mm_storeu_si128((xmmi *) (void *) &st->hh[8], _mm_setzero_si128());
103
126
 
104
127
  /* clamp key */
105
128
  memcpy(&t0, key, 8);
106
129
  memcpy(&t1, key + 8, 8);
107
- r0 = t0 & 0xffc0fffffff; t0 >>= 44; t0 |= t1 << 20;
108
- r1 = t0 & 0xfffffc0ffff; t1 >>= 24;
130
+ r0 = t0 & 0xffc0fffffff;
131
+ t0 >>= 44;
132
+ t0 |= t1 << 20;
133
+ r1 = t0 & 0xfffffc0ffff;
134
+ t1 >>= 24;
109
135
  r2 = t1 & 0x00ffffffc0f;
110
136
 
111
137
  /* r^1 */
112
- R = st->R;
113
- R[0] = (uint32_t)( r0 ) & 0x3ffffff;
114
- R[1] = (uint32_t)(( r0 >> 26) | ( r1 << 18)) & 0x3ffffff;
115
- R[2] = (uint32_t)(( r1 >> 8) ) & 0x3ffffff;
116
- R[3] = (uint32_t)(( r1 >> 34) | ( r2 << 10)) & 0x3ffffff;
117
- R[4] = (uint32_t)(( r2 >> 16) );
138
+ R = st->R;
139
+ R[0] = (uint32_t)(r0) &0x3ffffff;
140
+ R[1] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
141
+ R[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff;
142
+ R[3] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
143
+ R[4] = (uint32_t)((r2 >> 16));
118
144
 
119
145
  /* save pad */
120
146
  memcpy(&st->pad[0], key + 16, 8);
@@ -138,24 +164,37 @@ poly1305_init_ext(poly1305_state_internal_t *st,
138
164
  }
139
165
  }
140
166
  st2 = rt2 * (5 << 2);
141
- d[0] = ((uint128_t)rt0 * rt0) + ((uint128_t)(rt1 * 2) * st2);
142
- d[1] = ((uint128_t)rt2 * st2) + ((uint128_t)(rt0 * 2) * rt1);
143
- d[2] = ((uint128_t)rt1 * rt1) + ((uint128_t)(rt2 * 2) * rt0);
144
- rt0 = (uint64_t)d[0] & 0xfffffffffff; c = (uint64_t)(d[0] >> 44);
145
- d[1] += c ; rt1 = (uint64_t)d[1] & 0xfffffffffff; c = (uint64_t)(d[1] >> 44);
146
- d[2] += c ; rt2 = (uint64_t)d[2] & 0x3ffffffffff; c = (uint64_t)(d[2] >> 42);
147
- rt0 += c * 5; c = (rt0 >> 44); rt0 = rt0 & 0xfffffffffff;
148
- rt1 += c ; c = (rt1 >> 44); rt1 = rt1 & 0xfffffffffff;
149
- rt2 += c ; /* even if rt2 overflows, it will still fit in rp4 safely, and is safe to multiply with */
150
-
151
- R[0] = (uint32_t)( rt0 ) & 0x3ffffff;
167
+
168
+ d[0] = ((uint128_t) rt0 * rt0) + ((uint128_t)(rt1 * 2) * st2);
169
+ d[1] = ((uint128_t) rt2 * st2) + ((uint128_t)(rt0 * 2) * rt1);
170
+ d[2] = ((uint128_t) rt1 * rt1) + ((uint128_t)(rt2 * 2) * rt0);
171
+
172
+ rt0 = (uint64_t) d[0] & 0xfffffffffff;
173
+ c = (uint64_t)(d[0] >> 44);
174
+ d[1] += c;
175
+
176
+ rt1 = (uint64_t) d[1] & 0xfffffffffff;
177
+ c = (uint64_t)(d[1] >> 44);
178
+ d[2] += c;
179
+
180
+ rt2 = (uint64_t) d[2] & 0x3ffffffffff;
181
+ c = (uint64_t)(d[2] >> 42);
182
+ rt0 += c * 5;
183
+ c = (rt0 >> 44);
184
+ rt0 = rt0 & 0xfffffffffff;
185
+ rt1 += c;
186
+ c = (rt1 >> 44);
187
+ rt1 = rt1 & 0xfffffffffff;
188
+ rt2 += c; /* even if rt2 overflows, it will still fit in rp4 safely, and
189
+ is safe to multiply with */
190
+
191
+ R[0] = (uint32_t)(rt0) &0x3ffffff;
152
192
  R[1] = (uint32_t)((rt0 >> 26) | (rt1 << 18)) & 0x3ffffff;
153
- R[2] = (uint32_t)((rt1 >> 8) ) & 0x3ffffff;
193
+ R[2] = (uint32_t)((rt1 >> 8)) & 0x3ffffff;
154
194
  R[3] = (uint32_t)((rt1 >> 34) | (rt2 << 10)) & 0x3ffffff;
155
- R[4] = (uint32_t)((rt2 >> 16) );
195
+ R[4] = (uint32_t)((rt2 >> 16));
156
196
  }
157
-
158
- st->flags = 0;
197
+ st->flags = 0;
159
198
  st->leftover = 0U;
160
199
  }
161
200
 
@@ -163,25 +202,35 @@ static POLY1305_NOINLINE void
163
202
  poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
164
203
  unsigned long long bytes)
165
204
  {
166
- CRYPTO_ALIGN(64) xmmi HIBIT = _mm_shuffle_epi32(_mm_cvtsi32_si128(1 << 24), _MM_SHUFFLE(1,0,1,0));
167
- const xmmi MMASK = _mm_shuffle_epi32(_mm_cvtsi32_si128((1 << 26) - 1), _MM_SHUFFLE(1,0,1,0));
168
- const xmmi FIVE = _mm_shuffle_epi32(_mm_cvtsi32_si128(5), _MM_SHUFFLE(1,0,1,0));
169
- xmmi H0,H1,H2,H3,H4;
170
- xmmi T0,T1,T2,T3,T4,T5,T6,T7,T8;
171
- xmmi M0,M1,M2,M3,M4;
172
- xmmi M5,M6,M7,M8;
173
- xmmi C1,C2;
174
- xmmi R20,R21,R22,R23,R24,S21,S22,S23,S24;
175
- xmmi R40,R41,R42,R43,R44,S41,S42,S43,S44;
176
-
177
- if (st->flags & poly1305_final_shift8) HIBIT = _mm_srli_si128(HIBIT, 8);
178
- if (st->flags & poly1305_final_shift16) HIBIT = _mm_setzero_si128();
179
-
205
+ CRYPTO_ALIGN(64)
206
+ xmmi HIBIT =
207
+ _mm_shuffle_epi32(_mm_cvtsi32_si128(1 << 24), _MM_SHUFFLE(1, 0, 1, 0));
208
+ const xmmi MMASK = _mm_shuffle_epi32(_mm_cvtsi32_si128((1 << 26) - 1),
209
+ _MM_SHUFFLE(1, 0, 1, 0));
210
+ const xmmi FIVE =
211
+ _mm_shuffle_epi32(_mm_cvtsi32_si128(5), _MM_SHUFFLE(1, 0, 1, 0));
212
+ xmmi H0, H1, H2, H3, H4;
213
+ xmmi T0, T1, T2, T3, T4, T5, T6, T7, T8;
214
+ xmmi M0, M1, M2, M3, M4;
215
+ xmmi M5, M6, M7, M8;
216
+ xmmi C1, C2;
217
+ xmmi R20, R21, R22, R23, R24, S21, S22, S23, S24;
218
+ xmmi R40, R41, R42, R43, R44, S41, S42, S43, S44;
219
+
220
+ if (st->flags & poly1305_final_shift8) {
221
+ HIBIT = _mm_srli_si128(HIBIT, 8);
222
+ }
223
+ if (st->flags & poly1305_final_shift16) {
224
+ HIBIT = _mm_setzero_si128();
225
+ }
180
226
  if (!(st->flags & poly1305_started)) {
181
227
  /* H = [Mx,My] */
182
-
183
- T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((const xmmi *)(const void *)(m + 0)), _mm_loadl_epi64((const xmmi *)(const void *)(m + 16)));
184
- T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((const xmmi *)(const void *)(m + 8)), _mm_loadl_epi64((const xmmi *)(const void *)(m + 24)));
228
+ T5 = _mm_unpacklo_epi64(
229
+ _mm_loadl_epi64((const xmmi *) (const void *) (m + 0)),
230
+ _mm_loadl_epi64((const xmmi *) (const void *) (m + 16)));
231
+ T6 = _mm_unpacklo_epi64(
232
+ _mm_loadl_epi64((const xmmi *) (const void *) (m + 8)),
233
+ _mm_loadl_epi64((const xmmi *) (const void *) (m + 24)));
185
234
  H0 = _mm_and_si128(MMASK, T5);
186
235
  H1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
187
236
  T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
@@ -193,49 +242,47 @@ poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
193
242
  bytes -= 32;
194
243
  st->flags |= poly1305_started;
195
244
  } else {
196
- T0 = _mm_loadu_si128((const xmmi *)(const void *)&st->hh[0]);
197
- T1 = _mm_loadu_si128((const xmmi *)(const void *)&st->hh[4]);
198
- T2 = _mm_loadu_si128((const xmmi *)(const void *)&st->hh[8]);
199
- H0 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1,1,0,0));
200
- H1 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3,3,2,2));
201
- H2 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(1,1,0,0));
202
- H3 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(3,3,2,2));
203
- H4 = _mm_shuffle_epi32(T2, _MM_SHUFFLE(1,1,0,0));
245
+ T0 = _mm_loadu_si128((const xmmi *) (const void *) &st->hh[0]);
246
+ T1 = _mm_loadu_si128((const xmmi *) (const void *) &st->hh[4]);
247
+ T2 = _mm_loadu_si128((const xmmi *) (const void *) &st->hh[8]);
248
+ H0 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1, 1, 0, 0));
249
+ H1 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 3, 2, 2));
250
+ H2 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(1, 1, 0, 0));
251
+ H3 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(3, 3, 2, 2));
252
+ H4 = _mm_shuffle_epi32(T2, _MM_SHUFFLE(1, 1, 0, 0));
204
253
  }
205
-
206
- if (st->flags & (poly1305_final_r2_r|poly1305_final_r_1)) {
254
+ if (st->flags & (poly1305_final_r2_r | poly1305_final_r_1)) {
207
255
  if (st->flags & poly1305_final_r2_r) {
208
256
  /* use [r^2, r] */
209
- T2 = _mm_loadu_si128((const xmmi *)(const void *)&st->R[0]);
210
- T3 = _mm_cvtsi32_si128(st->R[4]);
211
- T0 = _mm_loadu_si128((const xmmi *)(const void *)&st->R2[0]);
212
- T1 = _mm_cvtsi32_si128(st->R2[4]);
213
- T4 = _mm_unpacklo_epi32(T0, T2);
214
- T5 = _mm_unpackhi_epi32(T0, T2);
257
+ T2 = _mm_loadu_si128((const xmmi *) (const void *) &st->R[0]);
258
+ T3 = _mm_cvtsi32_si128(st->R[4]);
259
+ T0 = _mm_loadu_si128((const xmmi *) (const void *) &st->R2[0]);
260
+ T1 = _mm_cvtsi32_si128(st->R2[4]);
261
+ T4 = _mm_unpacklo_epi32(T0, T2);
262
+ T5 = _mm_unpackhi_epi32(T0, T2);
215
263
  R24 = _mm_unpacklo_epi64(T1, T3);
216
264
  } else {
217
265
  /* use [r^1, 1] */
218
- T0 = _mm_loadu_si128((const xmmi *)(const void *)&st->R[0]);
219
- T1 = _mm_cvtsi32_si128(st->R[4]);
220
- T2 = _mm_cvtsi32_si128(1);
221
- T4 = _mm_unpacklo_epi32(T0, T2);
222
- T5 = _mm_unpackhi_epi32(T0, T2);
266
+ T0 = _mm_loadu_si128((const xmmi *) (const void *) &st->R[0]);
267
+ T1 = _mm_cvtsi32_si128(st->R[4]);
268
+ T2 = _mm_cvtsi32_si128(1);
269
+ T4 = _mm_unpacklo_epi32(T0, T2);
270
+ T5 = _mm_unpackhi_epi32(T0, T2);
223
271
  R24 = T1;
224
272
  }
225
-
226
- R20 = _mm_shuffle_epi32(T4, _MM_SHUFFLE(1,1,0,0));
227
- R21 = _mm_shuffle_epi32(T4, _MM_SHUFFLE(3,3,2,2));
228
- R22 = _mm_shuffle_epi32(T5, _MM_SHUFFLE(1,1,0,0));
229
- R23 = _mm_shuffle_epi32(T5, _MM_SHUFFLE(3,3,2,2));
273
+ R20 = _mm_shuffle_epi32(T4, _MM_SHUFFLE(1, 1, 0, 0));
274
+ R21 = _mm_shuffle_epi32(T4, _MM_SHUFFLE(3, 3, 2, 2));
275
+ R22 = _mm_shuffle_epi32(T5, _MM_SHUFFLE(1, 1, 0, 0));
276
+ R23 = _mm_shuffle_epi32(T5, _MM_SHUFFLE(3, 3, 2, 2));
230
277
  } else {
231
278
  /* use [r^2, r^2] */
232
- T0 = _mm_loadu_si128((const xmmi *)(const void *)&st->R2[0]);
233
- T1 = _mm_cvtsi32_si128(st->R2[4]);
234
- R20 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(0,0,0,0));
235
- R21 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1,1,1,1));
236
- R22 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(2,2,2,2));
237
- R23 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3,3,3,3));
238
- R24 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(0,0,0,0));
279
+ T0 = _mm_loadu_si128((const xmmi *) (const void *) &st->R2[0]);
280
+ T1 = _mm_cvtsi32_si128(st->R2[4]);
281
+ R20 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(0, 0, 0, 0));
282
+ R21 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1, 1, 1, 1));
283
+ R22 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(2, 2, 2, 2));
284
+ R23 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 3, 3, 3));
285
+ R24 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(0, 0, 0, 0));
239
286
  }
240
287
  S21 = _mm_mul_epu32(R21, FIVE);
241
288
  S22 = _mm_mul_epu32(R22, FIVE);
@@ -243,74 +290,120 @@ poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
243
290
  S24 = _mm_mul_epu32(R24, FIVE);
244
291
 
245
292
  if (bytes >= 64) {
246
- T0 = _mm_loadu_si128((const xmmi *)(const void *)&st->R4[0]);
247
- T1 = _mm_cvtsi32_si128(st->R4[4]);
248
- R40 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(0,0,0,0));
249
- R41 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1,1,1,1));
250
- R42 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(2,2,2,2));
251
- R43 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3,3,3,3));
252
- R44 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(0,0,0,0));
293
+ T0 = _mm_loadu_si128((const xmmi *) (const void *) &st->R4[0]);
294
+ T1 = _mm_cvtsi32_si128(st->R4[4]);
295
+ R40 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(0, 0, 0, 0));
296
+ R41 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1, 1, 1, 1));
297
+ R42 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(2, 2, 2, 2));
298
+ R43 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 3, 3, 3));
299
+ R44 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(0, 0, 0, 0));
253
300
  S41 = _mm_mul_epu32(R41, FIVE);
254
301
  S42 = _mm_mul_epu32(R42, FIVE);
255
302
  S43 = _mm_mul_epu32(R43, FIVE);
256
303
  S44 = _mm_mul_epu32(R44, FIVE);
257
304
 
258
305
  while (bytes >= 64) {
259
- xmmi v00,v01,v02,v03,v04;
260
- xmmi v10,v11,v12,v13,v14;
261
- xmmi v20,v21,v22,v23,v24;
262
- xmmi v30,v31,v32,v33,v34;
263
- xmmi v40,v41,v42,v43,v44;
264
- xmmi T14,T15;
306
+ xmmi v00, v01, v02, v03, v04;
307
+ xmmi v10, v11, v12, v13, v14;
308
+ xmmi v20, v21, v22, v23, v24;
309
+ xmmi v30, v31, v32, v33, v34;
310
+ xmmi v40, v41, v42, v43, v44;
311
+ xmmi T14, T15;
265
312
 
266
313
  /* H *= [r^4,r^4], preload [Mx,My] */
267
314
  T15 = S42;
268
- T0 = H4; T0 = _mm_mul_epu32(T0, S41);
269
- v01 = H3; v01 = _mm_mul_epu32(v01, T15);
315
+ T0 = H4;
316
+ T0 = _mm_mul_epu32(T0, S41);
317
+ v01 = H3;
318
+ v01 = _mm_mul_epu32(v01, T15);
270
319
  T14 = S43;
271
- T1 = H4; T1 = _mm_mul_epu32(T1 , T15);
272
- v11 = H3; v11 = _mm_mul_epu32(v11, T14);
273
- T2 = H4; T2 = _mm_mul_epu32(T2 , T14); T0 = _mm_add_epi64(T0, v01);
320
+ T1 = H4;
321
+ T1 = _mm_mul_epu32(T1, T15);
322
+ v11 = H3;
323
+ v11 = _mm_mul_epu32(v11, T14);
324
+ T2 = H4;
325
+ T2 = _mm_mul_epu32(T2, T14);
326
+ T0 = _mm_add_epi64(T0, v01);
274
327
  T15 = S44;
275
- v02 = H2; v02 = _mm_mul_epu32(v02, T14);
276
- T3 = H4; T3 = _mm_mul_epu32(T3 , T15); T1 = _mm_add_epi64(T1, v11);
277
- v03 = H1; v03 = _mm_mul_epu32(v03, T15);
278
- v12 = H2; v12 = _mm_mul_epu32(v12, T15); T0 = _mm_add_epi64(T0, v02);
328
+ v02 = H2;
329
+ v02 = _mm_mul_epu32(v02, T14);
330
+ T3 = H4;
331
+ T3 = _mm_mul_epu32(T3, T15);
332
+ T1 = _mm_add_epi64(T1, v11);
333
+ v03 = H1;
334
+ v03 = _mm_mul_epu32(v03, T15);
335
+ v12 = H2;
336
+ v12 = _mm_mul_epu32(v12, T15);
337
+ T0 = _mm_add_epi64(T0, v02);
279
338
  T14 = R40;
280
- v21 = H3; v21 = _mm_mul_epu32(v21, T15);
281
- v31 = H3; v31 = _mm_mul_epu32(v31, T14); T0 = _mm_add_epi64(T0, v03);
282
- T4 = H4; T4 = _mm_mul_epu32(T4 , T14); T1 = _mm_add_epi64(T1, v12);
283
- v04 = H0; v04 = _mm_mul_epu32(v04, T14); T2 = _mm_add_epi64(T2, v21);
284
- v13 = H1; v13 = _mm_mul_epu32(v13, T14); T3 = _mm_add_epi64(T3, v31);
339
+ v21 = H3;
340
+ v21 = _mm_mul_epu32(v21, T15);
341
+ v31 = H3;
342
+ v31 = _mm_mul_epu32(v31, T14);
343
+ T0 = _mm_add_epi64(T0, v03);
344
+ T4 = H4;
345
+ T4 = _mm_mul_epu32(T4, T14);
346
+ T1 = _mm_add_epi64(T1, v12);
347
+ v04 = H0;
348
+ v04 = _mm_mul_epu32(v04, T14);
349
+ T2 = _mm_add_epi64(T2, v21);
350
+ v13 = H1;
351
+ v13 = _mm_mul_epu32(v13, T14);
352
+ T3 = _mm_add_epi64(T3, v31);
285
353
  T15 = R41;
286
- v22 = H2; v22 = _mm_mul_epu32(v22, T14);
287
- v32 = H2; v32 = _mm_mul_epu32(v32, T15); T0 = _mm_add_epi64(T0, v04);
288
- v41 = H3; v41 = _mm_mul_epu32(v41, T15); T1 = _mm_add_epi64(T1, v13);
289
- v14 = H0; v14 = _mm_mul_epu32(v14, T15); T2 = _mm_add_epi64(T2, v22);
354
+ v22 = H2;
355
+ v22 = _mm_mul_epu32(v22, T14);
356
+ v32 = H2;
357
+ v32 = _mm_mul_epu32(v32, T15);
358
+ T0 = _mm_add_epi64(T0, v04);
359
+ v41 = H3;
360
+ v41 = _mm_mul_epu32(v41, T15);
361
+ T1 = _mm_add_epi64(T1, v13);
362
+ v14 = H0;
363
+ v14 = _mm_mul_epu32(v14, T15);
364
+ T2 = _mm_add_epi64(T2, v22);
290
365
  T14 = R42;
291
- T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((const xmmi *)(const void *)(m + 0)), _mm_loadl_epi64((const xmmi *)(const void *)(m + 16)));
292
- v23 = H1; v23 = _mm_mul_epu32(v23, T15); T3 = _mm_add_epi64(T3, v32);
293
- v33 = H1; v33 = _mm_mul_epu32(v33, T14); T4 = _mm_add_epi64(T4, v41);
294
- v42 = H2; v42 = _mm_mul_epu32(v42, T14); T1 = _mm_add_epi64(T1, v14);
366
+ T5 = _mm_unpacklo_epi64(
367
+ _mm_loadl_epi64((const xmmi *) (const void *) (m + 0)),
368
+ _mm_loadl_epi64((const xmmi *) (const void *) (m + 16)));
369
+ v23 = H1;
370
+ v23 = _mm_mul_epu32(v23, T15);
371
+ T3 = _mm_add_epi64(T3, v32);
372
+ v33 = H1;
373
+ v33 = _mm_mul_epu32(v33, T14);
374
+ T4 = _mm_add_epi64(T4, v41);
375
+ v42 = H2;
376
+ v42 = _mm_mul_epu32(v42, T14);
377
+ T1 = _mm_add_epi64(T1, v14);
295
378
  T15 = R43;
296
- T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((const xmmi *)(const void *)(m + 8)), _mm_loadl_epi64((const xmmi *)(const void *)(m + 24)));
297
- v24 = H0; v24 = _mm_mul_epu32(v24, T14); T2 = _mm_add_epi64(T2, v23);
298
- v34 = H0; v34 = _mm_mul_epu32(v34, T15); T3 = _mm_add_epi64(T3, v33);
299
- M0 = _mm_and_si128(MMASK, T5);
300
- v43 = H1; v43 = _mm_mul_epu32(v43, T15); T4 = _mm_add_epi64(T4, v42);
301
- M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
302
- v44 = H0; v44 = _mm_mul_epu32(v44, R44); T2 = _mm_add_epi64(T2, v24);
303
- T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
304
- T3 = _mm_add_epi64(T3, v34);
305
- M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T6, 14));
306
- T4 = _mm_add_epi64(T4, v43);
307
- M2 = _mm_and_si128(MMASK, T5);
308
- T4 = _mm_add_epi64(T4, v44);
309
- M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
379
+ T6 = _mm_unpacklo_epi64(
380
+ _mm_loadl_epi64((const xmmi *) (const void *) (m + 8)),
381
+ _mm_loadl_epi64((const xmmi *) (const void *) (m + 24)));
382
+ v24 = H0;
383
+ v24 = _mm_mul_epu32(v24, T14);
384
+ T2 = _mm_add_epi64(T2, v23);
385
+ v34 = H0;
386
+ v34 = _mm_mul_epu32(v34, T15);
387
+ T3 = _mm_add_epi64(T3, v33);
388
+ M0 = _mm_and_si128(MMASK, T5);
389
+ v43 = H1;
390
+ v43 = _mm_mul_epu32(v43, T15);
391
+ T4 = _mm_add_epi64(T4, v42);
392
+ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
393
+ v44 = H0;
394
+ v44 = _mm_mul_epu32(v44, R44);
395
+ T2 = _mm_add_epi64(T2, v24);
396
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
397
+ T3 = _mm_add_epi64(T3, v34);
398
+ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T6, 14));
399
+ T4 = _mm_add_epi64(T4, v43);
400
+ M2 = _mm_and_si128(MMASK, T5);
401
+ T4 = _mm_add_epi64(T4, v44);
402
+ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
310
403
 
311
404
  /* H += [Mx',My'] */
312
- T5 = _mm_loadu_si128((const xmmi *)(const void *)(m + 32));
313
- T6 = _mm_loadu_si128((const xmmi *)(const void *)(m + 48));
405
+ T5 = _mm_loadu_si128((const xmmi *) (const void *) (m + 32));
406
+ T6 = _mm_loadu_si128((const xmmi *) (const void *) (m + 48));
314
407
  T7 = _mm_unpacklo_epi32(T5, T6);
315
408
  T8 = _mm_unpackhi_epi32(T5, T6);
316
409
  M5 = _mm_unpacklo_epi32(T7, _mm_setzero_si128());
@@ -328,46 +421,110 @@ poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
328
421
 
329
422
  /* H += [Mx,My]*[r^2,r^2] */
330
423
  T15 = S22;
331
- v00 = M4; v00 = _mm_mul_epu32(v00, S21);
332
- v01 = M3; v01 = _mm_mul_epu32(v01, T15);
424
+ v00 = M4;
425
+ v00 = _mm_mul_epu32(v00, S21);
426
+ v01 = M3;
427
+ v01 = _mm_mul_epu32(v01, T15);
333
428
  T14 = S23;
334
- v10 = M4; v10 = _mm_mul_epu32(v10, T15);
335
- v11 = M3; v11 = _mm_mul_epu32(v11, T14); T0 = _mm_add_epi64(T0, v00);
336
- v20 = M4; v20 = _mm_mul_epu32(v20, T14); T0 = _mm_add_epi64(T0, v01);
429
+ v10 = M4;
430
+ v10 = _mm_mul_epu32(v10, T15);
431
+ v11 = M3;
432
+ v11 = _mm_mul_epu32(v11, T14);
433
+ T0 = _mm_add_epi64(T0, v00);
434
+ v20 = M4;
435
+ v20 = _mm_mul_epu32(v20, T14);
436
+ T0 = _mm_add_epi64(T0, v01);
337
437
  T15 = S24;
338
- v02 = M2; v02 = _mm_mul_epu32(v02, T14); T1 = _mm_add_epi64(T1, v10);
339
- v30 = M4; v30 = _mm_mul_epu32(v30, T15); T1 = _mm_add_epi64(T1, v11);
340
- v03 = M1; v03 = _mm_mul_epu32(v03, T15); T2 = _mm_add_epi64(T2, v20);
341
- v12 = M2; v12 = _mm_mul_epu32(v12, T15); T0 = _mm_add_epi64(T0, v02);
438
+ v02 = M2;
439
+ v02 = _mm_mul_epu32(v02, T14);
440
+ T1 = _mm_add_epi64(T1, v10);
441
+ v30 = M4;
442
+ v30 = _mm_mul_epu32(v30, T15);
443
+ T1 = _mm_add_epi64(T1, v11);
444
+ v03 = M1;
445
+ v03 = _mm_mul_epu32(v03, T15);
446
+ T2 = _mm_add_epi64(T2, v20);
447
+ v12 = M2;
448
+ v12 = _mm_mul_epu32(v12, T15);
449
+ T0 = _mm_add_epi64(T0, v02);
342
450
  T14 = R20;
343
- v21 = M3; v21 = _mm_mul_epu32(v21, T15); T3 = _mm_add_epi64(T3, v30);
344
- v31 = M3; v31 = _mm_mul_epu32(v31, T14); T0 = _mm_add_epi64(T0, v03);
345
- v40 = M4; v40 = _mm_mul_epu32(v40, T14); T1 = _mm_add_epi64(T1, v12);
346
- v04 = M0; v04 = _mm_mul_epu32(v04, T14); T2 = _mm_add_epi64(T2, v21);
347
- v13 = M1; v13 = _mm_mul_epu32(v13, T14); T3 = _mm_add_epi64(T3, v31);
451
+ v21 = M3;
452
+ v21 = _mm_mul_epu32(v21, T15);
453
+ T3 = _mm_add_epi64(T3, v30);
454
+ v31 = M3;
455
+ v31 = _mm_mul_epu32(v31, T14);
456
+ T0 = _mm_add_epi64(T0, v03);
457
+ v40 = M4;
458
+ v40 = _mm_mul_epu32(v40, T14);
459
+ T1 = _mm_add_epi64(T1, v12);
460
+ v04 = M0;
461
+ v04 = _mm_mul_epu32(v04, T14);
462
+ T2 = _mm_add_epi64(T2, v21);
463
+ v13 = M1;
464
+ v13 = _mm_mul_epu32(v13, T14);
465
+ T3 = _mm_add_epi64(T3, v31);
348
466
  T15 = R21;
349
- v22 = M2; v22 = _mm_mul_epu32(v22, T14); T4 = _mm_add_epi64(T4, v40);
350
- v32 = M2; v32 = _mm_mul_epu32(v32, T15); T0 = _mm_add_epi64(T0, v04);
351
- v41 = M3; v41 = _mm_mul_epu32(v41, T15); T1 = _mm_add_epi64(T1, v13);
352
- v14 = M0; v14 = _mm_mul_epu32(v14, T15); T2 = _mm_add_epi64(T2, v22);
467
+ v22 = M2;
468
+ v22 = _mm_mul_epu32(v22, T14);
469
+ T4 = _mm_add_epi64(T4, v40);
470
+ v32 = M2;
471
+ v32 = _mm_mul_epu32(v32, T15);
472
+ T0 = _mm_add_epi64(T0, v04);
473
+ v41 = M3;
474
+ v41 = _mm_mul_epu32(v41, T15);
475
+ T1 = _mm_add_epi64(T1, v13);
476
+ v14 = M0;
477
+ v14 = _mm_mul_epu32(v14, T15);
478
+ T2 = _mm_add_epi64(T2, v22);
353
479
  T14 = R22;
354
- v23 = M1; v23 = _mm_mul_epu32(v23, T15); T3 = _mm_add_epi64(T3, v32);
355
- v33 = M1; v33 = _mm_mul_epu32(v33, T14); T4 = _mm_add_epi64(T4, v41);
356
- v42 = M2; v42 = _mm_mul_epu32(v42, T14); T1 = _mm_add_epi64(T1, v14);
480
+ v23 = M1;
481
+ v23 = _mm_mul_epu32(v23, T15);
482
+ T3 = _mm_add_epi64(T3, v32);
483
+ v33 = M1;
484
+ v33 = _mm_mul_epu32(v33, T14);
485
+ T4 = _mm_add_epi64(T4, v41);
486
+ v42 = M2;
487
+ v42 = _mm_mul_epu32(v42, T14);
488
+ T1 = _mm_add_epi64(T1, v14);
357
489
  T15 = R23;
358
- v24 = M0; v24 = _mm_mul_epu32(v24, T14); T2 = _mm_add_epi64(T2, v23);
359
- v34 = M0; v34 = _mm_mul_epu32(v34, T15); T3 = _mm_add_epi64(T3, v33);
360
- v43 = M1; v43 = _mm_mul_epu32(v43, T15); T4 = _mm_add_epi64(T4, v42);
361
- v44 = M0; v44 = _mm_mul_epu32(v44, R24); T2 = _mm_add_epi64(T2, v24);
362
- T3 = _mm_add_epi64(T3, v34);
363
- T4 = _mm_add_epi64(T4, v43);
364
- T4 = _mm_add_epi64(T4, v44);
490
+ v24 = M0;
491
+ v24 = _mm_mul_epu32(v24, T14);
492
+ T2 = _mm_add_epi64(T2, v23);
493
+ v34 = M0;
494
+ v34 = _mm_mul_epu32(v34, T15);
495
+ T3 = _mm_add_epi64(T3, v33);
496
+ v43 = M1;
497
+ v43 = _mm_mul_epu32(v43, T15);
498
+ T4 = _mm_add_epi64(T4, v42);
499
+ v44 = M0;
500
+ v44 = _mm_mul_epu32(v44, R24);
501
+ T2 = _mm_add_epi64(T2, v24);
502
+ T3 = _mm_add_epi64(T3, v34);
503
+ T4 = _mm_add_epi64(T4, v43);
504
+ T4 = _mm_add_epi64(T4, v44);
365
505
 
366
506
  /* reduce */
367
- C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 = _mm_add_epi64(T4, C2);
368
- C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
369
- C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 = _mm_add_epi64(T1, C2);
370
- C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C1);
507
+ C1 = _mm_srli_epi64(T0, 26);
508
+ C2 = _mm_srli_epi64(T3, 26);
509
+ T0 = _mm_and_si128(T0, MMASK);
510
+ T3 = _mm_and_si128(T3, MMASK);
511
+ T1 = _mm_add_epi64(T1, C1);
512
+ T4 = _mm_add_epi64(T4, C2);
513
+ C1 = _mm_srli_epi64(T1, 26);
514
+ C2 = _mm_srli_epi64(T4, 26);
515
+ T1 = _mm_and_si128(T1, MMASK);
516
+ T4 = _mm_and_si128(T4, MMASK);
517
+ T2 = _mm_add_epi64(T2, C1);
518
+ T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
519
+ C1 = _mm_srli_epi64(T2, 26);
520
+ C2 = _mm_srli_epi64(T0, 26);
521
+ T2 = _mm_and_si128(T2, MMASK);
522
+ T0 = _mm_and_si128(T0, MMASK);
523
+ T3 = _mm_add_epi64(T3, C1);
524
+ T1 = _mm_add_epi64(T1, C2);
525
+ C1 = _mm_srli_epi64(T3, 26);
526
+ T3 = _mm_and_si128(T3, MMASK);
527
+ T4 = _mm_add_epi64(T4, C1);
371
528
 
372
529
  /* Final: H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx',My']) */
373
530
  H0 = T0;
@@ -382,54 +539,96 @@ poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
382
539
  }
383
540
 
384
541
  if (bytes >= 32) {
385
- xmmi v01,v02,v03,v04;
386
- xmmi v11,v12,v13,v14;
387
- xmmi v21,v22,v23,v24;
388
- xmmi v31,v32,v33,v34;
389
- xmmi v41,v42,v43,v44;
390
- xmmi T14,T15;
542
+ xmmi v01, v02, v03, v04;
543
+ xmmi v11, v12, v13, v14;
544
+ xmmi v21, v22, v23, v24;
545
+ xmmi v31, v32, v33, v34;
546
+ xmmi v41, v42, v43, v44;
547
+ xmmi T14, T15;
391
548
 
392
549
  /* H *= [r^2,r^2] */
393
550
  T15 = S22;
394
- T0 = H4; T0 = _mm_mul_epu32(T0, S21);
395
- v01 = H3; v01 = _mm_mul_epu32(v01, T15);
551
+ T0 = H4;
552
+ T0 = _mm_mul_epu32(T0, S21);
553
+ v01 = H3;
554
+ v01 = _mm_mul_epu32(v01, T15);
396
555
  T14 = S23;
397
- T1 = H4; T1 = _mm_mul_epu32(T1 , T15);
398
- v11 = H3; v11 = _mm_mul_epu32(v11, T14);
399
- T2 = H4; T2 = _mm_mul_epu32(T2 , T14); T0 = _mm_add_epi64(T0, v01);
556
+ T1 = H4;
557
+ T1 = _mm_mul_epu32(T1, T15);
558
+ v11 = H3;
559
+ v11 = _mm_mul_epu32(v11, T14);
560
+ T2 = H4;
561
+ T2 = _mm_mul_epu32(T2, T14);
562
+ T0 = _mm_add_epi64(T0, v01);
400
563
  T15 = S24;
401
- v02 = H2; v02 = _mm_mul_epu32(v02, T14);
402
- T3 = H4; T3 = _mm_mul_epu32(T3 , T15); T1 = _mm_add_epi64(T1, v11);
403
- v03 = H1; v03 = _mm_mul_epu32(v03, T15);
404
- v12 = H2; v12 = _mm_mul_epu32(v12, T15); T0 = _mm_add_epi64(T0, v02);
564
+ v02 = H2;
565
+ v02 = _mm_mul_epu32(v02, T14);
566
+ T3 = H4;
567
+ T3 = _mm_mul_epu32(T3, T15);
568
+ T1 = _mm_add_epi64(T1, v11);
569
+ v03 = H1;
570
+ v03 = _mm_mul_epu32(v03, T15);
571
+ v12 = H2;
572
+ v12 = _mm_mul_epu32(v12, T15);
573
+ T0 = _mm_add_epi64(T0, v02);
405
574
  T14 = R20;
406
- v21 = H3; v21 = _mm_mul_epu32(v21, T15);
407
- v31 = H3; v31 = _mm_mul_epu32(v31, T14); T0 = _mm_add_epi64(T0, v03);
408
- T4 = H4; T4 = _mm_mul_epu32(T4 , T14); T1 = _mm_add_epi64(T1, v12);
409
- v04 = H0; v04 = _mm_mul_epu32(v04, T14); T2 = _mm_add_epi64(T2, v21);
410
- v13 = H1; v13 = _mm_mul_epu32(v13, T14); T3 = _mm_add_epi64(T3, v31);
575
+ v21 = H3;
576
+ v21 = _mm_mul_epu32(v21, T15);
577
+ v31 = H3;
578
+ v31 = _mm_mul_epu32(v31, T14);
579
+ T0 = _mm_add_epi64(T0, v03);
580
+ T4 = H4;
581
+ T4 = _mm_mul_epu32(T4, T14);
582
+ T1 = _mm_add_epi64(T1, v12);
583
+ v04 = H0;
584
+ v04 = _mm_mul_epu32(v04, T14);
585
+ T2 = _mm_add_epi64(T2, v21);
586
+ v13 = H1;
587
+ v13 = _mm_mul_epu32(v13, T14);
588
+ T3 = _mm_add_epi64(T3, v31);
411
589
  T15 = R21;
412
- v22 = H2; v22 = _mm_mul_epu32(v22, T14);
413
- v32 = H2; v32 = _mm_mul_epu32(v32, T15); T0 = _mm_add_epi64(T0, v04);
414
- v41 = H3; v41 = _mm_mul_epu32(v41, T15); T1 = _mm_add_epi64(T1, v13);
415
- v14 = H0; v14 = _mm_mul_epu32(v14, T15); T2 = _mm_add_epi64(T2, v22);
590
+ v22 = H2;
591
+ v22 = _mm_mul_epu32(v22, T14);
592
+ v32 = H2;
593
+ v32 = _mm_mul_epu32(v32, T15);
594
+ T0 = _mm_add_epi64(T0, v04);
595
+ v41 = H3;
596
+ v41 = _mm_mul_epu32(v41, T15);
597
+ T1 = _mm_add_epi64(T1, v13);
598
+ v14 = H0;
599
+ v14 = _mm_mul_epu32(v14, T15);
600
+ T2 = _mm_add_epi64(T2, v22);
416
601
  T14 = R22;
417
- v23 = H1; v23 = _mm_mul_epu32(v23, T15); T3 = _mm_add_epi64(T3, v32);
418
- v33 = H1; v33 = _mm_mul_epu32(v33, T14); T4 = _mm_add_epi64(T4, v41);
419
- v42 = H2; v42 = _mm_mul_epu32(v42, T14); T1 = _mm_add_epi64(T1, v14);
602
+ v23 = H1;
603
+ v23 = _mm_mul_epu32(v23, T15);
604
+ T3 = _mm_add_epi64(T3, v32);
605
+ v33 = H1;
606
+ v33 = _mm_mul_epu32(v33, T14);
607
+ T4 = _mm_add_epi64(T4, v41);
608
+ v42 = H2;
609
+ v42 = _mm_mul_epu32(v42, T14);
610
+ T1 = _mm_add_epi64(T1, v14);
420
611
  T15 = R23;
421
- v24 = H0; v24 = _mm_mul_epu32(v24, T14); T2 = _mm_add_epi64(T2, v23);
422
- v34 = H0; v34 = _mm_mul_epu32(v34, T15); T3 = _mm_add_epi64(T3, v33);
423
- v43 = H1; v43 = _mm_mul_epu32(v43, T15); T4 = _mm_add_epi64(T4, v42);
424
- v44 = H0; v44 = _mm_mul_epu32(v44, R24); T2 = _mm_add_epi64(T2, v24);
425
- T3 = _mm_add_epi64(T3, v34);
426
- T4 = _mm_add_epi64(T4, v43);
427
- T4 = _mm_add_epi64(T4, v44);
612
+ v24 = H0;
613
+ v24 = _mm_mul_epu32(v24, T14);
614
+ T2 = _mm_add_epi64(T2, v23);
615
+ v34 = H0;
616
+ v34 = _mm_mul_epu32(v34, T15);
617
+ T3 = _mm_add_epi64(T3, v33);
618
+ v43 = H1;
619
+ v43 = _mm_mul_epu32(v43, T15);
620
+ T4 = _mm_add_epi64(T4, v42);
621
+ v44 = H0;
622
+ v44 = _mm_mul_epu32(v44, R24);
623
+ T2 = _mm_add_epi64(T2, v24);
624
+ T3 = _mm_add_epi64(T3, v34);
625
+ T4 = _mm_add_epi64(T4, v43);
626
+ T4 = _mm_add_epi64(T4, v44);
428
627
 
429
628
  /* H += [Mx,My] */
430
629
  if (m) {
431
- T5 = _mm_loadu_si128((const xmmi *)(const void *)(m + 0));
432
- T6 = _mm_loadu_si128((const xmmi *)(const void *)(m + 16));
630
+ T5 = _mm_loadu_si128((const xmmi *) (const void *) (m + 0));
631
+ T6 = _mm_loadu_si128((const xmmi *) (const void *) (m + 16));
433
632
  T7 = _mm_unpacklo_epi32(T5, T6);
434
633
  T8 = _mm_unpackhi_epi32(T5, T6);
435
634
  M0 = _mm_unpacklo_epi32(T7, _mm_setzero_si128());
@@ -447,10 +646,27 @@ poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
447
646
  }
448
647
 
449
648
  /* reduce */
450
- C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 = _mm_add_epi64(T4, C2);
451
- C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
452
- C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 = _mm_add_epi64(T1, C2);
453
- C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C1);
649
+ C1 = _mm_srli_epi64(T0, 26);
650
+ C2 = _mm_srli_epi64(T3, 26);
651
+ T0 = _mm_and_si128(T0, MMASK);
652
+ T3 = _mm_and_si128(T3, MMASK);
653
+ T1 = _mm_add_epi64(T1, C1);
654
+ T4 = _mm_add_epi64(T4, C2);
655
+ C1 = _mm_srli_epi64(T1, 26);
656
+ C2 = _mm_srli_epi64(T4, 26);
657
+ T1 = _mm_and_si128(T1, MMASK);
658
+ T4 = _mm_and_si128(T4, MMASK);
659
+ T2 = _mm_add_epi64(T2, C1);
660
+ T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
661
+ C1 = _mm_srli_epi64(T2, 26);
662
+ C2 = _mm_srli_epi64(T0, 26);
663
+ T2 = _mm_and_si128(T2, MMASK);
664
+ T0 = _mm_and_si128(T0, MMASK);
665
+ T3 = _mm_add_epi64(T3, C1);
666
+ T1 = _mm_add_epi64(T1, C2);
667
+ C1 = _mm_srli_epi64(T3, 26);
668
+ T3 = _mm_and_si128(T3, MMASK);
669
+ T4 = _mm_add_epi64(T4, C1);
454
670
 
455
671
  /* H = (H*[r^2,r^2] + [Mx,My]) */
456
672
  H0 = T0;
@@ -461,19 +677,19 @@ poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
461
677
  }
462
678
 
463
679
  if (m) {
464
- T0 = _mm_shuffle_epi32(H0, _MM_SHUFFLE(0,0,2,0));
465
- T1 = _mm_shuffle_epi32(H1, _MM_SHUFFLE(0,0,2,0));
466
- T2 = _mm_shuffle_epi32(H2, _MM_SHUFFLE(0,0,2,0));
467
- T3 = _mm_shuffle_epi32(H3, _MM_SHUFFLE(0,0,2,0));
468
- T4 = _mm_shuffle_epi32(H4, _MM_SHUFFLE(0,0,2,0));
680
+ T0 = _mm_shuffle_epi32(H0, _MM_SHUFFLE(0, 0, 2, 0));
681
+ T1 = _mm_shuffle_epi32(H1, _MM_SHUFFLE(0, 0, 2, 0));
682
+ T2 = _mm_shuffle_epi32(H2, _MM_SHUFFLE(0, 0, 2, 0));
683
+ T3 = _mm_shuffle_epi32(H3, _MM_SHUFFLE(0, 0, 2, 0));
684
+ T4 = _mm_shuffle_epi32(H4, _MM_SHUFFLE(0, 0, 2, 0));
469
685
  T0 = _mm_unpacklo_epi64(T0, T1);
470
686
  T1 = _mm_unpacklo_epi64(T2, T3);
471
- _mm_storeu_si128((xmmi *)(void *)&st->hh[0], T0);
472
- _mm_storeu_si128((xmmi *)(void *)&st->hh[4], T1);
473
- _mm_storel_epi64((xmmi *)(void *)&st->hh[8], T4);
687
+ _mm_storeu_si128((xmmi *) (void *) &st->hh[0], T0);
688
+ _mm_storeu_si128((xmmi *) (void *) &st->hh[4], T1);
689
+ _mm_storel_epi64((xmmi *) (void *) &st->hh[8], T4);
474
690
  } else {
475
- uint32_t t0,t1,t2,t3,t4,b;
476
- uint64_t h0,h1,h2,g0,g1,g2,c,nc;
691
+ uint32_t t0, t1, t2, t3, t4, b;
692
+ uint64_t h0, h1, h2, g0, g1, g2, c, nc;
477
693
 
478
694
  /* H = H[0]+H[1] */
479
695
  T0 = H0;
@@ -488,29 +704,52 @@ poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
488
704
  T3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
489
705
  T4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
490
706
 
491
- t0 = _mm_cvtsi128_si32(T0) ; b = (t0 >> 26); t0 &= 0x3ffffff;
492
- t1 = _mm_cvtsi128_si32(T1) + b; b = (t1 >> 26); t1 &= 0x3ffffff;
493
- t2 = _mm_cvtsi128_si32(T2) + b; b = (t2 >> 26); t2 &= 0x3ffffff;
494
- t3 = _mm_cvtsi128_si32(T3) + b; b = (t3 >> 26); t3 &= 0x3ffffff;
707
+ t0 = _mm_cvtsi128_si32(T0);
708
+ b = (t0 >> 26);
709
+ t0 &= 0x3ffffff;
710
+ t1 = _mm_cvtsi128_si32(T1) + b;
711
+ b = (t1 >> 26);
712
+ t1 &= 0x3ffffff;
713
+ t2 = _mm_cvtsi128_si32(T2) + b;
714
+ b = (t2 >> 26);
715
+ t2 &= 0x3ffffff;
716
+ t3 = _mm_cvtsi128_si32(T3) + b;
717
+ b = (t3 >> 26);
718
+ t3 &= 0x3ffffff;
495
719
  t4 = _mm_cvtsi128_si32(T4) + b;
496
720
 
497
721
  /* everything except t4 is in range, so this is all safe */
498
- h0 = (((uint64_t)t0 ) | ((uint64_t)t1 << 26) ) & 0xfffffffffffull;
499
- h1 = (((uint64_t)t1 >> 18) | ((uint64_t)t2 << 8) | ((uint64_t)t3 << 34)) & 0xfffffffffffull;
500
- h2 = (((uint64_t)t3 >> 10) | ((uint64_t)t4 << 16) );
501
-
502
- c = (h2 >> 42); h2 &= 0x3ffffffffff;
503
- h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
504
- h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff;
505
- h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
506
- h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
722
+ h0 = (((uint64_t) t0) | ((uint64_t) t1 << 26)) & 0xfffffffffffull;
723
+ h1 = (((uint64_t) t1 >> 18) | ((uint64_t) t2 << 8) |
724
+ ((uint64_t) t3 << 34)) &
725
+ 0xfffffffffffull;
726
+ h2 = (((uint64_t) t3 >> 10) | ((uint64_t) t4 << 16));
727
+
728
+ c = (h2 >> 42);
729
+ h2 &= 0x3ffffffffff;
730
+ h0 += c * 5;
731
+ c = (h0 >> 44);
732
+ h0 &= 0xfffffffffff;
733
+ h1 += c;
734
+ c = (h1 >> 44);
735
+ h1 &= 0xfffffffffff;
736
+ h2 += c;
737
+ c = (h2 >> 42);
738
+ h2 &= 0x3ffffffffff;
739
+ h0 += c * 5;
740
+ c = (h0 >> 44);
741
+ h0 &= 0xfffffffffff;
507
742
  h1 += c;
508
743
 
509
- g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
510
- g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
511
- g2 = h2 + c - ((uint64_t)1 << 42);
744
+ g0 = h0 + 5;
745
+ c = (g0 >> 44);
746
+ g0 &= 0xfffffffffff;
747
+ g1 = h1 + c;
748
+ c = (g1 >> 44);
749
+ g1 &= 0xfffffffffff;
750
+ g2 = h2 + c - ((uint64_t) 1 << 42);
512
751
 
513
- c = (g2 >> 63) - 1;
752
+ c = (g2 >> 63) - 1;
514
753
  nc = ~c;
515
754
  h0 = (h0 & nc) | (g0 & c);
516
755
  h1 = (h1 & nc) | (g1 & c);
@@ -532,15 +771,18 @@ poly1305_update(poly1305_state_internal_t *st, const unsigned char *m,
532
771
  if (st->leftover) {
533
772
  unsigned long long want = (poly1305_block_size - st->leftover);
534
773
 
535
- if (want > bytes)
774
+ if (want > bytes) {
536
775
  want = bytes;
537
- for (i = 0; i < want; i++)
776
+ }
777
+ for (i = 0; i < want; i++) {
538
778
  st->buffer[st->leftover + i] = m[i];
779
+ }
539
780
  bytes -= want;
540
781
  m += want;
541
782
  st->leftover += want;
542
- if (st->leftover < poly1305_block_size)
783
+ if (st->leftover < poly1305_block_size) {
543
784
  return;
785
+ }
544
786
  poly1305_blocks(st, st->buffer, poly1305_block_size);
545
787
  st->leftover = 0;
546
788
  }
@@ -567,13 +809,17 @@ static POLY1305_NOINLINE void
567
809
  poly1305_finish_ext(poly1305_state_internal_t *st, const unsigned char *m,
568
810
  unsigned long long leftover, unsigned char mac[16])
569
811
  {
570
- uint64_t h0,h1,h2;
812
+ uint64_t h0, h1, h2;
571
813
 
572
814
  if (leftover) {
573
- CRYPTO_ALIGN(16) unsigned char final[32] = {0};
815
+ CRYPTO_ALIGN(16) unsigned char final[32] = { 0 };
816
+
574
817
  poly1305_block_copy31(final, m, leftover);
575
- if (leftover != 16) final[leftover] = 1;
576
- st->flags |= (leftover >= 16) ? poly1305_final_shift8 : poly1305_final_shift16;
818
+ if (leftover != 16) {
819
+ final[leftover] = 1;
820
+ }
821
+ st->flags |=
822
+ (leftover >= 16) ? poly1305_final_shift8 : poly1305_final_shift16;
577
823
  poly1305_blocks(st, final, 32);
578
824
  }
579
825
 
@@ -592,14 +838,15 @@ poly1305_finish_ext(poly1305_state_internal_t *st, const unsigned char *m,
592
838
  h2 = st->h[2];
593
839
 
594
840
  /* pad */
595
- h0 = ((h0 ) | (h1 << 44));
841
+ h0 = ((h0) | (h1 << 44));
596
842
  h1 = ((h1 >> 20) | (h2 << 24));
597
843
  #ifdef HAVE_AMD64_ASM
598
- __asm__ __volatile__("addq %2, %0 ;\n"
599
- "adcq %3, %1 ;\n"
600
- : "+r"(h0), "+r"(h1)
601
- : "r"(st->pad[0]), "r"(st->pad[1])
602
- : "flags", "cc");
844
+ __asm__ __volatile__(
845
+ "addq %2, %0 ;\n"
846
+ "adcq %3, %1 ;\n"
847
+ : "+r"(h0), "+r"(h1)
848
+ : "r"(st->pad[0]), "r"(st->pad[1])
849
+ : "flags", "cc");
603
850
  #else
604
851
  {
605
852
  uint128_t h;
@@ -607,22 +854,22 @@ poly1305_finish_ext(poly1305_state_internal_t *st, const unsigned char *m,
607
854
  memcpy(&h, &st->pad[0], 16);
608
855
  h += ((uint128_t) h1 << 64) | h0;
609
856
  h0 = (uint64_t) h;
610
- h1 = (uint64_t) (h >> 64);
857
+ h1 = (uint64_t)(h >> 64);
611
858
  }
612
859
  #endif
613
- _mm_storeu_si128((xmmi *)(void *)st + 0, _mm_setzero_si128());
614
- _mm_storeu_si128((xmmi *)(void *)st + 1, _mm_setzero_si128());
615
- _mm_storeu_si128((xmmi *)(void *)st + 2, _mm_setzero_si128());
616
- _mm_storeu_si128((xmmi *)(void *)st + 3, _mm_setzero_si128());
617
- _mm_storeu_si128((xmmi *)(void *)st + 4, _mm_setzero_si128());
618
- _mm_storeu_si128((xmmi *)(void *)st + 5, _mm_setzero_si128());
619
- _mm_storeu_si128((xmmi *)(void *)st + 6, _mm_setzero_si128());
620
- _mm_storeu_si128((xmmi *)(void *)st + 7, _mm_setzero_si128());
860
+ _mm_storeu_si128((xmmi *) (void *) st + 0, _mm_setzero_si128());
861
+ _mm_storeu_si128((xmmi *) (void *) st + 1, _mm_setzero_si128());
862
+ _mm_storeu_si128((xmmi *) (void *) st + 2, _mm_setzero_si128());
863
+ _mm_storeu_si128((xmmi *) (void *) st + 3, _mm_setzero_si128());
864
+ _mm_storeu_si128((xmmi *) (void *) st + 4, _mm_setzero_si128());
865
+ _mm_storeu_si128((xmmi *) (void *) st + 5, _mm_setzero_si128());
866
+ _mm_storeu_si128((xmmi *) (void *) st + 6, _mm_setzero_si128());
867
+ _mm_storeu_si128((xmmi *) (void *) st + 7, _mm_setzero_si128());
621
868
 
622
869
  memcpy(&mac[0], &h0, 8);
623
870
  memcpy(&mac[8], &h1, 8);
624
871
 
625
- sodium_memzero((void *)st, sizeof *st);
872
+ sodium_memzero((void *) st, sizeof *st);
626
873
  }
627
874
 
628
875
  static void
@@ -635,19 +882,19 @@ static int
635
882
  crypto_onetimeauth_poly1305_sse2_init(crypto_onetimeauth_poly1305_state *state,
636
883
  const unsigned char *key)
637
884
  {
638
- (void) sizeof(int[sizeof (crypto_onetimeauth_poly1305_state) >=
639
- sizeof (poly1305_state_internal_t) ? 1 : -1]);
640
- poly1305_init_ext((poly1305_state_internal_t *)(void *) state, key, 0U);
885
+ COMPILER_ASSERT(sizeof(crypto_onetimeauth_poly1305_state) >=
886
+ sizeof(poly1305_state_internal_t));
887
+ poly1305_init_ext((poly1305_state_internal_t *) (void *) state, key, 0U);
641
888
 
642
889
  return 0;
643
890
  }
644
891
 
645
892
  static int
646
- crypto_onetimeauth_poly1305_sse2_update(crypto_onetimeauth_poly1305_state *state,
647
- const unsigned char *in,
648
- unsigned long long inlen)
893
+ crypto_onetimeauth_poly1305_sse2_update(
894
+ crypto_onetimeauth_poly1305_state *state, const unsigned char *in,
895
+ unsigned long long inlen)
649
896
  {
650
- poly1305_update((poly1305_state_internal_t *)(void *) state, in, inlen);
897
+ poly1305_update((poly1305_state_internal_t *) (void *) state, in, inlen);
651
898
 
652
899
  return 0;
653
900
  }
@@ -656,18 +903,18 @@ static int
656
903
  crypto_onetimeauth_poly1305_sse2_final(crypto_onetimeauth_poly1305_state *state,
657
904
  unsigned char *out)
658
905
  {
659
- poly1305_finish((poly1305_state_internal_t *)(void *) state, out);
906
+ poly1305_finish((poly1305_state_internal_t *) (void *) state, out);
660
907
 
661
908
  return 0;
662
909
  }
663
910
 
664
911
  static int
665
912
  crypto_onetimeauth_poly1305_sse2(unsigned char *out, const unsigned char *m,
666
- unsigned long long inlen,
913
+ unsigned long long inlen,
667
914
  const unsigned char *key)
668
915
  {
669
916
  CRYPTO_ALIGN(64) poly1305_state_internal_t st;
670
- unsigned long long blocks;
917
+ unsigned long long blocks;
671
918
 
672
919
  poly1305_init_ext(&st, key, inlen);
673
920
  blocks = inlen & ~31;
@@ -684,23 +931,25 @@ crypto_onetimeauth_poly1305_sse2(unsigned char *out, const unsigned char *m,
684
931
  static int
685
932
  crypto_onetimeauth_poly1305_sse2_verify(const unsigned char *h,
686
933
  const unsigned char *in,
687
- unsigned long long inlen,
934
+ unsigned long long inlen,
688
935
  const unsigned char *k)
689
936
  {
690
937
  unsigned char correct[16];
691
938
 
692
- crypto_onetimeauth_poly1305_sse2(correct,in,inlen,k);
939
+ crypto_onetimeauth_poly1305_sse2(correct, in, inlen, k);
693
940
 
694
- return crypto_verify_16(h,correct);
941
+ return crypto_verify_16(h, correct);
695
942
  }
696
943
 
697
944
  struct crypto_onetimeauth_poly1305_implementation
698
- crypto_onetimeauth_poly1305_sse2_implementation = {
699
- SODIUM_C99(.onetimeauth =) crypto_onetimeauth_poly1305_sse2,
700
- SODIUM_C99(.onetimeauth_verify =) crypto_onetimeauth_poly1305_sse2_verify,
701
- SODIUM_C99(.onetimeauth_init =) crypto_onetimeauth_poly1305_sse2_init,
702
- SODIUM_C99(.onetimeauth_update =) crypto_onetimeauth_poly1305_sse2_update,
703
- SODIUM_C99(.onetimeauth_final =) crypto_onetimeauth_poly1305_sse2_final
704
- };
945
+ crypto_onetimeauth_poly1305_sse2_implementation = {
946
+ SODIUM_C99(.onetimeauth =) crypto_onetimeauth_poly1305_sse2,
947
+ SODIUM_C99(.onetimeauth_verify =)
948
+ crypto_onetimeauth_poly1305_sse2_verify,
949
+ SODIUM_C99(.onetimeauth_init =) crypto_onetimeauth_poly1305_sse2_init,
950
+ SODIUM_C99(.onetimeauth_update =)
951
+ crypto_onetimeauth_poly1305_sse2_update,
952
+ SODIUM_C99(.onetimeauth_final =) crypto_onetimeauth_poly1305_sse2_final
953
+ };
705
954
 
706
955
  #endif