rbnacl-libsodium 1.0.6 → 1.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (243) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +4 -0
  3. data/lib/rbnacl/libsodium/version.rb +1 -1
  4. data/vendor/libsodium/AUTHORS +10 -3
  5. data/vendor/libsodium/ChangeLog +19 -1
  6. data/vendor/libsodium/Makefile.in +1 -0
  7. data/vendor/libsodium/README.markdown +1 -1
  8. data/vendor/libsodium/THANKS +1 -0
  9. data/vendor/libsodium/autogen.sh +24 -4
  10. data/vendor/libsodium/autom4te.cache/output.1 +208 -24
  11. data/vendor/libsodium/autom4te.cache/output.5 +208 -24
  12. data/vendor/libsodium/autom4te.cache/requests +869 -869
  13. data/vendor/libsodium/autom4te.cache/traces.1 +426 -406
  14. data/vendor/libsodium/autom4te.cache/traces.5 +255 -245
  15. data/vendor/libsodium/builds/msvc/version.h +2 -2
  16. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj +14 -44
  17. data/vendor/libsodium/builds/msvc/vs2010/libsodium/libsodium.vcxproj.filters +36 -132
  18. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj +14 -44
  19. data/vendor/libsodium/builds/msvc/vs2012/libsodium/libsodium.vcxproj.filters +36 -132
  20. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj +14 -44
  21. data/vendor/libsodium/builds/msvc/vs2013/libsodium/libsodium.vcxproj.filters +36 -132
  22. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj +14 -44
  23. data/vendor/libsodium/builds/msvc/vs2015/libsodium/libsodium.vcxproj.filters +62 -158
  24. data/vendor/libsodium/configure +208 -24
  25. data/vendor/libsodium/configure.ac +70 -15
  26. data/vendor/libsodium/dist-build/Makefile.in +1 -0
  27. data/vendor/libsodium/dist-build/emscripten.sh +52 -19
  28. data/vendor/libsodium/dist-build/ios.sh +8 -8
  29. data/vendor/libsodium/dist-build/msys2-win32.sh +2 -1
  30. data/vendor/libsodium/dist-build/msys2-win64.sh +2 -1
  31. data/vendor/libsodium/dist-build/osx.sh +2 -2
  32. data/vendor/libsodium/examples/box.c +3 -1
  33. data/vendor/libsodium/examples/box_detached.c +4 -2
  34. data/vendor/libsodium/examples/utils.h +3 -1
  35. data/vendor/libsodium/libsodium.vcxproj +13 -13
  36. data/vendor/libsodium/libsodium.vcxproj.filters +31 -35
  37. data/vendor/libsodium/msvc-scripts/Makefile.in +1 -0
  38. data/vendor/libsodium/msvc-scripts/process.bat +2 -2
  39. data/vendor/libsodium/src/Makefile.in +1 -0
  40. data/vendor/libsodium/src/libsodium/Makefile.am +43 -45
  41. data/vendor/libsodium/src/libsodium/Makefile.in +250 -249
  42. data/vendor/libsodium/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +28 -22
  43. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha256/cp/hmac_hmacsha256.c +3 -4
  44. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha256/cp/verify_hmacsha256.c +3 -3
  45. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512/cp/hmac_hmacsha512.c +3 -4
  46. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512/cp/verify_hmacsha512.c +4 -4
  47. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512256/cp/hmac_hmacsha512256.c +4 -4
  48. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512256/cp/verify_hmacsha512256.c +6 -4
  49. data/vendor/libsodium/src/libsodium/crypto_box/crypto_box_easy.c +6 -2
  50. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/after_curve25519xsalsa20poly1305.c +3 -3
  51. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/before_curve25519xsalsa20poly1305.c +5 -3
  52. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/box_curve25519xsalsa20poly1305.c +13 -9
  53. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/keypair_curve25519xsalsa20poly1305.c +3 -3
  54. data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/ref2/core_hsalsa20.c +2 -2
  55. data/vendor/libsodium/src/libsodium/crypto_core/salsa20/ref/core_salsa20.c +2 -2
  56. data/vendor/libsodium/src/libsodium/crypto_core/salsa2012/ref/core_salsa2012.c +2 -2
  57. data/vendor/libsodium/src/libsodium/crypto_core/salsa208/ref/core_salsa208.c +2 -2
  58. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2.h +2 -2
  59. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/blake2b-ref.c +36 -26
  60. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/generichash_blake2b.c +1 -1
  61. data/vendor/libsodium/src/libsodium/crypto_generichash/crypto_generichash.c +1 -2
  62. data/vendor/libsodium/src/libsodium/crypto_hash/sha256/cp/hash_sha256.c +2 -3
  63. data/vendor/libsodium/src/libsodium/crypto_hash/sha512/cp/hash_sha512.c +2 -3
  64. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/{auth_poly1305_donna.c → poly1305_donna.c} +18 -11
  65. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h +1 -5
  66. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h +10 -9
  67. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h +17 -9
  68. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305.c +10 -1
  69. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.c +708 -0
  70. data/vendor/libsodium/src/libsodium/crypto_onetimeauth/poly1305/sse2/poly1305_sse2.h +31 -0
  71. data/vendor/libsodium/src/libsodium/crypto_pwhash/scryptsalsa208sha256/crypto_scrypt.h +1 -0
  72. data/vendor/libsodium/src/libsodium/crypto_scalarmult/crypto_scalarmult.c +11 -12
  73. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/{smult_curve25519_donna_c64.c → curve25519_donna_c64.c} +32 -15
  74. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.h +9 -0
  75. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/curve25519_ref10.c +73 -0
  76. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/curve25519_ref10.h +9 -0
  77. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/fe_frombytes_curve25519_ref10.c +1 -0
  78. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/consts.S +25 -0
  79. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/consts_namespace.h +20 -0
  80. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/curve25519_sandy2x.c +114 -0
  81. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/curve25519_sandy2x.h +9 -0
  82. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe.h +25 -0
  83. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51.h +33 -0
  84. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_invert.c +57 -0
  85. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_mul.S +189 -0
  86. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_namespace.h +16 -0
  87. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_nsquare.S +165 -0
  88. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe51_pack.S +219 -0
  89. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/fe_frombytes_sandy2x.c +76 -0
  90. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.S +1432 -0
  91. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder.h +18 -0
  92. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base.S +1287 -0
  93. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base.h +18 -0
  94. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_base_namespace.h +8 -0
  95. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/ladder_namespace.h +8 -0
  96. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/sandy2x/sandy2x.S +17 -0
  97. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/scalarmult_curve25519.c +67 -0
  98. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/scalarmult_curve25519.h +11 -0
  99. data/vendor/libsodium/src/libsodium/crypto_secretbox/xsalsa20poly1305/ref/box_xsalsa20poly1305.c +3 -3
  100. data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/shorthash_siphash24.c +3 -2
  101. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge.h +2 -0
  102. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/ge_double_scalarmult.c +42 -0
  103. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/keypair.c +7 -7
  104. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/obsolete.c +113 -0
  105. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/open.c +10 -7
  106. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/sign.c +10 -10
  107. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/afternm_aes128ctr.c +2 -2
  108. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/beforenm_aes128ctr.c +2 -2
  109. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/stream_aes128ctr.c +10 -9
  110. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/xor_afternm_aes128ctr.c +2 -3
  111. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/stream_chacha20_ref.c +72 -68
  112. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/ref/{api.h → stream_chacha20_ref.h} +5 -0
  113. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/{stream_chacha20_api.c → stream_chacha20.c} +27 -7
  114. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/stream_chacha20.h +22 -0
  115. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.c +329 -0
  116. data/vendor/libsodium/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.h +27 -0
  117. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/amd64_xmm6/stream_salsa20_amd64_xmm6.S +1 -1
  118. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/stream_salsa20_ref.c +2 -2
  119. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/xor_salsa20_ref.c +1 -1
  120. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/stream_salsa2012.c +2 -2
  121. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/xor_salsa2012.c +2 -2
  122. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/stream_salsa208.c +2 -2
  123. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/xor_salsa208.c +2 -2
  124. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/stream_xsalsa20.c +2 -2
  125. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/xor_xsalsa20.c +4 -4
  126. data/vendor/libsodium/src/libsodium/crypto_verify/16/ref/verify_16.c +14 -21
  127. data/vendor/libsodium/src/libsodium/crypto_verify/32/ref/verify_32.c +14 -37
  128. data/vendor/libsodium/src/libsodium/crypto_verify/64/ref/verify_64.c +14 -69
  129. data/vendor/libsodium/src/libsodium/include/Makefile.in +1 -0
  130. data/vendor/libsodium/src/libsodium/include/sodium/crypto_box.h +8 -4
  131. data/vendor/libsodium/src/libsodium/include/sodium/crypto_box_curve25519xsalsa20poly1305.h +4 -2
  132. data/vendor/libsodium/src/libsodium/include/sodium/crypto_generichash_blake2b.h +2 -2
  133. data/vendor/libsodium/src/libsodium/include/sodium/crypto_onetimeauth_poly1305.h +2 -3
  134. data/vendor/libsodium/src/libsodium/include/sodium/crypto_scalarmult.h +2 -1
  135. data/vendor/libsodium/src/libsodium/include/sodium/crypto_scalarmult_curve25519.h +6 -1
  136. data/vendor/libsodium/src/libsodium/include/sodium/crypto_stream_chacha20.h +5 -0
  137. data/vendor/libsodium/src/libsodium/include/sodium/randombytes_nativeclient.h +6 -20
  138. data/vendor/libsodium/src/libsodium/include/sodium/randombytes_salsa20_random.h +1 -28
  139. data/vendor/libsodium/src/libsodium/include/sodium/randombytes_sysrandom.h +1 -27
  140. data/vendor/libsodium/src/libsodium/include/sodium/runtime.h +3 -0
  141. data/vendor/libsodium/src/libsodium/include/sodium/utils.h +6 -0
  142. data/vendor/libsodium/src/libsodium/randombytes/nativeclient/randombytes_nativeclient.c +3 -3
  143. data/vendor/libsodium/src/libsodium/randombytes/salsa20/randombytes_salsa20_random.c +58 -51
  144. data/vendor/libsodium/src/libsodium/randombytes/sysrandom/randombytes_sysrandom.c +23 -18
  145. data/vendor/libsodium/src/libsodium/sodium/core.c +4 -0
  146. data/vendor/libsodium/src/libsodium/sodium/runtime.c +34 -15
  147. data/vendor/libsodium/src/libsodium/sodium/utils.c +104 -14
  148. data/vendor/libsodium/test/Makefile.in +1 -0
  149. data/vendor/libsodium/test/default/Makefile.in +1 -0
  150. data/vendor/libsodium/test/default/auth.c +5 -5
  151. data/vendor/libsodium/test/default/auth2.c +3 -3
  152. data/vendor/libsodium/test/default/auth3.c +3 -3
  153. data/vendor/libsodium/test/default/auth5.c +4 -4
  154. data/vendor/libsodium/test/default/auth6.c +3 -3
  155. data/vendor/libsodium/test/default/auth7.c +4 -4
  156. data/vendor/libsodium/test/default/box.c +13 -9
  157. data/vendor/libsodium/test/default/box2.c +11 -8
  158. data/vendor/libsodium/test/default/box7.c +11 -9
  159. data/vendor/libsodium/test/default/box8.c +32 -19
  160. data/vendor/libsodium/test/default/box_easy.c +31 -10
  161. data/vendor/libsodium/test/default/box_easy.exp +3 -19
  162. data/vendor/libsodium/test/default/box_easy2.c +30 -15
  163. data/vendor/libsodium/test/default/box_seed.c +1 -1
  164. data/vendor/libsodium/test/default/cmptest.h +22 -1
  165. data/vendor/libsodium/test/default/core1.c +6 -5
  166. data/vendor/libsodium/test/default/core2.c +6 -5
  167. data/vendor/libsodium/test/default/core3.c +8 -7
  168. data/vendor/libsodium/test/default/core4.c +11 -8
  169. data/vendor/libsodium/test/default/core5.c +7 -5
  170. data/vendor/libsodium/test/default/core6.c +8 -6
  171. data/vendor/libsodium/test/default/generichash.c +0 -15
  172. data/vendor/libsodium/test/default/generichash.exp +0 -1
  173. data/vendor/libsodium/test/default/generichash3.c +1 -21
  174. data/vendor/libsodium/test/default/generichash3.exp +1 -1
  175. data/vendor/libsodium/test/default/hash.c +3 -3
  176. data/vendor/libsodium/test/default/hash3.c +2 -2
  177. data/vendor/libsodium/test/default/index.html.tpl +84 -0
  178. data/vendor/libsodium/test/default/onetimeauth.c +3 -3
  179. data/vendor/libsodium/test/default/onetimeauth2.c +5 -4
  180. data/vendor/libsodium/test/default/onetimeauth7.c +4 -4
  181. data/vendor/libsodium/test/default/pre.js.inc +14 -6
  182. data/vendor/libsodium/test/default/randombytes.c +2 -2
  183. data/vendor/libsodium/test/default/scalarmult.c +19 -6
  184. data/vendor/libsodium/test/default/scalarmult.exp +1 -0
  185. data/vendor/libsodium/test/default/scalarmult2.c +2 -2
  186. data/vendor/libsodium/test/default/scalarmult5.c +6 -4
  187. data/vendor/libsodium/test/default/scalarmult6.c +5 -3
  188. data/vendor/libsodium/test/default/scalarmult7.c +11 -7
  189. data/vendor/libsodium/test/default/secretbox.c +7 -6
  190. data/vendor/libsodium/test/default/secretbox2.c +7 -6
  191. data/vendor/libsodium/test/default/secretbox7.c +5 -5
  192. data/vendor/libsodium/test/default/secretbox8.c +5 -5
  193. data/vendor/libsodium/test/default/secretbox_easy.c +27 -17
  194. data/vendor/libsodium/test/default/secretbox_easy.exp +7 -95
  195. data/vendor/libsodium/test/default/secretbox_easy2.c +22 -10
  196. data/vendor/libsodium/test/default/sign.c +5 -0
  197. data/vendor/libsodium/test/default/sodium_utils.c +73 -0
  198. data/vendor/libsodium/test/default/sodium_utils.exp +6 -0
  199. data/vendor/libsodium/test/default/stream.c +7 -6
  200. data/vendor/libsodium/test/default/stream2.c +4 -4
  201. data/vendor/libsodium/test/default/stream3.c +6 -5
  202. data/vendor/libsodium/test/default/stream4.c +7 -6
  203. data/vendor/libsodium/test/default/verify1.c +61 -18
  204. data/vendor/libsodium/test/default/verify1.exp +2 -6
  205. metadata +37 -44
  206. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha256/cp/api.h +0 -9
  207. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512/cp/api.h +0 -9
  208. data/vendor/libsodium/src/libsodium/crypto_auth/hmacsha512256/cp/api.h +0 -9
  209. data/vendor/libsodium/src/libsodium/crypto_box/curve25519xsalsa20poly1305/ref/api.h +0 -20
  210. data/vendor/libsodium/src/libsodium/crypto_core/hsalsa20/ref2/api.h +0 -10
  211. data/vendor/libsodium/src/libsodium/crypto_core/salsa20/ref/api.h +0 -10
  212. data/vendor/libsodium/src/libsodium/crypto_core/salsa2012/ref/api.h +0 -10
  213. data/vendor/libsodium/src/libsodium/crypto_core/salsa208/ref/api.h +0 -10
  214. data/vendor/libsodium/src/libsodium/crypto_generichash/blake2/ref/api.h +0 -2
  215. data/vendor/libsodium/src/libsodium/crypto_hash/sha256/cp/api.h +0 -10
  216. data/vendor/libsodium/src/libsodium/crypto_hash/sha512/cp/api.h +0 -10
  217. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/api.h +0 -6
  218. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/donna_c64/base_curve25519_donna_c64.c +0 -13
  219. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/api.h +0 -5
  220. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/base_curve25519_ref10.c +0 -14
  221. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/ref10/scalarmult_curve25519_ref10.c +0 -54
  222. data/vendor/libsodium/src/libsodium/crypto_scalarmult/curve25519/scalarmult_curve25519_api.c +0 -14
  223. data/vendor/libsodium/src/libsodium/crypto_secretbox/xsalsa20poly1305/ref/api.h +0 -11
  224. data/vendor/libsodium/src/libsodium/crypto_shorthash/siphash24/ref/api.h +0 -7
  225. data/vendor/libsodium/src/libsodium/crypto_sign/ed25519/ref10/api.h +0 -15
  226. data/vendor/libsodium/src/libsodium/crypto_sign/edwards25519sha512batch/ref/api.h +0 -12
  227. data/vendor/libsodium/src/libsodium/crypto_sign/edwards25519sha512batch/ref/fe25519.h +0 -54
  228. data/vendor/libsodium/src/libsodium/crypto_sign/edwards25519sha512batch/ref/fe25519_edwards25519sha512batch.c +0 -348
  229. data/vendor/libsodium/src/libsodium/crypto_sign/edwards25519sha512batch/ref/ge25519.h +0 -34
  230. data/vendor/libsodium/src/libsodium/crypto_sign/edwards25519sha512batch/ref/ge25519_edwards25519sha512batch.c +0 -230
  231. data/vendor/libsodium/src/libsodium/crypto_sign/edwards25519sha512batch/ref/sc25519.h +0 -51
  232. data/vendor/libsodium/src/libsodium/crypto_sign/edwards25519sha512batch/ref/sc25519_edwards25519sha512batch.c +0 -150
  233. data/vendor/libsodium/src/libsodium/crypto_sign/edwards25519sha512batch/ref/sign_edwards25519sha512batch.c +0 -106
  234. data/vendor/libsodium/src/libsodium/crypto_sign/edwards25519sha512batch/sign_edwards25519sha512batch_api.c +0 -16
  235. data/vendor/libsodium/src/libsodium/crypto_stream/aes128ctr/portable/api.h +0 -13
  236. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/amd64_xmm6/api.h +0 -1
  237. data/vendor/libsodium/src/libsodium/crypto_stream/salsa20/ref/api.h +0 -5
  238. data/vendor/libsodium/src/libsodium/crypto_stream/salsa2012/ref/api.h +0 -10
  239. data/vendor/libsodium/src/libsodium/crypto_stream/salsa208/ref/api.h +0 -9
  240. data/vendor/libsodium/src/libsodium/crypto_stream/xsalsa20/ref/api.h +0 -11
  241. data/vendor/libsodium/src/libsodium/crypto_verify/16/ref/api.h +0 -2
  242. data/vendor/libsodium/src/libsodium/crypto_verify/32/ref/api.h +0 -2
  243. data/vendor/libsodium/src/libsodium/crypto_verify/64/ref/api.h +0 -2
@@ -1,7 +1,11 @@
1
1
 
2
2
  #include "crypto_onetimeauth_poly1305.h"
3
- #include "donna/poly1305_donna.h"
4
3
  #include "onetimeauth_poly1305.h"
4
+ #include "runtime.h"
5
+ #include "donna/poly1305_donna.h"
6
+ #if defined(HAVE_TI_MODE) && defined(HAVE_EMMINTRIN_H)
7
+ # include "sse2/poly1305_sse2.h"
8
+ #endif
5
9
 
6
10
  static const crypto_onetimeauth_poly1305_implementation *implementation =
7
11
  &crypto_onetimeauth_poly1305_donna_implementation;
@@ -58,5 +62,10 @@ int
58
62
  _crypto_onetimeauth_poly1305_pick_best_implementation(void)
59
63
  {
60
64
  implementation = &crypto_onetimeauth_poly1305_donna_implementation;
65
+ #if defined(HAVE_TI_MODE) && defined(HAVE_EMMINTRIN_H)
66
+ if (sodium_runtime_has_sse2()) {
67
+ implementation = &crypto_onetimeauth_poly1305_sse2_implementation;
68
+ }
69
+ #endif
61
70
  return 0;
62
71
  }
@@ -0,0 +1,708 @@
1
+
2
+ #include <stdint.h>
3
+ #include <string.h>
4
+
5
+ #include "crypto_verify_16.h"
6
+ #include "utils.h"
7
+ #include "poly1305_sse2.h"
8
+ #include "../onetimeauth_poly1305.h"
9
+
10
+ #if defined(HAVE_TI_MODE) && defined(HAVE_EMMINTRIN_H)
11
+
12
+ #pragma GCC target("sse2")
13
+
14
+ #include <emmintrin.h>
15
+
16
+ typedef __m128i xmmi;
17
+
18
+ #if defined(__SIZEOF_INT128__)
19
+ typedef unsigned __int128 uint128_t;
20
+ #else
21
+ typedef unsigned uint128_t __attribute__((mode(TI)));
22
+ #endif
23
+
24
+ #if defined(_MSC_VER)
25
+ # define POLY1305_NOINLINE __declspec(noinline)
26
+ #elif defined(__GNUC__)
27
+ # define POLY1305_NOINLINE __attribute__((noinline))
28
+ #else
29
+ # define POLY1305_NOINLINE
30
+ #endif
31
+
32
+ #define poly1305_block_size 32
33
+
34
+ enum poly1305_state_flags_t {
35
+ poly1305_started = 1,
36
+ poly1305_final_shift8 = 4,
37
+ poly1305_final_shift16 = 8,
38
+ poly1305_final_r2_r = 16, /* use [r^2,r] for the final block */
39
+ poly1305_final_r_1 = 32, /* use [r,1] for the final block */
40
+ };
41
+
42
+ typedef struct poly1305_state_internal_t {
43
+ union {
44
+ uint64_t h[3];
45
+ uint32_t hh[10];
46
+ }; /* 40 bytes */
47
+ uint32_t R[5]; /* 20 bytes */
48
+ uint32_t R2[5]; /* 20 bytes */
49
+ uint32_t R4[5]; /* 20 bytes */
50
+ uint64_t pad[2]; /* 16 bytes */
51
+ uint64_t flags; /* 8 bytes */
52
+ unsigned long long leftover; /* 8 bytes */
53
+ unsigned char buffer[poly1305_block_size]; /* 32 bytes */
54
+ } poly1305_state_internal_t; /* 164 bytes total */
55
+
56
+ /*
57
+ * _mm_loadl_epi64() is turned into a simple MOVQ. So, unaligned accesses are totally fine, even though this intrinsic requires a __m128i* input.
58
+ * This confuses dynamic analysis, so force alignment, only in debug mode.
59
+ */
60
+ #ifdef DEBUG
61
+ static xmmi
62
+ _fakealign_mm_loadl_epi64(const void *m)
63
+ {
64
+ xmmi tmp;
65
+ memcpy(&tmp, m, 8);
66
+ return _mm_loadl_epi64(&tmp);
67
+ }
68
+ # define _mm_loadl_epi64(X) _fakealign_mm_loadl_epi64(X)
69
+ #endif
70
+
71
+ /* copy 0-31 bytes */
72
+ static inline void
73
+ poly1305_block_copy31(unsigned char *dst, const unsigned char *src, unsigned long long bytes)
74
+ {
75
+ if (bytes & 16) {
76
+ _mm_store_si128((xmmi *) (void *) dst,
77
+ _mm_loadu_si128((const xmmi *) (const void *) src));
78
+ src += 16; dst += 16;
79
+ }
80
+ if (bytes & 8) { memcpy(dst, src, 8); src += 8; dst += 8; }
81
+ if (bytes & 4) { memcpy(dst, src, 4); src += 4; dst += 4; }
82
+ if (bytes & 2) { memcpy(dst, src, 2); src += 2; dst += 2; }
83
+ if (bytes & 1) { *dst = *src; }
84
+ }
85
+
86
+ static POLY1305_NOINLINE void
87
+ poly1305_init_ext(poly1305_state_internal_t *st,
88
+ const unsigned char key[32], unsigned long long bytes)
89
+ {
90
+ uint32_t *R;
91
+ uint128_t d[3],m0;
92
+ uint64_t r0,r1,r2;
93
+ uint32_t rp0,rp1,rp2,rp3,rp4;
94
+ uint64_t rt0,rt1,rt2,st2,c;
95
+ uint64_t t0,t1;
96
+ unsigned long long i;
97
+
98
+ if (!bytes) bytes = ~(unsigned long long)0;
99
+
100
+ /* H = 0 */
101
+ _mm_storeu_si128((xmmi *)(void *)&st->hh[0], _mm_setzero_si128());
102
+ _mm_storeu_si128((xmmi *)(void *)&st->hh[4], _mm_setzero_si128());
103
+ _mm_storeu_si128((xmmi *)(void *)&st->hh[8], _mm_setzero_si128());
104
+
105
+ /* clamp key */
106
+ memcpy(&t0, key, 8);
107
+ memcpy(&t1, key + 8, 8);
108
+ r0 = t0 & 0xffc0fffffff; t0 >>= 44; t0 |= t1 << 20;
109
+ r1 = t0 & 0xfffffc0ffff; t1 >>= 24;
110
+ r2 = t1 & 0x00ffffffc0f;
111
+
112
+ /* r^1 */
113
+ R = st->R;
114
+ R[0] = (uint32_t)( r0 ) & 0x3ffffff;
115
+ R[1] = (uint32_t)(( r0 >> 26) | ( r1 << 18)) & 0x3ffffff;
116
+ R[2] = (uint32_t)(( r1 >> 8) ) & 0x3ffffff;
117
+ R[3] = (uint32_t)(( r1 >> 34) | ( r2 << 10)) & 0x3ffffff;
118
+ R[4] = (uint32_t)(( r2 >> 16) );
119
+
120
+ /* save pad */
121
+ memcpy(&st->pad[0], key + 16, 8);
122
+ memcpy(&st->pad[1], key + 24, 8);
123
+
124
+ rt0 = r0;
125
+ rt1 = r1;
126
+ rt2 = r2;
127
+
128
+ /* r^2, r^4 */
129
+ for (i = 0; i < 2; i++) {
130
+ if (i == 0) {
131
+ R = st->R2;
132
+ if (bytes <= 16) {
133
+ break;
134
+ }
135
+ } else if (i == 1) {
136
+ R = st->R4;
137
+ if (bytes < 96) {
138
+ break;
139
+ }
140
+ }
141
+ st2 = rt2 * (5 << 2);
142
+ d[0] = ((uint128_t)rt0 * rt0) + ((uint128_t)(rt1 * 2) * st2);
143
+ d[1] = ((uint128_t)rt2 * st2) + ((uint128_t)(rt0 * 2) * rt1);
144
+ d[2] = ((uint128_t)rt1 * rt1) + ((uint128_t)(rt2 * 2) * rt0);
145
+ rt0 = (uint64_t)d[0] & 0xfffffffffff; c = (uint64_t)(d[0] >> 44);
146
+ d[1] += c ; rt1 = (uint64_t)d[1] & 0xfffffffffff; c = (uint64_t)(d[1] >> 44);
147
+ d[2] += c ; rt2 = (uint64_t)d[2] & 0x3ffffffffff; c = (uint64_t)(d[2] >> 42);
148
+ rt0 += c * 5; c = (rt0 >> 44); rt0 = rt0 & 0xfffffffffff;
149
+ rt1 += c ; c = (rt1 >> 44); rt1 = rt1 & 0xfffffffffff;
150
+ rt2 += c ; /* even if rt2 overflows, it will still fit in rp4 safely, and is safe to multiply with */
151
+
152
+ R[0] = (uint32_t)( rt0 ) & 0x3ffffff;
153
+ R[1] = (uint32_t)((rt0 >> 26) | (rt1 << 18)) & 0x3ffffff;
154
+ R[2] = (uint32_t)((rt1 >> 8) ) & 0x3ffffff;
155
+ R[3] = (uint32_t)((rt1 >> 34) | (rt2 << 10)) & 0x3ffffff;
156
+ R[4] = (uint32_t)((rt2 >> 16) );
157
+ }
158
+
159
+ st->flags = 0;
160
+ st->leftover = 0U;
161
+ }
162
+
163
+ static POLY1305_NOINLINE void
164
+ poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
165
+ unsigned long long bytes)
166
+ {
167
+ CRYPTO_ALIGN(64) xmmi HIBIT = _mm_shuffle_epi32(_mm_cvtsi32_si128(1 << 24), _MM_SHUFFLE(1,0,1,0));
168
+ const xmmi MMASK = _mm_shuffle_epi32(_mm_cvtsi32_si128((1 << 26) - 1), _MM_SHUFFLE(1,0,1,0));
169
+ const xmmi FIVE = _mm_shuffle_epi32(_mm_cvtsi32_si128(5), _MM_SHUFFLE(1,0,1,0));
170
+ xmmi H0,H1,H2,H3,H4;
171
+ xmmi T0,T1,T2,T3,T4,T5,T6,T7,T8;
172
+ xmmi M0,M1,M2,M3,M4;
173
+ xmmi M5,M6,M7,M8,M9;
174
+ xmmi C1,C2;
175
+ xmmi R20,R21,R22,R23,R24,S21,S22,S23,S24;
176
+ xmmi R40,R41,R42,R43,R44,S41,S42,S43,S44;
177
+
178
+ if (st->flags & poly1305_final_shift8) HIBIT = _mm_srli_si128(HIBIT, 8);
179
+ if (st->flags & poly1305_final_shift16) HIBIT = _mm_setzero_si128();
180
+
181
+ if (!(st->flags & poly1305_started)) {
182
+ /* H = [Mx,My] */
183
+
184
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((const xmmi *)(const void *)(m + 0)), _mm_loadl_epi64((const xmmi *)(const void *)(m + 16)));
185
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((const xmmi *)(const void *)(m + 8)), _mm_loadl_epi64((const xmmi *)(const void *)(m + 24)));
186
+ H0 = _mm_and_si128(MMASK, T5);
187
+ H1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
188
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
189
+ H2 = _mm_and_si128(MMASK, T5);
190
+ H3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
191
+ H4 = _mm_srli_epi64(T6, 40);
192
+ H4 = _mm_or_si128(H4, HIBIT);
193
+ m += 32;
194
+ bytes -= 32;
195
+ st->flags |= poly1305_started;
196
+ } else {
197
+ T0 = _mm_loadu_si128((const xmmi *)(const void *)&st->hh[0]);
198
+ T1 = _mm_loadu_si128((const xmmi *)(const void *)&st->hh[4]);
199
+ T2 = _mm_loadu_si128((const xmmi *)(const void *)&st->hh[8]);
200
+ H0 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1,1,0,0));
201
+ H1 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3,3,2,2));
202
+ H2 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(1,1,0,0));
203
+ H3 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(3,3,2,2));
204
+ H4 = _mm_shuffle_epi32(T2, _MM_SHUFFLE(1,1,0,0));
205
+ }
206
+
207
+ if (st->flags & (poly1305_final_r2_r|poly1305_final_r_1)) {
208
+ if (st->flags & poly1305_final_r2_r) {
209
+ /* use [r^2, r] */
210
+ T2 = _mm_loadu_si128((const xmmi *)(const void *)&st->R[0]);
211
+ T3 = _mm_cvtsi32_si128(st->R[4]);
212
+ T0 = _mm_loadu_si128((const xmmi *)(const void *)&st->R2[0]);
213
+ T1 = _mm_cvtsi32_si128(st->R2[4]);
214
+ T4 = _mm_unpacklo_epi32(T0, T2);
215
+ T5 = _mm_unpackhi_epi32(T0, T2);
216
+ R24 = _mm_unpacklo_epi64(T1, T3);
217
+ } else {
218
+ /* use [r^1, 1] */
219
+ T0 = _mm_loadu_si128((const xmmi *)(const void *)&st->R[0]);
220
+ T1 = _mm_cvtsi32_si128(st->R[4]);
221
+ T2 = _mm_cvtsi32_si128(1);
222
+ T4 = _mm_unpacklo_epi32(T0, T2);
223
+ T5 = _mm_unpackhi_epi32(T0, T2);
224
+ R24 = T1;
225
+ }
226
+
227
+ R20 = _mm_shuffle_epi32(T4, _MM_SHUFFLE(1,1,0,0));
228
+ R21 = _mm_shuffle_epi32(T4, _MM_SHUFFLE(3,3,2,2));
229
+ R22 = _mm_shuffle_epi32(T5, _MM_SHUFFLE(1,1,0,0));
230
+ R23 = _mm_shuffle_epi32(T5, _MM_SHUFFLE(3,3,2,2));
231
+ } else {
232
+ /* use [r^2, r^2] */
233
+ T0 = _mm_loadu_si128((const xmmi *)(const void *)&st->R2[0]);
234
+ T1 = _mm_cvtsi32_si128(st->R2[4]);
235
+ R20 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(0,0,0,0));
236
+ R21 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1,1,1,1));
237
+ R22 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(2,2,2,2));
238
+ R23 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3,3,3,3));
239
+ R24 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(0,0,0,0));
240
+ }
241
+ S21 = _mm_mul_epu32(R21, FIVE);
242
+ S22 = _mm_mul_epu32(R22, FIVE);
243
+ S23 = _mm_mul_epu32(R23, FIVE);
244
+ S24 = _mm_mul_epu32(R24, FIVE);
245
+
246
+ if (bytes >= 64) {
247
+ T0 = _mm_loadu_si128((const xmmi *)(const void *)&st->R4[0]);
248
+ T1 = _mm_cvtsi32_si128(st->R4[4]);
249
+ R40 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(0,0,0,0));
250
+ R41 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(1,1,1,1));
251
+ R42 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(2,2,2,2));
252
+ R43 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3,3,3,3));
253
+ R44 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(0,0,0,0));
254
+ S41 = _mm_mul_epu32(R41, FIVE);
255
+ S42 = _mm_mul_epu32(R42, FIVE);
256
+ S43 = _mm_mul_epu32(R43, FIVE);
257
+ S44 = _mm_mul_epu32(R44, FIVE);
258
+
259
+ while (bytes >= 64) {
260
+ xmmi v00,v01,v02,v03,v04;
261
+ xmmi v10,v11,v12,v13,v14;
262
+ xmmi v20,v21,v22,v23,v24;
263
+ xmmi v30,v31,v32,v33,v34;
264
+ xmmi v40,v41,v42,v43,v44;
265
+ xmmi T14,T15;
266
+
267
+ /* H *= [r^4,r^4], preload [Mx,My] */
268
+ T15 = S42;
269
+ T0 = H4; T0 = _mm_mul_epu32(T0, S41);
270
+ v01 = H3; v01 = _mm_mul_epu32(v01, T15);
271
+ T14 = S43;
272
+ T1 = H4; T1 = _mm_mul_epu32(T1 , T15);
273
+ v11 = H3; v11 = _mm_mul_epu32(v11, T14);
274
+ T2 = H4; T2 = _mm_mul_epu32(T2 , T14); T0 = _mm_add_epi64(T0, v01);
275
+ T15 = S44;
276
+ v02 = H2; v02 = _mm_mul_epu32(v02, T14);
277
+ T3 = H4; T3 = _mm_mul_epu32(T3 , T15); T1 = _mm_add_epi64(T1, v11);
278
+ v03 = H1; v03 = _mm_mul_epu32(v03, T15);
279
+ v12 = H2; v12 = _mm_mul_epu32(v12, T15); T0 = _mm_add_epi64(T0, v02);
280
+ T14 = R40;
281
+ v21 = H3; v21 = _mm_mul_epu32(v21, T15);
282
+ v31 = H3; v31 = _mm_mul_epu32(v31, T14); T0 = _mm_add_epi64(T0, v03);
283
+ T4 = H4; T4 = _mm_mul_epu32(T4 , T14); T1 = _mm_add_epi64(T1, v12);
284
+ v04 = H0; v04 = _mm_mul_epu32(v04, T14); T2 = _mm_add_epi64(T2, v21);
285
+ v13 = H1; v13 = _mm_mul_epu32(v13, T14); T3 = _mm_add_epi64(T3, v31);
286
+ T15 = R41;
287
+ v22 = H2; v22 = _mm_mul_epu32(v22, T14);
288
+ v32 = H2; v32 = _mm_mul_epu32(v32, T15); T0 = _mm_add_epi64(T0, v04);
289
+ v41 = H3; v41 = _mm_mul_epu32(v41, T15); T1 = _mm_add_epi64(T1, v13);
290
+ v14 = H0; v14 = _mm_mul_epu32(v14, T15); T2 = _mm_add_epi64(T2, v22);
291
+ T14 = R42;
292
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((const xmmi *)(const void *)(m + 0)), _mm_loadl_epi64((const xmmi *)(const void *)(m + 16)));
293
+ v23 = H1; v23 = _mm_mul_epu32(v23, T15); T3 = _mm_add_epi64(T3, v32);
294
+ v33 = H1; v33 = _mm_mul_epu32(v33, T14); T4 = _mm_add_epi64(T4, v41);
295
+ v42 = H2; v42 = _mm_mul_epu32(v42, T14); T1 = _mm_add_epi64(T1, v14);
296
+ T15 = R43;
297
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((const xmmi *)(const void *)(m + 8)), _mm_loadl_epi64((const xmmi *)(const void *)(m + 24)));
298
+ v24 = H0; v24 = _mm_mul_epu32(v24, T14); T2 = _mm_add_epi64(T2, v23);
299
+ v34 = H0; v34 = _mm_mul_epu32(v34, T15); T3 = _mm_add_epi64(T3, v33);
300
+ M0 = _mm_and_si128(MMASK, T5);
301
+ v43 = H1; v43 = _mm_mul_epu32(v43, T15); T4 = _mm_add_epi64(T4, v42);
302
+ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
303
+ v44 = H0; v44 = _mm_mul_epu32(v44, R44); T2 = _mm_add_epi64(T2, v24);
304
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
305
+ T3 = _mm_add_epi64(T3, v34);
306
+ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T6, 14));
307
+ T4 = _mm_add_epi64(T4, v43);
308
+ M2 = _mm_and_si128(MMASK, T5);
309
+ T4 = _mm_add_epi64(T4, v44);
310
+ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
311
+
312
+ /* H += [Mx',My'] */
313
+ T5 = _mm_loadu_si128((const xmmi *)(const void *)(m + 32));
314
+ T6 = _mm_loadu_si128((const xmmi *)(const void *)(m + 48));
315
+ T7 = _mm_unpacklo_epi32(T5, T6);
316
+ T8 = _mm_unpackhi_epi32(T5, T6);
317
+ M5 = _mm_unpacklo_epi32(T7, _mm_setzero_si128());
318
+ M6 = _mm_unpackhi_epi32(T7, _mm_setzero_si128());
319
+ M7 = _mm_unpacklo_epi32(T8, _mm_setzero_si128());
320
+ M8 = _mm_unpackhi_epi32(T8, _mm_setzero_si128());
321
+ M6 = _mm_slli_epi64(M6, 6);
322
+ M7 = _mm_slli_epi64(M7, 12);
323
+ M8 = _mm_slli_epi64(M8, 18);
324
+ T0 = _mm_add_epi64(T0, M5);
325
+ T1 = _mm_add_epi64(T1, M6);
326
+ T2 = _mm_add_epi64(T2, M7);
327
+ T3 = _mm_add_epi64(T3, M8);
328
+ T4 = _mm_add_epi64(T4, HIBIT);
329
+
330
+ /* H += [Mx,My]*[r^2,r^2] */
331
+ T15 = S22;
332
+ v00 = M4; v00 = _mm_mul_epu32(v00, S21);
333
+ v01 = M3; v01 = _mm_mul_epu32(v01, T15);
334
+ T14 = S23;
335
+ v10 = M4; v10 = _mm_mul_epu32(v10, T15);
336
+ v11 = M3; v11 = _mm_mul_epu32(v11, T14); T0 = _mm_add_epi64(T0, v00);
337
+ v20 = M4; v20 = _mm_mul_epu32(v20, T14); T0 = _mm_add_epi64(T0, v01);
338
+ T15 = S24;
339
+ v02 = M2; v02 = _mm_mul_epu32(v02, T14); T1 = _mm_add_epi64(T1, v10);
340
+ v30 = M4; v30 = _mm_mul_epu32(v30, T15); T1 = _mm_add_epi64(T1, v11);
341
+ v03 = M1; v03 = _mm_mul_epu32(v03, T15); T2 = _mm_add_epi64(T2, v20);
342
+ v12 = M2; v12 = _mm_mul_epu32(v12, T15); T0 = _mm_add_epi64(T0, v02);
343
+ T14 = R20;
344
+ v21 = M3; v21 = _mm_mul_epu32(v21, T15); T3 = _mm_add_epi64(T3, v30);
345
+ v31 = M3; v31 = _mm_mul_epu32(v31, T14); T0 = _mm_add_epi64(T0, v03);
346
+ v40 = M4; v40 = _mm_mul_epu32(v40, T14); T1 = _mm_add_epi64(T1, v12);
347
+ v04 = M0; v04 = _mm_mul_epu32(v04, T14); T2 = _mm_add_epi64(T2, v21);
348
+ v13 = M1; v13 = _mm_mul_epu32(v13, T14); T3 = _mm_add_epi64(T3, v31);
349
+ T15 = R21;
350
+ v22 = M2; v22 = _mm_mul_epu32(v22, T14); T4 = _mm_add_epi64(T4, v40);
351
+ v32 = M2; v32 = _mm_mul_epu32(v32, T15); T0 = _mm_add_epi64(T0, v04);
352
+ v41 = M3; v41 = _mm_mul_epu32(v41, T15); T1 = _mm_add_epi64(T1, v13);
353
+ v14 = M0; v14 = _mm_mul_epu32(v14, T15); T2 = _mm_add_epi64(T2, v22);
354
+ T14 = R22;
355
+ v23 = M1; v23 = _mm_mul_epu32(v23, T15); T3 = _mm_add_epi64(T3, v32);
356
+ v33 = M1; v33 = _mm_mul_epu32(v33, T14); T4 = _mm_add_epi64(T4, v41);
357
+ v42 = M2; v42 = _mm_mul_epu32(v42, T14); T1 = _mm_add_epi64(T1, v14);
358
+ T15 = R23;
359
+ v24 = M0; v24 = _mm_mul_epu32(v24, T14); T2 = _mm_add_epi64(T2, v23);
360
+ v34 = M0; v34 = _mm_mul_epu32(v34, T15); T3 = _mm_add_epi64(T3, v33);
361
+ v43 = M1; v43 = _mm_mul_epu32(v43, T15); T4 = _mm_add_epi64(T4, v42);
362
+ v44 = M0; v44 = _mm_mul_epu32(v44, R24); T2 = _mm_add_epi64(T2, v24);
363
+ T3 = _mm_add_epi64(T3, v34);
364
+ T4 = _mm_add_epi64(T4, v43);
365
+ T4 = _mm_add_epi64(T4, v44);
366
+
367
+ /* reduce */
368
+ C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 = _mm_add_epi64(T4, C2);
369
+ C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
370
+ C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 = _mm_add_epi64(T1, C2);
371
+ C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C1);
372
+
373
+ /* Final: H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx',My']) */
374
+ H0 = T0;
375
+ H1 = T1;
376
+ H2 = T2;
377
+ H3 = T3;
378
+ H4 = T4;
379
+
380
+ m += 64;
381
+ bytes -= 64;
382
+ }
383
+ }
384
+
385
+ if (bytes >= 32) {
386
+ xmmi v01,v02,v03,v04;
387
+ xmmi v11,v12,v13,v14;
388
+ xmmi v21,v22,v23,v24;
389
+ xmmi v31,v32,v33,v34;
390
+ xmmi v41,v42,v43,v44;
391
+ xmmi T14,T15;
392
+
393
+ /* H *= [r^2,r^2] */
394
+ T15 = S22;
395
+ T0 = H4; T0 = _mm_mul_epu32(T0, S21);
396
+ v01 = H3; v01 = _mm_mul_epu32(v01, T15);
397
+ T14 = S23;
398
+ T1 = H4; T1 = _mm_mul_epu32(T1 , T15);
399
+ v11 = H3; v11 = _mm_mul_epu32(v11, T14);
400
+ T2 = H4; T2 = _mm_mul_epu32(T2 , T14); T0 = _mm_add_epi64(T0, v01);
401
+ T15 = S24;
402
+ v02 = H2; v02 = _mm_mul_epu32(v02, T14);
403
+ T3 = H4; T3 = _mm_mul_epu32(T3 , T15); T1 = _mm_add_epi64(T1, v11);
404
+ v03 = H1; v03 = _mm_mul_epu32(v03, T15);
405
+ v12 = H2; v12 = _mm_mul_epu32(v12, T15); T0 = _mm_add_epi64(T0, v02);
406
+ T14 = R20;
407
+ v21 = H3; v21 = _mm_mul_epu32(v21, T15);
408
+ v31 = H3; v31 = _mm_mul_epu32(v31, T14); T0 = _mm_add_epi64(T0, v03);
409
+ T4 = H4; T4 = _mm_mul_epu32(T4 , T14); T1 = _mm_add_epi64(T1, v12);
410
+ v04 = H0; v04 = _mm_mul_epu32(v04, T14); T2 = _mm_add_epi64(T2, v21);
411
+ v13 = H1; v13 = _mm_mul_epu32(v13, T14); T3 = _mm_add_epi64(T3, v31);
412
+ T15 = R21;
413
+ v22 = H2; v22 = _mm_mul_epu32(v22, T14);
414
+ v32 = H2; v32 = _mm_mul_epu32(v32, T15); T0 = _mm_add_epi64(T0, v04);
415
+ v41 = H3; v41 = _mm_mul_epu32(v41, T15); T1 = _mm_add_epi64(T1, v13);
416
+ v14 = H0; v14 = _mm_mul_epu32(v14, T15); T2 = _mm_add_epi64(T2, v22);
417
+ T14 = R22;
418
+ v23 = H1; v23 = _mm_mul_epu32(v23, T15); T3 = _mm_add_epi64(T3, v32);
419
+ v33 = H1; v33 = _mm_mul_epu32(v33, T14); T4 = _mm_add_epi64(T4, v41);
420
+ v42 = H2; v42 = _mm_mul_epu32(v42, T14); T1 = _mm_add_epi64(T1, v14);
421
+ T15 = R23;
422
+ v24 = H0; v24 = _mm_mul_epu32(v24, T14); T2 = _mm_add_epi64(T2, v23);
423
+ v34 = H0; v34 = _mm_mul_epu32(v34, T15); T3 = _mm_add_epi64(T3, v33);
424
+ v43 = H1; v43 = _mm_mul_epu32(v43, T15); T4 = _mm_add_epi64(T4, v42);
425
+ v44 = H0; v44 = _mm_mul_epu32(v44, R24); T2 = _mm_add_epi64(T2, v24);
426
+ T3 = _mm_add_epi64(T3, v34);
427
+ T4 = _mm_add_epi64(T4, v43);
428
+ T4 = _mm_add_epi64(T4, v44);
429
+
430
+ /* H += [Mx,My] */
431
+ if (m) {
432
+ T5 = _mm_loadu_si128((const xmmi *)(const void *)(m + 0));
433
+ T6 = _mm_loadu_si128((const xmmi *)(const void *)(m + 16));
434
+ T7 = _mm_unpacklo_epi32(T5, T6);
435
+ T8 = _mm_unpackhi_epi32(T5, T6);
436
+ M0 = _mm_unpacklo_epi32(T7, _mm_setzero_si128());
437
+ M1 = _mm_unpackhi_epi32(T7, _mm_setzero_si128());
438
+ M2 = _mm_unpacklo_epi32(T8, _mm_setzero_si128());
439
+ M3 = _mm_unpackhi_epi32(T8, _mm_setzero_si128());
440
+ M1 = _mm_slli_epi64(M1, 6);
441
+ M2 = _mm_slli_epi64(M2, 12);
442
+ M3 = _mm_slli_epi64(M3, 18);
443
+ T0 = _mm_add_epi64(T0, M0);
444
+ T1 = _mm_add_epi64(T1, M1);
445
+ T2 = _mm_add_epi64(T2, M2);
446
+ T3 = _mm_add_epi64(T3, M3);
447
+ T4 = _mm_add_epi64(T4, HIBIT);
448
+ }
449
+
450
+ /* reduce */
451
+ C1 = _mm_srli_epi64(T0, 26); C2 = _mm_srli_epi64(T3, 26); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_and_si128(T3, MMASK); T1 = _mm_add_epi64(T1, C1); T4 = _mm_add_epi64(T4, C2);
452
+ C1 = _mm_srli_epi64(T1, 26); C2 = _mm_srli_epi64(T4, 26); T1 = _mm_and_si128(T1, MMASK); T4 = _mm_and_si128(T4, MMASK); T2 = _mm_add_epi64(T2, C1); T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
453
+ C1 = _mm_srli_epi64(T2, 26); C2 = _mm_srli_epi64(T0, 26); T2 = _mm_and_si128(T2, MMASK); T0 = _mm_and_si128(T0, MMASK); T3 = _mm_add_epi64(T3, C1); T1 = _mm_add_epi64(T1, C2);
454
+ C1 = _mm_srli_epi64(T3, 26); T3 = _mm_and_si128(T3, MMASK); T4 = _mm_add_epi64(T4, C1);
455
+
456
+ /* H = (H*[r^2,r^2] + [Mx,My]) */
457
+ H0 = T0;
458
+ H1 = T1;
459
+ H2 = T2;
460
+ H3 = T3;
461
+ H4 = T4;
462
+ }
463
+
464
+ if (m) {
465
+ T0 = _mm_shuffle_epi32(H0, _MM_SHUFFLE(0,0,2,0));
466
+ T1 = _mm_shuffle_epi32(H1, _MM_SHUFFLE(0,0,2,0));
467
+ T2 = _mm_shuffle_epi32(H2, _MM_SHUFFLE(0,0,2,0));
468
+ T3 = _mm_shuffle_epi32(H3, _MM_SHUFFLE(0,0,2,0));
469
+ T4 = _mm_shuffle_epi32(H4, _MM_SHUFFLE(0,0,2,0));
470
+ T0 = _mm_unpacklo_epi64(T0, T1);
471
+ T1 = _mm_unpacklo_epi64(T2, T3);
472
+ _mm_storeu_si128((xmmi *)(void *)&st->hh[0], T0);
473
+ _mm_storeu_si128((xmmi *)(void *)&st->hh[4], T1);
474
+ _mm_storel_epi64((xmmi *)(void *)&st->hh[8], T4);
475
+ } else {
476
+ uint32_t t0,t1,t2,t3,t4,b;
477
+ uint64_t h0,h1,h2,g0,g1,g2,c,nc;
478
+
479
+ /* H = H[0]+H[1] */
480
+ T0 = H0;
481
+ T1 = H1;
482
+ T2 = H2;
483
+ T3 = H3;
484
+ T4 = H4;
485
+
486
+ T0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
487
+ T1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
488
+ T2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
489
+ T3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
490
+ T4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
491
+
492
+ t0 = _mm_cvtsi128_si32(T0) ; b = (t0 >> 26); t0 &= 0x3ffffff;
493
+ t1 = _mm_cvtsi128_si32(T1) + b; b = (t1 >> 26); t1 &= 0x3ffffff;
494
+ t2 = _mm_cvtsi128_si32(T2) + b; b = (t2 >> 26); t2 &= 0x3ffffff;
495
+ t3 = _mm_cvtsi128_si32(T3) + b; b = (t3 >> 26); t3 &= 0x3ffffff;
496
+ t4 = _mm_cvtsi128_si32(T4) + b;
497
+
498
+ /* everything except t4 is in range, so this is all safe */
499
+ h0 = (((uint64_t)t0 ) | ((uint64_t)t1 << 26) ) & 0xfffffffffffull;
500
+ h1 = (((uint64_t)t1 >> 18) | ((uint64_t)t2 << 8) | ((uint64_t)t3 << 34)) & 0xfffffffffffull;
501
+ h2 = (((uint64_t)t3 >> 10) | ((uint64_t)t4 << 16) );
502
+
503
+ c = (h2 >> 42); h2 &= 0x3ffffffffff;
504
+ h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
505
+ h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff;
506
+ h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
507
+ h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
508
+ h1 += c;
509
+
510
+ g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
511
+ g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
512
+ g2 = h2 + c - ((uint64_t)1 << 42);
513
+
514
+ c = (g2 >> 63) - 1;
515
+ nc = ~c;
516
+ h0 = (h0 & nc) | (g0 & c);
517
+ h1 = (h1 & nc) | (g1 & c);
518
+ h2 = (h2 & nc) | (g2 & c);
519
+
520
+ st->h[0] = h0;
521
+ st->h[1] = h1;
522
+ st->h[2] = h2;
523
+ }
524
+ }
525
+
526
+ static void
527
+ poly1305_update(poly1305_state_internal_t *st, const unsigned char *m,
528
+ unsigned long long bytes)
529
+ {
530
+ unsigned long long i;
531
+
532
+ /* handle leftover */
533
+ if (st->leftover) {
534
+ unsigned long long want = (poly1305_block_size - st->leftover);
535
+
536
+ if (want > bytes)
537
+ want = bytes;
538
+ for (i = 0; i < want; i++)
539
+ st->buffer[st->leftover + i] = m[i];
540
+ bytes -= want;
541
+ m += want;
542
+ st->leftover += want;
543
+ if (st->leftover < poly1305_block_size)
544
+ return;
545
+ poly1305_blocks(st, st->buffer, poly1305_block_size);
546
+ st->leftover = 0;
547
+ }
548
+
549
+ /* process full blocks */
550
+ if (bytes >= poly1305_block_size) {
551
+ unsigned long long want = (bytes & ~(poly1305_block_size - 1));
552
+
553
+ poly1305_blocks(st, m, want);
554
+ m += want;
555
+ bytes -= want;
556
+ }
557
+
558
+ /* store leftover */
559
+ if (bytes) {
560
+ for (i = 0; i < bytes; i++) {
561
+ st->buffer[st->leftover + i] = m[i];
562
+ }
563
+ st->leftover += bytes;
564
+ }
565
+ }
566
+
567
+ static POLY1305_NOINLINE void
568
+ poly1305_finish_ext(poly1305_state_internal_t *st, const unsigned char *m,
569
+ unsigned long long leftover, unsigned char mac[16])
570
+ {
571
+ uint64_t h0,h1,h2;
572
+ uint64_t t0,t1,c;
573
+
574
+ if (leftover) {
575
+ CRYPTO_ALIGN(16) unsigned char final[32] = {0};
576
+ poly1305_block_copy31(final, m, leftover);
577
+ if (leftover != 16) final[leftover] = 1;
578
+ st->flags |= (leftover >= 16) ? poly1305_final_shift8 : poly1305_final_shift16;
579
+ poly1305_blocks(st, final, 32);
580
+ }
581
+
582
+ if (st->flags & poly1305_started) {
583
+ /* finalize, H *= [r^2,r], or H *= [r,1] */
584
+ if (!leftover || (leftover > 16)) {
585
+ st->flags |= poly1305_final_r2_r;
586
+ } else {
587
+ st->flags |= poly1305_final_r_1;
588
+ }
589
+ poly1305_blocks(st, NULL, 32);
590
+ }
591
+
592
+ h0 = st->h[0];
593
+ h1 = st->h[1];
594
+ h2 = st->h[2];
595
+
596
+ /* pad */
597
+ h0 = ((h0 ) | (h1 << 44));
598
+ h1 = ((h1 >> 20) | (h2 << 24));
599
+ #ifdef HAVE_AMD64_ASM
600
+ __asm__ __volatile__("addq %2, %0 ;\n"
601
+ "adcq %3, %1 ;\n"
602
+ : "+r"(h0), "+r"(h1)
603
+ : "r"(st->pad[0]), "r"(st->pad[1])
604
+ : "flags", "cc");
605
+ #else
606
+ {
607
+ uint128_t h;
608
+
609
+ memcpy(&h, &st->pad[0], 16);
610
+ h += ((uint128_t) h1 << 64) | h0;
611
+ h0 = (uint64_t) h;
612
+ h1 = (uint64_t) (h >> 64);
613
+ }
614
+ #endif
615
+ _mm_storeu_si128((xmmi *)(void *)st + 0, _mm_setzero_si128());
616
+ _mm_storeu_si128((xmmi *)(void *)st + 1, _mm_setzero_si128());
617
+ _mm_storeu_si128((xmmi *)(void *)st + 2, _mm_setzero_si128());
618
+ _mm_storeu_si128((xmmi *)(void *)st + 3, _mm_setzero_si128());
619
+ _mm_storeu_si128((xmmi *)(void *)st + 4, _mm_setzero_si128());
620
+ _mm_storeu_si128((xmmi *)(void *)st + 5, _mm_setzero_si128());
621
+ _mm_storeu_si128((xmmi *)(void *)st + 6, _mm_setzero_si128());
622
+ _mm_storeu_si128((xmmi *)(void *)st + 7, _mm_setzero_si128());
623
+
624
+ memcpy(&mac[0], &h0, 8);
625
+ memcpy(&mac[8], &h1, 8);
626
+
627
+ sodium_memzero((void *)st, sizeof *st);
628
+ }
629
+
630
+ static void
631
+ poly1305_finish(poly1305_state_internal_t *st, unsigned char mac[16])
632
+ {
633
+ poly1305_finish_ext(st, st->buffer, st->leftover, mac);
634
+ }
635
+
636
+ static int
637
+ crypto_onetimeauth_poly1305_sse2_init(crypto_onetimeauth_poly1305_state *state,
638
+ const unsigned char *key)
639
+ {
640
+ (void) sizeof(int[sizeof (crypto_onetimeauth_poly1305_state) >=
641
+ sizeof (poly1305_state_internal_t) ? 1 : -1]);
642
+ poly1305_init_ext((poly1305_state_internal_t *)(void *) state, key, 0U);
643
+
644
+ return 0;
645
+ }
646
+
647
+ static int
648
+ crypto_onetimeauth_poly1305_sse2_update(crypto_onetimeauth_poly1305_state *state,
649
+ const unsigned char *in,
650
+ unsigned long long inlen)
651
+ {
652
+ poly1305_update((poly1305_state_internal_t *)(void *) state, in, inlen);
653
+
654
+ return 0;
655
+ }
656
+
657
+ static int
658
+ crypto_onetimeauth_poly1305_sse2_final(crypto_onetimeauth_poly1305_state *state,
659
+ unsigned char *out)
660
+ {
661
+ poly1305_finish((poly1305_state_internal_t *)(void *) state, out);
662
+
663
+ return 0;
664
+ }
665
+
666
+ static int
667
+ crypto_onetimeauth_poly1305_sse2(unsigned char *out, const unsigned char *m,
668
+ unsigned long long inlen,
669
+ const unsigned char *key)
670
+ {
671
+ CRYPTO_ALIGN(64) poly1305_state_internal_t st;
672
+ unsigned long long blocks;
673
+
674
+ poly1305_init_ext(&st, key, inlen);
675
+ blocks = inlen & ~31;
676
+ if (blocks > 0) {
677
+ poly1305_blocks(&st, m, blocks);
678
+ m += blocks;
679
+ inlen -= blocks;
680
+ }
681
+ poly1305_finish_ext(&st, m, inlen, out);
682
+
683
+ return 0;
684
+ }
685
+
686
+ static int
687
+ crypto_onetimeauth_poly1305_sse2_verify(const unsigned char *h,
688
+ const unsigned char *in,
689
+ unsigned long long inlen,
690
+ const unsigned char *k)
691
+ {
692
+ unsigned char correct[16];
693
+
694
+ crypto_onetimeauth_poly1305_sse2(correct,in,inlen,k);
695
+
696
+ return crypto_verify_16(h,correct);
697
+ }
698
+
699
+ struct crypto_onetimeauth_poly1305_implementation
700
+ crypto_onetimeauth_poly1305_sse2_implementation = {
701
+ SODIUM_C99(.onetimeauth =) crypto_onetimeauth_poly1305_sse2,
702
+ SODIUM_C99(.onetimeauth_verify =) crypto_onetimeauth_poly1305_sse2_verify,
703
+ SODIUM_C99(.onetimeauth_init =) crypto_onetimeauth_poly1305_sse2_init,
704
+ SODIUM_C99(.onetimeauth_update =) crypto_onetimeauth_poly1305_sse2_update,
705
+ SODIUM_C99(.onetimeauth_final =) crypto_onetimeauth_poly1305_sse2_final
706
+ };
707
+
708
+ #endif