ring-native 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/Gemfile +3 -0
  4. data/README.md +22 -0
  5. data/Rakefile +1 -0
  6. data/ext/ring/extconf.rb +29 -0
  7. data/lib/ring/native.rb +8 -0
  8. data/lib/ring/native/version.rb +5 -0
  9. data/ring-native.gemspec +25 -0
  10. data/vendor/ring/BUILDING.md +40 -0
  11. data/vendor/ring/Cargo.toml +43 -0
  12. data/vendor/ring/LICENSE +185 -0
  13. data/vendor/ring/Makefile +35 -0
  14. data/vendor/ring/PORTING.md +163 -0
  15. data/vendor/ring/README.md +113 -0
  16. data/vendor/ring/STYLE.md +197 -0
  17. data/vendor/ring/appveyor.yml +27 -0
  18. data/vendor/ring/build.rs +108 -0
  19. data/vendor/ring/crypto/aes/aes.c +1142 -0
  20. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +25 -0
  21. data/vendor/ring/crypto/aes/aes_test.cc +93 -0
  22. data/vendor/ring/crypto/aes/asm/aes-586.pl +2368 -0
  23. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +1249 -0
  24. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +2246 -0
  25. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +1318 -0
  26. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +2084 -0
  27. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +675 -0
  28. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +1364 -0
  29. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +1565 -0
  30. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +841 -0
  31. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +1116 -0
  32. data/vendor/ring/crypto/aes/internal.h +87 -0
  33. data/vendor/ring/crypto/aes/mode_wrappers.c +61 -0
  34. data/vendor/ring/crypto/bn/add.c +394 -0
  35. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +694 -0
  36. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +1503 -0
  37. data/vendor/ring/crypto/bn/asm/bn-586.pl +774 -0
  38. data/vendor/ring/crypto/bn/asm/co-586.pl +287 -0
  39. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +1882 -0
  40. data/vendor/ring/crypto/bn/asm/x86-mont.pl +592 -0
  41. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +599 -0
  42. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +1393 -0
  43. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +3507 -0
  44. data/vendor/ring/crypto/bn/bn.c +352 -0
  45. data/vendor/ring/crypto/bn/bn_asn1.c +74 -0
  46. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +25 -0
  47. data/vendor/ring/crypto/bn/bn_test.cc +1696 -0
  48. data/vendor/ring/crypto/bn/cmp.c +200 -0
  49. data/vendor/ring/crypto/bn/convert.c +433 -0
  50. data/vendor/ring/crypto/bn/ctx.c +311 -0
  51. data/vendor/ring/crypto/bn/div.c +594 -0
  52. data/vendor/ring/crypto/bn/exponentiation.c +1335 -0
  53. data/vendor/ring/crypto/bn/gcd.c +711 -0
  54. data/vendor/ring/crypto/bn/generic.c +1019 -0
  55. data/vendor/ring/crypto/bn/internal.h +316 -0
  56. data/vendor/ring/crypto/bn/montgomery.c +516 -0
  57. data/vendor/ring/crypto/bn/mul.c +888 -0
  58. data/vendor/ring/crypto/bn/prime.c +829 -0
  59. data/vendor/ring/crypto/bn/random.c +334 -0
  60. data/vendor/ring/crypto/bn/rsaz_exp.c +262 -0
  61. data/vendor/ring/crypto/bn/rsaz_exp.h +53 -0
  62. data/vendor/ring/crypto/bn/shift.c +276 -0
  63. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +25 -0
  64. data/vendor/ring/crypto/bytestring/bytestring_test.cc +421 -0
  65. data/vendor/ring/crypto/bytestring/cbb.c +399 -0
  66. data/vendor/ring/crypto/bytestring/cbs.c +227 -0
  67. data/vendor/ring/crypto/bytestring/internal.h +46 -0
  68. data/vendor/ring/crypto/chacha/chacha_generic.c +140 -0
  69. data/vendor/ring/crypto/chacha/chacha_vec.c +323 -0
  70. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +1447 -0
  71. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +153 -0
  72. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +25 -0
  73. data/vendor/ring/crypto/cipher/e_aes.c +390 -0
  74. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +208 -0
  75. data/vendor/ring/crypto/cipher/internal.h +173 -0
  76. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +543 -0
  77. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +9 -0
  78. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +475 -0
  79. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +23 -0
  80. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +422 -0
  81. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +484 -0
  82. data/vendor/ring/crypto/cipher/test/cipher_test.txt +100 -0
  83. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +25 -0
  84. data/vendor/ring/crypto/constant_time_test.c +304 -0
  85. data/vendor/ring/crypto/cpu-arm-asm.S +32 -0
  86. data/vendor/ring/crypto/cpu-arm.c +199 -0
  87. data/vendor/ring/crypto/cpu-intel.c +261 -0
  88. data/vendor/ring/crypto/crypto.c +151 -0
  89. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +2118 -0
  90. data/vendor/ring/crypto/curve25519/curve25519.c +4888 -0
  91. data/vendor/ring/crypto/curve25519/x25519_test.cc +128 -0
  92. data/vendor/ring/crypto/digest/md32_common.h +181 -0
  93. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +2725 -0
  94. data/vendor/ring/crypto/ec/ec.c +193 -0
  95. data/vendor/ring/crypto/ec/ec_curves.c +61 -0
  96. data/vendor/ring/crypto/ec/ec_key.c +228 -0
  97. data/vendor/ring/crypto/ec/ec_montgomery.c +114 -0
  98. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +25 -0
  99. data/vendor/ring/crypto/ec/internal.h +243 -0
  100. data/vendor/ring/crypto/ec/oct.c +253 -0
  101. data/vendor/ring/crypto/ec/p256-64.c +1794 -0
  102. data/vendor/ring/crypto/ec/p256-x86_64-table.h +9548 -0
  103. data/vendor/ring/crypto/ec/p256-x86_64.c +509 -0
  104. data/vendor/ring/crypto/ec/simple.c +1007 -0
  105. data/vendor/ring/crypto/ec/util-64.c +183 -0
  106. data/vendor/ring/crypto/ec/wnaf.c +508 -0
  107. data/vendor/ring/crypto/ecdh/ecdh.c +155 -0
  108. data/vendor/ring/crypto/ecdsa/ecdsa.c +304 -0
  109. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +193 -0
  110. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +25 -0
  111. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +327 -0
  112. data/vendor/ring/crypto/header_removed.h +17 -0
  113. data/vendor/ring/crypto/internal.h +495 -0
  114. data/vendor/ring/crypto/libring.Windows.vcxproj +101 -0
  115. data/vendor/ring/crypto/mem.c +98 -0
  116. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +1045 -0
  117. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +517 -0
  118. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +1393 -0
  119. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +1741 -0
  120. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +422 -0
  121. data/vendor/ring/crypto/modes/ctr.c +226 -0
  122. data/vendor/ring/crypto/modes/gcm.c +1206 -0
  123. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +25 -0
  124. data/vendor/ring/crypto/modes/gcm_test.c +348 -0
  125. data/vendor/ring/crypto/modes/internal.h +299 -0
  126. data/vendor/ring/crypto/perlasm/arm-xlate.pl +170 -0
  127. data/vendor/ring/crypto/perlasm/readme +100 -0
  128. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +1164 -0
  129. data/vendor/ring/crypto/perlasm/x86asm.pl +292 -0
  130. data/vendor/ring/crypto/perlasm/x86gas.pl +263 -0
  131. data/vendor/ring/crypto/perlasm/x86masm.pl +200 -0
  132. data/vendor/ring/crypto/perlasm/x86nasm.pl +187 -0
  133. data/vendor/ring/crypto/poly1305/poly1305.c +331 -0
  134. data/vendor/ring/crypto/poly1305/poly1305_arm.c +301 -0
  135. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +2015 -0
  136. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +25 -0
  137. data/vendor/ring/crypto/poly1305/poly1305_test.cc +80 -0
  138. data/vendor/ring/crypto/poly1305/poly1305_test.txt +52 -0
  139. data/vendor/ring/crypto/poly1305/poly1305_vec.c +892 -0
  140. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +75 -0
  141. data/vendor/ring/crypto/rand/internal.h +32 -0
  142. data/vendor/ring/crypto/rand/rand.c +189 -0
  143. data/vendor/ring/crypto/rand/urandom.c +219 -0
  144. data/vendor/ring/crypto/rand/windows.c +56 -0
  145. data/vendor/ring/crypto/refcount_c11.c +66 -0
  146. data/vendor/ring/crypto/refcount_lock.c +53 -0
  147. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +25 -0
  148. data/vendor/ring/crypto/refcount_test.c +58 -0
  149. data/vendor/ring/crypto/rsa/blinding.c +462 -0
  150. data/vendor/ring/crypto/rsa/internal.h +108 -0
  151. data/vendor/ring/crypto/rsa/padding.c +300 -0
  152. data/vendor/ring/crypto/rsa/rsa.c +450 -0
  153. data/vendor/ring/crypto/rsa/rsa_asn1.c +261 -0
  154. data/vendor/ring/crypto/rsa/rsa_impl.c +944 -0
  155. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +25 -0
  156. data/vendor/ring/crypto/rsa/rsa_test.cc +437 -0
  157. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +436 -0
  158. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +2390 -0
  159. data/vendor/ring/crypto/sha/asm/sha256-586.pl +1275 -0
  160. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +735 -0
  161. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +14 -0
  162. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +14 -0
  163. data/vendor/ring/crypto/sha/asm/sha512-586.pl +911 -0
  164. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +666 -0
  165. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +14 -0
  166. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +14 -0
  167. data/vendor/ring/crypto/sha/sha1.c +271 -0
  168. data/vendor/ring/crypto/sha/sha256.c +204 -0
  169. data/vendor/ring/crypto/sha/sha512.c +355 -0
  170. data/vendor/ring/crypto/test/file_test.cc +326 -0
  171. data/vendor/ring/crypto/test/file_test.h +181 -0
  172. data/vendor/ring/crypto/test/malloc.cc +150 -0
  173. data/vendor/ring/crypto/test/scoped_types.h +95 -0
  174. data/vendor/ring/crypto/test/test.Windows.vcxproj +35 -0
  175. data/vendor/ring/crypto/test/test_util.cc +46 -0
  176. data/vendor/ring/crypto/test/test_util.h +41 -0
  177. data/vendor/ring/crypto/thread_none.c +55 -0
  178. data/vendor/ring/crypto/thread_pthread.c +165 -0
  179. data/vendor/ring/crypto/thread_test.Windows.vcxproj +25 -0
  180. data/vendor/ring/crypto/thread_test.c +200 -0
  181. data/vendor/ring/crypto/thread_win.c +282 -0
  182. data/vendor/ring/examples/checkdigest.rs +103 -0
  183. data/vendor/ring/include/openssl/aes.h +121 -0
  184. data/vendor/ring/include/openssl/arm_arch.h +129 -0
  185. data/vendor/ring/include/openssl/base.h +156 -0
  186. data/vendor/ring/include/openssl/bn.h +794 -0
  187. data/vendor/ring/include/openssl/buffer.h +18 -0
  188. data/vendor/ring/include/openssl/bytestring.h +235 -0
  189. data/vendor/ring/include/openssl/chacha.h +37 -0
  190. data/vendor/ring/include/openssl/cmac.h +76 -0
  191. data/vendor/ring/include/openssl/cpu.h +184 -0
  192. data/vendor/ring/include/openssl/crypto.h +43 -0
  193. data/vendor/ring/include/openssl/curve25519.h +88 -0
  194. data/vendor/ring/include/openssl/ec.h +225 -0
  195. data/vendor/ring/include/openssl/ec_key.h +129 -0
  196. data/vendor/ring/include/openssl/ecdh.h +110 -0
  197. data/vendor/ring/include/openssl/ecdsa.h +156 -0
  198. data/vendor/ring/include/openssl/err.h +201 -0
  199. data/vendor/ring/include/openssl/mem.h +101 -0
  200. data/vendor/ring/include/openssl/obj_mac.h +71 -0
  201. data/vendor/ring/include/openssl/opensslfeatures.h +68 -0
  202. data/vendor/ring/include/openssl/opensslv.h +18 -0
  203. data/vendor/ring/include/openssl/ossl_typ.h +18 -0
  204. data/vendor/ring/include/openssl/poly1305.h +51 -0
  205. data/vendor/ring/include/openssl/rand.h +70 -0
  206. data/vendor/ring/include/openssl/rsa.h +399 -0
  207. data/vendor/ring/include/openssl/thread.h +133 -0
  208. data/vendor/ring/include/openssl/type_check.h +71 -0
  209. data/vendor/ring/mk/Common.props +63 -0
  210. data/vendor/ring/mk/Windows.props +42 -0
  211. data/vendor/ring/mk/WindowsTest.props +18 -0
  212. data/vendor/ring/mk/appveyor.bat +62 -0
  213. data/vendor/ring/mk/bottom_of_makefile.mk +54 -0
  214. data/vendor/ring/mk/ring.mk +266 -0
  215. data/vendor/ring/mk/top_of_makefile.mk +214 -0
  216. data/vendor/ring/mk/travis.sh +40 -0
  217. data/vendor/ring/mk/update-travis-yml.py +229 -0
  218. data/vendor/ring/ring.sln +153 -0
  219. data/vendor/ring/src/aead.rs +682 -0
  220. data/vendor/ring/src/agreement.rs +248 -0
  221. data/vendor/ring/src/c.rs +129 -0
  222. data/vendor/ring/src/constant_time.rs +37 -0
  223. data/vendor/ring/src/der.rs +96 -0
  224. data/vendor/ring/src/digest.rs +690 -0
  225. data/vendor/ring/src/digest_tests.txt +57 -0
  226. data/vendor/ring/src/ecc.rs +28 -0
  227. data/vendor/ring/src/ecc_build.rs +279 -0
  228. data/vendor/ring/src/ecc_curves.rs +117 -0
  229. data/vendor/ring/src/ed25519_tests.txt +2579 -0
  230. data/vendor/ring/src/exe_tests.rs +46 -0
  231. data/vendor/ring/src/ffi.rs +29 -0
  232. data/vendor/ring/src/file_test.rs +187 -0
  233. data/vendor/ring/src/hkdf.rs +153 -0
  234. data/vendor/ring/src/hkdf_tests.txt +59 -0
  235. data/vendor/ring/src/hmac.rs +414 -0
  236. data/vendor/ring/src/hmac_tests.txt +97 -0
  237. data/vendor/ring/src/input.rs +312 -0
  238. data/vendor/ring/src/lib.rs +41 -0
  239. data/vendor/ring/src/pbkdf2.rs +265 -0
  240. data/vendor/ring/src/pbkdf2_tests.txt +113 -0
  241. data/vendor/ring/src/polyfill.rs +57 -0
  242. data/vendor/ring/src/rand.rs +28 -0
  243. data/vendor/ring/src/signature.rs +314 -0
  244. data/vendor/ring/third-party/NIST/README.md +9 -0
  245. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +263 -0
  246. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +309 -0
  247. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +267 -0
  248. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +263 -0
  249. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +309 -0
  250. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +267 -0
  251. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +263 -0
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +309 -0
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +267 -0
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +519 -0
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +309 -0
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +523 -0
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +519 -0
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +309 -0
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +523 -0
  260. data/vendor/ring/third-party/NIST/sha256sums.txt +1 -0
  261. metadata +333 -0
@@ -0,0 +1,25 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3
+ <PropertyGroup Label="Globals">
4
+ <ProjectGuid>{CD0F021B-E347-4CCA-B5B7-CD1F757E15D6}</ProjectGuid>
5
+ <TargetName>poly1305_test</TargetName>
6
+ </PropertyGroup>
7
+ <ImportGroup Label="PropertySheets">
8
+ <Import Project="..\..\mk\WindowsTest.props" />
9
+ </ImportGroup>
10
+ <PropertyGroup Label="Configuration">
11
+ <OutDir>$(OutRootDir)test\ring\crypto\poly1305\</OutDir>
12
+ </PropertyGroup>
13
+ <ItemGroup>
14
+ <ClCompile Include="poly1305_test.cc" />
15
+ </ItemGroup>
16
+ <ItemGroup>
17
+ <ProjectReference Include="..\libring.Windows.vcxproj">
18
+ <Project>{f4c0a1b6-5e09-41c8-8242-3e1f6762fb18}</Project>
19
+ </ProjectReference>
20
+ <ProjectReference Include="..\test\test.Windows.vcxproj">
21
+ <Project>{1dace503-6498-492d-b1ff-f9ee18624443}</Project>
22
+ </ProjectReference>
23
+ </ItemGroup>
24
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
25
+ </Project>
@@ -0,0 +1,80 @@
1
+ /* Copyright (c) 2015, Google Inc.
2
+ *
3
+ * Permission to use, copy, modify, and/or distribute this software for any
4
+ * purpose with or without fee is hereby granted, provided that the above
5
+ * copyright notice and this permission notice appear in all copies.
6
+ *
7
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
+
15
+ #include <stdio.h>
16
+ #include <string.h>
17
+
18
+ #include <vector>
19
+
20
+ #include <openssl/crypto.h>
21
+ #include <openssl/poly1305.h>
22
+
23
+ #include "../test/file_test.h"
24
+
25
+
26
+ // |CRYPTO_poly1305_finish| requires a 16-byte-aligned output.
27
+ #if defined(OPENSSL_WINDOWS)
28
+ // MSVC doesn't support C++11 |alignas|.
29
+ #define ALIGNED __declspec(align(16))
30
+ #else
31
+ #define ALIGNED alignas(16)
32
+ #endif
33
+
34
+ static bool TestPoly1305(FileTest *t, void *arg) {
35
+ std::vector<uint8_t> key, in, mac;
36
+ if (!t->GetBytes(&key, "Key") ||
37
+ !t->GetBytes(&in, "Input") ||
38
+ !t->GetBytes(&mac, "MAC")) {
39
+ return false;
40
+ }
41
+ if (key.size() != 32 || mac.size() != 16) {
42
+ t->PrintLine("Invalid test");
43
+ return false;
44
+ }
45
+
46
+ // Test single-shot operation.
47
+ poly1305_state state;
48
+ CRYPTO_poly1305_init(&state, key.data());
49
+ CRYPTO_poly1305_update(&state, in.data(), in.size());
50
+ ALIGNED uint8_t out[16];
51
+ CRYPTO_poly1305_finish(&state, out);
52
+ if (!t->ExpectBytesEqual(out, 16, mac.data(), mac.size())) {
53
+ t->PrintLine("Single-shot Poly1305 failed.");
54
+ return false;
55
+ }
56
+
57
+ // Test streaming byte-by-byte.
58
+ CRYPTO_poly1305_init(&state, key.data());
59
+ for (size_t i = 0; i < in.size(); i++) {
60
+ CRYPTO_poly1305_update(&state, &in[i], 1);
61
+ }
62
+ CRYPTO_poly1305_finish(&state, out);
63
+ if (!t->ExpectBytesEqual(out, 16, mac.data(), mac.size())) {
64
+ t->PrintLine("Streaming Poly1305 failed.");
65
+ return false;
66
+ }
67
+
68
+ return true;
69
+ }
70
+
71
+ int main(int argc, char **argv) {
72
+ CRYPTO_library_init();
73
+
74
+ if (argc != 2) {
75
+ fprintf(stderr, "%s <test file>\n", argv[0]);
76
+ return 1;
77
+ }
78
+
79
+ return FileTestMain(TestPoly1305, nullptr, argv[1]);
80
+ }
@@ -0,0 +1,52 @@
1
+ # RFC 7539, section 2.5.2.
2
+
3
+ Key = 85d6be7857556d337f4452fe42d506a80103808afb0db2fd4abff6af4149f51b
4
+ Input = "Cryptographic Forum Research Group"
5
+ MAC = a8061dc1305136c6c22b8baf0c0127a9
6
+
7
+
8
+ # RFC 7539, section A.3.
9
+
10
+ Key = 0000000000000000000000000000000000000000000000000000000000000000
11
+ Input = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
12
+ MAC = 00000000000000000000000000000000
13
+
14
+ Key = 0000000000000000000000000000000036e5f6b5c5e06070f0efca96227a863e
15
+ Input = 416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e69636174696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f
16
+ MAC = 36e5f6b5c5e06070f0efca96227a863e
17
+
18
+ Key = 36e5f6b5c5e06070f0efca96227a863e00000000000000000000000000000000
19
+ Input = 416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e69636174696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f
20
+ MAC = f3477e7cd95417af89a6b8794c310cf0
21
+
22
+ Key = 1c9240a5eb55d38af333888604f6b5f0473917c1402b80099dca5cbc207075c0
23
+ Input = 2754776173206272696c6c69672c20616e642074686520736c6974687920746f7665730a446964206779726520616e642067696d626c6520696e2074686520776162653a0a416c6c206d696d737920776572652074686520626f726f676f7665732c0a416e6420746865206d6f6d65207261746873206f757467726162652e
24
+ MAC = 4541669a7eaaee61e708dc7cbcc5eb62
25
+
26
+ Key = 0200000000000000000000000000000000000000000000000000000000000000
27
+ Input = ffffffffffffffffffffffffffffffff
28
+ MAC = 03000000000000000000000000000000
29
+
30
+ Key = 02000000000000000000000000000000ffffffffffffffffffffffffffffffff
31
+ Input = 02000000000000000000000000000000
32
+ MAC = 03000000000000000000000000000000
33
+
34
+ Key = 0100000000000000000000000000000000000000000000000000000000000000
35
+ Input = fffffffffffffffffffffffffffffffff0ffffffffffffffffffffffffffffff11000000000000000000000000000000
36
+ MAC = 05000000000000000000000000000000
37
+
38
+ Key = 0100000000000000000000000000000000000000000000000000000000000000
39
+ Input = fffffffffffffffffffffffffffffffffbfefefefefefefefefefefefefefefe01010101010101010101010101010101
40
+ MAC = 00000000000000000000000000000000
41
+
42
+ Key = 0200000000000000000000000000000000000000000000000000000000000000
43
+ Input = fdffffffffffffffffffffffffffffff
44
+ MAC = faffffffffffffffffffffffffffffff
45
+
46
+ Key = 0100000000000000040000000000000000000000000000000000000000000000
47
+ Input = e33594d7505e43b900000000000000003394d7505e4379cd01000000000000000000000000000000000000000000000001000000000000000000000000000000
48
+ MAC = 14000000000000005500000000000000
49
+
50
+ Key = 0100000000000000040000000000000000000000000000000000000000000000
51
+ Input = e33594d7505e43b900000000000000003394d7505e4379cd010000000000000000000000000000000000000000000000
52
+ MAC = 13000000000000000000000000000000
@@ -0,0 +1,892 @@
1
+ /* Copyright (c) 2014, Google Inc.
2
+ *
3
+ * Permission to use, copy, modify, and/or distribute this software for any
4
+ * purpose with or without fee is hereby granted, provided that the above
5
+ * copyright notice and this permission notice appear in all copies.
6
+ *
7
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
+
15
+ /* This implementation of poly1305 is by Andrew Moon
16
+ * (https://github.com/floodyberry/poly1305-donna) and released as public
17
+ * domain. It implements SIMD vectorization based on the algorithm described in
18
+ * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte
19
+ * block size */
20
+
21
+ #include <openssl/poly1305.h>
22
+
23
+
24
+ #if !defined(OPENSSL_WINDOWS) && defined(OPENSSL_X86_64)
25
+
26
+ #include <emmintrin.h>
27
+
28
+ #define ALIGN(x) __attribute__((aligned(x)))
29
+ /* inline is not a keyword in C89. */
30
+ #define INLINE
31
+ #define U8TO64_LE(m) (*(uint64_t *)(m))
32
+ #define U8TO32_LE(m) (*(uint32_t *)(m))
33
+ #define U64TO8_LE(m, v) (*(uint64_t *)(m)) = v
34
+
35
+ typedef __m128i xmmi;
36
+ typedef unsigned __int128 uint128_t;
37
+
38
+ static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = {
39
+ (1 << 26) - 1, 0, (1 << 26) - 1, 0};
40
+ static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0};
41
+ static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = {(1 << 24), 0,
42
+ (1 << 24), 0};
43
+
44
+ static uint128_t INLINE add128(uint128_t a, uint128_t b) { return a + b; }
45
+
46
+ static uint128_t INLINE add128_64(uint128_t a, uint64_t b) { return a + b; }
47
+
48
+ static uint128_t INLINE mul64x64_128(uint64_t a, uint64_t b) {
49
+ return (uint128_t)a * b;
50
+ }
51
+
52
+ static uint64_t INLINE lo128(uint128_t a) { return (uint64_t)a; }
53
+
54
+ static uint64_t INLINE shr128(uint128_t v, const int shift) {
55
+ return (uint64_t)(v >> shift);
56
+ }
57
+
58
+ static uint64_t INLINE shr128_pair(uint64_t hi, uint64_t lo, const int shift) {
59
+ return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift);
60
+ }
61
+
62
+ typedef struct poly1305_power_t {
63
+ union {
64
+ xmmi v;
65
+ uint64_t u[2];
66
+ uint32_t d[4];
67
+ } R20, R21, R22, R23, R24, S21, S22, S23, S24;
68
+ } poly1305_power;
69
+
70
+ typedef struct poly1305_state_internal_t {
71
+ poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144
72
+ bytes of free storage */
73
+ union {
74
+ xmmi H[5]; /* 80 bytes */
75
+ uint64_t HH[10];
76
+ };
77
+ /* uint64_t r0,r1,r2; [24 bytes] */
78
+ /* uint64_t pad0,pad1; [16 bytes] */
79
+ uint64_t started; /* 8 bytes */
80
+ uint64_t leftover; /* 8 bytes */
81
+ uint8_t buffer[64]; /* 64 bytes */
82
+ } poly1305_state_internal; /* 448 bytes total + 63 bytes for
83
+ alignment = 511 bytes raw */
84
+
85
+ static poly1305_state_internal INLINE *poly1305_aligned_state(
86
+ poly1305_state *state) {
87
+ return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63);
88
+ }
89
+
90
+ /* copy 0-63 bytes */
91
+ static void INLINE
92
+ poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) {
93
+ size_t offset = src - dst;
94
+ if (bytes & 32) {
95
+ _mm_storeu_si128((xmmi *)(dst + 0),
96
+ _mm_loadu_si128((xmmi *)(dst + offset + 0)));
97
+ _mm_storeu_si128((xmmi *)(dst + 16),
98
+ _mm_loadu_si128((xmmi *)(dst + offset + 16)));
99
+ dst += 32;
100
+ }
101
+ if (bytes & 16) {
102
+ _mm_storeu_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset)));
103
+ dst += 16;
104
+ }
105
+ if (bytes & 8) {
106
+ *(uint64_t *)dst = *(uint64_t *)(dst + offset);
107
+ dst += 8;
108
+ }
109
+ if (bytes & 4) {
110
+ *(uint32_t *)dst = *(uint32_t *)(dst + offset);
111
+ dst += 4;
112
+ }
113
+ if (bytes & 2) {
114
+ *(uint16_t *)dst = *(uint16_t *)(dst + offset);
115
+ dst += 2;
116
+ }
117
+ if (bytes & 1) {
118
+ *(uint8_t *)dst = *(uint8_t *)(dst + offset);
119
+ }
120
+ }
121
+
122
+ /* zero 0-15 bytes */
123
+ static void INLINE poly1305_block_zero(uint8_t *dst, size_t bytes) {
124
+ if (bytes & 8) {
125
+ *(uint64_t *)dst = 0;
126
+ dst += 8;
127
+ }
128
+ if (bytes & 4) {
129
+ *(uint32_t *)dst = 0;
130
+ dst += 4;
131
+ }
132
+ if (bytes & 2) {
133
+ *(uint16_t *)dst = 0;
134
+ dst += 2;
135
+ }
136
+ if (bytes & 1) {
137
+ *(uint8_t *)dst = 0;
138
+ }
139
+ }
140
+
141
+ static size_t INLINE poly1305_min(size_t a, size_t b) {
142
+ return (a < b) ? a : b;
143
+ }
144
+
145
+ void CRYPTO_poly1305_init(poly1305_state *state, const uint8_t key[32]) {
146
+ poly1305_state_internal *st = poly1305_aligned_state(state);
147
+ poly1305_power *p;
148
+ uint64_t r0, r1, r2;
149
+ uint64_t t0, t1;
150
+
151
+ /* clamp key */
152
+ t0 = U8TO64_LE(key + 0);
153
+ t1 = U8TO64_LE(key + 8);
154
+ r0 = t0 & 0xffc0fffffff;
155
+ t0 >>= 44;
156
+ t0 |= t1 << 20;
157
+ r1 = t0 & 0xfffffc0ffff;
158
+ t1 >>= 24;
159
+ r2 = t1 & 0x00ffffffc0f;
160
+
161
+ /* store r in un-used space of st->P[1] */
162
+ p = &st->P[1];
163
+ p->R20.d[1] = (uint32_t)(r0);
164
+ p->R20.d[3] = (uint32_t)(r0 >> 32);
165
+ p->R21.d[1] = (uint32_t)(r1);
166
+ p->R21.d[3] = (uint32_t)(r1 >> 32);
167
+ p->R22.d[1] = (uint32_t)(r2);
168
+ p->R22.d[3] = (uint32_t)(r2 >> 32);
169
+
170
+ /* store pad */
171
+ p->R23.d[1] = U8TO32_LE(key + 16);
172
+ p->R23.d[3] = U8TO32_LE(key + 20);
173
+ p->R24.d[1] = U8TO32_LE(key + 24);
174
+ p->R24.d[3] = U8TO32_LE(key + 28);
175
+
176
+ /* H = 0 */
177
+ st->H[0] = _mm_setzero_si128();
178
+ st->H[1] = _mm_setzero_si128();
179
+ st->H[2] = _mm_setzero_si128();
180
+ st->H[3] = _mm_setzero_si128();
181
+ st->H[4] = _mm_setzero_si128();
182
+
183
+ st->started = 0;
184
+ st->leftover = 0;
185
+ }
186
+
187
+ static void poly1305_first_block(poly1305_state_internal *st,
188
+ const uint8_t *m) {
189
+ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
190
+ const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
191
+ const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
192
+ xmmi T5, T6;
193
+ poly1305_power *p;
194
+ uint128_t d[3];
195
+ uint64_t r0, r1, r2;
196
+ uint64_t r20, r21, r22, s22;
197
+ uint64_t pad0, pad1;
198
+ uint64_t c;
199
+ uint64_t i;
200
+
201
+ /* pull out stored info */
202
+ p = &st->P[1];
203
+
204
+ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
205
+ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
206
+ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
207
+ pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
208
+ pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
209
+
210
+ /* compute powers r^2,r^4 */
211
+ r20 = r0;
212
+ r21 = r1;
213
+ r22 = r2;
214
+ for (i = 0; i < 2; i++) {
215
+ s22 = r22 * (5 << 2);
216
+
217
+ d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22));
218
+ d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21));
219
+ d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20));
220
+
221
+ r20 = lo128(d[0]) & 0xfffffffffff;
222
+ c = shr128(d[0], 44);
223
+ d[1] = add128_64(d[1], c);
224
+ r21 = lo128(d[1]) & 0xfffffffffff;
225
+ c = shr128(d[1], 44);
226
+ d[2] = add128_64(d[2], c);
227
+ r22 = lo128(d[2]) & 0x3ffffffffff;
228
+ c = shr128(d[2], 42);
229
+ r20 += c * 5;
230
+ c = (r20 >> 44);
231
+ r20 = r20 & 0xfffffffffff;
232
+ r21 += c;
233
+
234
+ p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)(r20)&0x3ffffff),
235
+ _MM_SHUFFLE(1, 0, 1, 0));
236
+ p->R21.v = _mm_shuffle_epi32(
237
+ _mm_cvtsi32_si128((uint32_t)((r20 >> 26) | (r21 << 18)) & 0x3ffffff),
238
+ _MM_SHUFFLE(1, 0, 1, 0));
239
+ p->R22.v =
240
+ _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 8)) & 0x3ffffff),
241
+ _MM_SHUFFLE(1, 0, 1, 0));
242
+ p->R23.v = _mm_shuffle_epi32(
243
+ _mm_cvtsi32_si128((uint32_t)((r21 >> 34) | (r22 << 10)) & 0x3ffffff),
244
+ _MM_SHUFFLE(1, 0, 1, 0));
245
+ p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >> 16))),
246
+ _MM_SHUFFLE(1, 0, 1, 0));
247
+ p->S21.v = _mm_mul_epu32(p->R21.v, FIVE);
248
+ p->S22.v = _mm_mul_epu32(p->R22.v, FIVE);
249
+ p->S23.v = _mm_mul_epu32(p->R23.v, FIVE);
250
+ p->S24.v = _mm_mul_epu32(p->R24.v, FIVE);
251
+ p--;
252
+ }
253
+
254
+ /* put saved info back */
255
+ p = &st->P[1];
256
+ p->R20.d[1] = (uint32_t)(r0);
257
+ p->R20.d[3] = (uint32_t)(r0 >> 32);
258
+ p->R21.d[1] = (uint32_t)(r1);
259
+ p->R21.d[3] = (uint32_t)(r1 >> 32);
260
+ p->R22.d[1] = (uint32_t)(r2);
261
+ p->R22.d[3] = (uint32_t)(r2 >> 32);
262
+ p->R23.d[1] = (uint32_t)(pad0);
263
+ p->R23.d[3] = (uint32_t)(pad0 >> 32);
264
+ p->R24.d[1] = (uint32_t)(pad1);
265
+ p->R24.d[3] = (uint32_t)(pad1 >> 32);
266
+
267
+ /* H = [Mx,My] */
268
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
269
+ _mm_loadl_epi64((xmmi *)(m + 16)));
270
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
271
+ _mm_loadl_epi64((xmmi *)(m + 24)));
272
+ st->H[0] = _mm_and_si128(MMASK, T5);
273
+ st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
274
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
275
+ st->H[2] = _mm_and_si128(MMASK, T5);
276
+ st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
277
+ st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
278
+ }
279
+
280
+ static void poly1305_blocks(poly1305_state_internal *st, const uint8_t *m,
281
+ size_t bytes) {
282
+ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
283
+ const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
284
+ const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
285
+
286
+ poly1305_power *p;
287
+ xmmi H0, H1, H2, H3, H4;
288
+ xmmi T0, T1, T2, T3, T4, T5, T6;
289
+ xmmi M0, M1, M2, M3, M4;
290
+ xmmi C1, C2;
291
+
292
+ H0 = st->H[0];
293
+ H1 = st->H[1];
294
+ H2 = st->H[2];
295
+ H3 = st->H[3];
296
+ H4 = st->H[4];
297
+
298
+ while (bytes >= 64) {
299
+ /* H *= [r^4,r^4] */
300
+ p = &st->P[0];
301
+ T0 = _mm_mul_epu32(H0, p->R20.v);
302
+ T1 = _mm_mul_epu32(H0, p->R21.v);
303
+ T2 = _mm_mul_epu32(H0, p->R22.v);
304
+ T3 = _mm_mul_epu32(H0, p->R23.v);
305
+ T4 = _mm_mul_epu32(H0, p->R24.v);
306
+ T5 = _mm_mul_epu32(H1, p->S24.v);
307
+ T6 = _mm_mul_epu32(H1, p->R20.v);
308
+ T0 = _mm_add_epi64(T0, T5);
309
+ T1 = _mm_add_epi64(T1, T6);
310
+ T5 = _mm_mul_epu32(H2, p->S23.v);
311
+ T6 = _mm_mul_epu32(H2, p->S24.v);
312
+ T0 = _mm_add_epi64(T0, T5);
313
+ T1 = _mm_add_epi64(T1, T6);
314
+ T5 = _mm_mul_epu32(H3, p->S22.v);
315
+ T6 = _mm_mul_epu32(H3, p->S23.v);
316
+ T0 = _mm_add_epi64(T0, T5);
317
+ T1 = _mm_add_epi64(T1, T6);
318
+ T5 = _mm_mul_epu32(H4, p->S21.v);
319
+ T6 = _mm_mul_epu32(H4, p->S22.v);
320
+ T0 = _mm_add_epi64(T0, T5);
321
+ T1 = _mm_add_epi64(T1, T6);
322
+ T5 = _mm_mul_epu32(H1, p->R21.v);
323
+ T6 = _mm_mul_epu32(H1, p->R22.v);
324
+ T2 = _mm_add_epi64(T2, T5);
325
+ T3 = _mm_add_epi64(T3, T6);
326
+ T5 = _mm_mul_epu32(H2, p->R20.v);
327
+ T6 = _mm_mul_epu32(H2, p->R21.v);
328
+ T2 = _mm_add_epi64(T2, T5);
329
+ T3 = _mm_add_epi64(T3, T6);
330
+ T5 = _mm_mul_epu32(H3, p->S24.v);
331
+ T6 = _mm_mul_epu32(H3, p->R20.v);
332
+ T2 = _mm_add_epi64(T2, T5);
333
+ T3 = _mm_add_epi64(T3, T6);
334
+ T5 = _mm_mul_epu32(H4, p->S23.v);
335
+ T6 = _mm_mul_epu32(H4, p->S24.v);
336
+ T2 = _mm_add_epi64(T2, T5);
337
+ T3 = _mm_add_epi64(T3, T6);
338
+ T5 = _mm_mul_epu32(H1, p->R23.v);
339
+ T4 = _mm_add_epi64(T4, T5);
340
+ T5 = _mm_mul_epu32(H2, p->R22.v);
341
+ T4 = _mm_add_epi64(T4, T5);
342
+ T5 = _mm_mul_epu32(H3, p->R21.v);
343
+ T4 = _mm_add_epi64(T4, T5);
344
+ T5 = _mm_mul_epu32(H4, p->R20.v);
345
+ T4 = _mm_add_epi64(T4, T5);
346
+
347
+ /* H += [Mx,My]*[r^2,r^2] */
348
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
349
+ _mm_loadl_epi64((xmmi *)(m + 16)));
350
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
351
+ _mm_loadl_epi64((xmmi *)(m + 24)));
352
+ M0 = _mm_and_si128(MMASK, T5);
353
+ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
354
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
355
+ M2 = _mm_and_si128(MMASK, T5);
356
+ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
357
+ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
358
+
359
+ p = &st->P[1];
360
+ T5 = _mm_mul_epu32(M0, p->R20.v);
361
+ T6 = _mm_mul_epu32(M0, p->R21.v);
362
+ T0 = _mm_add_epi64(T0, T5);
363
+ T1 = _mm_add_epi64(T1, T6);
364
+ T5 = _mm_mul_epu32(M1, p->S24.v);
365
+ T6 = _mm_mul_epu32(M1, p->R20.v);
366
+ T0 = _mm_add_epi64(T0, T5);
367
+ T1 = _mm_add_epi64(T1, T6);
368
+ T5 = _mm_mul_epu32(M2, p->S23.v);
369
+ T6 = _mm_mul_epu32(M2, p->S24.v);
370
+ T0 = _mm_add_epi64(T0, T5);
371
+ T1 = _mm_add_epi64(T1, T6);
372
+ T5 = _mm_mul_epu32(M3, p->S22.v);
373
+ T6 = _mm_mul_epu32(M3, p->S23.v);
374
+ T0 = _mm_add_epi64(T0, T5);
375
+ T1 = _mm_add_epi64(T1, T6);
376
+ T5 = _mm_mul_epu32(M4, p->S21.v);
377
+ T6 = _mm_mul_epu32(M4, p->S22.v);
378
+ T0 = _mm_add_epi64(T0, T5);
379
+ T1 = _mm_add_epi64(T1, T6);
380
+ T5 = _mm_mul_epu32(M0, p->R22.v);
381
+ T6 = _mm_mul_epu32(M0, p->R23.v);
382
+ T2 = _mm_add_epi64(T2, T5);
383
+ T3 = _mm_add_epi64(T3, T6);
384
+ T5 = _mm_mul_epu32(M1, p->R21.v);
385
+ T6 = _mm_mul_epu32(M1, p->R22.v);
386
+ T2 = _mm_add_epi64(T2, T5);
387
+ T3 = _mm_add_epi64(T3, T6);
388
+ T5 = _mm_mul_epu32(M2, p->R20.v);
389
+ T6 = _mm_mul_epu32(M2, p->R21.v);
390
+ T2 = _mm_add_epi64(T2, T5);
391
+ T3 = _mm_add_epi64(T3, T6);
392
+ T5 = _mm_mul_epu32(M3, p->S24.v);
393
+ T6 = _mm_mul_epu32(M3, p->R20.v);
394
+ T2 = _mm_add_epi64(T2, T5);
395
+ T3 = _mm_add_epi64(T3, T6);
396
+ T5 = _mm_mul_epu32(M4, p->S23.v);
397
+ T6 = _mm_mul_epu32(M4, p->S24.v);
398
+ T2 = _mm_add_epi64(T2, T5);
399
+ T3 = _mm_add_epi64(T3, T6);
400
+ T5 = _mm_mul_epu32(M0, p->R24.v);
401
+ T4 = _mm_add_epi64(T4, T5);
402
+ T5 = _mm_mul_epu32(M1, p->R23.v);
403
+ T4 = _mm_add_epi64(T4, T5);
404
+ T5 = _mm_mul_epu32(M2, p->R22.v);
405
+ T4 = _mm_add_epi64(T4, T5);
406
+ T5 = _mm_mul_epu32(M3, p->R21.v);
407
+ T4 = _mm_add_epi64(T4, T5);
408
+ T5 = _mm_mul_epu32(M4, p->R20.v);
409
+ T4 = _mm_add_epi64(T4, T5);
410
+
411
+ /* H += [Mx,My] */
412
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)),
413
+ _mm_loadl_epi64((xmmi *)(m + 48)));
414
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)),
415
+ _mm_loadl_epi64((xmmi *)(m + 56)));
416
+ M0 = _mm_and_si128(MMASK, T5);
417
+ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
418
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
419
+ M2 = _mm_and_si128(MMASK, T5);
420
+ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
421
+ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
422
+
423
+ T0 = _mm_add_epi64(T0, M0);
424
+ T1 = _mm_add_epi64(T1, M1);
425
+ T2 = _mm_add_epi64(T2, M2);
426
+ T3 = _mm_add_epi64(T3, M3);
427
+ T4 = _mm_add_epi64(T4, M4);
428
+
429
+ /* reduce */
430
+ C1 = _mm_srli_epi64(T0, 26);
431
+ C2 = _mm_srli_epi64(T3, 26);
432
+ T0 = _mm_and_si128(T0, MMASK);
433
+ T3 = _mm_and_si128(T3, MMASK);
434
+ T1 = _mm_add_epi64(T1, C1);
435
+ T4 = _mm_add_epi64(T4, C2);
436
+ C1 = _mm_srli_epi64(T1, 26);
437
+ C2 = _mm_srli_epi64(T4, 26);
438
+ T1 = _mm_and_si128(T1, MMASK);
439
+ T4 = _mm_and_si128(T4, MMASK);
440
+ T2 = _mm_add_epi64(T2, C1);
441
+ T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
442
+ C1 = _mm_srli_epi64(T2, 26);
443
+ C2 = _mm_srli_epi64(T0, 26);
444
+ T2 = _mm_and_si128(T2, MMASK);
445
+ T0 = _mm_and_si128(T0, MMASK);
446
+ T3 = _mm_add_epi64(T3, C1);
447
+ T1 = _mm_add_epi64(T1, C2);
448
+ C1 = _mm_srli_epi64(T3, 26);
449
+ T3 = _mm_and_si128(T3, MMASK);
450
+ T4 = _mm_add_epi64(T4, C1);
451
+
452
+ /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */
453
+ H0 = T0;
454
+ H1 = T1;
455
+ H2 = T2;
456
+ H3 = T3;
457
+ H4 = T4;
458
+
459
+ m += 64;
460
+ bytes -= 64;
461
+ }
462
+
463
+ st->H[0] = H0;
464
+ st->H[1] = H1;
465
+ st->H[2] = H2;
466
+ st->H[3] = H3;
467
+ st->H[4] = H4;
468
+ }
469
+
470
+ static size_t poly1305_combine(poly1305_state_internal *st, const uint8_t *m,
471
+ size_t bytes) {
472
+ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
473
+ const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
474
+ const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
475
+
476
+ poly1305_power *p;
477
+ xmmi H0, H1, H2, H3, H4;
478
+ xmmi M0, M1, M2, M3, M4;
479
+ xmmi T0, T1, T2, T3, T4, T5, T6;
480
+ xmmi C1, C2;
481
+
482
+ uint64_t r0, r1, r2;
483
+ uint64_t t0, t1, t2, t3, t4;
484
+ uint64_t c;
485
+ size_t consumed = 0;
486
+
487
+ H0 = st->H[0];
488
+ H1 = st->H[1];
489
+ H2 = st->H[2];
490
+ H3 = st->H[3];
491
+ H4 = st->H[4];
492
+
493
+ /* p = [r^2,r^2] */
494
+ p = &st->P[1];
495
+
496
+ if (bytes >= 32) {
497
+ /* H *= [r^2,r^2] */
498
+ T0 = _mm_mul_epu32(H0, p->R20.v);
499
+ T1 = _mm_mul_epu32(H0, p->R21.v);
500
+ T2 = _mm_mul_epu32(H0, p->R22.v);
501
+ T3 = _mm_mul_epu32(H0, p->R23.v);
502
+ T4 = _mm_mul_epu32(H0, p->R24.v);
503
+ T5 = _mm_mul_epu32(H1, p->S24.v);
504
+ T6 = _mm_mul_epu32(H1, p->R20.v);
505
+ T0 = _mm_add_epi64(T0, T5);
506
+ T1 = _mm_add_epi64(T1, T6);
507
+ T5 = _mm_mul_epu32(H2, p->S23.v);
508
+ T6 = _mm_mul_epu32(H2, p->S24.v);
509
+ T0 = _mm_add_epi64(T0, T5);
510
+ T1 = _mm_add_epi64(T1, T6);
511
+ T5 = _mm_mul_epu32(H3, p->S22.v);
512
+ T6 = _mm_mul_epu32(H3, p->S23.v);
513
+ T0 = _mm_add_epi64(T0, T5);
514
+ T1 = _mm_add_epi64(T1, T6);
515
+ T5 = _mm_mul_epu32(H4, p->S21.v);
516
+ T6 = _mm_mul_epu32(H4, p->S22.v);
517
+ T0 = _mm_add_epi64(T0, T5);
518
+ T1 = _mm_add_epi64(T1, T6);
519
+ T5 = _mm_mul_epu32(H1, p->R21.v);
520
+ T6 = _mm_mul_epu32(H1, p->R22.v);
521
+ T2 = _mm_add_epi64(T2, T5);
522
+ T3 = _mm_add_epi64(T3, T6);
523
+ T5 = _mm_mul_epu32(H2, p->R20.v);
524
+ T6 = _mm_mul_epu32(H2, p->R21.v);
525
+ T2 = _mm_add_epi64(T2, T5);
526
+ T3 = _mm_add_epi64(T3, T6);
527
+ T5 = _mm_mul_epu32(H3, p->S24.v);
528
+ T6 = _mm_mul_epu32(H3, p->R20.v);
529
+ T2 = _mm_add_epi64(T2, T5);
530
+ T3 = _mm_add_epi64(T3, T6);
531
+ T5 = _mm_mul_epu32(H4, p->S23.v);
532
+ T6 = _mm_mul_epu32(H4, p->S24.v);
533
+ T2 = _mm_add_epi64(T2, T5);
534
+ T3 = _mm_add_epi64(T3, T6);
535
+ T5 = _mm_mul_epu32(H1, p->R23.v);
536
+ T4 = _mm_add_epi64(T4, T5);
537
+ T5 = _mm_mul_epu32(H2, p->R22.v);
538
+ T4 = _mm_add_epi64(T4, T5);
539
+ T5 = _mm_mul_epu32(H3, p->R21.v);
540
+ T4 = _mm_add_epi64(T4, T5);
541
+ T5 = _mm_mul_epu32(H4, p->R20.v);
542
+ T4 = _mm_add_epi64(T4, T5);
543
+
544
+ /* H += [Mx,My] */
545
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
546
+ _mm_loadl_epi64((xmmi *)(m + 16)));
547
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
548
+ _mm_loadl_epi64((xmmi *)(m + 24)));
549
+ M0 = _mm_and_si128(MMASK, T5);
550
+ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
551
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
552
+ M2 = _mm_and_si128(MMASK, T5);
553
+ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
554
+ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
555
+
556
+ T0 = _mm_add_epi64(T0, M0);
557
+ T1 = _mm_add_epi64(T1, M1);
558
+ T2 = _mm_add_epi64(T2, M2);
559
+ T3 = _mm_add_epi64(T3, M3);
560
+ T4 = _mm_add_epi64(T4, M4);
561
+
562
+ /* reduce */
563
+ C1 = _mm_srli_epi64(T0, 26);
564
+ C2 = _mm_srli_epi64(T3, 26);
565
+ T0 = _mm_and_si128(T0, MMASK);
566
+ T3 = _mm_and_si128(T3, MMASK);
567
+ T1 = _mm_add_epi64(T1, C1);
568
+ T4 = _mm_add_epi64(T4, C2);
569
+ C1 = _mm_srli_epi64(T1, 26);
570
+ C2 = _mm_srli_epi64(T4, 26);
571
+ T1 = _mm_and_si128(T1, MMASK);
572
+ T4 = _mm_and_si128(T4, MMASK);
573
+ T2 = _mm_add_epi64(T2, C1);
574
+ T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
575
+ C1 = _mm_srli_epi64(T2, 26);
576
+ C2 = _mm_srli_epi64(T0, 26);
577
+ T2 = _mm_and_si128(T2, MMASK);
578
+ T0 = _mm_and_si128(T0, MMASK);
579
+ T3 = _mm_add_epi64(T3, C1);
580
+ T1 = _mm_add_epi64(T1, C2);
581
+ C1 = _mm_srli_epi64(T3, 26);
582
+ T3 = _mm_and_si128(T3, MMASK);
583
+ T4 = _mm_add_epi64(T4, C1);
584
+
585
+ /* H = (H*[r^2,r^2] + [Mx,My]) */
586
+ H0 = T0;
587
+ H1 = T1;
588
+ H2 = T2;
589
+ H3 = T3;
590
+ H4 = T4;
591
+
592
+ consumed = 32;
593
+ }
594
+
595
+ /* finalize, H *= [r^2,r] */
596
+ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
597
+ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
598
+ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
599
+
600
+ p->R20.d[2] = (uint32_t)(r0)&0x3ffffff;
601
+ p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
602
+ p->R22.d[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff;
603
+ p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
604
+ p->R24.d[2] = (uint32_t)((r2 >> 16));
605
+ p->S21.d[2] = p->R21.d[2] * 5;
606
+ p->S22.d[2] = p->R22.d[2] * 5;
607
+ p->S23.d[2] = p->R23.d[2] * 5;
608
+ p->S24.d[2] = p->R24.d[2] * 5;
609
+
610
+ /* H *= [r^2,r] */
611
+ T0 = _mm_mul_epu32(H0, p->R20.v);
612
+ T1 = _mm_mul_epu32(H0, p->R21.v);
613
+ T2 = _mm_mul_epu32(H0, p->R22.v);
614
+ T3 = _mm_mul_epu32(H0, p->R23.v);
615
+ T4 = _mm_mul_epu32(H0, p->R24.v);
616
+ T5 = _mm_mul_epu32(H1, p->S24.v);
617
+ T6 = _mm_mul_epu32(H1, p->R20.v);
618
+ T0 = _mm_add_epi64(T0, T5);
619
+ T1 = _mm_add_epi64(T1, T6);
620
+ T5 = _mm_mul_epu32(H2, p->S23.v);
621
+ T6 = _mm_mul_epu32(H2, p->S24.v);
622
+ T0 = _mm_add_epi64(T0, T5);
623
+ T1 = _mm_add_epi64(T1, T6);
624
+ T5 = _mm_mul_epu32(H3, p->S22.v);
625
+ T6 = _mm_mul_epu32(H3, p->S23.v);
626
+ T0 = _mm_add_epi64(T0, T5);
627
+ T1 = _mm_add_epi64(T1, T6);
628
+ T5 = _mm_mul_epu32(H4, p->S21.v);
629
+ T6 = _mm_mul_epu32(H4, p->S22.v);
630
+ T0 = _mm_add_epi64(T0, T5);
631
+ T1 = _mm_add_epi64(T1, T6);
632
+ T5 = _mm_mul_epu32(H1, p->R21.v);
633
+ T6 = _mm_mul_epu32(H1, p->R22.v);
634
+ T2 = _mm_add_epi64(T2, T5);
635
+ T3 = _mm_add_epi64(T3, T6);
636
+ T5 = _mm_mul_epu32(H2, p->R20.v);
637
+ T6 = _mm_mul_epu32(H2, p->R21.v);
638
+ T2 = _mm_add_epi64(T2, T5);
639
+ T3 = _mm_add_epi64(T3, T6);
640
+ T5 = _mm_mul_epu32(H3, p->S24.v);
641
+ T6 = _mm_mul_epu32(H3, p->R20.v);
642
+ T2 = _mm_add_epi64(T2, T5);
643
+ T3 = _mm_add_epi64(T3, T6);
644
+ T5 = _mm_mul_epu32(H4, p->S23.v);
645
+ T6 = _mm_mul_epu32(H4, p->S24.v);
646
+ T2 = _mm_add_epi64(T2, T5);
647
+ T3 = _mm_add_epi64(T3, T6);
648
+ T5 = _mm_mul_epu32(H1, p->R23.v);
649
+ T4 = _mm_add_epi64(T4, T5);
650
+ T5 = _mm_mul_epu32(H2, p->R22.v);
651
+ T4 = _mm_add_epi64(T4, T5);
652
+ T5 = _mm_mul_epu32(H3, p->R21.v);
653
+ T4 = _mm_add_epi64(T4, T5);
654
+ T5 = _mm_mul_epu32(H4, p->R20.v);
655
+ T4 = _mm_add_epi64(T4, T5);
656
+
657
+ C1 = _mm_srli_epi64(T0, 26);
658
+ C2 = _mm_srli_epi64(T3, 26);
659
+ T0 = _mm_and_si128(T0, MMASK);
660
+ T3 = _mm_and_si128(T3, MMASK);
661
+ T1 = _mm_add_epi64(T1, C1);
662
+ T4 = _mm_add_epi64(T4, C2);
663
+ C1 = _mm_srli_epi64(T1, 26);
664
+ C2 = _mm_srli_epi64(T4, 26);
665
+ T1 = _mm_and_si128(T1, MMASK);
666
+ T4 = _mm_and_si128(T4, MMASK);
667
+ T2 = _mm_add_epi64(T2, C1);
668
+ T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
669
+ C1 = _mm_srli_epi64(T2, 26);
670
+ C2 = _mm_srli_epi64(T0, 26);
671
+ T2 = _mm_and_si128(T2, MMASK);
672
+ T0 = _mm_and_si128(T0, MMASK);
673
+ T3 = _mm_add_epi64(T3, C1);
674
+ T1 = _mm_add_epi64(T1, C2);
675
+ C1 = _mm_srli_epi64(T3, 26);
676
+ T3 = _mm_and_si128(T3, MMASK);
677
+ T4 = _mm_add_epi64(T4, C1);
678
+
679
+ /* H = H[0]+H[1] */
680
+ H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
681
+ H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
682
+ H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
683
+ H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
684
+ H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
685
+
686
+ t0 = _mm_cvtsi128_si32(H0);
687
+ c = (t0 >> 26);
688
+ t0 &= 0x3ffffff;
689
+ t1 = _mm_cvtsi128_si32(H1) + c;
690
+ c = (t1 >> 26);
691
+ t1 &= 0x3ffffff;
692
+ t2 = _mm_cvtsi128_si32(H2) + c;
693
+ c = (t2 >> 26);
694
+ t2 &= 0x3ffffff;
695
+ t3 = _mm_cvtsi128_si32(H3) + c;
696
+ c = (t3 >> 26);
697
+ t3 &= 0x3ffffff;
698
+ t4 = _mm_cvtsi128_si32(H4) + c;
699
+ c = (t4 >> 26);
700
+ t4 &= 0x3ffffff;
701
+ t0 = t0 + (c * 5);
702
+ c = (t0 >> 26);
703
+ t0 &= 0x3ffffff;
704
+ t1 = t1 + c;
705
+
706
+ st->HH[0] = ((t0) | (t1 << 26)) & 0xfffffffffffull;
707
+ st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull;
708
+ st->HH[2] = ((t3 >> 10) | (t4 << 16)) & 0x3ffffffffffull;
709
+
710
+ return consumed;
711
+ }
712
+
713
+ void CRYPTO_poly1305_update(poly1305_state *state, const uint8_t *m,
714
+ size_t bytes) {
715
+ poly1305_state_internal *st = poly1305_aligned_state(state);
716
+ size_t want;
717
+
718
+ /* need at least 32 initial bytes to start the accelerated branch */
719
+ if (!st->started) {
720
+ if ((st->leftover == 0) && (bytes > 32)) {
721
+ poly1305_first_block(st, m);
722
+ m += 32;
723
+ bytes -= 32;
724
+ } else {
725
+ want = poly1305_min(32 - st->leftover, bytes);
726
+ poly1305_block_copy(st->buffer + st->leftover, m, want);
727
+ bytes -= want;
728
+ m += want;
729
+ st->leftover += want;
730
+ if ((st->leftover < 32) || (bytes == 0)) {
731
+ return;
732
+ }
733
+ poly1305_first_block(st, st->buffer);
734
+ st->leftover = 0;
735
+ }
736
+ st->started = 1;
737
+ }
738
+
739
+ /* handle leftover */
740
+ if (st->leftover) {
741
+ want = poly1305_min(64 - st->leftover, bytes);
742
+ poly1305_block_copy(st->buffer + st->leftover, m, want);
743
+ bytes -= want;
744
+ m += want;
745
+ st->leftover += want;
746
+ if (st->leftover < 64) {
747
+ return;
748
+ }
749
+ poly1305_blocks(st, st->buffer, 64);
750
+ st->leftover = 0;
751
+ }
752
+
753
+ /* process 64 byte blocks */
754
+ if (bytes >= 64) {
755
+ want = (bytes & ~63);
756
+ poly1305_blocks(st, m, want);
757
+ m += want;
758
+ bytes -= want;
759
+ }
760
+
761
+ if (bytes) {
762
+ poly1305_block_copy(st->buffer + st->leftover, m, bytes);
763
+ st->leftover += bytes;
764
+ }
765
+ }
766
+
767
+ void CRYPTO_poly1305_finish(poly1305_state *state, uint8_t mac[16]) {
768
+ poly1305_state_internal *st = poly1305_aligned_state(state);
769
+ size_t leftover = st->leftover;
770
+ uint8_t *m = st->buffer;
771
+ uint128_t d[3];
772
+ uint64_t h0, h1, h2;
773
+ uint64_t t0, t1;
774
+ uint64_t g0, g1, g2, c, nc;
775
+ uint64_t r0, r1, r2, s1, s2;
776
+ poly1305_power *p;
777
+
778
+ if (st->started) {
779
+ size_t consumed = poly1305_combine(st, m, leftover);
780
+ leftover -= consumed;
781
+ m += consumed;
782
+ }
783
+
784
+ /* st->HH will either be 0 or have the combined result */
785
+ h0 = st->HH[0];
786
+ h1 = st->HH[1];
787
+ h2 = st->HH[2];
788
+
789
+ p = &st->P[1];
790
+ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
791
+ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
792
+ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
793
+ s1 = r1 * (5 << 2);
794
+ s2 = r2 * (5 << 2);
795
+
796
+ if (leftover < 16) {
797
+ goto poly1305_donna_atmost15bytes;
798
+ }
799
+
800
+ poly1305_donna_atleast16bytes:
801
+ t0 = U8TO64_LE(m + 0);
802
+ t1 = U8TO64_LE(m + 8);
803
+ h0 += t0 & 0xfffffffffff;
804
+ t0 = shr128_pair(t1, t0, 44);
805
+ h1 += t0 & 0xfffffffffff;
806
+ h2 += (t1 >> 24) | ((uint64_t)1 << 40);
807
+
808
+ poly1305_donna_mul:
809
+ d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)),
810
+ mul64x64_128(h2, s1));
811
+ d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)),
812
+ mul64x64_128(h2, s2));
813
+ d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)),
814
+ mul64x64_128(h2, r0));
815
+ h0 = lo128(d[0]) & 0xfffffffffff;
816
+ c = shr128(d[0], 44);
817
+ d[1] = add128_64(d[1], c);
818
+ h1 = lo128(d[1]) & 0xfffffffffff;
819
+ c = shr128(d[1], 44);
820
+ d[2] = add128_64(d[2], c);
821
+ h2 = lo128(d[2]) & 0x3ffffffffff;
822
+ c = shr128(d[2], 42);
823
+ h0 += c * 5;
824
+
825
+ m += 16;
826
+ leftover -= 16;
827
+ if (leftover >= 16) {
828
+ goto poly1305_donna_atleast16bytes;
829
+ }
830
+
831
+ /* final bytes */
832
+ poly1305_donna_atmost15bytes:
833
+ if (!leftover) {
834
+ goto poly1305_donna_finish;
835
+ }
836
+
837
+ m[leftover++] = 1;
838
+ poly1305_block_zero(m + leftover, 16 - leftover);
839
+ leftover = 16;
840
+
841
+ t0 = U8TO64_LE(m + 0);
842
+ t1 = U8TO64_LE(m + 8);
843
+ h0 += t0 & 0xfffffffffff;
844
+ t0 = shr128_pair(t1, t0, 44);
845
+ h1 += t0 & 0xfffffffffff;
846
+ h2 += (t1 >> 24);
847
+
848
+ goto poly1305_donna_mul;
849
+
850
+ poly1305_donna_finish:
851
+ c = (h0 >> 44);
852
+ h0 &= 0xfffffffffff;
853
+ h1 += c;
854
+ c = (h1 >> 44);
855
+ h1 &= 0xfffffffffff;
856
+ h2 += c;
857
+ c = (h2 >> 42);
858
+ h2 &= 0x3ffffffffff;
859
+ h0 += c * 5;
860
+
861
+ g0 = h0 + 5;
862
+ c = (g0 >> 44);
863
+ g0 &= 0xfffffffffff;
864
+ g1 = h1 + c;
865
+ c = (g1 >> 44);
866
+ g1 &= 0xfffffffffff;
867
+ g2 = h2 + c - ((uint64_t)1 << 42);
868
+
869
+ c = (g2 >> 63) - 1;
870
+ nc = ~c;
871
+ h0 = (h0 & nc) | (g0 & c);
872
+ h1 = (h1 & nc) | (g1 & c);
873
+ h2 = (h2 & nc) | (g2 & c);
874
+
875
+ /* pad */
876
+ t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
877
+ t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
878
+ h0 += (t0 & 0xfffffffffff);
879
+ c = (h0 >> 44);
880
+ h0 &= 0xfffffffffff;
881
+ t0 = shr128_pair(t1, t0, 44);
882
+ h1 += (t0 & 0xfffffffffff) + c;
883
+ c = (h1 >> 44);
884
+ h1 &= 0xfffffffffff;
885
+ t1 = (t1 >> 24);
886
+ h2 += (t1)+c;
887
+
888
+ U64TO8_LE(mac + 0, ((h0) | (h1 << 44)));
889
+ U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24)));
890
+ }
891
+
892
+ #endif /* !OPENSSL_WINDOWS && OPENSSL_X86_64 */