ring-native 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/Gemfile +3 -0
  4. data/README.md +22 -0
  5. data/Rakefile +1 -0
  6. data/ext/ring/extconf.rb +29 -0
  7. data/lib/ring/native.rb +8 -0
  8. data/lib/ring/native/version.rb +5 -0
  9. data/ring-native.gemspec +25 -0
  10. data/vendor/ring/BUILDING.md +40 -0
  11. data/vendor/ring/Cargo.toml +43 -0
  12. data/vendor/ring/LICENSE +185 -0
  13. data/vendor/ring/Makefile +35 -0
  14. data/vendor/ring/PORTING.md +163 -0
  15. data/vendor/ring/README.md +113 -0
  16. data/vendor/ring/STYLE.md +197 -0
  17. data/vendor/ring/appveyor.yml +27 -0
  18. data/vendor/ring/build.rs +108 -0
  19. data/vendor/ring/crypto/aes/aes.c +1142 -0
  20. data/vendor/ring/crypto/aes/aes_test.Windows.vcxproj +25 -0
  21. data/vendor/ring/crypto/aes/aes_test.cc +93 -0
  22. data/vendor/ring/crypto/aes/asm/aes-586.pl +2368 -0
  23. data/vendor/ring/crypto/aes/asm/aes-armv4.pl +1249 -0
  24. data/vendor/ring/crypto/aes/asm/aes-x86_64.pl +2246 -0
  25. data/vendor/ring/crypto/aes/asm/aesni-x86.pl +1318 -0
  26. data/vendor/ring/crypto/aes/asm/aesni-x86_64.pl +2084 -0
  27. data/vendor/ring/crypto/aes/asm/aesv8-armx.pl +675 -0
  28. data/vendor/ring/crypto/aes/asm/bsaes-armv7.pl +1364 -0
  29. data/vendor/ring/crypto/aes/asm/bsaes-x86_64.pl +1565 -0
  30. data/vendor/ring/crypto/aes/asm/vpaes-x86.pl +841 -0
  31. data/vendor/ring/crypto/aes/asm/vpaes-x86_64.pl +1116 -0
  32. data/vendor/ring/crypto/aes/internal.h +87 -0
  33. data/vendor/ring/crypto/aes/mode_wrappers.c +61 -0
  34. data/vendor/ring/crypto/bn/add.c +394 -0
  35. data/vendor/ring/crypto/bn/asm/armv4-mont.pl +694 -0
  36. data/vendor/ring/crypto/bn/asm/armv8-mont.pl +1503 -0
  37. data/vendor/ring/crypto/bn/asm/bn-586.pl +774 -0
  38. data/vendor/ring/crypto/bn/asm/co-586.pl +287 -0
  39. data/vendor/ring/crypto/bn/asm/rsaz-avx2.pl +1882 -0
  40. data/vendor/ring/crypto/bn/asm/x86-mont.pl +592 -0
  41. data/vendor/ring/crypto/bn/asm/x86_64-gcc.c +599 -0
  42. data/vendor/ring/crypto/bn/asm/x86_64-mont.pl +1393 -0
  43. data/vendor/ring/crypto/bn/asm/x86_64-mont5.pl +3507 -0
  44. data/vendor/ring/crypto/bn/bn.c +352 -0
  45. data/vendor/ring/crypto/bn/bn_asn1.c +74 -0
  46. data/vendor/ring/crypto/bn/bn_test.Windows.vcxproj +25 -0
  47. data/vendor/ring/crypto/bn/bn_test.cc +1696 -0
  48. data/vendor/ring/crypto/bn/cmp.c +200 -0
  49. data/vendor/ring/crypto/bn/convert.c +433 -0
  50. data/vendor/ring/crypto/bn/ctx.c +311 -0
  51. data/vendor/ring/crypto/bn/div.c +594 -0
  52. data/vendor/ring/crypto/bn/exponentiation.c +1335 -0
  53. data/vendor/ring/crypto/bn/gcd.c +711 -0
  54. data/vendor/ring/crypto/bn/generic.c +1019 -0
  55. data/vendor/ring/crypto/bn/internal.h +316 -0
  56. data/vendor/ring/crypto/bn/montgomery.c +516 -0
  57. data/vendor/ring/crypto/bn/mul.c +888 -0
  58. data/vendor/ring/crypto/bn/prime.c +829 -0
  59. data/vendor/ring/crypto/bn/random.c +334 -0
  60. data/vendor/ring/crypto/bn/rsaz_exp.c +262 -0
  61. data/vendor/ring/crypto/bn/rsaz_exp.h +53 -0
  62. data/vendor/ring/crypto/bn/shift.c +276 -0
  63. data/vendor/ring/crypto/bytestring/bytestring_test.Windows.vcxproj +25 -0
  64. data/vendor/ring/crypto/bytestring/bytestring_test.cc +421 -0
  65. data/vendor/ring/crypto/bytestring/cbb.c +399 -0
  66. data/vendor/ring/crypto/bytestring/cbs.c +227 -0
  67. data/vendor/ring/crypto/bytestring/internal.h +46 -0
  68. data/vendor/ring/crypto/chacha/chacha_generic.c +140 -0
  69. data/vendor/ring/crypto/chacha/chacha_vec.c +323 -0
  70. data/vendor/ring/crypto/chacha/chacha_vec_arm.S +1447 -0
  71. data/vendor/ring/crypto/chacha/chacha_vec_arm_generate.go +153 -0
  72. data/vendor/ring/crypto/cipher/cipher_test.Windows.vcxproj +25 -0
  73. data/vendor/ring/crypto/cipher/e_aes.c +390 -0
  74. data/vendor/ring/crypto/cipher/e_chacha20poly1305.c +208 -0
  75. data/vendor/ring/crypto/cipher/internal.h +173 -0
  76. data/vendor/ring/crypto/cipher/test/aes_128_gcm_tests.txt +543 -0
  77. data/vendor/ring/crypto/cipher/test/aes_128_key_wrap_tests.txt +9 -0
  78. data/vendor/ring/crypto/cipher/test/aes_256_gcm_tests.txt +475 -0
  79. data/vendor/ring/crypto/cipher/test/aes_256_key_wrap_tests.txt +23 -0
  80. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_old_tests.txt +422 -0
  81. data/vendor/ring/crypto/cipher/test/chacha20_poly1305_tests.txt +484 -0
  82. data/vendor/ring/crypto/cipher/test/cipher_test.txt +100 -0
  83. data/vendor/ring/crypto/constant_time_test.Windows.vcxproj +25 -0
  84. data/vendor/ring/crypto/constant_time_test.c +304 -0
  85. data/vendor/ring/crypto/cpu-arm-asm.S +32 -0
  86. data/vendor/ring/crypto/cpu-arm.c +199 -0
  87. data/vendor/ring/crypto/cpu-intel.c +261 -0
  88. data/vendor/ring/crypto/crypto.c +151 -0
  89. data/vendor/ring/crypto/curve25519/asm/x25519-arm.S +2118 -0
  90. data/vendor/ring/crypto/curve25519/curve25519.c +4888 -0
  91. data/vendor/ring/crypto/curve25519/x25519_test.cc +128 -0
  92. data/vendor/ring/crypto/digest/md32_common.h +181 -0
  93. data/vendor/ring/crypto/ec/asm/p256-x86_64-asm.pl +2725 -0
  94. data/vendor/ring/crypto/ec/ec.c +193 -0
  95. data/vendor/ring/crypto/ec/ec_curves.c +61 -0
  96. data/vendor/ring/crypto/ec/ec_key.c +228 -0
  97. data/vendor/ring/crypto/ec/ec_montgomery.c +114 -0
  98. data/vendor/ring/crypto/ec/example_mul.Windows.vcxproj +25 -0
  99. data/vendor/ring/crypto/ec/internal.h +243 -0
  100. data/vendor/ring/crypto/ec/oct.c +253 -0
  101. data/vendor/ring/crypto/ec/p256-64.c +1794 -0
  102. data/vendor/ring/crypto/ec/p256-x86_64-table.h +9548 -0
  103. data/vendor/ring/crypto/ec/p256-x86_64.c +509 -0
  104. data/vendor/ring/crypto/ec/simple.c +1007 -0
  105. data/vendor/ring/crypto/ec/util-64.c +183 -0
  106. data/vendor/ring/crypto/ec/wnaf.c +508 -0
  107. data/vendor/ring/crypto/ecdh/ecdh.c +155 -0
  108. data/vendor/ring/crypto/ecdsa/ecdsa.c +304 -0
  109. data/vendor/ring/crypto/ecdsa/ecdsa_asn1.c +193 -0
  110. data/vendor/ring/crypto/ecdsa/ecdsa_test.Windows.vcxproj +25 -0
  111. data/vendor/ring/crypto/ecdsa/ecdsa_test.cc +327 -0
  112. data/vendor/ring/crypto/header_removed.h +17 -0
  113. data/vendor/ring/crypto/internal.h +495 -0
  114. data/vendor/ring/crypto/libring.Windows.vcxproj +101 -0
  115. data/vendor/ring/crypto/mem.c +98 -0
  116. data/vendor/ring/crypto/modes/asm/aesni-gcm-x86_64.pl +1045 -0
  117. data/vendor/ring/crypto/modes/asm/ghash-armv4.pl +517 -0
  118. data/vendor/ring/crypto/modes/asm/ghash-x86.pl +1393 -0
  119. data/vendor/ring/crypto/modes/asm/ghash-x86_64.pl +1741 -0
  120. data/vendor/ring/crypto/modes/asm/ghashv8-armx.pl +422 -0
  121. data/vendor/ring/crypto/modes/ctr.c +226 -0
  122. data/vendor/ring/crypto/modes/gcm.c +1206 -0
  123. data/vendor/ring/crypto/modes/gcm_test.Windows.vcxproj +25 -0
  124. data/vendor/ring/crypto/modes/gcm_test.c +348 -0
  125. data/vendor/ring/crypto/modes/internal.h +299 -0
  126. data/vendor/ring/crypto/perlasm/arm-xlate.pl +170 -0
  127. data/vendor/ring/crypto/perlasm/readme +100 -0
  128. data/vendor/ring/crypto/perlasm/x86_64-xlate.pl +1164 -0
  129. data/vendor/ring/crypto/perlasm/x86asm.pl +292 -0
  130. data/vendor/ring/crypto/perlasm/x86gas.pl +263 -0
  131. data/vendor/ring/crypto/perlasm/x86masm.pl +200 -0
  132. data/vendor/ring/crypto/perlasm/x86nasm.pl +187 -0
  133. data/vendor/ring/crypto/poly1305/poly1305.c +331 -0
  134. data/vendor/ring/crypto/poly1305/poly1305_arm.c +301 -0
  135. data/vendor/ring/crypto/poly1305/poly1305_arm_asm.S +2015 -0
  136. data/vendor/ring/crypto/poly1305/poly1305_test.Windows.vcxproj +25 -0
  137. data/vendor/ring/crypto/poly1305/poly1305_test.cc +80 -0
  138. data/vendor/ring/crypto/poly1305/poly1305_test.txt +52 -0
  139. data/vendor/ring/crypto/poly1305/poly1305_vec.c +892 -0
  140. data/vendor/ring/crypto/rand/asm/rdrand-x86_64.pl +75 -0
  141. data/vendor/ring/crypto/rand/internal.h +32 -0
  142. data/vendor/ring/crypto/rand/rand.c +189 -0
  143. data/vendor/ring/crypto/rand/urandom.c +219 -0
  144. data/vendor/ring/crypto/rand/windows.c +56 -0
  145. data/vendor/ring/crypto/refcount_c11.c +66 -0
  146. data/vendor/ring/crypto/refcount_lock.c +53 -0
  147. data/vendor/ring/crypto/refcount_test.Windows.vcxproj +25 -0
  148. data/vendor/ring/crypto/refcount_test.c +58 -0
  149. data/vendor/ring/crypto/rsa/blinding.c +462 -0
  150. data/vendor/ring/crypto/rsa/internal.h +108 -0
  151. data/vendor/ring/crypto/rsa/padding.c +300 -0
  152. data/vendor/ring/crypto/rsa/rsa.c +450 -0
  153. data/vendor/ring/crypto/rsa/rsa_asn1.c +261 -0
  154. data/vendor/ring/crypto/rsa/rsa_impl.c +944 -0
  155. data/vendor/ring/crypto/rsa/rsa_test.Windows.vcxproj +25 -0
  156. data/vendor/ring/crypto/rsa/rsa_test.cc +437 -0
  157. data/vendor/ring/crypto/sha/asm/sha-armv8.pl +436 -0
  158. data/vendor/ring/crypto/sha/asm/sha-x86_64.pl +2390 -0
  159. data/vendor/ring/crypto/sha/asm/sha256-586.pl +1275 -0
  160. data/vendor/ring/crypto/sha/asm/sha256-armv4.pl +735 -0
  161. data/vendor/ring/crypto/sha/asm/sha256-armv8.pl +14 -0
  162. data/vendor/ring/crypto/sha/asm/sha256-x86_64.pl +14 -0
  163. data/vendor/ring/crypto/sha/asm/sha512-586.pl +911 -0
  164. data/vendor/ring/crypto/sha/asm/sha512-armv4.pl +666 -0
  165. data/vendor/ring/crypto/sha/asm/sha512-armv8.pl +14 -0
  166. data/vendor/ring/crypto/sha/asm/sha512-x86_64.pl +14 -0
  167. data/vendor/ring/crypto/sha/sha1.c +271 -0
  168. data/vendor/ring/crypto/sha/sha256.c +204 -0
  169. data/vendor/ring/crypto/sha/sha512.c +355 -0
  170. data/vendor/ring/crypto/test/file_test.cc +326 -0
  171. data/vendor/ring/crypto/test/file_test.h +181 -0
  172. data/vendor/ring/crypto/test/malloc.cc +150 -0
  173. data/vendor/ring/crypto/test/scoped_types.h +95 -0
  174. data/vendor/ring/crypto/test/test.Windows.vcxproj +35 -0
  175. data/vendor/ring/crypto/test/test_util.cc +46 -0
  176. data/vendor/ring/crypto/test/test_util.h +41 -0
  177. data/vendor/ring/crypto/thread_none.c +55 -0
  178. data/vendor/ring/crypto/thread_pthread.c +165 -0
  179. data/vendor/ring/crypto/thread_test.Windows.vcxproj +25 -0
  180. data/vendor/ring/crypto/thread_test.c +200 -0
  181. data/vendor/ring/crypto/thread_win.c +282 -0
  182. data/vendor/ring/examples/checkdigest.rs +103 -0
  183. data/vendor/ring/include/openssl/aes.h +121 -0
  184. data/vendor/ring/include/openssl/arm_arch.h +129 -0
  185. data/vendor/ring/include/openssl/base.h +156 -0
  186. data/vendor/ring/include/openssl/bn.h +794 -0
  187. data/vendor/ring/include/openssl/buffer.h +18 -0
  188. data/vendor/ring/include/openssl/bytestring.h +235 -0
  189. data/vendor/ring/include/openssl/chacha.h +37 -0
  190. data/vendor/ring/include/openssl/cmac.h +76 -0
  191. data/vendor/ring/include/openssl/cpu.h +184 -0
  192. data/vendor/ring/include/openssl/crypto.h +43 -0
  193. data/vendor/ring/include/openssl/curve25519.h +88 -0
  194. data/vendor/ring/include/openssl/ec.h +225 -0
  195. data/vendor/ring/include/openssl/ec_key.h +129 -0
  196. data/vendor/ring/include/openssl/ecdh.h +110 -0
  197. data/vendor/ring/include/openssl/ecdsa.h +156 -0
  198. data/vendor/ring/include/openssl/err.h +201 -0
  199. data/vendor/ring/include/openssl/mem.h +101 -0
  200. data/vendor/ring/include/openssl/obj_mac.h +71 -0
  201. data/vendor/ring/include/openssl/opensslfeatures.h +68 -0
  202. data/vendor/ring/include/openssl/opensslv.h +18 -0
  203. data/vendor/ring/include/openssl/ossl_typ.h +18 -0
  204. data/vendor/ring/include/openssl/poly1305.h +51 -0
  205. data/vendor/ring/include/openssl/rand.h +70 -0
  206. data/vendor/ring/include/openssl/rsa.h +399 -0
  207. data/vendor/ring/include/openssl/thread.h +133 -0
  208. data/vendor/ring/include/openssl/type_check.h +71 -0
  209. data/vendor/ring/mk/Common.props +63 -0
  210. data/vendor/ring/mk/Windows.props +42 -0
  211. data/vendor/ring/mk/WindowsTest.props +18 -0
  212. data/vendor/ring/mk/appveyor.bat +62 -0
  213. data/vendor/ring/mk/bottom_of_makefile.mk +54 -0
  214. data/vendor/ring/mk/ring.mk +266 -0
  215. data/vendor/ring/mk/top_of_makefile.mk +214 -0
  216. data/vendor/ring/mk/travis.sh +40 -0
  217. data/vendor/ring/mk/update-travis-yml.py +229 -0
  218. data/vendor/ring/ring.sln +153 -0
  219. data/vendor/ring/src/aead.rs +682 -0
  220. data/vendor/ring/src/agreement.rs +248 -0
  221. data/vendor/ring/src/c.rs +129 -0
  222. data/vendor/ring/src/constant_time.rs +37 -0
  223. data/vendor/ring/src/der.rs +96 -0
  224. data/vendor/ring/src/digest.rs +690 -0
  225. data/vendor/ring/src/digest_tests.txt +57 -0
  226. data/vendor/ring/src/ecc.rs +28 -0
  227. data/vendor/ring/src/ecc_build.rs +279 -0
  228. data/vendor/ring/src/ecc_curves.rs +117 -0
  229. data/vendor/ring/src/ed25519_tests.txt +2579 -0
  230. data/vendor/ring/src/exe_tests.rs +46 -0
  231. data/vendor/ring/src/ffi.rs +29 -0
  232. data/vendor/ring/src/file_test.rs +187 -0
  233. data/vendor/ring/src/hkdf.rs +153 -0
  234. data/vendor/ring/src/hkdf_tests.txt +59 -0
  235. data/vendor/ring/src/hmac.rs +414 -0
  236. data/vendor/ring/src/hmac_tests.txt +97 -0
  237. data/vendor/ring/src/input.rs +312 -0
  238. data/vendor/ring/src/lib.rs +41 -0
  239. data/vendor/ring/src/pbkdf2.rs +265 -0
  240. data/vendor/ring/src/pbkdf2_tests.txt +113 -0
  241. data/vendor/ring/src/polyfill.rs +57 -0
  242. data/vendor/ring/src/rand.rs +28 -0
  243. data/vendor/ring/src/signature.rs +314 -0
  244. data/vendor/ring/third-party/NIST/README.md +9 -0
  245. data/vendor/ring/third-party/NIST/SHAVS/SHA1LongMsg.rsp +263 -0
  246. data/vendor/ring/third-party/NIST/SHAVS/SHA1Monte.rsp +309 -0
  247. data/vendor/ring/third-party/NIST/SHAVS/SHA1ShortMsg.rsp +267 -0
  248. data/vendor/ring/third-party/NIST/SHAVS/SHA224LongMsg.rsp +263 -0
  249. data/vendor/ring/third-party/NIST/SHAVS/SHA224Monte.rsp +309 -0
  250. data/vendor/ring/third-party/NIST/SHAVS/SHA224ShortMsg.rsp +267 -0
  251. data/vendor/ring/third-party/NIST/SHAVS/SHA256LongMsg.rsp +263 -0
  252. data/vendor/ring/third-party/NIST/SHAVS/SHA256Monte.rsp +309 -0
  253. data/vendor/ring/third-party/NIST/SHAVS/SHA256ShortMsg.rsp +267 -0
  254. data/vendor/ring/third-party/NIST/SHAVS/SHA384LongMsg.rsp +519 -0
  255. data/vendor/ring/third-party/NIST/SHAVS/SHA384Monte.rsp +309 -0
  256. data/vendor/ring/third-party/NIST/SHAVS/SHA384ShortMsg.rsp +523 -0
  257. data/vendor/ring/third-party/NIST/SHAVS/SHA512LongMsg.rsp +519 -0
  258. data/vendor/ring/third-party/NIST/SHAVS/SHA512Monte.rsp +309 -0
  259. data/vendor/ring/third-party/NIST/SHAVS/SHA512ShortMsg.rsp +523 -0
  260. data/vendor/ring/third-party/NIST/sha256sums.txt +1 -0
  261. metadata +333 -0
@@ -0,0 +1,25 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3
+ <PropertyGroup Label="Globals">
4
+ <ProjectGuid>{CD0F021B-E347-4CCA-B5B7-CD1F757E15D6}</ProjectGuid>
5
+ <TargetName>poly1305_test</TargetName>
6
+ </PropertyGroup>
7
+ <ImportGroup Label="PropertySheets">
8
+ <Import Project="..\..\mk\WindowsTest.props" />
9
+ </ImportGroup>
10
+ <PropertyGroup Label="Configuration">
11
+ <OutDir>$(OutRootDir)test\ring\crypto\poly1305\</OutDir>
12
+ </PropertyGroup>
13
+ <ItemGroup>
14
+ <ClCompile Include="poly1305_test.cc" />
15
+ </ItemGroup>
16
+ <ItemGroup>
17
+ <ProjectReference Include="..\libring.Windows.vcxproj">
18
+ <Project>{f4c0a1b6-5e09-41c8-8242-3e1f6762fb18}</Project>
19
+ </ProjectReference>
20
+ <ProjectReference Include="..\test\test.Windows.vcxproj">
21
+ <Project>{1dace503-6498-492d-b1ff-f9ee18624443}</Project>
22
+ </ProjectReference>
23
+ </ItemGroup>
24
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
25
+ </Project>
@@ -0,0 +1,80 @@
1
+ /* Copyright (c) 2015, Google Inc.
2
+ *
3
+ * Permission to use, copy, modify, and/or distribute this software for any
4
+ * purpose with or without fee is hereby granted, provided that the above
5
+ * copyright notice and this permission notice appear in all copies.
6
+ *
7
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
+
15
+ #include <stdio.h>
16
+ #include <string.h>
17
+
18
+ #include <vector>
19
+
20
+ #include <openssl/crypto.h>
21
+ #include <openssl/poly1305.h>
22
+
23
+ #include "../test/file_test.h"
24
+
25
+
26
+ // |CRYPTO_poly1305_finish| requires a 16-byte-aligned output.
27
+ #if defined(OPENSSL_WINDOWS)
28
+ // MSVC doesn't support C++11 |alignas|.
29
+ #define ALIGNED __declspec(align(16))
30
+ #else
31
+ #define ALIGNED alignas(16)
32
+ #endif
33
+
34
+ static bool TestPoly1305(FileTest *t, void *arg) {
35
+ std::vector<uint8_t> key, in, mac;
36
+ if (!t->GetBytes(&key, "Key") ||
37
+ !t->GetBytes(&in, "Input") ||
38
+ !t->GetBytes(&mac, "MAC")) {
39
+ return false;
40
+ }
41
+ if (key.size() != 32 || mac.size() != 16) {
42
+ t->PrintLine("Invalid test");
43
+ return false;
44
+ }
45
+
46
+ // Test single-shot operation.
47
+ poly1305_state state;
48
+ CRYPTO_poly1305_init(&state, key.data());
49
+ CRYPTO_poly1305_update(&state, in.data(), in.size());
50
+ ALIGNED uint8_t out[16];
51
+ CRYPTO_poly1305_finish(&state, out);
52
+ if (!t->ExpectBytesEqual(out, 16, mac.data(), mac.size())) {
53
+ t->PrintLine("Single-shot Poly1305 failed.");
54
+ return false;
55
+ }
56
+
57
+ // Test streaming byte-by-byte.
58
+ CRYPTO_poly1305_init(&state, key.data());
59
+ for (size_t i = 0; i < in.size(); i++) {
60
+ CRYPTO_poly1305_update(&state, &in[i], 1);
61
+ }
62
+ CRYPTO_poly1305_finish(&state, out);
63
+ if (!t->ExpectBytesEqual(out, 16, mac.data(), mac.size())) {
64
+ t->PrintLine("Streaming Poly1305 failed.");
65
+ return false;
66
+ }
67
+
68
+ return true;
69
+ }
70
+
71
+ int main(int argc, char **argv) {
72
+ CRYPTO_library_init();
73
+
74
+ if (argc != 2) {
75
+ fprintf(stderr, "%s <test file>\n", argv[0]);
76
+ return 1;
77
+ }
78
+
79
+ return FileTestMain(TestPoly1305, nullptr, argv[1]);
80
+ }
@@ -0,0 +1,52 @@
1
+ # RFC 7539, section 2.5.2.
2
+
3
+ Key = 85d6be7857556d337f4452fe42d506a80103808afb0db2fd4abff6af4149f51b
4
+ Input = "Cryptographic Forum Research Group"
5
+ MAC = a8061dc1305136c6c22b8baf0c0127a9
6
+
7
+
8
+ # RFC 7539, section A.3.
9
+
10
+ Key = 0000000000000000000000000000000000000000000000000000000000000000
11
+ Input = 00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
12
+ MAC = 00000000000000000000000000000000
13
+
14
+ Key = 0000000000000000000000000000000036e5f6b5c5e06070f0efca96227a863e
15
+ Input = 416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e69636174696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f
16
+ MAC = 36e5f6b5c5e06070f0efca96227a863e
17
+
18
+ Key = 36e5f6b5c5e06070f0efca96227a863e00000000000000000000000000000000
19
+ Input = 416e79207375626d697373696f6e20746f20746865204945544620696e74656e6465642062792074686520436f6e7472696275746f7220666f72207075626c69636174696f6e20617320616c6c206f722070617274206f6620616e204945544620496e7465726e65742d4472616674206f722052464320616e6420616e792073746174656d656e74206d6164652077697468696e2074686520636f6e74657874206f6620616e204945544620616374697669747920697320636f6e7369646572656420616e20224945544620436f6e747269627574696f6e222e20537563682073746174656d656e747320696e636c756465206f72616c2073746174656d656e747320696e20494554462073657373696f6e732c2061732077656c6c206173207772697474656e20616e6420656c656374726f6e696320636f6d6d756e69636174696f6e73206d61646520617420616e792074696d65206f7220706c6163652c207768696368206172652061646472657373656420746f
20
+ MAC = f3477e7cd95417af89a6b8794c310cf0
21
+
22
+ Key = 1c9240a5eb55d38af333888604f6b5f0473917c1402b80099dca5cbc207075c0
23
+ Input = 2754776173206272696c6c69672c20616e642074686520736c6974687920746f7665730a446964206779726520616e642067696d626c6520696e2074686520776162653a0a416c6c206d696d737920776572652074686520626f726f676f7665732c0a416e6420746865206d6f6d65207261746873206f757467726162652e
24
+ MAC = 4541669a7eaaee61e708dc7cbcc5eb62
25
+
26
+ Key = 0200000000000000000000000000000000000000000000000000000000000000
27
+ Input = ffffffffffffffffffffffffffffffff
28
+ MAC = 03000000000000000000000000000000
29
+
30
+ Key = 02000000000000000000000000000000ffffffffffffffffffffffffffffffff
31
+ Input = 02000000000000000000000000000000
32
+ MAC = 03000000000000000000000000000000
33
+
34
+ Key = 0100000000000000000000000000000000000000000000000000000000000000
35
+ Input = fffffffffffffffffffffffffffffffff0ffffffffffffffffffffffffffffff11000000000000000000000000000000
36
+ MAC = 05000000000000000000000000000000
37
+
38
+ Key = 0100000000000000000000000000000000000000000000000000000000000000
39
+ Input = fffffffffffffffffffffffffffffffffbfefefefefefefefefefefefefefefe01010101010101010101010101010101
40
+ MAC = 00000000000000000000000000000000
41
+
42
+ Key = 0200000000000000000000000000000000000000000000000000000000000000
43
+ Input = fdffffffffffffffffffffffffffffff
44
+ MAC = faffffffffffffffffffffffffffffff
45
+
46
+ Key = 0100000000000000040000000000000000000000000000000000000000000000
47
+ Input = e33594d7505e43b900000000000000003394d7505e4379cd01000000000000000000000000000000000000000000000001000000000000000000000000000000
48
+ MAC = 14000000000000005500000000000000
49
+
50
+ Key = 0100000000000000040000000000000000000000000000000000000000000000
51
+ Input = e33594d7505e43b900000000000000003394d7505e4379cd010000000000000000000000000000000000000000000000
52
+ MAC = 13000000000000000000000000000000
@@ -0,0 +1,892 @@
1
+ /* Copyright (c) 2014, Google Inc.
2
+ *
3
+ * Permission to use, copy, modify, and/or distribute this software for any
4
+ * purpose with or without fee is hereby granted, provided that the above
5
+ * copyright notice and this permission notice appear in all copies.
6
+ *
7
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
10
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
14
+
15
+ /* This implementation of poly1305 is by Andrew Moon
16
+ * (https://github.com/floodyberry/poly1305-donna) and released as public
17
+ * domain. It implements SIMD vectorization based on the algorithm described in
18
+ * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte
19
+ * block size */
20
+
21
+ #include <openssl/poly1305.h>
22
+
23
+
24
+ #if !defined(OPENSSL_WINDOWS) && defined(OPENSSL_X86_64)
25
+
26
+ #include <emmintrin.h>
27
+
28
+ #define ALIGN(x) __attribute__((aligned(x)))
29
+ /* inline is not a keyword in C89. */
30
+ #define INLINE
31
+ #define U8TO64_LE(m) (*(uint64_t *)(m))
32
+ #define U8TO32_LE(m) (*(uint32_t *)(m))
33
+ #define U64TO8_LE(m, v) (*(uint64_t *)(m)) = v
34
+
35
+ typedef __m128i xmmi;
36
+ typedef unsigned __int128 uint128_t;
37
+
38
+ static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = {
39
+ (1 << 26) - 1, 0, (1 << 26) - 1, 0};
40
+ static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = {5, 0, 5, 0};
41
+ static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = {(1 << 24), 0,
42
+ (1 << 24), 0};
43
+
44
+ static uint128_t INLINE add128(uint128_t a, uint128_t b) { return a + b; }
45
+
46
+ static uint128_t INLINE add128_64(uint128_t a, uint64_t b) { return a + b; }
47
+
48
+ static uint128_t INLINE mul64x64_128(uint64_t a, uint64_t b) {
49
+ return (uint128_t)a * b;
50
+ }
51
+
52
+ static uint64_t INLINE lo128(uint128_t a) { return (uint64_t)a; }
53
+
54
+ static uint64_t INLINE shr128(uint128_t v, const int shift) {
55
+ return (uint64_t)(v >> shift);
56
+ }
57
+
58
+ static uint64_t INLINE shr128_pair(uint64_t hi, uint64_t lo, const int shift) {
59
+ return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift);
60
+ }
61
+
62
+ typedef struct poly1305_power_t {
63
+ union {
64
+ xmmi v;
65
+ uint64_t u[2];
66
+ uint32_t d[4];
67
+ } R20, R21, R22, R23, R24, S21, S22, S23, S24;
68
+ } poly1305_power;
69
+
70
+ typedef struct poly1305_state_internal_t {
71
+ poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144
72
+ bytes of free storage */
73
+ union {
74
+ xmmi H[5]; /* 80 bytes */
75
+ uint64_t HH[10];
76
+ };
77
+ /* uint64_t r0,r1,r2; [24 bytes] */
78
+ /* uint64_t pad0,pad1; [16 bytes] */
79
+ uint64_t started; /* 8 bytes */
80
+ uint64_t leftover; /* 8 bytes */
81
+ uint8_t buffer[64]; /* 64 bytes */
82
+ } poly1305_state_internal; /* 448 bytes total + 63 bytes for
83
+ alignment = 511 bytes raw */
84
+
85
+ static poly1305_state_internal INLINE *poly1305_aligned_state(
86
+ poly1305_state *state) {
87
+ return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63);
88
+ }
89
+
90
+ /* copy 0-63 bytes */
91
+ static void INLINE
92
+ poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) {
93
+ size_t offset = src - dst;
94
+ if (bytes & 32) {
95
+ _mm_storeu_si128((xmmi *)(dst + 0),
96
+ _mm_loadu_si128((xmmi *)(dst + offset + 0)));
97
+ _mm_storeu_si128((xmmi *)(dst + 16),
98
+ _mm_loadu_si128((xmmi *)(dst + offset + 16)));
99
+ dst += 32;
100
+ }
101
+ if (bytes & 16) {
102
+ _mm_storeu_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset)));
103
+ dst += 16;
104
+ }
105
+ if (bytes & 8) {
106
+ *(uint64_t *)dst = *(uint64_t *)(dst + offset);
107
+ dst += 8;
108
+ }
109
+ if (bytes & 4) {
110
+ *(uint32_t *)dst = *(uint32_t *)(dst + offset);
111
+ dst += 4;
112
+ }
113
+ if (bytes & 2) {
114
+ *(uint16_t *)dst = *(uint16_t *)(dst + offset);
115
+ dst += 2;
116
+ }
117
+ if (bytes & 1) {
118
+ *(uint8_t *)dst = *(uint8_t *)(dst + offset);
119
+ }
120
+ }
121
+
122
+ /* zero 0-15 bytes */
123
+ static void INLINE poly1305_block_zero(uint8_t *dst, size_t bytes) {
124
+ if (bytes & 8) {
125
+ *(uint64_t *)dst = 0;
126
+ dst += 8;
127
+ }
128
+ if (bytes & 4) {
129
+ *(uint32_t *)dst = 0;
130
+ dst += 4;
131
+ }
132
+ if (bytes & 2) {
133
+ *(uint16_t *)dst = 0;
134
+ dst += 2;
135
+ }
136
+ if (bytes & 1) {
137
+ *(uint8_t *)dst = 0;
138
+ }
139
+ }
140
+
141
+ static size_t INLINE poly1305_min(size_t a, size_t b) {
142
+ return (a < b) ? a : b;
143
+ }
144
+
145
+ void CRYPTO_poly1305_init(poly1305_state *state, const uint8_t key[32]) {
146
+ poly1305_state_internal *st = poly1305_aligned_state(state);
147
+ poly1305_power *p;
148
+ uint64_t r0, r1, r2;
149
+ uint64_t t0, t1;
150
+
151
+ /* clamp key */
152
+ t0 = U8TO64_LE(key + 0);
153
+ t1 = U8TO64_LE(key + 8);
154
+ r0 = t0 & 0xffc0fffffff;
155
+ t0 >>= 44;
156
+ t0 |= t1 << 20;
157
+ r1 = t0 & 0xfffffc0ffff;
158
+ t1 >>= 24;
159
+ r2 = t1 & 0x00ffffffc0f;
160
+
161
+ /* store r in un-used space of st->P[1] */
162
+ p = &st->P[1];
163
+ p->R20.d[1] = (uint32_t)(r0);
164
+ p->R20.d[3] = (uint32_t)(r0 >> 32);
165
+ p->R21.d[1] = (uint32_t)(r1);
166
+ p->R21.d[3] = (uint32_t)(r1 >> 32);
167
+ p->R22.d[1] = (uint32_t)(r2);
168
+ p->R22.d[3] = (uint32_t)(r2 >> 32);
169
+
170
+ /* store pad */
171
+ p->R23.d[1] = U8TO32_LE(key + 16);
172
+ p->R23.d[3] = U8TO32_LE(key + 20);
173
+ p->R24.d[1] = U8TO32_LE(key + 24);
174
+ p->R24.d[3] = U8TO32_LE(key + 28);
175
+
176
+ /* H = 0 */
177
+ st->H[0] = _mm_setzero_si128();
178
+ st->H[1] = _mm_setzero_si128();
179
+ st->H[2] = _mm_setzero_si128();
180
+ st->H[3] = _mm_setzero_si128();
181
+ st->H[4] = _mm_setzero_si128();
182
+
183
+ st->started = 0;
184
+ st->leftover = 0;
185
+ }
186
+
187
+ static void poly1305_first_block(poly1305_state_internal *st,
188
+ const uint8_t *m) {
189
+ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
190
+ const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
191
+ const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
192
+ xmmi T5, T6;
193
+ poly1305_power *p;
194
+ uint128_t d[3];
195
+ uint64_t r0, r1, r2;
196
+ uint64_t r20, r21, r22, s22;
197
+ uint64_t pad0, pad1;
198
+ uint64_t c;
199
+ uint64_t i;
200
+
201
+ /* pull out stored info */
202
+ p = &st->P[1];
203
+
204
+ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
205
+ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
206
+ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
207
+ pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
208
+ pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
209
+
210
+ /* compute powers r^2,r^4 */
211
+ r20 = r0;
212
+ r21 = r1;
213
+ r22 = r2;
214
+ for (i = 0; i < 2; i++) {
215
+ s22 = r22 * (5 << 2);
216
+
217
+ d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22));
218
+ d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21));
219
+ d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20));
220
+
221
+ r20 = lo128(d[0]) & 0xfffffffffff;
222
+ c = shr128(d[0], 44);
223
+ d[1] = add128_64(d[1], c);
224
+ r21 = lo128(d[1]) & 0xfffffffffff;
225
+ c = shr128(d[1], 44);
226
+ d[2] = add128_64(d[2], c);
227
+ r22 = lo128(d[2]) & 0x3ffffffffff;
228
+ c = shr128(d[2], 42);
229
+ r20 += c * 5;
230
+ c = (r20 >> 44);
231
+ r20 = r20 & 0xfffffffffff;
232
+ r21 += c;
233
+
234
+ p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)(r20)&0x3ffffff),
235
+ _MM_SHUFFLE(1, 0, 1, 0));
236
+ p->R21.v = _mm_shuffle_epi32(
237
+ _mm_cvtsi32_si128((uint32_t)((r20 >> 26) | (r21 << 18)) & 0x3ffffff),
238
+ _MM_SHUFFLE(1, 0, 1, 0));
239
+ p->R22.v =
240
+ _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 8)) & 0x3ffffff),
241
+ _MM_SHUFFLE(1, 0, 1, 0));
242
+ p->R23.v = _mm_shuffle_epi32(
243
+ _mm_cvtsi32_si128((uint32_t)((r21 >> 34) | (r22 << 10)) & 0x3ffffff),
244
+ _MM_SHUFFLE(1, 0, 1, 0));
245
+ p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >> 16))),
246
+ _MM_SHUFFLE(1, 0, 1, 0));
247
+ p->S21.v = _mm_mul_epu32(p->R21.v, FIVE);
248
+ p->S22.v = _mm_mul_epu32(p->R22.v, FIVE);
249
+ p->S23.v = _mm_mul_epu32(p->R23.v, FIVE);
250
+ p->S24.v = _mm_mul_epu32(p->R24.v, FIVE);
251
+ p--;
252
+ }
253
+
254
+ /* put saved info back */
255
+ p = &st->P[1];
256
+ p->R20.d[1] = (uint32_t)(r0);
257
+ p->R20.d[3] = (uint32_t)(r0 >> 32);
258
+ p->R21.d[1] = (uint32_t)(r1);
259
+ p->R21.d[3] = (uint32_t)(r1 >> 32);
260
+ p->R22.d[1] = (uint32_t)(r2);
261
+ p->R22.d[3] = (uint32_t)(r2 >> 32);
262
+ p->R23.d[1] = (uint32_t)(pad0);
263
+ p->R23.d[3] = (uint32_t)(pad0 >> 32);
264
+ p->R24.d[1] = (uint32_t)(pad1);
265
+ p->R24.d[3] = (uint32_t)(pad1 >> 32);
266
+
267
+ /* H = [Mx,My] */
268
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
269
+ _mm_loadl_epi64((xmmi *)(m + 16)));
270
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
271
+ _mm_loadl_epi64((xmmi *)(m + 24)));
272
+ st->H[0] = _mm_and_si128(MMASK, T5);
273
+ st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
274
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
275
+ st->H[2] = _mm_and_si128(MMASK, T5);
276
+ st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
277
+ st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
278
+ }
279
+
280
+ static void poly1305_blocks(poly1305_state_internal *st, const uint8_t *m,
281
+ size_t bytes) {
282
+ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
283
+ const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
284
+ const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
285
+
286
+ poly1305_power *p;
287
+ xmmi H0, H1, H2, H3, H4;
288
+ xmmi T0, T1, T2, T3, T4, T5, T6;
289
+ xmmi M0, M1, M2, M3, M4;
290
+ xmmi C1, C2;
291
+
292
+ H0 = st->H[0];
293
+ H1 = st->H[1];
294
+ H2 = st->H[2];
295
+ H3 = st->H[3];
296
+ H4 = st->H[4];
297
+
298
+ while (bytes >= 64) {
299
+ /* H *= [r^4,r^4] */
300
+ p = &st->P[0];
301
+ T0 = _mm_mul_epu32(H0, p->R20.v);
302
+ T1 = _mm_mul_epu32(H0, p->R21.v);
303
+ T2 = _mm_mul_epu32(H0, p->R22.v);
304
+ T3 = _mm_mul_epu32(H0, p->R23.v);
305
+ T4 = _mm_mul_epu32(H0, p->R24.v);
306
+ T5 = _mm_mul_epu32(H1, p->S24.v);
307
+ T6 = _mm_mul_epu32(H1, p->R20.v);
308
+ T0 = _mm_add_epi64(T0, T5);
309
+ T1 = _mm_add_epi64(T1, T6);
310
+ T5 = _mm_mul_epu32(H2, p->S23.v);
311
+ T6 = _mm_mul_epu32(H2, p->S24.v);
312
+ T0 = _mm_add_epi64(T0, T5);
313
+ T1 = _mm_add_epi64(T1, T6);
314
+ T5 = _mm_mul_epu32(H3, p->S22.v);
315
+ T6 = _mm_mul_epu32(H3, p->S23.v);
316
+ T0 = _mm_add_epi64(T0, T5);
317
+ T1 = _mm_add_epi64(T1, T6);
318
+ T5 = _mm_mul_epu32(H4, p->S21.v);
319
+ T6 = _mm_mul_epu32(H4, p->S22.v);
320
+ T0 = _mm_add_epi64(T0, T5);
321
+ T1 = _mm_add_epi64(T1, T6);
322
+ T5 = _mm_mul_epu32(H1, p->R21.v);
323
+ T6 = _mm_mul_epu32(H1, p->R22.v);
324
+ T2 = _mm_add_epi64(T2, T5);
325
+ T3 = _mm_add_epi64(T3, T6);
326
+ T5 = _mm_mul_epu32(H2, p->R20.v);
327
+ T6 = _mm_mul_epu32(H2, p->R21.v);
328
+ T2 = _mm_add_epi64(T2, T5);
329
+ T3 = _mm_add_epi64(T3, T6);
330
+ T5 = _mm_mul_epu32(H3, p->S24.v);
331
+ T6 = _mm_mul_epu32(H3, p->R20.v);
332
+ T2 = _mm_add_epi64(T2, T5);
333
+ T3 = _mm_add_epi64(T3, T6);
334
+ T5 = _mm_mul_epu32(H4, p->S23.v);
335
+ T6 = _mm_mul_epu32(H4, p->S24.v);
336
+ T2 = _mm_add_epi64(T2, T5);
337
+ T3 = _mm_add_epi64(T3, T6);
338
+ T5 = _mm_mul_epu32(H1, p->R23.v);
339
+ T4 = _mm_add_epi64(T4, T5);
340
+ T5 = _mm_mul_epu32(H2, p->R22.v);
341
+ T4 = _mm_add_epi64(T4, T5);
342
+ T5 = _mm_mul_epu32(H3, p->R21.v);
343
+ T4 = _mm_add_epi64(T4, T5);
344
+ T5 = _mm_mul_epu32(H4, p->R20.v);
345
+ T4 = _mm_add_epi64(T4, T5);
346
+
347
+ /* H += [Mx,My]*[r^2,r^2] */
348
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
349
+ _mm_loadl_epi64((xmmi *)(m + 16)));
350
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
351
+ _mm_loadl_epi64((xmmi *)(m + 24)));
352
+ M0 = _mm_and_si128(MMASK, T5);
353
+ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
354
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
355
+ M2 = _mm_and_si128(MMASK, T5);
356
+ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
357
+ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
358
+
359
+ p = &st->P[1];
360
+ T5 = _mm_mul_epu32(M0, p->R20.v);
361
+ T6 = _mm_mul_epu32(M0, p->R21.v);
362
+ T0 = _mm_add_epi64(T0, T5);
363
+ T1 = _mm_add_epi64(T1, T6);
364
+ T5 = _mm_mul_epu32(M1, p->S24.v);
365
+ T6 = _mm_mul_epu32(M1, p->R20.v);
366
+ T0 = _mm_add_epi64(T0, T5);
367
+ T1 = _mm_add_epi64(T1, T6);
368
+ T5 = _mm_mul_epu32(M2, p->S23.v);
369
+ T6 = _mm_mul_epu32(M2, p->S24.v);
370
+ T0 = _mm_add_epi64(T0, T5);
371
+ T1 = _mm_add_epi64(T1, T6);
372
+ T5 = _mm_mul_epu32(M3, p->S22.v);
373
+ T6 = _mm_mul_epu32(M3, p->S23.v);
374
+ T0 = _mm_add_epi64(T0, T5);
375
+ T1 = _mm_add_epi64(T1, T6);
376
+ T5 = _mm_mul_epu32(M4, p->S21.v);
377
+ T6 = _mm_mul_epu32(M4, p->S22.v);
378
+ T0 = _mm_add_epi64(T0, T5);
379
+ T1 = _mm_add_epi64(T1, T6);
380
+ T5 = _mm_mul_epu32(M0, p->R22.v);
381
+ T6 = _mm_mul_epu32(M0, p->R23.v);
382
+ T2 = _mm_add_epi64(T2, T5);
383
+ T3 = _mm_add_epi64(T3, T6);
384
+ T5 = _mm_mul_epu32(M1, p->R21.v);
385
+ T6 = _mm_mul_epu32(M1, p->R22.v);
386
+ T2 = _mm_add_epi64(T2, T5);
387
+ T3 = _mm_add_epi64(T3, T6);
388
+ T5 = _mm_mul_epu32(M2, p->R20.v);
389
+ T6 = _mm_mul_epu32(M2, p->R21.v);
390
+ T2 = _mm_add_epi64(T2, T5);
391
+ T3 = _mm_add_epi64(T3, T6);
392
+ T5 = _mm_mul_epu32(M3, p->S24.v);
393
+ T6 = _mm_mul_epu32(M3, p->R20.v);
394
+ T2 = _mm_add_epi64(T2, T5);
395
+ T3 = _mm_add_epi64(T3, T6);
396
+ T5 = _mm_mul_epu32(M4, p->S23.v);
397
+ T6 = _mm_mul_epu32(M4, p->S24.v);
398
+ T2 = _mm_add_epi64(T2, T5);
399
+ T3 = _mm_add_epi64(T3, T6);
400
+ T5 = _mm_mul_epu32(M0, p->R24.v);
401
+ T4 = _mm_add_epi64(T4, T5);
402
+ T5 = _mm_mul_epu32(M1, p->R23.v);
403
+ T4 = _mm_add_epi64(T4, T5);
404
+ T5 = _mm_mul_epu32(M2, p->R22.v);
405
+ T4 = _mm_add_epi64(T4, T5);
406
+ T5 = _mm_mul_epu32(M3, p->R21.v);
407
+ T4 = _mm_add_epi64(T4, T5);
408
+ T5 = _mm_mul_epu32(M4, p->R20.v);
409
+ T4 = _mm_add_epi64(T4, T5);
410
+
411
+ /* H += [Mx,My] */
412
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)),
413
+ _mm_loadl_epi64((xmmi *)(m + 48)));
414
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)),
415
+ _mm_loadl_epi64((xmmi *)(m + 56)));
416
+ M0 = _mm_and_si128(MMASK, T5);
417
+ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
418
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
419
+ M2 = _mm_and_si128(MMASK, T5);
420
+ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
421
+ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
422
+
423
+ T0 = _mm_add_epi64(T0, M0);
424
+ T1 = _mm_add_epi64(T1, M1);
425
+ T2 = _mm_add_epi64(T2, M2);
426
+ T3 = _mm_add_epi64(T3, M3);
427
+ T4 = _mm_add_epi64(T4, M4);
428
+
429
+ /* reduce */
430
+ C1 = _mm_srli_epi64(T0, 26);
431
+ C2 = _mm_srli_epi64(T3, 26);
432
+ T0 = _mm_and_si128(T0, MMASK);
433
+ T3 = _mm_and_si128(T3, MMASK);
434
+ T1 = _mm_add_epi64(T1, C1);
435
+ T4 = _mm_add_epi64(T4, C2);
436
+ C1 = _mm_srli_epi64(T1, 26);
437
+ C2 = _mm_srli_epi64(T4, 26);
438
+ T1 = _mm_and_si128(T1, MMASK);
439
+ T4 = _mm_and_si128(T4, MMASK);
440
+ T2 = _mm_add_epi64(T2, C1);
441
+ T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
442
+ C1 = _mm_srli_epi64(T2, 26);
443
+ C2 = _mm_srli_epi64(T0, 26);
444
+ T2 = _mm_and_si128(T2, MMASK);
445
+ T0 = _mm_and_si128(T0, MMASK);
446
+ T3 = _mm_add_epi64(T3, C1);
447
+ T1 = _mm_add_epi64(T1, C2);
448
+ C1 = _mm_srli_epi64(T3, 26);
449
+ T3 = _mm_and_si128(T3, MMASK);
450
+ T4 = _mm_add_epi64(T4, C1);
451
+
452
+ /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */
453
+ H0 = T0;
454
+ H1 = T1;
455
+ H2 = T2;
456
+ H3 = T3;
457
+ H4 = T4;
458
+
459
+ m += 64;
460
+ bytes -= 64;
461
+ }
462
+
463
+ st->H[0] = H0;
464
+ st->H[1] = H1;
465
+ st->H[2] = H2;
466
+ st->H[3] = H3;
467
+ st->H[4] = H4;
468
+ }
469
+
470
+ static size_t poly1305_combine(poly1305_state_internal *st, const uint8_t *m,
471
+ size_t bytes) {
472
+ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
473
+ const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
474
+ const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
475
+
476
+ poly1305_power *p;
477
+ xmmi H0, H1, H2, H3, H4;
478
+ xmmi M0, M1, M2, M3, M4;
479
+ xmmi T0, T1, T2, T3, T4, T5, T6;
480
+ xmmi C1, C2;
481
+
482
+ uint64_t r0, r1, r2;
483
+ uint64_t t0, t1, t2, t3, t4;
484
+ uint64_t c;
485
+ size_t consumed = 0;
486
+
487
+ H0 = st->H[0];
488
+ H1 = st->H[1];
489
+ H2 = st->H[2];
490
+ H3 = st->H[3];
491
+ H4 = st->H[4];
492
+
493
+ /* p = [r^2,r^2] */
494
+ p = &st->P[1];
495
+
496
+ if (bytes >= 32) {
497
+ /* H *= [r^2,r^2] */
498
+ T0 = _mm_mul_epu32(H0, p->R20.v);
499
+ T1 = _mm_mul_epu32(H0, p->R21.v);
500
+ T2 = _mm_mul_epu32(H0, p->R22.v);
501
+ T3 = _mm_mul_epu32(H0, p->R23.v);
502
+ T4 = _mm_mul_epu32(H0, p->R24.v);
503
+ T5 = _mm_mul_epu32(H1, p->S24.v);
504
+ T6 = _mm_mul_epu32(H1, p->R20.v);
505
+ T0 = _mm_add_epi64(T0, T5);
506
+ T1 = _mm_add_epi64(T1, T6);
507
+ T5 = _mm_mul_epu32(H2, p->S23.v);
508
+ T6 = _mm_mul_epu32(H2, p->S24.v);
509
+ T0 = _mm_add_epi64(T0, T5);
510
+ T1 = _mm_add_epi64(T1, T6);
511
+ T5 = _mm_mul_epu32(H3, p->S22.v);
512
+ T6 = _mm_mul_epu32(H3, p->S23.v);
513
+ T0 = _mm_add_epi64(T0, T5);
514
+ T1 = _mm_add_epi64(T1, T6);
515
+ T5 = _mm_mul_epu32(H4, p->S21.v);
516
+ T6 = _mm_mul_epu32(H4, p->S22.v);
517
+ T0 = _mm_add_epi64(T0, T5);
518
+ T1 = _mm_add_epi64(T1, T6);
519
+ T5 = _mm_mul_epu32(H1, p->R21.v);
520
+ T6 = _mm_mul_epu32(H1, p->R22.v);
521
+ T2 = _mm_add_epi64(T2, T5);
522
+ T3 = _mm_add_epi64(T3, T6);
523
+ T5 = _mm_mul_epu32(H2, p->R20.v);
524
+ T6 = _mm_mul_epu32(H2, p->R21.v);
525
+ T2 = _mm_add_epi64(T2, T5);
526
+ T3 = _mm_add_epi64(T3, T6);
527
+ T5 = _mm_mul_epu32(H3, p->S24.v);
528
+ T6 = _mm_mul_epu32(H3, p->R20.v);
529
+ T2 = _mm_add_epi64(T2, T5);
530
+ T3 = _mm_add_epi64(T3, T6);
531
+ T5 = _mm_mul_epu32(H4, p->S23.v);
532
+ T6 = _mm_mul_epu32(H4, p->S24.v);
533
+ T2 = _mm_add_epi64(T2, T5);
534
+ T3 = _mm_add_epi64(T3, T6);
535
+ T5 = _mm_mul_epu32(H1, p->R23.v);
536
+ T4 = _mm_add_epi64(T4, T5);
537
+ T5 = _mm_mul_epu32(H2, p->R22.v);
538
+ T4 = _mm_add_epi64(T4, T5);
539
+ T5 = _mm_mul_epu32(H3, p->R21.v);
540
+ T4 = _mm_add_epi64(T4, T5);
541
+ T5 = _mm_mul_epu32(H4, p->R20.v);
542
+ T4 = _mm_add_epi64(T4, T5);
543
+
544
+ /* H += [Mx,My] */
545
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)),
546
+ _mm_loadl_epi64((xmmi *)(m + 16)));
547
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)),
548
+ _mm_loadl_epi64((xmmi *)(m + 24)));
549
+ M0 = _mm_and_si128(MMASK, T5);
550
+ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
551
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
552
+ M2 = _mm_and_si128(MMASK, T5);
553
+ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
554
+ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
555
+
556
+ T0 = _mm_add_epi64(T0, M0);
557
+ T1 = _mm_add_epi64(T1, M1);
558
+ T2 = _mm_add_epi64(T2, M2);
559
+ T3 = _mm_add_epi64(T3, M3);
560
+ T4 = _mm_add_epi64(T4, M4);
561
+
562
+ /* reduce */
563
+ C1 = _mm_srli_epi64(T0, 26);
564
+ C2 = _mm_srli_epi64(T3, 26);
565
+ T0 = _mm_and_si128(T0, MMASK);
566
+ T3 = _mm_and_si128(T3, MMASK);
567
+ T1 = _mm_add_epi64(T1, C1);
568
+ T4 = _mm_add_epi64(T4, C2);
569
+ C1 = _mm_srli_epi64(T1, 26);
570
+ C2 = _mm_srli_epi64(T4, 26);
571
+ T1 = _mm_and_si128(T1, MMASK);
572
+ T4 = _mm_and_si128(T4, MMASK);
573
+ T2 = _mm_add_epi64(T2, C1);
574
+ T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
575
+ C1 = _mm_srli_epi64(T2, 26);
576
+ C2 = _mm_srli_epi64(T0, 26);
577
+ T2 = _mm_and_si128(T2, MMASK);
578
+ T0 = _mm_and_si128(T0, MMASK);
579
+ T3 = _mm_add_epi64(T3, C1);
580
+ T1 = _mm_add_epi64(T1, C2);
581
+ C1 = _mm_srli_epi64(T3, 26);
582
+ T3 = _mm_and_si128(T3, MMASK);
583
+ T4 = _mm_add_epi64(T4, C1);
584
+
585
+ /* H = (H*[r^2,r^2] + [Mx,My]) */
586
+ H0 = T0;
587
+ H1 = T1;
588
+ H2 = T2;
589
+ H3 = T3;
590
+ H4 = T4;
591
+
592
+ consumed = 32;
593
+ }
594
+
595
+ /* finalize, H *= [r^2,r] */
596
+ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
597
+ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
598
+ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
599
+
600
+ p->R20.d[2] = (uint32_t)(r0)&0x3ffffff;
601
+ p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
602
+ p->R22.d[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff;
603
+ p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
604
+ p->R24.d[2] = (uint32_t)((r2 >> 16));
605
+ p->S21.d[2] = p->R21.d[2] * 5;
606
+ p->S22.d[2] = p->R22.d[2] * 5;
607
+ p->S23.d[2] = p->R23.d[2] * 5;
608
+ p->S24.d[2] = p->R24.d[2] * 5;
609
+
610
+ /* H *= [r^2,r] */
611
+ T0 = _mm_mul_epu32(H0, p->R20.v);
612
+ T1 = _mm_mul_epu32(H0, p->R21.v);
613
+ T2 = _mm_mul_epu32(H0, p->R22.v);
614
+ T3 = _mm_mul_epu32(H0, p->R23.v);
615
+ T4 = _mm_mul_epu32(H0, p->R24.v);
616
+ T5 = _mm_mul_epu32(H1, p->S24.v);
617
+ T6 = _mm_mul_epu32(H1, p->R20.v);
618
+ T0 = _mm_add_epi64(T0, T5);
619
+ T1 = _mm_add_epi64(T1, T6);
620
+ T5 = _mm_mul_epu32(H2, p->S23.v);
621
+ T6 = _mm_mul_epu32(H2, p->S24.v);
622
+ T0 = _mm_add_epi64(T0, T5);
623
+ T1 = _mm_add_epi64(T1, T6);
624
+ T5 = _mm_mul_epu32(H3, p->S22.v);
625
+ T6 = _mm_mul_epu32(H3, p->S23.v);
626
+ T0 = _mm_add_epi64(T0, T5);
627
+ T1 = _mm_add_epi64(T1, T6);
628
+ T5 = _mm_mul_epu32(H4, p->S21.v);
629
+ T6 = _mm_mul_epu32(H4, p->S22.v);
630
+ T0 = _mm_add_epi64(T0, T5);
631
+ T1 = _mm_add_epi64(T1, T6);
632
+ T5 = _mm_mul_epu32(H1, p->R21.v);
633
+ T6 = _mm_mul_epu32(H1, p->R22.v);
634
+ T2 = _mm_add_epi64(T2, T5);
635
+ T3 = _mm_add_epi64(T3, T6);
636
+ T5 = _mm_mul_epu32(H2, p->R20.v);
637
+ T6 = _mm_mul_epu32(H2, p->R21.v);
638
+ T2 = _mm_add_epi64(T2, T5);
639
+ T3 = _mm_add_epi64(T3, T6);
640
+ T5 = _mm_mul_epu32(H3, p->S24.v);
641
+ T6 = _mm_mul_epu32(H3, p->R20.v);
642
+ T2 = _mm_add_epi64(T2, T5);
643
+ T3 = _mm_add_epi64(T3, T6);
644
+ T5 = _mm_mul_epu32(H4, p->S23.v);
645
+ T6 = _mm_mul_epu32(H4, p->S24.v);
646
+ T2 = _mm_add_epi64(T2, T5);
647
+ T3 = _mm_add_epi64(T3, T6);
648
+ T5 = _mm_mul_epu32(H1, p->R23.v);
649
+ T4 = _mm_add_epi64(T4, T5);
650
+ T5 = _mm_mul_epu32(H2, p->R22.v);
651
+ T4 = _mm_add_epi64(T4, T5);
652
+ T5 = _mm_mul_epu32(H3, p->R21.v);
653
+ T4 = _mm_add_epi64(T4, T5);
654
+ T5 = _mm_mul_epu32(H4, p->R20.v);
655
+ T4 = _mm_add_epi64(T4, T5);
656
+
657
+ C1 = _mm_srli_epi64(T0, 26);
658
+ C2 = _mm_srli_epi64(T3, 26);
659
+ T0 = _mm_and_si128(T0, MMASK);
660
+ T3 = _mm_and_si128(T3, MMASK);
661
+ T1 = _mm_add_epi64(T1, C1);
662
+ T4 = _mm_add_epi64(T4, C2);
663
+ C1 = _mm_srli_epi64(T1, 26);
664
+ C2 = _mm_srli_epi64(T4, 26);
665
+ T1 = _mm_and_si128(T1, MMASK);
666
+ T4 = _mm_and_si128(T4, MMASK);
667
+ T2 = _mm_add_epi64(T2, C1);
668
+ T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
669
+ C1 = _mm_srli_epi64(T2, 26);
670
+ C2 = _mm_srli_epi64(T0, 26);
671
+ T2 = _mm_and_si128(T2, MMASK);
672
+ T0 = _mm_and_si128(T0, MMASK);
673
+ T3 = _mm_add_epi64(T3, C1);
674
+ T1 = _mm_add_epi64(T1, C2);
675
+ C1 = _mm_srli_epi64(T3, 26);
676
+ T3 = _mm_and_si128(T3, MMASK);
677
+ T4 = _mm_add_epi64(T4, C1);
678
+
679
+ /* H = H[0]+H[1] */
680
+ H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
681
+ H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
682
+ H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
683
+ H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
684
+ H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
685
+
686
+ t0 = _mm_cvtsi128_si32(H0);
687
+ c = (t0 >> 26);
688
+ t0 &= 0x3ffffff;
689
+ t1 = _mm_cvtsi128_si32(H1) + c;
690
+ c = (t1 >> 26);
691
+ t1 &= 0x3ffffff;
692
+ t2 = _mm_cvtsi128_si32(H2) + c;
693
+ c = (t2 >> 26);
694
+ t2 &= 0x3ffffff;
695
+ t3 = _mm_cvtsi128_si32(H3) + c;
696
+ c = (t3 >> 26);
697
+ t3 &= 0x3ffffff;
698
+ t4 = _mm_cvtsi128_si32(H4) + c;
699
+ c = (t4 >> 26);
700
+ t4 &= 0x3ffffff;
701
+ t0 = t0 + (c * 5);
702
+ c = (t0 >> 26);
703
+ t0 &= 0x3ffffff;
704
+ t1 = t1 + c;
705
+
706
+ st->HH[0] = ((t0) | (t1 << 26)) & 0xfffffffffffull;
707
+ st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull;
708
+ st->HH[2] = ((t3 >> 10) | (t4 << 16)) & 0x3ffffffffffull;
709
+
710
+ return consumed;
711
+ }
712
+
713
+ void CRYPTO_poly1305_update(poly1305_state *state, const uint8_t *m,
714
+ size_t bytes) {
715
+ poly1305_state_internal *st = poly1305_aligned_state(state);
716
+ size_t want;
717
+
718
+ /* need at least 32 initial bytes to start the accelerated branch */
719
+ if (!st->started) {
720
+ if ((st->leftover == 0) && (bytes > 32)) {
721
+ poly1305_first_block(st, m);
722
+ m += 32;
723
+ bytes -= 32;
724
+ } else {
725
+ want = poly1305_min(32 - st->leftover, bytes);
726
+ poly1305_block_copy(st->buffer + st->leftover, m, want);
727
+ bytes -= want;
728
+ m += want;
729
+ st->leftover += want;
730
+ if ((st->leftover < 32) || (bytes == 0)) {
731
+ return;
732
+ }
733
+ poly1305_first_block(st, st->buffer);
734
+ st->leftover = 0;
735
+ }
736
+ st->started = 1;
737
+ }
738
+
739
+ /* handle leftover */
740
+ if (st->leftover) {
741
+ want = poly1305_min(64 - st->leftover, bytes);
742
+ poly1305_block_copy(st->buffer + st->leftover, m, want);
743
+ bytes -= want;
744
+ m += want;
745
+ st->leftover += want;
746
+ if (st->leftover < 64) {
747
+ return;
748
+ }
749
+ poly1305_blocks(st, st->buffer, 64);
750
+ st->leftover = 0;
751
+ }
752
+
753
+ /* process 64 byte blocks */
754
+ if (bytes >= 64) {
755
+ want = (bytes & ~63);
756
+ poly1305_blocks(st, m, want);
757
+ m += want;
758
+ bytes -= want;
759
+ }
760
+
761
+ if (bytes) {
762
+ poly1305_block_copy(st->buffer + st->leftover, m, bytes);
763
+ st->leftover += bytes;
764
+ }
765
+ }
766
+
767
+ void CRYPTO_poly1305_finish(poly1305_state *state, uint8_t mac[16]) {
768
+ poly1305_state_internal *st = poly1305_aligned_state(state);
769
+ size_t leftover = st->leftover;
770
+ uint8_t *m = st->buffer;
771
+ uint128_t d[3];
772
+ uint64_t h0, h1, h2;
773
+ uint64_t t0, t1;
774
+ uint64_t g0, g1, g2, c, nc;
775
+ uint64_t r0, r1, r2, s1, s2;
776
+ poly1305_power *p;
777
+
778
+ if (st->started) {
779
+ size_t consumed = poly1305_combine(st, m, leftover);
780
+ leftover -= consumed;
781
+ m += consumed;
782
+ }
783
+
784
+ /* st->HH will either be 0 or have the combined result */
785
+ h0 = st->HH[0];
786
+ h1 = st->HH[1];
787
+ h2 = st->HH[2];
788
+
789
+ p = &st->P[1];
790
+ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
791
+ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
792
+ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
793
+ s1 = r1 * (5 << 2);
794
+ s2 = r2 * (5 << 2);
795
+
796
+ if (leftover < 16) {
797
+ goto poly1305_donna_atmost15bytes;
798
+ }
799
+
800
+ poly1305_donna_atleast16bytes:
801
+ t0 = U8TO64_LE(m + 0);
802
+ t1 = U8TO64_LE(m + 8);
803
+ h0 += t0 & 0xfffffffffff;
804
+ t0 = shr128_pair(t1, t0, 44);
805
+ h1 += t0 & 0xfffffffffff;
806
+ h2 += (t1 >> 24) | ((uint64_t)1 << 40);
807
+
808
+ poly1305_donna_mul:
809
+ d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)),
810
+ mul64x64_128(h2, s1));
811
+ d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)),
812
+ mul64x64_128(h2, s2));
813
+ d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)),
814
+ mul64x64_128(h2, r0));
815
+ h0 = lo128(d[0]) & 0xfffffffffff;
816
+ c = shr128(d[0], 44);
817
+ d[1] = add128_64(d[1], c);
818
+ h1 = lo128(d[1]) & 0xfffffffffff;
819
+ c = shr128(d[1], 44);
820
+ d[2] = add128_64(d[2], c);
821
+ h2 = lo128(d[2]) & 0x3ffffffffff;
822
+ c = shr128(d[2], 42);
823
+ h0 += c * 5;
824
+
825
+ m += 16;
826
+ leftover -= 16;
827
+ if (leftover >= 16) {
828
+ goto poly1305_donna_atleast16bytes;
829
+ }
830
+
831
+ /* final bytes */
832
+ poly1305_donna_atmost15bytes:
833
+ if (!leftover) {
834
+ goto poly1305_donna_finish;
835
+ }
836
+
837
+ m[leftover++] = 1;
838
+ poly1305_block_zero(m + leftover, 16 - leftover);
839
+ leftover = 16;
840
+
841
+ t0 = U8TO64_LE(m + 0);
842
+ t1 = U8TO64_LE(m + 8);
843
+ h0 += t0 & 0xfffffffffff;
844
+ t0 = shr128_pair(t1, t0, 44);
845
+ h1 += t0 & 0xfffffffffff;
846
+ h2 += (t1 >> 24);
847
+
848
+ goto poly1305_donna_mul;
849
+
850
+ poly1305_donna_finish:
851
+ c = (h0 >> 44);
852
+ h0 &= 0xfffffffffff;
853
+ h1 += c;
854
+ c = (h1 >> 44);
855
+ h1 &= 0xfffffffffff;
856
+ h2 += c;
857
+ c = (h2 >> 42);
858
+ h2 &= 0x3ffffffffff;
859
+ h0 += c * 5;
860
+
861
+ g0 = h0 + 5;
862
+ c = (g0 >> 44);
863
+ g0 &= 0xfffffffffff;
864
+ g1 = h1 + c;
865
+ c = (g1 >> 44);
866
+ g1 &= 0xfffffffffff;
867
+ g2 = h2 + c - ((uint64_t)1 << 42);
868
+
869
+ c = (g2 >> 63) - 1;
870
+ nc = ~c;
871
+ h0 = (h0 & nc) | (g0 & c);
872
+ h1 = (h1 & nc) | (g1 & c);
873
+ h2 = (h2 & nc) | (g2 & c);
874
+
875
+ /* pad */
876
+ t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
877
+ t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
878
+ h0 += (t0 & 0xfffffffffff);
879
+ c = (h0 >> 44);
880
+ h0 &= 0xfffffffffff;
881
+ t0 = shr128_pair(t1, t0, 44);
882
+ h1 += (t0 & 0xfffffffffff) + c;
883
+ c = (h1 >> 44);
884
+ h1 &= 0xfffffffffff;
885
+ t1 = (t1 >> 24);
886
+ h2 += (t1)+c;
887
+
888
+ U64TO8_LE(mac + 0, ((h0) | (h1 << 44)));
889
+ U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24)));
890
+ }
891
+
892
+ #endif /* !OPENSSL_WINDOWS && OPENSSL_X86_64 */