react-native-quick-crypto 1.0.19 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (561) hide show
  1. package/QuickCrypto.podspec +12 -38
  2. package/README.md +2 -0
  3. package/android/CMakeLists.txt +3 -0
  4. package/android/build.gradle +5 -1
  5. package/cpp/argon2/HybridArgon2.cpp +10 -3
  6. package/cpp/blake3/HybridBlake3.cpp +5 -3
  7. package/cpp/cipher/CCMCipher.cpp +29 -16
  8. package/cpp/cipher/CCMCipher.hpp +2 -4
  9. package/cpp/cipher/ChaCha20Cipher.cpp +14 -18
  10. package/cpp/cipher/ChaCha20Cipher.hpp +2 -4
  11. package/cpp/cipher/ChaCha20Poly1305Cipher.cpp +34 -23
  12. package/cpp/cipher/ChaCha20Poly1305Cipher.hpp +2 -4
  13. package/cpp/cipher/GCMCipher.cpp +14 -15
  14. package/cpp/cipher/HybridCipher.cpp +39 -36
  15. package/cpp/cipher/HybridCipher.hpp +17 -1
  16. package/cpp/cipher/HybridRsaCipher.cpp +74 -29
  17. package/cpp/cipher/OCBCipher.cpp +4 -3
  18. package/cpp/cipher/XChaCha20Poly1305Cipher.cpp +14 -13
  19. package/cpp/cipher/XSalsa20Cipher.cpp +72 -6
  20. package/cpp/cipher/XSalsa20Cipher.hpp +25 -3
  21. package/cpp/cipher/XSalsa20Poly1305Cipher.cpp +21 -25
  22. package/cpp/dh/HybridDiffieHellman.cpp +29 -0
  23. package/cpp/ec/HybridEcKeyPair.cpp +35 -33
  24. package/cpp/ec/HybridEcKeyPair.hpp +3 -7
  25. package/cpp/ecdh/HybridECDH.cpp +23 -0
  26. package/cpp/ed25519/HybridEdKeyPair.cpp +73 -117
  27. package/cpp/ed25519/HybridEdKeyPair.hpp +5 -9
  28. package/cpp/hash/HybridHash.cpp +5 -7
  29. package/cpp/hkdf/HybridHkdf.cpp +6 -4
  30. package/cpp/hmac/HybridHmac.cpp +4 -6
  31. package/cpp/kmac/HybridKmac.cpp +4 -4
  32. package/cpp/mldsa/HybridMlDsaKeyPair.cpp +37 -49
  33. package/cpp/mlkem/HybridMlKemKeyPair.cpp +39 -43
  34. package/cpp/pbkdf2/HybridPbkdf2.cpp +7 -8
  35. package/cpp/rsa/HybridRsaKeyPair.cpp +5 -8
  36. package/cpp/rsa/HybridRsaKeyPair.hpp +4 -7
  37. package/cpp/scrypt/HybridScrypt.cpp +6 -4
  38. package/cpp/sign/HybridSignHandle.cpp +25 -68
  39. package/cpp/sign/HybridVerifyHandle.cpp +23 -60
  40. package/cpp/utils/HybridUtils.cpp +213 -111
  41. package/cpp/utils/HybridUtils.hpp +9 -2
  42. package/cpp/utils/QuickCryptoUtils.hpp +72 -0
  43. package/deps/simdutf/LICENSE-APACHE +201 -0
  44. package/deps/simdutf/LICENSE-MIT +18 -0
  45. package/deps/simdutf/README.md +2782 -0
  46. package/deps/simdutf/include/simdutf/avx512.h +79 -0
  47. package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
  48. package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
  49. package/deps/simdutf/include/simdutf/common_defs.h +186 -0
  50. package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
  51. package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
  52. package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
  53. package/deps/simdutf/include/simdutf/error.h +126 -0
  54. package/deps/simdutf/include/simdutf/implementation.h +7081 -0
  55. package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
  56. package/deps/simdutf/include/simdutf/portability.h +285 -0
  57. package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
  58. package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
  59. package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
  60. package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
  61. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
  62. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
  63. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
  64. package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
  65. package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
  66. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
  67. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
  68. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
  69. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
  70. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
  71. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
  72. package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
  73. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
  74. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
  75. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
  76. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
  77. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
  78. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
  79. package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
  80. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
  81. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
  82. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
  83. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
  84. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
  85. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
  86. package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
  87. package/deps/simdutf/include/simdutf.h +26 -0
  88. package/deps/simdutf/include/simdutf_c.h +342 -0
  89. package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
  90. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
  91. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
  92. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
  93. package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
  94. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
  95. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
  96. package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
  97. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
  98. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
  99. package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
  100. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
  101. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
  102. package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
  103. package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
  104. package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
  105. package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
  106. package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
  107. package/deps/simdutf/src/encoding_types.cpp +67 -0
  108. package/deps/simdutf/src/error.cpp +3 -0
  109. package/deps/simdutf/src/fallback/implementation.cpp +589 -0
  110. package/deps/simdutf/src/generic/ascii_validation.h +50 -0
  111. package/deps/simdutf/src/generic/base64.h +233 -0
  112. package/deps/simdutf/src/generic/base64lengths.h +63 -0
  113. package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
  114. package/deps/simdutf/src/generic/find.h +75 -0
  115. package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
  116. package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
  117. package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
  118. package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
  119. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
  120. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
  121. package/deps/simdutf/src/generic/utf16.h +73 -0
  122. package/deps/simdutf/src/generic/utf32.h +136 -0
  123. package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
  124. package/deps/simdutf/src/generic/utf8.h +92 -0
  125. package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
  126. package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
  127. package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
  128. package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
  129. package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
  130. package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
  131. package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
  132. package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
  133. package/deps/simdutf/src/generic/validate_utf16.h +164 -0
  134. package/deps/simdutf/src/generic/validate_utf32.h +99 -0
  135. package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
  136. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
  137. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
  138. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
  139. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
  140. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
  141. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
  142. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
  143. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
  144. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
  145. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
  146. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
  147. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
  148. package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
  149. package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
  150. package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
  151. package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
  152. package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
  153. package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
  154. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
  155. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
  156. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
  157. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
  158. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
  159. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
  160. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
  161. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
  162. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
  163. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
  164. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
  165. package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
  166. package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
  167. package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
  168. package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
  169. package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
  170. package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
  171. package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
  172. package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
  173. package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
  174. package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
  175. package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
  176. package/deps/simdutf/src/implementation.cpp +2526 -0
  177. package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
  178. package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
  179. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
  180. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
  181. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
  182. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
  183. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
  184. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
  185. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
  186. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
  187. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
  188. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
  189. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
  190. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
  191. package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
  192. package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
  193. package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
  194. package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
  195. package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
  196. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
  197. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
  198. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
  199. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
  200. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
  201. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
  202. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
  203. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
  204. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
  205. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
  206. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
  207. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
  208. package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
  209. package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
  210. package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
  211. package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
  212. package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
  213. package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
  214. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
  215. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
  216. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
  217. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
  218. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
  219. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
  220. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
  221. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
  222. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
  223. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
  224. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
  225. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
  226. package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
  227. package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
  228. package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
  229. package/deps/simdutf/src/ppc64/templates.cpp +91 -0
  230. package/deps/simdutf/src/rvv/implementation.cpp +138 -0
  231. package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
  232. package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
  233. package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
  234. package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
  235. package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
  236. package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
  237. package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
  238. package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
  239. package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
  240. package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
  241. package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
  242. package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
  243. package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
  244. package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
  245. package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
  246. package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
  247. package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
  248. package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
  249. package/deps/simdutf/src/simdutf/arm64.h +43 -0
  250. package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
  251. package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
  252. package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
  253. package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
  254. package/deps/simdutf/src/simdutf/fallback.h +42 -0
  255. package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
  256. package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
  257. package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
  258. package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
  259. package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
  260. package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
  261. package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
  262. package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
  263. package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
  264. package/deps/simdutf/src/simdutf/haswell.h +63 -0
  265. package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
  266. package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
  267. package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
  268. package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
  269. package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
  270. package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
  271. package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
  272. package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
  273. package/deps/simdutf/src/simdutf/icelake.h +81 -0
  274. package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
  275. package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
  276. package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
  277. package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
  278. package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
  279. package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
  280. package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
  281. package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
  282. package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
  283. package/deps/simdutf/src/simdutf/lasx.h +49 -0
  284. package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
  285. package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
  286. package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
  287. package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
  288. package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
  289. package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
  290. package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
  291. package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
  292. package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
  293. package/deps/simdutf/src/simdutf/lsx.h +52 -0
  294. package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
  295. package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
  296. package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
  297. package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
  298. package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
  299. package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
  300. package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
  301. package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
  302. package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
  303. package/deps/simdutf/src/simdutf/ppc64.h +40 -0
  304. package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
  305. package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
  306. package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
  307. package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
  308. package/deps/simdutf/src/simdutf/rvv.h +41 -0
  309. package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
  310. package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
  311. package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
  312. package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
  313. package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
  314. package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
  315. package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
  316. package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
  317. package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
  318. package/deps/simdutf/src/simdutf/westmere.h +59 -0
  319. package/deps/simdutf/src/simdutf.cpp +152 -0
  320. package/deps/simdutf/src/simdutf_c.cpp +525 -0
  321. package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
  322. package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
  323. package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
  324. package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
  325. package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
  326. package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
  327. package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
  328. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
  329. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
  330. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
  331. package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
  332. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
  333. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
  334. package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
  335. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
  336. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
  337. package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
  338. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
  339. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
  340. package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
  341. package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
  342. package/lib/commonjs/argon2.js +51 -2
  343. package/lib/commonjs/argon2.js.map +1 -1
  344. package/lib/commonjs/cipher.js +109 -11
  345. package/lib/commonjs/cipher.js.map +1 -1
  346. package/lib/commonjs/dsa.js +8 -2
  347. package/lib/commonjs/dsa.js.map +1 -1
  348. package/lib/commonjs/hash.js +15 -5
  349. package/lib/commonjs/hash.js.map +1 -1
  350. package/lib/commonjs/hkdf.js +33 -6
  351. package/lib/commonjs/hkdf.js.map +1 -1
  352. package/lib/commonjs/hmac.js +15 -5
  353. package/lib/commonjs/hmac.js.map +1 -1
  354. package/lib/commonjs/keys/publicCipher.js +10 -4
  355. package/lib/commonjs/keys/publicCipher.js.map +1 -1
  356. package/lib/commonjs/random.js +11 -2
  357. package/lib/commonjs/random.js.map +1 -1
  358. package/lib/commonjs/rsa.js +12 -5
  359. package/lib/commonjs/rsa.js.map +1 -1
  360. package/lib/commonjs/scrypt.js +47 -6
  361. package/lib/commonjs/scrypt.js.map +1 -1
  362. package/lib/commonjs/subtle.js +76 -5
  363. package/lib/commonjs/subtle.js.map +1 -1
  364. package/lib/commonjs/utils/cipher.js +18 -7
  365. package/lib/commonjs/utils/cipher.js.map +1 -1
  366. package/lib/commonjs/utils/conversion.js +33 -9
  367. package/lib/commonjs/utils/conversion.js.map +1 -1
  368. package/lib/commonjs/utils/timingSafeEqual.js +7 -2
  369. package/lib/commonjs/utils/timingSafeEqual.js.map +1 -1
  370. package/lib/commonjs/x509certificate.js +6 -6
  371. package/lib/commonjs/x509certificate.js.map +1 -1
  372. package/lib/module/argon2.js +51 -2
  373. package/lib/module/argon2.js.map +1 -1
  374. package/lib/module/cipher.js +109 -11
  375. package/lib/module/cipher.js.map +1 -1
  376. package/lib/module/dsa.js +8 -2
  377. package/lib/module/dsa.js.map +1 -1
  378. package/lib/module/hash.js +15 -5
  379. package/lib/module/hash.js.map +1 -1
  380. package/lib/module/hkdf.js +33 -6
  381. package/lib/module/hkdf.js.map +1 -1
  382. package/lib/module/hmac.js +15 -5
  383. package/lib/module/hmac.js.map +1 -1
  384. package/lib/module/keys/publicCipher.js +10 -4
  385. package/lib/module/keys/publicCipher.js.map +1 -1
  386. package/lib/module/random.js +11 -2
  387. package/lib/module/random.js.map +1 -1
  388. package/lib/module/rsa.js +11 -4
  389. package/lib/module/rsa.js.map +1 -1
  390. package/lib/module/scrypt.js +47 -6
  391. package/lib/module/scrypt.js.map +1 -1
  392. package/lib/module/subtle.js +76 -5
  393. package/lib/module/subtle.js.map +1 -1
  394. package/lib/module/utils/cipher.js +18 -7
  395. package/lib/module/utils/cipher.js.map +1 -1
  396. package/lib/module/utils/conversion.js +33 -9
  397. package/lib/module/utils/conversion.js.map +1 -1
  398. package/lib/module/utils/timingSafeEqual.js +8 -3
  399. package/lib/module/utils/timingSafeEqual.js.map +1 -1
  400. package/lib/module/x509certificate.js +6 -6
  401. package/lib/module/x509certificate.js.map +1 -1
  402. package/lib/typescript/argon2.d.ts.map +1 -1
  403. package/lib/typescript/cipher.d.ts +2 -2
  404. package/lib/typescript/cipher.d.ts.map +1 -1
  405. package/lib/typescript/dsa.d.ts.map +1 -1
  406. package/lib/typescript/hash.d.ts +2 -2
  407. package/lib/typescript/hash.d.ts.map +1 -1
  408. package/lib/typescript/hkdf.d.ts.map +1 -1
  409. package/lib/typescript/hmac.d.ts +2 -2
  410. package/lib/typescript/hmac.d.ts.map +1 -1
  411. package/lib/typescript/index.d.ts +1 -1
  412. package/lib/typescript/index.d.ts.map +1 -1
  413. package/lib/typescript/keys/publicCipher.d.ts.map +1 -1
  414. package/lib/typescript/random.d.ts.map +1 -1
  415. package/lib/typescript/rsa.d.ts.map +1 -1
  416. package/lib/typescript/scrypt.d.ts.map +1 -1
  417. package/lib/typescript/specs/utils.nitro.d.ts +0 -2
  418. package/lib/typescript/specs/utils.nitro.d.ts.map +1 -1
  419. package/lib/typescript/subtle.d.ts.map +1 -1
  420. package/lib/typescript/utils/cipher.d.ts +13 -1
  421. package/lib/typescript/utils/cipher.d.ts.map +1 -1
  422. package/lib/typescript/utils/conversion.d.ts +9 -6
  423. package/lib/typescript/utils/conversion.d.ts.map +1 -1
  424. package/lib/typescript/utils/timingSafeEqual.d.ts.map +1 -1
  425. package/lib/typescript/x509certificate.d.ts.map +1 -1
  426. package/nitrogen/generated/shared/c++/HybridUtilsSpec.cpp +0 -2
  427. package/nitrogen/generated/shared/c++/HybridUtilsSpec.hpp +0 -3
  428. package/package.json +38 -6
  429. package/src/argon2.ts +80 -2
  430. package/src/cipher.ts +139 -15
  431. package/src/dsa.ts +11 -2
  432. package/src/hash.ts +17 -7
  433. package/src/hkdf.ts +44 -6
  434. package/src/hmac.ts +17 -7
  435. package/src/keys/publicCipher.ts +10 -4
  436. package/src/random.ts +11 -2
  437. package/src/rsa.ts +18 -4
  438. package/src/scrypt.ts +73 -6
  439. package/src/specs/utils.nitro.ts +0 -2
  440. package/src/subtle.ts +90 -8
  441. package/src/utils/cipher.ts +30 -8
  442. package/src/utils/conversion.ts +58 -20
  443. package/src/utils/timingSafeEqual.ts +8 -3
  444. package/src/x509certificate.ts +5 -6
  445. package/deps/blake3/.cargo/config.toml +0 -2
  446. package/deps/blake3/.git-blame-ignore-revs +0 -2
  447. package/deps/blake3/.github/workflows/build_b3sum.py +0 -38
  448. package/deps/blake3/.github/workflows/ci.yml +0 -491
  449. package/deps/blake3/.github/workflows/tag.yml +0 -43
  450. package/deps/blake3/.github/workflows/upload_github_release_asset.py +0 -73
  451. package/deps/blake3/CONTRIBUTING.md +0 -31
  452. package/deps/blake3/Cargo.toml +0 -135
  453. package/deps/blake3/b3sum/Cargo.lock +0 -513
  454. package/deps/blake3/b3sum/Cargo.toml +0 -26
  455. package/deps/blake3/b3sum/README.md +0 -72
  456. package/deps/blake3/b3sum/src/main.rs +0 -564
  457. package/deps/blake3/b3sum/src/unit_tests.rs +0 -235
  458. package/deps/blake3/b3sum/tests/cli_tests.rs +0 -680
  459. package/deps/blake3/b3sum/what_does_check_do.md +0 -176
  460. package/deps/blake3/benches/bench.rs +0 -623
  461. package/deps/blake3/build.rs +0 -389
  462. package/deps/blake3/c/CMakeLists.txt +0 -383
  463. package/deps/blake3/c/CMakePresets.json +0 -73
  464. package/deps/blake3/c/Makefile.testing +0 -82
  465. package/deps/blake3/c/blake3-config.cmake.in +0 -14
  466. package/deps/blake3/c/blake3_avx2.c +0 -326
  467. package/deps/blake3/c/blake3_avx2_x86-64_unix.S +0 -1815
  468. package/deps/blake3/c/blake3_avx2_x86-64_windows_gnu.S +0 -1817
  469. package/deps/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +0 -1828
  470. package/deps/blake3/c/blake3_avx512.c +0 -1388
  471. package/deps/blake3/c/blake3_avx512_x86-64_unix.S +0 -4824
  472. package/deps/blake3/c/blake3_avx512_x86-64_windows_gnu.S +0 -2615
  473. package/deps/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +0 -2634
  474. package/deps/blake3/c/blake3_c_rust_bindings/Cargo.toml +0 -32
  475. package/deps/blake3/c/blake3_c_rust_bindings/README.md +0 -4
  476. package/deps/blake3/c/blake3_c_rust_bindings/benches/bench.rs +0 -477
  477. package/deps/blake3/c/blake3_c_rust_bindings/build.rs +0 -253
  478. package/deps/blake3/c/blake3_c_rust_bindings/cross_test.sh +0 -31
  479. package/deps/blake3/c/blake3_c_rust_bindings/src/lib.rs +0 -333
  480. package/deps/blake3/c/blake3_c_rust_bindings/src/test.rs +0 -696
  481. package/deps/blake3/c/blake3_sse2.c +0 -566
  482. package/deps/blake3/c/blake3_sse2_x86-64_unix.S +0 -2291
  483. package/deps/blake3/c/blake3_sse2_x86-64_windows_gnu.S +0 -2332
  484. package/deps/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +0 -2350
  485. package/deps/blake3/c/blake3_sse41.c +0 -560
  486. package/deps/blake3/c/blake3_sse41_x86-64_unix.S +0 -2028
  487. package/deps/blake3/c/blake3_sse41_x86-64_windows_gnu.S +0 -2069
  488. package/deps/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +0 -2089
  489. package/deps/blake3/c/blake3_tbb.cpp +0 -37
  490. package/deps/blake3/c/dependencies/CMakeLists.txt +0 -3
  491. package/deps/blake3/c/dependencies/tbb/CMakeLists.txt +0 -28
  492. package/deps/blake3/c/example.c +0 -36
  493. package/deps/blake3/c/example_tbb.c +0 -57
  494. package/deps/blake3/c/libblake3.pc.in +0 -12
  495. package/deps/blake3/c/main.c +0 -166
  496. package/deps/blake3/c/test.py +0 -97
  497. package/deps/blake3/media/B3.svg +0 -70
  498. package/deps/blake3/media/BLAKE3.svg +0 -85
  499. package/deps/blake3/media/speed.svg +0 -1474
  500. package/deps/blake3/reference_impl/Cargo.toml +0 -8
  501. package/deps/blake3/reference_impl/README.md +0 -14
  502. package/deps/blake3/reference_impl/reference_impl.rs +0 -374
  503. package/deps/blake3/src/ffi_avx2.rs +0 -65
  504. package/deps/blake3/src/ffi_avx512.rs +0 -169
  505. package/deps/blake3/src/ffi_neon.rs +0 -82
  506. package/deps/blake3/src/ffi_sse2.rs +0 -126
  507. package/deps/blake3/src/ffi_sse41.rs +0 -126
  508. package/deps/blake3/src/guts.rs +0 -60
  509. package/deps/blake3/src/hazmat.rs +0 -704
  510. package/deps/blake3/src/io.rs +0 -64
  511. package/deps/blake3/src/join.rs +0 -92
  512. package/deps/blake3/src/lib.rs +0 -1835
  513. package/deps/blake3/src/platform.rs +0 -587
  514. package/deps/blake3/src/portable.rs +0 -198
  515. package/deps/blake3/src/rust_avx2.rs +0 -474
  516. package/deps/blake3/src/rust_sse2.rs +0 -775
  517. package/deps/blake3/src/rust_sse41.rs +0 -766
  518. package/deps/blake3/src/test.rs +0 -1049
  519. package/deps/blake3/src/traits.rs +0 -227
  520. package/deps/blake3/src/wasm32_simd.rs +0 -794
  521. package/deps/blake3/test_vectors/Cargo.toml +0 -19
  522. package/deps/blake3/test_vectors/cross_test.sh +0 -25
  523. package/deps/blake3/test_vectors/src/bin/generate.rs +0 -4
  524. package/deps/blake3/test_vectors/src/lib.rs +0 -350
  525. package/deps/blake3/test_vectors/test_vectors.json +0 -217
  526. package/deps/blake3/tools/compiler_version/Cargo.toml +0 -7
  527. package/deps/blake3/tools/compiler_version/build.rs +0 -6
  528. package/deps/blake3/tools/compiler_version/src/main.rs +0 -27
  529. package/deps/blake3/tools/instruction_set_support/Cargo.toml +0 -6
  530. package/deps/blake3/tools/instruction_set_support/src/main.rs +0 -10
  531. package/deps/blake3/tools/release.md +0 -16
  532. package/deps/ncrypto/.bazelignore +0 -4
  533. package/deps/ncrypto/.bazelrc +0 -1
  534. package/deps/ncrypto/.bazelversion +0 -1
  535. package/deps/ncrypto/.clang-format +0 -111
  536. package/deps/ncrypto/.github/workflows/bazel.yml +0 -58
  537. package/deps/ncrypto/.github/workflows/commitlint.yml +0 -16
  538. package/deps/ncrypto/.github/workflows/linter.yml +0 -38
  539. package/deps/ncrypto/.github/workflows/macos.yml +0 -43
  540. package/deps/ncrypto/.github/workflows/release-please.yml +0 -16
  541. package/deps/ncrypto/.github/workflows/ubuntu.yml +0 -128
  542. package/deps/ncrypto/.github/workflows/visual-studio.yml +0 -49
  543. package/deps/ncrypto/.python-version +0 -1
  544. package/deps/ncrypto/.release-please-manifest.json +0 -3
  545. package/deps/ncrypto/BUILD.bazel +0 -44
  546. package/deps/ncrypto/CHANGELOG.md +0 -37
  547. package/deps/ncrypto/CMakeLists.txt +0 -79
  548. package/deps/ncrypto/MODULE.bazel +0 -16
  549. package/deps/ncrypto/MODULE.bazel.lock +0 -461
  550. package/deps/ncrypto/cmake/CPM.cmake +0 -1225
  551. package/deps/ncrypto/cmake/ncrypto-flags.cmake +0 -17
  552. package/deps/ncrypto/ncrypto.pc.in +0 -10
  553. package/deps/ncrypto/patches/0001-Expose-libdecrepit-so-NodeJS-can-use-it-for-ncrypto.patch +0 -28
  554. package/deps/ncrypto/pyproject.toml +0 -38
  555. package/deps/ncrypto/release-please-config.json +0 -11
  556. package/deps/ncrypto/src/CMakeLists.txt +0 -40
  557. package/deps/ncrypto/tests/BUILD.bazel +0 -11
  558. package/deps/ncrypto/tests/CMakeLists.txt +0 -7
  559. package/deps/ncrypto/tests/basic.cpp +0 -856
  560. package/deps/ncrypto/tools/run-clang-format.sh +0 -42
  561. package/lib/tsconfig.tsbuildinfo +0 -1
@@ -1,560 +0,0 @@
1
- #include "blake3_impl.h"
2
-
3
- #include <immintrin.h>
4
-
5
- #define DEGREE 4
6
-
7
- #define _mm_shuffle_ps2(a, b, c) \
8
- (_mm_castps_si128( \
9
- _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))
10
-
11
- INLINE __m128i loadu(const uint8_t src[16]) {
12
- return _mm_loadu_si128((const __m128i *)src);
13
- }
14
-
15
- INLINE void storeu(__m128i src, uint8_t dest[16]) {
16
- _mm_storeu_si128((__m128i *)dest, src);
17
- }
18
-
19
- INLINE __m128i addv(__m128i a, __m128i b) { return _mm_add_epi32(a, b); }
20
-
21
- // Note that clang-format doesn't like the name "xor" for some reason.
22
- INLINE __m128i xorv(__m128i a, __m128i b) { return _mm_xor_si128(a, b); }
23
-
24
- INLINE __m128i set1(uint32_t x) { return _mm_set1_epi32((int32_t)x); }
25
-
26
- INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
27
- return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
28
- }
29
-
30
- INLINE __m128i rot16(__m128i x) {
31
- return _mm_shuffle_epi8(
32
- x, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2));
33
- }
34
-
35
- INLINE __m128i rot12(__m128i x) {
36
- return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12));
37
- }
38
-
39
- INLINE __m128i rot8(__m128i x) {
40
- return _mm_shuffle_epi8(
41
- x, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1));
42
- }
43
-
44
- INLINE __m128i rot7(__m128i x) {
45
- return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7));
46
- }
47
-
48
- INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
49
- __m128i m) {
50
- *row0 = addv(addv(*row0, m), *row1);
51
- *row3 = xorv(*row3, *row0);
52
- *row3 = rot16(*row3);
53
- *row2 = addv(*row2, *row3);
54
- *row1 = xorv(*row1, *row2);
55
- *row1 = rot12(*row1);
56
- }
57
-
58
- INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
59
- __m128i m) {
60
- *row0 = addv(addv(*row0, m), *row1);
61
- *row3 = xorv(*row3, *row0);
62
- *row3 = rot8(*row3);
63
- *row2 = addv(*row2, *row3);
64
- *row1 = xorv(*row1, *row2);
65
- *row1 = rot7(*row1);
66
- }
67
-
68
- // Note the optimization here of leaving row1 as the unrotated row, rather than
69
- // row0. All the message loads below are adjusted to compensate for this. See
70
- // discussion at https://github.com/sneves/blake2-avx2/pull/4
71
- INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
72
- *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
73
- *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
74
- *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
75
- }
76
-
77
- INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
78
- *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
79
- *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
80
- *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
81
- }
82
-
83
- INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8],
84
- const uint8_t block[BLAKE3_BLOCK_LEN],
85
- uint8_t block_len, uint64_t counter, uint8_t flags) {
86
- rows[0] = loadu((uint8_t *)&cv[0]);
87
- rows[1] = loadu((uint8_t *)&cv[4]);
88
- rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
89
- rows[3] = set4(counter_low(counter), counter_high(counter),
90
- (uint32_t)block_len, (uint32_t)flags);
91
-
92
- __m128i m0 = loadu(&block[sizeof(__m128i) * 0]);
93
- __m128i m1 = loadu(&block[sizeof(__m128i) * 1]);
94
- __m128i m2 = loadu(&block[sizeof(__m128i) * 2]);
95
- __m128i m3 = loadu(&block[sizeof(__m128i) * 3]);
96
-
97
- __m128i t0, t1, t2, t3, tt;
98
-
99
- // Round 1. The first round permutes the message words from the original
100
- // input order, into the groups that get mixed in parallel.
101
- t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); // 6 4 2 0
102
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
103
- t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); // 7 5 3 1
104
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
105
- diagonalize(&rows[0], &rows[2], &rows[3]);
106
- t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10 8
107
- t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3)); // 12 10 8 14
108
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
109
- t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11 9
110
- t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3)); // 13 11 9 15
111
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
112
- undiagonalize(&rows[0], &rows[2], &rows[3]);
113
- m0 = t0;
114
- m1 = t1;
115
- m2 = t2;
116
- m3 = t3;
117
-
118
- // Round 2. This round and all following rounds apply a fixed permutation
119
- // to the message words from the round before.
120
- t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
121
- t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
122
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
123
- t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
124
- tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
125
- t1 = _mm_blend_epi16(tt, t1, 0xCC);
126
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
127
- diagonalize(&rows[0], &rows[2], &rows[3]);
128
- t2 = _mm_unpacklo_epi64(m3, m1);
129
- tt = _mm_blend_epi16(t2, m2, 0xC0);
130
- t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
131
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
132
- t3 = _mm_unpackhi_epi32(m1, m3);
133
- tt = _mm_unpacklo_epi32(m2, t3);
134
- t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
135
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
136
- undiagonalize(&rows[0], &rows[2], &rows[3]);
137
- m0 = t0;
138
- m1 = t1;
139
- m2 = t2;
140
- m3 = t3;
141
-
142
- // Round 3
143
- t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
144
- t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
145
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
146
- t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
147
- tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
148
- t1 = _mm_blend_epi16(tt, t1, 0xCC);
149
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
150
- diagonalize(&rows[0], &rows[2], &rows[3]);
151
- t2 = _mm_unpacklo_epi64(m3, m1);
152
- tt = _mm_blend_epi16(t2, m2, 0xC0);
153
- t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
154
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
155
- t3 = _mm_unpackhi_epi32(m1, m3);
156
- tt = _mm_unpacklo_epi32(m2, t3);
157
- t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
158
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
159
- undiagonalize(&rows[0], &rows[2], &rows[3]);
160
- m0 = t0;
161
- m1 = t1;
162
- m2 = t2;
163
- m3 = t3;
164
-
165
- // Round 4
166
- t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
167
- t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
168
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
169
- t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
170
- tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
171
- t1 = _mm_blend_epi16(tt, t1, 0xCC);
172
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
173
- diagonalize(&rows[0], &rows[2], &rows[3]);
174
- t2 = _mm_unpacklo_epi64(m3, m1);
175
- tt = _mm_blend_epi16(t2, m2, 0xC0);
176
- t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
177
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
178
- t3 = _mm_unpackhi_epi32(m1, m3);
179
- tt = _mm_unpacklo_epi32(m2, t3);
180
- t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
181
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
182
- undiagonalize(&rows[0], &rows[2], &rows[3]);
183
- m0 = t0;
184
- m1 = t1;
185
- m2 = t2;
186
- m3 = t3;
187
-
188
- // Round 5
189
- t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
190
- t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
191
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
192
- t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
193
- tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
194
- t1 = _mm_blend_epi16(tt, t1, 0xCC);
195
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
196
- diagonalize(&rows[0], &rows[2], &rows[3]);
197
- t2 = _mm_unpacklo_epi64(m3, m1);
198
- tt = _mm_blend_epi16(t2, m2, 0xC0);
199
- t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
200
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
201
- t3 = _mm_unpackhi_epi32(m1, m3);
202
- tt = _mm_unpacklo_epi32(m2, t3);
203
- t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
204
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
205
- undiagonalize(&rows[0], &rows[2], &rows[3]);
206
- m0 = t0;
207
- m1 = t1;
208
- m2 = t2;
209
- m3 = t3;
210
-
211
- // Round 6
212
- t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
213
- t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
214
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
215
- t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
216
- tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
217
- t1 = _mm_blend_epi16(tt, t1, 0xCC);
218
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
219
- diagonalize(&rows[0], &rows[2], &rows[3]);
220
- t2 = _mm_unpacklo_epi64(m3, m1);
221
- tt = _mm_blend_epi16(t2, m2, 0xC0);
222
- t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
223
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
224
- t3 = _mm_unpackhi_epi32(m1, m3);
225
- tt = _mm_unpacklo_epi32(m2, t3);
226
- t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
227
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
228
- undiagonalize(&rows[0], &rows[2], &rows[3]);
229
- m0 = t0;
230
- m1 = t1;
231
- m2 = t2;
232
- m3 = t3;
233
-
234
- // Round 7
235
- t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
236
- t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
237
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
238
- t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
239
- tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
240
- t1 = _mm_blend_epi16(tt, t1, 0xCC);
241
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
242
- diagonalize(&rows[0], &rows[2], &rows[3]);
243
- t2 = _mm_unpacklo_epi64(m3, m1);
244
- tt = _mm_blend_epi16(t2, m2, 0xC0);
245
- t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
246
- g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
247
- t3 = _mm_unpackhi_epi32(m1, m3);
248
- tt = _mm_unpacklo_epi32(m2, t3);
249
- t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
250
- g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
251
- undiagonalize(&rows[0], &rows[2], &rows[3]);
252
- }
253
-
254
- void blake3_compress_in_place_sse41(uint32_t cv[8],
255
- const uint8_t block[BLAKE3_BLOCK_LEN],
256
- uint8_t block_len, uint64_t counter,
257
- uint8_t flags) {
258
- __m128i rows[4];
259
- compress_pre(rows, cv, block, block_len, counter, flags);
260
- storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]);
261
- storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]);
262
- }
263
-
264
- void blake3_compress_xof_sse41(const uint32_t cv[8],
265
- const uint8_t block[BLAKE3_BLOCK_LEN],
266
- uint8_t block_len, uint64_t counter,
267
- uint8_t flags, uint8_t out[64]) {
268
- __m128i rows[4];
269
- compress_pre(rows, cv, block, block_len, counter, flags);
270
- storeu(xorv(rows[0], rows[2]), &out[0]);
271
- storeu(xorv(rows[1], rows[3]), &out[16]);
272
- storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]);
273
- storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]);
274
- }
275
-
276
- INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) {
277
- v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
278
- v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
279
- v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
280
- v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
281
- v[0] = addv(v[0], v[4]);
282
- v[1] = addv(v[1], v[5]);
283
- v[2] = addv(v[2], v[6]);
284
- v[3] = addv(v[3], v[7]);
285
- v[12] = xorv(v[12], v[0]);
286
- v[13] = xorv(v[13], v[1]);
287
- v[14] = xorv(v[14], v[2]);
288
- v[15] = xorv(v[15], v[3]);
289
- v[12] = rot16(v[12]);
290
- v[13] = rot16(v[13]);
291
- v[14] = rot16(v[14]);
292
- v[15] = rot16(v[15]);
293
- v[8] = addv(v[8], v[12]);
294
- v[9] = addv(v[9], v[13]);
295
- v[10] = addv(v[10], v[14]);
296
- v[11] = addv(v[11], v[15]);
297
- v[4] = xorv(v[4], v[8]);
298
- v[5] = xorv(v[5], v[9]);
299
- v[6] = xorv(v[6], v[10]);
300
- v[7] = xorv(v[7], v[11]);
301
- v[4] = rot12(v[4]);
302
- v[5] = rot12(v[5]);
303
- v[6] = rot12(v[6]);
304
- v[7] = rot12(v[7]);
305
- v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
306
- v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
307
- v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
308
- v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
309
- v[0] = addv(v[0], v[4]);
310
- v[1] = addv(v[1], v[5]);
311
- v[2] = addv(v[2], v[6]);
312
- v[3] = addv(v[3], v[7]);
313
- v[12] = xorv(v[12], v[0]);
314
- v[13] = xorv(v[13], v[1]);
315
- v[14] = xorv(v[14], v[2]);
316
- v[15] = xorv(v[15], v[3]);
317
- v[12] = rot8(v[12]);
318
- v[13] = rot8(v[13]);
319
- v[14] = rot8(v[14]);
320
- v[15] = rot8(v[15]);
321
- v[8] = addv(v[8], v[12]);
322
- v[9] = addv(v[9], v[13]);
323
- v[10] = addv(v[10], v[14]);
324
- v[11] = addv(v[11], v[15]);
325
- v[4] = xorv(v[4], v[8]);
326
- v[5] = xorv(v[5], v[9]);
327
- v[6] = xorv(v[6], v[10]);
328
- v[7] = xorv(v[7], v[11]);
329
- v[4] = rot7(v[4]);
330
- v[5] = rot7(v[5]);
331
- v[6] = rot7(v[6]);
332
- v[7] = rot7(v[7]);
333
-
334
- v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
335
- v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
336
- v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
337
- v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
338
- v[0] = addv(v[0], v[5]);
339
- v[1] = addv(v[1], v[6]);
340
- v[2] = addv(v[2], v[7]);
341
- v[3] = addv(v[3], v[4]);
342
- v[15] = xorv(v[15], v[0]);
343
- v[12] = xorv(v[12], v[1]);
344
- v[13] = xorv(v[13], v[2]);
345
- v[14] = xorv(v[14], v[3]);
346
- v[15] = rot16(v[15]);
347
- v[12] = rot16(v[12]);
348
- v[13] = rot16(v[13]);
349
- v[14] = rot16(v[14]);
350
- v[10] = addv(v[10], v[15]);
351
- v[11] = addv(v[11], v[12]);
352
- v[8] = addv(v[8], v[13]);
353
- v[9] = addv(v[9], v[14]);
354
- v[5] = xorv(v[5], v[10]);
355
- v[6] = xorv(v[6], v[11]);
356
- v[7] = xorv(v[7], v[8]);
357
- v[4] = xorv(v[4], v[9]);
358
- v[5] = rot12(v[5]);
359
- v[6] = rot12(v[6]);
360
- v[7] = rot12(v[7]);
361
- v[4] = rot12(v[4]);
362
- v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
363
- v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
364
- v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
365
- v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
366
- v[0] = addv(v[0], v[5]);
367
- v[1] = addv(v[1], v[6]);
368
- v[2] = addv(v[2], v[7]);
369
- v[3] = addv(v[3], v[4]);
370
- v[15] = xorv(v[15], v[0]);
371
- v[12] = xorv(v[12], v[1]);
372
- v[13] = xorv(v[13], v[2]);
373
- v[14] = xorv(v[14], v[3]);
374
- v[15] = rot8(v[15]);
375
- v[12] = rot8(v[12]);
376
- v[13] = rot8(v[13]);
377
- v[14] = rot8(v[14]);
378
- v[10] = addv(v[10], v[15]);
379
- v[11] = addv(v[11], v[12]);
380
- v[8] = addv(v[8], v[13]);
381
- v[9] = addv(v[9], v[14]);
382
- v[5] = xorv(v[5], v[10]);
383
- v[6] = xorv(v[6], v[11]);
384
- v[7] = xorv(v[7], v[8]);
385
- v[4] = xorv(v[4], v[9]);
386
- v[5] = rot7(v[5]);
387
- v[6] = rot7(v[6]);
388
- v[7] = rot7(v[7]);
389
- v[4] = rot7(v[4]);
390
- }
391
-
392
- INLINE void transpose_vecs(__m128i vecs[DEGREE]) {
393
- // Interleave 32-bit lanes. The low unpack is lanes 00/11 and the high is
394
- // 22/33. Note that this doesn't split the vector into two lanes, as the
395
- // AVX2 counterparts do.
396
- __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
397
- __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
398
- __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
399
- __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
400
-
401
- // Interleave 64-bit lanes.
402
- __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
403
- __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
404
- __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
405
- __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
406
-
407
- vecs[0] = abcd_0;
408
- vecs[1] = abcd_1;
409
- vecs[2] = abcd_2;
410
- vecs[3] = abcd_3;
411
- }
412
-
413
- INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
414
- size_t block_offset, __m128i out[16]) {
415
- out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
416
- out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
417
- out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
418
- out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
419
- out[4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
420
- out[5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
421
- out[6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
422
- out[7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
423
- out[8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
424
- out[9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
425
- out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
426
- out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
427
- out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
428
- out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
429
- out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
430
- out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
431
- for (size_t i = 0; i < 4; ++i) {
432
- _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
433
- }
434
- transpose_vecs(&out[0]);
435
- transpose_vecs(&out[4]);
436
- transpose_vecs(&out[8]);
437
- transpose_vecs(&out[12]);
438
- }
439
-
440
- INLINE void load_counters(uint64_t counter, bool increment_counter,
441
- __m128i *out_lo, __m128i *out_hi) {
442
- const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
443
- const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
444
- const __m128i add1 = _mm_and_si128(mask, add0);
445
- __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
446
- __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)),
447
- _mm_xor_si128( l, _mm_set1_epi32(0x80000000)));
448
- __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
449
- *out_lo = l;
450
- *out_hi = h;
451
- }
452
-
453
- static
454
- void blake3_hash4_sse41(const uint8_t *const *inputs, size_t blocks,
455
- const uint32_t key[8], uint64_t counter,
456
- bool increment_counter, uint8_t flags,
457
- uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
458
- __m128i h_vecs[8] = {
459
- set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
460
- set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
461
- };
462
- __m128i counter_low_vec, counter_high_vec;
463
- load_counters(counter, increment_counter, &counter_low_vec,
464
- &counter_high_vec);
465
- uint8_t block_flags = flags | flags_start;
466
-
467
- for (size_t block = 0; block < blocks; block++) {
468
- if (block + 1 == blocks) {
469
- block_flags |= flags_end;
470
- }
471
- __m128i block_len_vec = set1(BLAKE3_BLOCK_LEN);
472
- __m128i block_flags_vec = set1(block_flags);
473
- __m128i msg_vecs[16];
474
- transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
475
-
476
- __m128i v[16] = {
477
- h_vecs[0], h_vecs[1], h_vecs[2], h_vecs[3],
478
- h_vecs[4], h_vecs[5], h_vecs[6], h_vecs[7],
479
- set1(IV[0]), set1(IV[1]), set1(IV[2]), set1(IV[3]),
480
- counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
481
- };
482
- round_fn(v, msg_vecs, 0);
483
- round_fn(v, msg_vecs, 1);
484
- round_fn(v, msg_vecs, 2);
485
- round_fn(v, msg_vecs, 3);
486
- round_fn(v, msg_vecs, 4);
487
- round_fn(v, msg_vecs, 5);
488
- round_fn(v, msg_vecs, 6);
489
- h_vecs[0] = xorv(v[0], v[8]);
490
- h_vecs[1] = xorv(v[1], v[9]);
491
- h_vecs[2] = xorv(v[2], v[10]);
492
- h_vecs[3] = xorv(v[3], v[11]);
493
- h_vecs[4] = xorv(v[4], v[12]);
494
- h_vecs[5] = xorv(v[5], v[13]);
495
- h_vecs[6] = xorv(v[6], v[14]);
496
- h_vecs[7] = xorv(v[7], v[15]);
497
-
498
- block_flags = flags;
499
- }
500
-
501
- transpose_vecs(&h_vecs[0]);
502
- transpose_vecs(&h_vecs[4]);
503
- // The first four vecs now contain the first half of each output, and the
504
- // second four vecs contain the second half of each output.
505
- storeu(h_vecs[0], &out[0 * sizeof(__m128i)]);
506
- storeu(h_vecs[4], &out[1 * sizeof(__m128i)]);
507
- storeu(h_vecs[1], &out[2 * sizeof(__m128i)]);
508
- storeu(h_vecs[5], &out[3 * sizeof(__m128i)]);
509
- storeu(h_vecs[2], &out[4 * sizeof(__m128i)]);
510
- storeu(h_vecs[6], &out[5 * sizeof(__m128i)]);
511
- storeu(h_vecs[3], &out[6 * sizeof(__m128i)]);
512
- storeu(h_vecs[7], &out[7 * sizeof(__m128i)]);
513
- }
514
-
515
- INLINE void hash_one_sse41(const uint8_t *input, size_t blocks,
516
- const uint32_t key[8], uint64_t counter,
517
- uint8_t flags, uint8_t flags_start,
518
- uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
519
- uint32_t cv[8];
520
- memcpy(cv, key, BLAKE3_KEY_LEN);
521
- uint8_t block_flags = flags | flags_start;
522
- while (blocks > 0) {
523
- if (blocks == 1) {
524
- block_flags |= flags_end;
525
- }
526
- blake3_compress_in_place_sse41(cv, input, BLAKE3_BLOCK_LEN, counter,
527
- block_flags);
528
- input = &input[BLAKE3_BLOCK_LEN];
529
- blocks -= 1;
530
- block_flags = flags;
531
- }
532
- memcpy(out, cv, BLAKE3_OUT_LEN);
533
- }
534
-
535
- void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
536
- size_t blocks, const uint32_t key[8],
537
- uint64_t counter, bool increment_counter,
538
- uint8_t flags, uint8_t flags_start,
539
- uint8_t flags_end, uint8_t *out) {
540
- while (num_inputs >= DEGREE) {
541
- blake3_hash4_sse41(inputs, blocks, key, counter, increment_counter, flags,
542
- flags_start, flags_end, out);
543
- if (increment_counter) {
544
- counter += DEGREE;
545
- }
546
- inputs += DEGREE;
547
- num_inputs -= DEGREE;
548
- out = &out[DEGREE * BLAKE3_OUT_LEN];
549
- }
550
- while (num_inputs > 0) {
551
- hash_one_sse41(inputs[0], blocks, key, counter, flags, flags_start,
552
- flags_end, out);
553
- if (increment_counter) {
554
- counter += 1;
555
- }
556
- inputs += 1;
557
- num_inputs -= 1;
558
- out = &out[BLAKE3_OUT_LEN];
559
- }
560
- }