react-native-quick-crypto 1.0.19 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (561) hide show
  1. package/QuickCrypto.podspec +12 -38
  2. package/README.md +2 -0
  3. package/android/CMakeLists.txt +3 -0
  4. package/android/build.gradle +5 -1
  5. package/cpp/argon2/HybridArgon2.cpp +10 -3
  6. package/cpp/blake3/HybridBlake3.cpp +5 -3
  7. package/cpp/cipher/CCMCipher.cpp +29 -16
  8. package/cpp/cipher/CCMCipher.hpp +2 -4
  9. package/cpp/cipher/ChaCha20Cipher.cpp +14 -18
  10. package/cpp/cipher/ChaCha20Cipher.hpp +2 -4
  11. package/cpp/cipher/ChaCha20Poly1305Cipher.cpp +34 -23
  12. package/cpp/cipher/ChaCha20Poly1305Cipher.hpp +2 -4
  13. package/cpp/cipher/GCMCipher.cpp +14 -15
  14. package/cpp/cipher/HybridCipher.cpp +39 -36
  15. package/cpp/cipher/HybridCipher.hpp +17 -1
  16. package/cpp/cipher/HybridRsaCipher.cpp +74 -29
  17. package/cpp/cipher/OCBCipher.cpp +4 -3
  18. package/cpp/cipher/XChaCha20Poly1305Cipher.cpp +14 -13
  19. package/cpp/cipher/XSalsa20Cipher.cpp +72 -6
  20. package/cpp/cipher/XSalsa20Cipher.hpp +25 -3
  21. package/cpp/cipher/XSalsa20Poly1305Cipher.cpp +21 -25
  22. package/cpp/dh/HybridDiffieHellman.cpp +29 -0
  23. package/cpp/ec/HybridEcKeyPair.cpp +35 -33
  24. package/cpp/ec/HybridEcKeyPair.hpp +3 -7
  25. package/cpp/ecdh/HybridECDH.cpp +23 -0
  26. package/cpp/ed25519/HybridEdKeyPair.cpp +73 -117
  27. package/cpp/ed25519/HybridEdKeyPair.hpp +5 -9
  28. package/cpp/hash/HybridHash.cpp +5 -7
  29. package/cpp/hkdf/HybridHkdf.cpp +6 -4
  30. package/cpp/hmac/HybridHmac.cpp +4 -6
  31. package/cpp/kmac/HybridKmac.cpp +4 -4
  32. package/cpp/mldsa/HybridMlDsaKeyPair.cpp +37 -49
  33. package/cpp/mlkem/HybridMlKemKeyPair.cpp +39 -43
  34. package/cpp/pbkdf2/HybridPbkdf2.cpp +7 -8
  35. package/cpp/rsa/HybridRsaKeyPair.cpp +5 -8
  36. package/cpp/rsa/HybridRsaKeyPair.hpp +4 -7
  37. package/cpp/scrypt/HybridScrypt.cpp +6 -4
  38. package/cpp/sign/HybridSignHandle.cpp +25 -68
  39. package/cpp/sign/HybridVerifyHandle.cpp +23 -60
  40. package/cpp/utils/HybridUtils.cpp +213 -111
  41. package/cpp/utils/HybridUtils.hpp +9 -2
  42. package/cpp/utils/QuickCryptoUtils.hpp +72 -0
  43. package/deps/simdutf/LICENSE-APACHE +201 -0
  44. package/deps/simdutf/LICENSE-MIT +18 -0
  45. package/deps/simdutf/README.md +2782 -0
  46. package/deps/simdutf/include/simdutf/avx512.h +79 -0
  47. package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
  48. package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
  49. package/deps/simdutf/include/simdutf/common_defs.h +186 -0
  50. package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
  51. package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
  52. package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
  53. package/deps/simdutf/include/simdutf/error.h +126 -0
  54. package/deps/simdutf/include/simdutf/implementation.h +7081 -0
  55. package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
  56. package/deps/simdutf/include/simdutf/portability.h +285 -0
  57. package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
  58. package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
  59. package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
  60. package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
  61. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
  62. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
  63. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
  64. package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
  65. package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
  66. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
  67. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
  68. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
  69. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
  70. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
  71. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
  72. package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
  73. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
  74. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
  75. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
  76. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
  77. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
  78. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
  79. package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
  80. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
  81. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
  82. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
  83. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
  84. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
  85. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
  86. package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
  87. package/deps/simdutf/include/simdutf.h +26 -0
  88. package/deps/simdutf/include/simdutf_c.h +342 -0
  89. package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
  90. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
  91. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
  92. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
  93. package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
  94. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
  95. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
  96. package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
  97. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
  98. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
  99. package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
  100. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
  101. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
  102. package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
  103. package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
  104. package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
  105. package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
  106. package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
  107. package/deps/simdutf/src/encoding_types.cpp +67 -0
  108. package/deps/simdutf/src/error.cpp +3 -0
  109. package/deps/simdutf/src/fallback/implementation.cpp +589 -0
  110. package/deps/simdutf/src/generic/ascii_validation.h +50 -0
  111. package/deps/simdutf/src/generic/base64.h +233 -0
  112. package/deps/simdutf/src/generic/base64lengths.h +63 -0
  113. package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
  114. package/deps/simdutf/src/generic/find.h +75 -0
  115. package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
  116. package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
  117. package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
  118. package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
  119. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
  120. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
  121. package/deps/simdutf/src/generic/utf16.h +73 -0
  122. package/deps/simdutf/src/generic/utf32.h +136 -0
  123. package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
  124. package/deps/simdutf/src/generic/utf8.h +92 -0
  125. package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
  126. package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
  127. package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
  128. package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
  129. package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
  130. package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
  131. package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
  132. package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
  133. package/deps/simdutf/src/generic/validate_utf16.h +164 -0
  134. package/deps/simdutf/src/generic/validate_utf32.h +99 -0
  135. package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
  136. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
  137. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
  138. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
  139. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
  140. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
  141. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
  142. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
  143. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
  144. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
  145. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
  146. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
  147. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
  148. package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
  149. package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
  150. package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
  151. package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
  152. package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
  153. package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
  154. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
  155. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
  156. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
  157. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
  158. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
  159. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
  160. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
  161. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
  162. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
  163. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
  164. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
  165. package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
  166. package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
  167. package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
  168. package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
  169. package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
  170. package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
  171. package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
  172. package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
  173. package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
  174. package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
  175. package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
  176. package/deps/simdutf/src/implementation.cpp +2526 -0
  177. package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
  178. package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
  179. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
  180. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
  181. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
  182. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
  183. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
  184. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
  185. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
  186. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
  187. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
  188. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
  189. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
  190. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
  191. package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
  192. package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
  193. package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
  194. package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
  195. package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
  196. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
  197. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
  198. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
  199. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
  200. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
  201. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
  202. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
  203. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
  204. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
  205. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
  206. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
  207. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
  208. package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
  209. package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
  210. package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
  211. package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
  212. package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
  213. package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
  214. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
  215. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
  216. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
  217. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
  218. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
  219. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
  220. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
  221. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
  222. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
  223. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
  224. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
  225. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
  226. package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
  227. package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
  228. package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
  229. package/deps/simdutf/src/ppc64/templates.cpp +91 -0
  230. package/deps/simdutf/src/rvv/implementation.cpp +138 -0
  231. package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
  232. package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
  233. package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
  234. package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
  235. package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
  236. package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
  237. package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
  238. package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
  239. package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
  240. package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
  241. package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
  242. package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
  243. package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
  244. package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
  245. package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
  246. package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
  247. package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
  248. package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
  249. package/deps/simdutf/src/simdutf/arm64.h +43 -0
  250. package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
  251. package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
  252. package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
  253. package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
  254. package/deps/simdutf/src/simdutf/fallback.h +42 -0
  255. package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
  256. package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
  257. package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
  258. package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
  259. package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
  260. package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
  261. package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
  262. package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
  263. package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
  264. package/deps/simdutf/src/simdutf/haswell.h +63 -0
  265. package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
  266. package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
  267. package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
  268. package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
  269. package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
  270. package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
  271. package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
  272. package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
  273. package/deps/simdutf/src/simdutf/icelake.h +81 -0
  274. package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
  275. package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
  276. package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
  277. package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
  278. package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
  279. package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
  280. package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
  281. package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
  282. package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
  283. package/deps/simdutf/src/simdutf/lasx.h +49 -0
  284. package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
  285. package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
  286. package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
  287. package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
  288. package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
  289. package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
  290. package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
  291. package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
  292. package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
  293. package/deps/simdutf/src/simdutf/lsx.h +52 -0
  294. package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
  295. package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
  296. package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
  297. package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
  298. package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
  299. package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
  300. package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
  301. package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
  302. package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
  303. package/deps/simdutf/src/simdutf/ppc64.h +40 -0
  304. package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
  305. package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
  306. package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
  307. package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
  308. package/deps/simdutf/src/simdutf/rvv.h +41 -0
  309. package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
  310. package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
  311. package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
  312. package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
  313. package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
  314. package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
  315. package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
  316. package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
  317. package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
  318. package/deps/simdutf/src/simdutf/westmere.h +59 -0
  319. package/deps/simdutf/src/simdutf.cpp +152 -0
  320. package/deps/simdutf/src/simdutf_c.cpp +525 -0
  321. package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
  322. package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
  323. package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
  324. package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
  325. package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
  326. package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
  327. package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
  328. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
  329. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
  330. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
  331. package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
  332. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
  333. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
  334. package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
  335. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
  336. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
  337. package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
  338. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
  339. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
  340. package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
  341. package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
  342. package/lib/commonjs/argon2.js +51 -2
  343. package/lib/commonjs/argon2.js.map +1 -1
  344. package/lib/commonjs/cipher.js +109 -11
  345. package/lib/commonjs/cipher.js.map +1 -1
  346. package/lib/commonjs/dsa.js +8 -2
  347. package/lib/commonjs/dsa.js.map +1 -1
  348. package/lib/commonjs/hash.js +15 -5
  349. package/lib/commonjs/hash.js.map +1 -1
  350. package/lib/commonjs/hkdf.js +33 -6
  351. package/lib/commonjs/hkdf.js.map +1 -1
  352. package/lib/commonjs/hmac.js +15 -5
  353. package/lib/commonjs/hmac.js.map +1 -1
  354. package/lib/commonjs/keys/publicCipher.js +10 -4
  355. package/lib/commonjs/keys/publicCipher.js.map +1 -1
  356. package/lib/commonjs/random.js +11 -2
  357. package/lib/commonjs/random.js.map +1 -1
  358. package/lib/commonjs/rsa.js +12 -5
  359. package/lib/commonjs/rsa.js.map +1 -1
  360. package/lib/commonjs/scrypt.js +47 -6
  361. package/lib/commonjs/scrypt.js.map +1 -1
  362. package/lib/commonjs/subtle.js +76 -5
  363. package/lib/commonjs/subtle.js.map +1 -1
  364. package/lib/commonjs/utils/cipher.js +18 -7
  365. package/lib/commonjs/utils/cipher.js.map +1 -1
  366. package/lib/commonjs/utils/conversion.js +33 -9
  367. package/lib/commonjs/utils/conversion.js.map +1 -1
  368. package/lib/commonjs/utils/timingSafeEqual.js +7 -2
  369. package/lib/commonjs/utils/timingSafeEqual.js.map +1 -1
  370. package/lib/commonjs/x509certificate.js +6 -6
  371. package/lib/commonjs/x509certificate.js.map +1 -1
  372. package/lib/module/argon2.js +51 -2
  373. package/lib/module/argon2.js.map +1 -1
  374. package/lib/module/cipher.js +109 -11
  375. package/lib/module/cipher.js.map +1 -1
  376. package/lib/module/dsa.js +8 -2
  377. package/lib/module/dsa.js.map +1 -1
  378. package/lib/module/hash.js +15 -5
  379. package/lib/module/hash.js.map +1 -1
  380. package/lib/module/hkdf.js +33 -6
  381. package/lib/module/hkdf.js.map +1 -1
  382. package/lib/module/hmac.js +15 -5
  383. package/lib/module/hmac.js.map +1 -1
  384. package/lib/module/keys/publicCipher.js +10 -4
  385. package/lib/module/keys/publicCipher.js.map +1 -1
  386. package/lib/module/random.js +11 -2
  387. package/lib/module/random.js.map +1 -1
  388. package/lib/module/rsa.js +11 -4
  389. package/lib/module/rsa.js.map +1 -1
  390. package/lib/module/scrypt.js +47 -6
  391. package/lib/module/scrypt.js.map +1 -1
  392. package/lib/module/subtle.js +76 -5
  393. package/lib/module/subtle.js.map +1 -1
  394. package/lib/module/utils/cipher.js +18 -7
  395. package/lib/module/utils/cipher.js.map +1 -1
  396. package/lib/module/utils/conversion.js +33 -9
  397. package/lib/module/utils/conversion.js.map +1 -1
  398. package/lib/module/utils/timingSafeEqual.js +8 -3
  399. package/lib/module/utils/timingSafeEqual.js.map +1 -1
  400. package/lib/module/x509certificate.js +6 -6
  401. package/lib/module/x509certificate.js.map +1 -1
  402. package/lib/typescript/argon2.d.ts.map +1 -1
  403. package/lib/typescript/cipher.d.ts +2 -2
  404. package/lib/typescript/cipher.d.ts.map +1 -1
  405. package/lib/typescript/dsa.d.ts.map +1 -1
  406. package/lib/typescript/hash.d.ts +2 -2
  407. package/lib/typescript/hash.d.ts.map +1 -1
  408. package/lib/typescript/hkdf.d.ts.map +1 -1
  409. package/lib/typescript/hmac.d.ts +2 -2
  410. package/lib/typescript/hmac.d.ts.map +1 -1
  411. package/lib/typescript/index.d.ts +1 -1
  412. package/lib/typescript/index.d.ts.map +1 -1
  413. package/lib/typescript/keys/publicCipher.d.ts.map +1 -1
  414. package/lib/typescript/random.d.ts.map +1 -1
  415. package/lib/typescript/rsa.d.ts.map +1 -1
  416. package/lib/typescript/scrypt.d.ts.map +1 -1
  417. package/lib/typescript/specs/utils.nitro.d.ts +0 -2
  418. package/lib/typescript/specs/utils.nitro.d.ts.map +1 -1
  419. package/lib/typescript/subtle.d.ts.map +1 -1
  420. package/lib/typescript/utils/cipher.d.ts +13 -1
  421. package/lib/typescript/utils/cipher.d.ts.map +1 -1
  422. package/lib/typescript/utils/conversion.d.ts +9 -6
  423. package/lib/typescript/utils/conversion.d.ts.map +1 -1
  424. package/lib/typescript/utils/timingSafeEqual.d.ts.map +1 -1
  425. package/lib/typescript/x509certificate.d.ts.map +1 -1
  426. package/nitrogen/generated/shared/c++/HybridUtilsSpec.cpp +0 -2
  427. package/nitrogen/generated/shared/c++/HybridUtilsSpec.hpp +0 -3
  428. package/package.json +38 -6
  429. package/src/argon2.ts +80 -2
  430. package/src/cipher.ts +139 -15
  431. package/src/dsa.ts +11 -2
  432. package/src/hash.ts +17 -7
  433. package/src/hkdf.ts +44 -6
  434. package/src/hmac.ts +17 -7
  435. package/src/keys/publicCipher.ts +10 -4
  436. package/src/random.ts +11 -2
  437. package/src/rsa.ts +18 -4
  438. package/src/scrypt.ts +73 -6
  439. package/src/specs/utils.nitro.ts +0 -2
  440. package/src/subtle.ts +90 -8
  441. package/src/utils/cipher.ts +30 -8
  442. package/src/utils/conversion.ts +58 -20
  443. package/src/utils/timingSafeEqual.ts +8 -3
  444. package/src/x509certificate.ts +5 -6
  445. package/deps/blake3/.cargo/config.toml +0 -2
  446. package/deps/blake3/.git-blame-ignore-revs +0 -2
  447. package/deps/blake3/.github/workflows/build_b3sum.py +0 -38
  448. package/deps/blake3/.github/workflows/ci.yml +0 -491
  449. package/deps/blake3/.github/workflows/tag.yml +0 -43
  450. package/deps/blake3/.github/workflows/upload_github_release_asset.py +0 -73
  451. package/deps/blake3/CONTRIBUTING.md +0 -31
  452. package/deps/blake3/Cargo.toml +0 -135
  453. package/deps/blake3/b3sum/Cargo.lock +0 -513
  454. package/deps/blake3/b3sum/Cargo.toml +0 -26
  455. package/deps/blake3/b3sum/README.md +0 -72
  456. package/deps/blake3/b3sum/src/main.rs +0 -564
  457. package/deps/blake3/b3sum/src/unit_tests.rs +0 -235
  458. package/deps/blake3/b3sum/tests/cli_tests.rs +0 -680
  459. package/deps/blake3/b3sum/what_does_check_do.md +0 -176
  460. package/deps/blake3/benches/bench.rs +0 -623
  461. package/deps/blake3/build.rs +0 -389
  462. package/deps/blake3/c/CMakeLists.txt +0 -383
  463. package/deps/blake3/c/CMakePresets.json +0 -73
  464. package/deps/blake3/c/Makefile.testing +0 -82
  465. package/deps/blake3/c/blake3-config.cmake.in +0 -14
  466. package/deps/blake3/c/blake3_avx2.c +0 -326
  467. package/deps/blake3/c/blake3_avx2_x86-64_unix.S +0 -1815
  468. package/deps/blake3/c/blake3_avx2_x86-64_windows_gnu.S +0 -1817
  469. package/deps/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +0 -1828
  470. package/deps/blake3/c/blake3_avx512.c +0 -1388
  471. package/deps/blake3/c/blake3_avx512_x86-64_unix.S +0 -4824
  472. package/deps/blake3/c/blake3_avx512_x86-64_windows_gnu.S +0 -2615
  473. package/deps/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +0 -2634
  474. package/deps/blake3/c/blake3_c_rust_bindings/Cargo.toml +0 -32
  475. package/deps/blake3/c/blake3_c_rust_bindings/README.md +0 -4
  476. package/deps/blake3/c/blake3_c_rust_bindings/benches/bench.rs +0 -477
  477. package/deps/blake3/c/blake3_c_rust_bindings/build.rs +0 -253
  478. package/deps/blake3/c/blake3_c_rust_bindings/cross_test.sh +0 -31
  479. package/deps/blake3/c/blake3_c_rust_bindings/src/lib.rs +0 -333
  480. package/deps/blake3/c/blake3_c_rust_bindings/src/test.rs +0 -696
  481. package/deps/blake3/c/blake3_sse2.c +0 -566
  482. package/deps/blake3/c/blake3_sse2_x86-64_unix.S +0 -2291
  483. package/deps/blake3/c/blake3_sse2_x86-64_windows_gnu.S +0 -2332
  484. package/deps/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +0 -2350
  485. package/deps/blake3/c/blake3_sse41.c +0 -560
  486. package/deps/blake3/c/blake3_sse41_x86-64_unix.S +0 -2028
  487. package/deps/blake3/c/blake3_sse41_x86-64_windows_gnu.S +0 -2069
  488. package/deps/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +0 -2089
  489. package/deps/blake3/c/blake3_tbb.cpp +0 -37
  490. package/deps/blake3/c/dependencies/CMakeLists.txt +0 -3
  491. package/deps/blake3/c/dependencies/tbb/CMakeLists.txt +0 -28
  492. package/deps/blake3/c/example.c +0 -36
  493. package/deps/blake3/c/example_tbb.c +0 -57
  494. package/deps/blake3/c/libblake3.pc.in +0 -12
  495. package/deps/blake3/c/main.c +0 -166
  496. package/deps/blake3/c/test.py +0 -97
  497. package/deps/blake3/media/B3.svg +0 -70
  498. package/deps/blake3/media/BLAKE3.svg +0 -85
  499. package/deps/blake3/media/speed.svg +0 -1474
  500. package/deps/blake3/reference_impl/Cargo.toml +0 -8
  501. package/deps/blake3/reference_impl/README.md +0 -14
  502. package/deps/blake3/reference_impl/reference_impl.rs +0 -374
  503. package/deps/blake3/src/ffi_avx2.rs +0 -65
  504. package/deps/blake3/src/ffi_avx512.rs +0 -169
  505. package/deps/blake3/src/ffi_neon.rs +0 -82
  506. package/deps/blake3/src/ffi_sse2.rs +0 -126
  507. package/deps/blake3/src/ffi_sse41.rs +0 -126
  508. package/deps/blake3/src/guts.rs +0 -60
  509. package/deps/blake3/src/hazmat.rs +0 -704
  510. package/deps/blake3/src/io.rs +0 -64
  511. package/deps/blake3/src/join.rs +0 -92
  512. package/deps/blake3/src/lib.rs +0 -1835
  513. package/deps/blake3/src/platform.rs +0 -587
  514. package/deps/blake3/src/portable.rs +0 -198
  515. package/deps/blake3/src/rust_avx2.rs +0 -474
  516. package/deps/blake3/src/rust_sse2.rs +0 -775
  517. package/deps/blake3/src/rust_sse41.rs +0 -766
  518. package/deps/blake3/src/test.rs +0 -1049
  519. package/deps/blake3/src/traits.rs +0 -227
  520. package/deps/blake3/src/wasm32_simd.rs +0 -794
  521. package/deps/blake3/test_vectors/Cargo.toml +0 -19
  522. package/deps/blake3/test_vectors/cross_test.sh +0 -25
  523. package/deps/blake3/test_vectors/src/bin/generate.rs +0 -4
  524. package/deps/blake3/test_vectors/src/lib.rs +0 -350
  525. package/deps/blake3/test_vectors/test_vectors.json +0 -217
  526. package/deps/blake3/tools/compiler_version/Cargo.toml +0 -7
  527. package/deps/blake3/tools/compiler_version/build.rs +0 -6
  528. package/deps/blake3/tools/compiler_version/src/main.rs +0 -27
  529. package/deps/blake3/tools/instruction_set_support/Cargo.toml +0 -6
  530. package/deps/blake3/tools/instruction_set_support/src/main.rs +0 -10
  531. package/deps/blake3/tools/release.md +0 -16
  532. package/deps/ncrypto/.bazelignore +0 -4
  533. package/deps/ncrypto/.bazelrc +0 -1
  534. package/deps/ncrypto/.bazelversion +0 -1
  535. package/deps/ncrypto/.clang-format +0 -111
  536. package/deps/ncrypto/.github/workflows/bazel.yml +0 -58
  537. package/deps/ncrypto/.github/workflows/commitlint.yml +0 -16
  538. package/deps/ncrypto/.github/workflows/linter.yml +0 -38
  539. package/deps/ncrypto/.github/workflows/macos.yml +0 -43
  540. package/deps/ncrypto/.github/workflows/release-please.yml +0 -16
  541. package/deps/ncrypto/.github/workflows/ubuntu.yml +0 -128
  542. package/deps/ncrypto/.github/workflows/visual-studio.yml +0 -49
  543. package/deps/ncrypto/.python-version +0 -1
  544. package/deps/ncrypto/.release-please-manifest.json +0 -3
  545. package/deps/ncrypto/BUILD.bazel +0 -44
  546. package/deps/ncrypto/CHANGELOG.md +0 -37
  547. package/deps/ncrypto/CMakeLists.txt +0 -79
  548. package/deps/ncrypto/MODULE.bazel +0 -16
  549. package/deps/ncrypto/MODULE.bazel.lock +0 -461
  550. package/deps/ncrypto/cmake/CPM.cmake +0 -1225
  551. package/deps/ncrypto/cmake/ncrypto-flags.cmake +0 -17
  552. package/deps/ncrypto/ncrypto.pc.in +0 -10
  553. package/deps/ncrypto/patches/0001-Expose-libdecrepit-so-NodeJS-can-use-it-for-ncrypto.patch +0 -28
  554. package/deps/ncrypto/pyproject.toml +0 -38
  555. package/deps/ncrypto/release-please-config.json +0 -11
  556. package/deps/ncrypto/src/CMakeLists.txt +0 -40
  557. package/deps/ncrypto/tests/BUILD.bazel +0 -11
  558. package/deps/ncrypto/tests/CMakeLists.txt +0 -7
  559. package/deps/ncrypto/tests/basic.cpp +0 -856
  560. package/deps/ncrypto/tools/run-clang-format.sh +0 -42
  561. package/lib/tsconfig.tsbuildinfo +0 -1
@@ -0,0 +1,837 @@
1
+ /**
2
+ * References and further reading:
3
+ *
4
+ * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the
5
+ * speed of a memory copy, Software: Practice and Experience 50 (2), 2020.
6
+ * https://arxiv.org/abs/1910.05109
7
+ *
8
+ * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2
9
+ * Instructions, ACM Transactions on the Web 12 (3), 2018.
10
+ * https://arxiv.org/abs/1704.00605
11
+ *
12
+ * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings.
13
+ * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force,
14
+ * Request for Comments: 4648.
15
+ *
16
+ * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization.
17
+ * http://www.alfredklomp.com/programming/sse-base64/. (2014).
18
+ *
19
+ * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD
20
+ * acceleration. https://github.com/aklomp/base64. (2014).
21
+ *
22
+ * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014).
23
+ * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/
24
+ *
25
+ * Nick Kopp. 2013. Base64 Encoding on a GPU.
26
+ * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013).
27
+ */
28
+
29
+ template <bool base64_url>
30
+ simdutf_really_inline __m256i lookup_pshufb_improved(const __m256i input) {
31
+ // Precomputed shuffle masks for K = 1 to 16
32
+ // credit: Wojciech Muła
33
+ __m256i result = _mm256_subs_epu8(input, _mm256_set1_epi8(51));
34
+ const __m256i less = _mm256_cmpgt_epi8(_mm256_set1_epi8(26), input);
35
+ result =
36
+ _mm256_or_si256(result, _mm256_and_si256(less, _mm256_set1_epi8(13)));
37
+ __m256i shift_LUT;
38
+ if (base64_url) {
39
+ shift_LUT = _mm256_setr_epi8(
40
+ 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52,
41
+ '0' - 52, '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0,
42
+
43
+ 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52,
44
+ '0' - 52, '0' - 52, '0' - 52, '0' - 52, '-' - 62, '_' - 63, 'A', 0, 0);
45
+ } else {
46
+ shift_LUT = _mm256_setr_epi8(
47
+ 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52,
48
+ '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0,
49
+
50
+ 'a' - 26, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52, '0' - 52,
51
+ '0' - 52, '0' - 52, '0' - 52, '0' - 52, '+' - 62, '/' - 63, 'A', 0, 0);
52
+ }
53
+
54
+ result = _mm256_shuffle_epi8(shift_LUT, result);
55
+ return _mm256_add_epi8(result, input);
56
+ }
57
+
58
+ simdutf_really_inline __m256i insert_line_feed32(__m256i input, int K) {
59
+
60
+ static const uint8_t low_table[16][32] = {
61
+ {0x80, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
62
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
63
+ {0, 0x80, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
64
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
65
+ {0, 1, 0x80, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
66
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
67
+ {0, 1, 2, 0x80, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
68
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
69
+ {0, 1, 2, 3, 0x80, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
70
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
71
+ {0, 1, 2, 3, 4, 0x80, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
72
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
73
+ {0, 1, 2, 3, 4, 5, 0x80, 6, 7, 8, 9, 10, 11, 12, 13, 14,
74
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
75
+ {0, 1, 2, 3, 4, 5, 6, 0x80, 7, 8, 9, 10, 11, 12, 13, 14,
76
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
77
+ {0, 1, 2, 3, 4, 5, 6, 7, 0x80, 8, 9, 10, 11, 12, 13, 14,
78
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
79
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 0x80, 9, 10, 11, 12, 13, 14,
80
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
81
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x80, 10, 11, 12, 13, 14,
82
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
83
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0x80, 11, 12, 13, 14,
84
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
85
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0x80, 12, 13, 14,
86
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
87
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0x80, 13, 14,
88
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
89
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0x80, 14,
90
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
91
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0x80,
92
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}};
93
+ static const uint8_t high_table[16][32] = {
94
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
95
+ 0x80, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
96
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
97
+ 0, 0x80, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
98
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
99
+ 0, 1, 0x80, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
100
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
101
+ 0, 1, 2, 0x80, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
102
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
103
+ 0, 1, 2, 3, 0x80, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
104
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
105
+ 0, 1, 2, 3, 4, 0x80, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
106
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
107
+ 0, 1, 2, 3, 4, 5, 0x80, 6, 7, 8, 9, 10, 11, 12, 13, 14},
108
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
109
+ 0, 1, 2, 3, 4, 5, 6, 0x80, 7, 8, 9, 10, 11, 12, 13, 14},
110
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
111
+ 0, 1, 2, 3, 4, 5, 6, 7, 0x80, 8, 9, 10, 11, 12, 13, 14},
112
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
113
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 0x80, 9, 10, 11, 12, 13, 14},
114
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
115
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x80, 10, 11, 12, 13, 14},
116
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
117
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0x80, 11, 12, 13, 14},
118
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
119
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0x80, 12, 13, 14},
120
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
121
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0x80, 13, 14},
122
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
123
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0x80, 14},
124
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
125
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0x80}};
126
+
127
+ __m256i line_feed_vector = _mm256_set1_epi8('\n');
128
+ if (K >= 16) {
129
+ __m256i mask = _mm256_loadu_si256((const __m256i *)high_table[K - 16]);
130
+ __m256i lf_pos =
131
+ _mm256_cmpeq_epi8(mask, _mm256_set1_epi8(static_cast<char>(0x80)));
132
+ __m256i shuffled = _mm256_shuffle_epi8(input, mask);
133
+ __m256i result = _mm256_blendv_epi8(shuffled, line_feed_vector, lf_pos);
134
+ return result;
135
+ }
136
+ // Shift input right by 1 byte
137
+ __m256i shift = _mm256_alignr_epi8(
138
+ input, _mm256_permute2x128_si256(input, input, 0x21), 15);
139
+
140
+ input = _mm256_blend_epi32(input, shift, 0xF0);
141
+
142
+ __m256i mask = _mm256_loadu_si256((const __m256i *)low_table[K]);
143
+
144
+ __m256i lf_pos =
145
+ _mm256_cmpeq_epi8(mask, _mm256_set1_epi8(static_cast<char>(0x80)));
146
+ __m256i shuffled = _mm256_shuffle_epi8(input, mask);
147
+
148
+ __m256i result = _mm256_blendv_epi8(shuffled, line_feed_vector, lf_pos);
149
+ return result;
150
+ }
151
+
152
+ template <bool isbase64url, bool use_lines>
153
+ size_t
154
+ avx2_encode_base64_impl(char *dst, const char *src, size_t srclen,
155
+ base64_options options,
156
+ size_t line_length = simdutf::default_line_length) {
157
+ size_t offset = 0;
158
+
159
+ if (line_length < 4) {
160
+ line_length = 4; // We do not support line_length less than 4
161
+ }
162
+ // credit: Wojciech Muła
163
+ const uint8_t *input = (const uint8_t *)src;
164
+
165
+ uint8_t *out = (uint8_t *)dst;
166
+ const __m256i shuf =
167
+ _mm256_set_epi8(10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1,
168
+
169
+ 10, 11, 9, 10, 7, 8, 6, 7, 4, 5, 3, 4, 1, 2, 0, 1);
170
+ size_t i = 0;
171
+ for (; i + 100 <= srclen; i += 96) {
172
+ const __m128i lo0 = _mm_loadu_si128(
173
+ reinterpret_cast<const __m128i *>(input + i + 4 * 3 * 0));
174
+ const __m128i hi0 = _mm_loadu_si128(
175
+ reinterpret_cast<const __m128i *>(input + i + 4 * 3 * 1));
176
+ const __m128i lo1 = _mm_loadu_si128(
177
+ reinterpret_cast<const __m128i *>(input + i + 4 * 3 * 2));
178
+ const __m128i hi1 = _mm_loadu_si128(
179
+ reinterpret_cast<const __m128i *>(input + i + 4 * 3 * 3));
180
+ const __m128i lo2 = _mm_loadu_si128(
181
+ reinterpret_cast<const __m128i *>(input + i + 4 * 3 * 4));
182
+ const __m128i hi2 = _mm_loadu_si128(
183
+ reinterpret_cast<const __m128i *>(input + i + 4 * 3 * 5));
184
+ const __m128i lo3 = _mm_loadu_si128(
185
+ reinterpret_cast<const __m128i *>(input + i + 4 * 3 * 6));
186
+ const __m128i hi3 = _mm_loadu_si128(
187
+ reinterpret_cast<const __m128i *>(input + i + 4 * 3 * 7));
188
+
189
+ __m256i in0 = _mm256_shuffle_epi8(_mm256_set_m128i(hi0, lo0), shuf);
190
+ __m256i in1 = _mm256_shuffle_epi8(_mm256_set_m128i(hi1, lo1), shuf);
191
+ __m256i in2 = _mm256_shuffle_epi8(_mm256_set_m128i(hi2, lo2), shuf);
192
+ __m256i in3 = _mm256_shuffle_epi8(_mm256_set_m128i(hi3, lo3), shuf);
193
+
194
+ const __m256i t0_0 = _mm256_and_si256(in0, _mm256_set1_epi32(0x0fc0fc00));
195
+ const __m256i t0_1 = _mm256_and_si256(in1, _mm256_set1_epi32(0x0fc0fc00));
196
+ const __m256i t0_2 = _mm256_and_si256(in2, _mm256_set1_epi32(0x0fc0fc00));
197
+ const __m256i t0_3 = _mm256_and_si256(in3, _mm256_set1_epi32(0x0fc0fc00));
198
+
199
+ const __m256i t1_0 =
200
+ _mm256_mulhi_epu16(t0_0, _mm256_set1_epi32(0x04000040));
201
+ const __m256i t1_1 =
202
+ _mm256_mulhi_epu16(t0_1, _mm256_set1_epi32(0x04000040));
203
+ const __m256i t1_2 =
204
+ _mm256_mulhi_epu16(t0_2, _mm256_set1_epi32(0x04000040));
205
+ const __m256i t1_3 =
206
+ _mm256_mulhi_epu16(t0_3, _mm256_set1_epi32(0x04000040));
207
+
208
+ const __m256i t2_0 = _mm256_and_si256(in0, _mm256_set1_epi32(0x003f03f0));
209
+ const __m256i t2_1 = _mm256_and_si256(in1, _mm256_set1_epi32(0x003f03f0));
210
+ const __m256i t2_2 = _mm256_and_si256(in2, _mm256_set1_epi32(0x003f03f0));
211
+ const __m256i t2_3 = _mm256_and_si256(in3, _mm256_set1_epi32(0x003f03f0));
212
+
213
+ const __m256i t3_0 =
214
+ _mm256_mullo_epi16(t2_0, _mm256_set1_epi32(0x01000010));
215
+ const __m256i t3_1 =
216
+ _mm256_mullo_epi16(t2_1, _mm256_set1_epi32(0x01000010));
217
+ const __m256i t3_2 =
218
+ _mm256_mullo_epi16(t2_2, _mm256_set1_epi32(0x01000010));
219
+ const __m256i t3_3 =
220
+ _mm256_mullo_epi16(t2_3, _mm256_set1_epi32(0x01000010));
221
+
222
+ const __m256i input0 = _mm256_or_si256(t1_0, t3_0);
223
+ const __m256i input1 = _mm256_or_si256(t1_1, t3_1);
224
+ const __m256i input2 = _mm256_or_si256(t1_2, t3_2);
225
+ const __m256i input3 = _mm256_or_si256(t1_3, t3_3);
226
+
227
+ if (use_lines) {
228
+ if (line_length >= 32) { // fast path
229
+ __m256i result;
230
+ result = lookup_pshufb_improved<isbase64url>(input0);
231
+ if (offset + 32 > line_length) {
232
+ size_t location_end = line_length - offset;
233
+ size_t to_move = 32 - location_end;
234
+ // We could do this, or extract instead.
235
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 1), result);
236
+ _mm256_storeu_si256(
237
+ reinterpret_cast<__m256i *>(out),
238
+ insert_line_feed32(result, static_cast<int>(location_end)));
239
+ offset = to_move;
240
+ out += 32 + 1;
241
+ } else {
242
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), result);
243
+ offset += 32;
244
+ out += 32;
245
+ }
246
+ result = lookup_pshufb_improved<isbase64url>(input1);
247
+
248
+ if (offset + 32 > line_length) {
249
+ size_t location_end = line_length - offset;
250
+ size_t to_move = 32 - location_end;
251
+
252
+ // We could do this, or extract instead.
253
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 1), result);
254
+ _mm256_storeu_si256(
255
+ reinterpret_cast<__m256i *>(out),
256
+ insert_line_feed32(result, static_cast<int>(location_end)));
257
+ // see above.
258
+ // out[32] = static_cast<uint8_t>(_mm256_extract_epi8(result, 31));
259
+ offset = to_move;
260
+ out += 32 + 1;
261
+ } else {
262
+
263
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), result);
264
+
265
+ offset += 32;
266
+ out += 32;
267
+ }
268
+ result = lookup_pshufb_improved<isbase64url>(input2);
269
+
270
+ if (offset + 32 > line_length) {
271
+ size_t location_end = line_length - offset;
272
+ size_t to_move = 32 - location_end;
273
+
274
+ // We could do this, or extract instead.
275
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 1), result);
276
+ _mm256_storeu_si256(
277
+ reinterpret_cast<__m256i *>(out),
278
+ insert_line_feed32(result, static_cast<int>(location_end)));
279
+ // see above.
280
+ // out[32] = static_cast<uint8_t>(_mm256_extract_epi8(result, 31));
281
+ offset = to_move;
282
+ out += 32 + 1;
283
+ } else {
284
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), result);
285
+ offset += 32;
286
+ out += 32;
287
+ }
288
+ result = lookup_pshufb_improved<isbase64url>(input3);
289
+
290
+ if (offset + 32 > line_length) {
291
+ size_t location_end = line_length - offset;
292
+ size_t to_move = 32 - location_end;
293
+
294
+ // We could do this, or extract instead.
295
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 1), result);
296
+ _mm256_storeu_si256(
297
+ reinterpret_cast<__m256i *>(out),
298
+ insert_line_feed32(result, static_cast<int>(location_end)));
299
+ // see above.
300
+ // out[32] = static_cast<uint8_t>(_mm256_extract_epi8(result, 31));
301
+ offset = to_move;
302
+ out += 32 + 1;
303
+ } else {
304
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out), result);
305
+ offset += 32;
306
+ out += 32;
307
+ }
308
+ } else { // slow path
309
+ // could be optimized
310
+ uint8_t buffer[128];
311
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer),
312
+ lookup_pshufb_improved<isbase64url>(input0));
313
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer + 32),
314
+ lookup_pshufb_improved<isbase64url>(input1));
315
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer + 64),
316
+ lookup_pshufb_improved<isbase64url>(input2));
317
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer + 96),
318
+ lookup_pshufb_improved<isbase64url>(input3));
319
+ size_t out_pos = 0;
320
+ size_t local_offset = offset;
321
+ for (size_t j = 0; j < 128;) {
322
+ if (local_offset == line_length) {
323
+ out[out_pos++] = '\n';
324
+ local_offset = 0;
325
+ }
326
+ out[out_pos++] = buffer[j++];
327
+ local_offset++;
328
+ }
329
+ offset = local_offset;
330
+ out += out_pos;
331
+ }
332
+ } else {
333
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
334
+ lookup_pshufb_improved<isbase64url>(input0));
335
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 32),
336
+ lookup_pshufb_improved<isbase64url>(input1));
337
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 64),
338
+ lookup_pshufb_improved<isbase64url>(input2));
339
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 96),
340
+ lookup_pshufb_improved<isbase64url>(input3));
341
+
342
+ out += 128;
343
+ }
344
+ }
345
+ for (; i + 28 <= srclen; i += 24) {
346
+ // lo = [xxxx|DDDC|CCBB|BAAA]
347
+ // hi = [xxxx|HHHG|GGFF|FEEE]
348
+ const __m128i lo =
349
+ _mm_loadu_si128(reinterpret_cast<const __m128i *>(input + i));
350
+ const __m128i hi =
351
+ _mm_loadu_si128(reinterpret_cast<const __m128i *>(input + i + 4 * 3));
352
+
353
+ // bytes from groups A, B and C are needed in separate 32-bit lanes
354
+ // in = [0HHH|0GGG|0FFF|0EEE[0DDD|0CCC|0BBB|0AAA]
355
+ __m256i in = _mm256_shuffle_epi8(_mm256_set_m128i(hi, lo), shuf);
356
+
357
+ // this part is well commented in encode.sse.cpp
358
+
359
+ const __m256i t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00));
360
+ const __m256i t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
361
+ const __m256i t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0));
362
+ const __m256i t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
363
+ const __m256i indices = _mm256_or_si256(t1, t3);
364
+
365
+ if (use_lines) {
366
+ if (line_length >= 32) { // fast path
367
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
368
+ lookup_pshufb_improved<isbase64url>(indices));
369
+
370
+ if (offset + 32 > line_length) {
371
+ size_t location_end = line_length - offset;
372
+ size_t to_move = 32 - location_end;
373
+ std::memmove(out + location_end + 1, out + location_end, to_move);
374
+ out[location_end] = '\n';
375
+ offset = to_move;
376
+ out += 32 + 1;
377
+ } else {
378
+ offset += 32;
379
+ out += 32;
380
+ }
381
+ } else { // slow path
382
+ // could be optimized
383
+ alignas(32) uint8_t buffer[32];
384
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(buffer),
385
+ lookup_pshufb_improved<isbase64url>(indices));
386
+ std::memcpy(out, buffer, 32);
387
+ size_t out_pos = 0;
388
+ size_t local_offset = offset;
389
+ for (size_t j = 0; j < 32;) {
390
+ if (local_offset == line_length) {
391
+ out[out_pos++] = '\n';
392
+ local_offset = 0;
393
+ }
394
+ out[out_pos++] = buffer[j++];
395
+ local_offset++;
396
+ }
397
+ offset = local_offset;
398
+ out += out_pos;
399
+ }
400
+ } else {
401
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
402
+ lookup_pshufb_improved<isbase64url>(indices));
403
+
404
+ out += 32;
405
+ }
406
+ }
407
+ return ((char *)out - (char *)dst) +
408
+ scalar::base64::tail_encode_base64_impl<use_lines>(
409
+ (char *)out, src + i, srclen - i, options, line_length, offset);
410
+ }
411
+
412
+ template <bool isbase64url>
413
+ size_t encode_base64(char *dst, const char *src, size_t srclen,
414
+ base64_options options) {
415
+ return avx2_encode_base64_impl<isbase64url, false>(dst, src, srclen, options);
416
+ }
417
+
418
+ static inline void compress(__m128i data, uint16_t mask, char *output) {
419
+ if (mask == 0) {
420
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output), data);
421
+ return;
422
+ }
423
+ // this particular implementation was inspired by work done by @animetosho
424
+ // we do it in two steps, first 8 bytes and then second 8 bytes
425
+ uint8_t mask1 = uint8_t(mask); // least significant 8 bits
426
+ uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits
427
+ // next line just loads the 64-bit values thintable_epi8[mask1] and
428
+ // thintable_epi8[mask2] into a 128-bit register, using only
429
+ // two instructions on most compilers.
430
+
431
+ __m128i shufmask = _mm_set_epi64x(tables::base64::thintable_epi8[mask2],
432
+ tables::base64::thintable_epi8[mask1]);
433
+ // we increment by 0x08 the second half of the mask
434
+ shufmask =
435
+ _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0));
436
+ // this is the version "nearly pruned"
437
+ __m128i pruned = _mm_shuffle_epi8(data, shufmask);
438
+ // we still need to put the two halves together.
439
+ // we compute the popcount of the first half:
440
+ int pop1 = tables::base64::BitsSetTable256mul2[mask1];
441
+ // then load the corresponding mask, what it does is to write
442
+ // only the first pop1 bytes from the first 8 bytes, and then
443
+ // it fills in with the bytes from the second 8 bytes + some filling
444
+ // at the end.
445
+ __m128i compactmask = _mm_loadu_si128(reinterpret_cast<const __m128i *>(
446
+ tables::base64::pshufb_combine_table + pop1 * 8));
447
+ __m128i answer = _mm_shuffle_epi8(pruned, compactmask);
448
+
449
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer);
450
+ }
451
+
452
+ // --- decoding -----------------------------------------------
453
+
454
+ template <typename = void>
455
+ simdutf_really_inline void compress(__m256i data, uint32_t mask, char *output) {
456
+ if (mask == 0) {
457
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), data);
458
+ return;
459
+ }
460
+ compress(_mm256_castsi256_si128(data), uint16_t(mask), output);
461
+ compress(_mm256_extracti128_si256(data, 1), uint16_t(mask >> 16),
462
+ output + count_ones(~mask & 0xFFFF));
463
+ }
464
+
465
+ template <typename = void>
466
+ simdutf_really_inline void base64_decode(char *out, __m256i str) {
467
+ // credit: aqrit
468
+ const __m256i pack_shuffle =
469
+ _mm256_setr_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
470
+ 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1);
471
+ const __m256i t0 = _mm256_maddubs_epi16(str, _mm256_set1_epi32(0x01400140));
472
+ const __m256i t1 = _mm256_madd_epi16(t0, _mm256_set1_epi32(0x00011000));
473
+ const __m256i t2 = _mm256_shuffle_epi8(t1, pack_shuffle);
474
+
475
+ // Store the output:
476
+ _mm_storeu_si128((__m128i *)out, _mm256_castsi256_si128(t2));
477
+ _mm_storeu_si128((__m128i *)(out + 12), _mm256_extracti128_si256(t2, 1));
478
+ }
479
+
480
+ template <typename = void>
481
+ simdutf_really_inline void base64_decode_block(char *out, const char *src) {
482
+ base64_decode(out,
483
+ _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src)));
484
+ base64_decode(out + 24, _mm256_loadu_si256(
485
+ reinterpret_cast<const __m256i *>(src + 32)));
486
+ }
487
+
488
+ template <typename = void>
489
+ simdutf_really_inline void base64_decode_block_safe(char *out,
490
+ const char *src) {
491
+ base64_decode(out,
492
+ _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src)));
493
+ alignas(32) char buffer[32]; // We enforce safety with a buffer.
494
+ base64_decode(
495
+ buffer, _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + 32)));
496
+ std::memcpy(out + 24, buffer, 24);
497
+ }
498
+
499
+ // --- decoding - base64 class --------------------------------
500
+
501
+ class block64 {
502
+ __m256i chunks[2];
503
+
504
+ public:
505
+ // The caller of this function is responsible to ensure that there are 64
506
+ // bytes available from reading at src.
507
+ simdutf_really_inline block64(const char *src) {
508
+ chunks[0] = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
509
+ chunks[1] = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + 32));
510
+ }
511
+
512
+ // The caller of this function is responsible to ensure that there are 128
513
+ // bytes available from reading at src.
514
+ simdutf_really_inline block64(const char16_t *src) {
515
+ const auto m1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
516
+ const auto m2 =
517
+ _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + 16));
518
+ const auto m3 =
519
+ _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + 32));
520
+ const auto m4 =
521
+ _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + 48));
522
+
523
+ const auto m1p = _mm256_permute2x128_si256(m1, m2, 0x20);
524
+ const auto m2p = _mm256_permute2x128_si256(m1, m2, 0x31);
525
+ const auto m3p = _mm256_permute2x128_si256(m3, m4, 0x20);
526
+ const auto m4p = _mm256_permute2x128_si256(m3, m4, 0x31);
527
+
528
+ chunks[0] = _mm256_packus_epi16(m1p, m2p);
529
+ chunks[1] = _mm256_packus_epi16(m3p, m4p);
530
+ }
531
+
532
+ simdutf_really_inline void copy_block(char *output) {
533
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(output), chunks[0]);
534
+ _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + 32), chunks[1]);
535
+ }
536
+
537
+ // decode 64 bytes and output 48 bytes
538
+ simdutf_really_inline void base64_decode_block(char *out) {
539
+ base64_decode(out, chunks[0]);
540
+ base64_decode(out + 24, chunks[1]);
541
+ }
542
+
543
+ simdutf_really_inline void base64_decode_block_safe(char *out) {
544
+ base64_decode(out, chunks[0]);
545
+ alignas(32) char buffer[32]; // We enforce safety with a buffer.
546
+ base64_decode(buffer, chunks[1]);
547
+ std::memcpy(out + 24, buffer, 24);
548
+ }
549
+
550
+ template <bool base64_url, bool ignore_garbage, bool default_or_url>
551
+ simdutf_really_inline uint64_t to_base64_mask(uint64_t *error) {
552
+ uint32_t err0 = 0;
553
+ uint32_t err1 = 0;
554
+ uint64_t m0 = to_base64_mask<base64_url, ignore_garbage, default_or_url>(
555
+ &chunks[0], &err0);
556
+ uint64_t m1 = to_base64_mask<base64_url, ignore_garbage, default_or_url>(
557
+ &chunks[1], &err1);
558
+ if (!ignore_garbage) {
559
+ *error = err0 | ((uint64_t)err1 << 32);
560
+ }
561
+ return m0 | (m1 << 32);
562
+ }
563
+
564
+ template <bool base64_url, bool ignore_garbage, bool default_or_url>
565
+ simdutf_really_inline uint32_t to_base64_mask(__m256i *src, uint32_t *error) {
566
+ const __m256i ascii_space_tbl =
567
+ _mm256_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa,
568
+ 0x0, 0xc, 0xd, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0,
569
+ 0x0, 0x0, 0x0, 0x9, 0xa, 0x0, 0xc, 0xd, 0x0, 0x0);
570
+ // credit: aqrit
571
+ __m256i delta_asso;
572
+ if (default_or_url) {
573
+ delta_asso = _mm256_setr_epi8(
574
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
575
+ 0x00, 0x00, 0x11, 0x00, 0x16, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
576
+ 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x16);
577
+ } else if (base64_url) {
578
+ delta_asso = _mm256_setr_epi8(0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0,
579
+ 0x0, 0x0, 0x0, 0x0, 0xF, 0x0, 0xF, 0x1, 0x1,
580
+ 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x0, 0x0, 0x0,
581
+ 0x0, 0x0, 0xF, 0x0, 0xF);
582
+ } else {
583
+ delta_asso = _mm256_setr_epi8(
584
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
585
+ 0x00, 0x00, 0x0F, 0x00, 0x0F, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
586
+ 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x0F);
587
+ }
588
+
589
+ __m256i delta_values;
590
+ if (default_or_url) {
591
+ delta_values = _mm256_setr_epi8(
592
+ uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0x13),
593
+ uint8_t(0x04), uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9),
594
+ uint8_t(0xB9), uint8_t(0x00), uint8_t(0xFF), uint8_t(0x11),
595
+ uint8_t(0xFF), uint8_t(0xBF), uint8_t(0x10), uint8_t(0xB9),
596
+ uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0x13),
597
+ uint8_t(0x04), uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9),
598
+ uint8_t(0xB9), uint8_t(0x00), uint8_t(0xFF), uint8_t(0x11),
599
+ uint8_t(0xFF), uint8_t(0xBF), uint8_t(0x10), uint8_t(0xB9));
600
+ } else if (base64_url) {
601
+ delta_values = _mm256_setr_epi8(
602
+ 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF), uint8_t(0xBF), uint8_t(0xB9),
603
+ uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3), uint8_t(0xBF), uint8_t(0xE0),
604
+ uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x0, 0x0, 0x13, 0x4, uint8_t(0xBF),
605
+ uint8_t(0xBF), uint8_t(0xB9), uint8_t(0xB9), 0x0, 0x11, uint8_t(0xC3),
606
+ uint8_t(0xBF), uint8_t(0xE0), uint8_t(0xB9), uint8_t(0xB9));
607
+ } else {
608
+ delta_values = _mm256_setr_epi8(
609
+ int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13), int8_t(0x04),
610
+ int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9), int8_t(0x00),
611
+ int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9),
612
+ int8_t(0xB9), int8_t(0x00), int8_t(0x00), int8_t(0x00), int8_t(0x13),
613
+ int8_t(0x04), int8_t(0xBF), int8_t(0xBF), int8_t(0xB9), int8_t(0xB9),
614
+ int8_t(0x00), int8_t(0x10), int8_t(0xC3), int8_t(0xBF), int8_t(0xBF),
615
+ int8_t(0xB9), int8_t(0xB9));
616
+ }
617
+
618
+ __m256i check_asso;
619
+ if (default_or_url) {
620
+ check_asso = _mm256_setr_epi8(
621
+ 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x03,
622
+ 0x07, 0x0B, 0x0E, 0x0B, 0x06, 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01,
623
+ 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0E, 0x0B, 0x06);
624
+ } else if (base64_url) {
625
+ check_asso = _mm256_setr_epi8(0xD, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1,
626
+ 0x1, 0x3, 0x7, 0xB, 0xE, 0xB, 0x6, 0xD, 0x1,
627
+ 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x3,
628
+ 0x7, 0xB, 0xE, 0xB, 0x6);
629
+ } else {
630
+ check_asso = _mm256_setr_epi8(
631
+ 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x03,
632
+ 0x07, 0x0B, 0x0B, 0x0B, 0x0F, 0x0D, 0x01, 0x01, 0x01, 0x01, 0x01,
633
+ 0x01, 0x01, 0x01, 0x01, 0x03, 0x07, 0x0B, 0x0B, 0x0B, 0x0F);
634
+ }
635
+ __m256i check_values;
636
+ if (default_or_url) {
637
+ check_values = _mm256_setr_epi8(
638
+ uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80),
639
+ uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xD5), uint8_t(0xA6),
640
+ uint8_t(0xB5), uint8_t(0xA1), uint8_t(0x00), uint8_t(0x80),
641
+ uint8_t(0x00), uint8_t(0x80), uint8_t(0x00), uint8_t(0x80),
642
+ uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80),
643
+ uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xD5), uint8_t(0xA6),
644
+ uint8_t(0xB5), uint8_t(0xA1), uint8_t(0x00), uint8_t(0x80),
645
+ uint8_t(0x00), uint8_t(0x80), uint8_t(0x00), uint8_t(0x80));
646
+ } else if (base64_url) {
647
+ check_values = _mm256_setr_epi8(
648
+ uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80),
649
+ uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6), uint8_t(0xA6),
650
+ uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0, uint8_t(0x80),
651
+ 0x0, uint8_t(0x80), uint8_t(0x80), uint8_t(0x80), uint8_t(0x80),
652
+ uint8_t(0x80), uint8_t(0xCF), uint8_t(0xBF), uint8_t(0xB6),
653
+ uint8_t(0xA6), uint8_t(0xB5), uint8_t(0xA1), 0x0, uint8_t(0x80), 0x0,
654
+ uint8_t(0x80), 0x0, uint8_t(0x80));
655
+ } else {
656
+ check_values = _mm256_setr_epi8(
657
+ int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0xCF),
658
+ int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5), int8_t(0x86),
659
+ int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80), int8_t(0x91),
660
+ int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80), int8_t(0x80),
661
+ int8_t(0xCF), int8_t(0xBF), int8_t(0xD5), int8_t(0xA6), int8_t(0xB5),
662
+ int8_t(0x86), int8_t(0xD1), int8_t(0x80), int8_t(0xB1), int8_t(0x80),
663
+ int8_t(0x91), int8_t(0x80));
664
+ }
665
+ const __m256i shifted = _mm256_srli_epi32(*src, 3);
666
+ __m256i delta_hash =
667
+ _mm256_avg_epu8(_mm256_shuffle_epi8(delta_asso, *src), shifted);
668
+ if (default_or_url) {
669
+ delta_hash = _mm256_and_si256(delta_hash, _mm256_set1_epi8(0xf));
670
+ }
671
+ const __m256i check_hash =
672
+ _mm256_avg_epu8(_mm256_shuffle_epi8(check_asso, *src), shifted);
673
+ const __m256i out =
674
+ _mm256_adds_epi8(_mm256_shuffle_epi8(delta_values, delta_hash), *src);
675
+ const __m256i chk =
676
+ _mm256_adds_epi8(_mm256_shuffle_epi8(check_values, check_hash), *src);
677
+ const int mask = _mm256_movemask_epi8(chk);
678
+ if (!ignore_garbage && mask) {
679
+ __m256i ascii_space =
680
+ _mm256_cmpeq_epi8(_mm256_shuffle_epi8(ascii_space_tbl, *src), *src);
681
+ *error = (mask ^ _mm256_movemask_epi8(ascii_space));
682
+ }
683
+ *src = out;
684
+ return (uint32_t)mask;
685
+ }
686
+
687
+ simdutf_really_inline uint64_t compress_block(uint64_t mask, char *output) {
688
+ if (is_power_of_two(mask)) {
689
+ return compress_block_single(mask, output);
690
+ }
691
+
692
+ uint64_t nmask = ~mask;
693
+ compress(chunks[0], uint32_t(mask), output);
694
+ compress(chunks[1], uint32_t(mask >> 32),
695
+ output + count_ones(nmask & 0xFFFFFFFF));
696
+ return count_ones(nmask);
697
+ }
698
+
699
+ simdutf_really_inline size_t compress_block_single(uint64_t mask,
700
+ char *output) {
701
+ const size_t pos64 = trailing_zeroes(mask);
702
+ const int8_t pos = pos64 & 0xf;
703
+ switch (pos64 >> 4) {
704
+ case 0b00: {
705
+ const __m128i lane0 = _mm256_extracti128_si256(chunks[0], 0);
706
+ const __m128i lane1 = _mm256_extracti128_si256(chunks[0], 1);
707
+
708
+ const __m128i v0 = _mm_set1_epi8(char(pos - 1));
709
+ const __m128i v1 =
710
+ _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
711
+ const __m128i v2 = _mm_cmpgt_epi8(v1, v0);
712
+ const __m128i sh = _mm_sub_epi8(v1, v2);
713
+ const __m128i compressed = _mm_shuffle_epi8(lane0, sh);
714
+
715
+ _mm_storeu_si128((__m128i *)(output + 0 * 16), compressed);
716
+ _mm_storeu_si128((__m128i *)(output + 1 * 16 - 1), lane1);
717
+ _mm256_storeu_si256((__m256i *)(output + 2 * 16 - 1), chunks[1]);
718
+ } break;
719
+ case 0b01: {
720
+ const __m128i lane0 = _mm256_extracti128_si256(chunks[0], 0);
721
+ const __m128i lane1 = _mm256_extracti128_si256(chunks[0], 1);
722
+ _mm_storeu_si128((__m128i *)(output + 0 * 16), lane0);
723
+
724
+ const __m128i v0 = _mm_set1_epi8(char(pos - 1));
725
+ const __m128i v1 =
726
+ _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
727
+ const __m128i v2 = _mm_cmpgt_epi8(v1, v0);
728
+ const __m128i sh = _mm_sub_epi8(v1, v2);
729
+ const __m128i compressed = _mm_shuffle_epi8(lane1, sh);
730
+
731
+ _mm_storeu_si128((__m128i *)(output + 1 * 16), compressed);
732
+ _mm256_storeu_si256((__m256i *)(output + 2 * 16 - 1), chunks[1]);
733
+ } break;
734
+ case 0b10: {
735
+ const __m128i lane2 = _mm256_extracti128_si256(chunks[1], 0);
736
+ const __m128i lane3 = _mm256_extracti128_si256(chunks[1], 1);
737
+
738
+ _mm256_storeu_si256((__m256i *)(output + 0 * 16), chunks[0]);
739
+
740
+ const __m128i v0 = _mm_set1_epi8(char(pos - 1));
741
+ const __m128i v1 =
742
+ _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
743
+ const __m128i v2 = _mm_cmpgt_epi8(v1, v0);
744
+ const __m128i sh = _mm_sub_epi8(v1, v2);
745
+ const __m128i compressed = _mm_shuffle_epi8(lane2, sh);
746
+
747
+ _mm_storeu_si128((__m128i *)(output + 2 * 16), compressed);
748
+ _mm_storeu_si128((__m128i *)(output + 3 * 16 - 1), lane3);
749
+ } break;
750
+ case 0b11: {
751
+ const __m128i lane2 = _mm256_extracti128_si256(chunks[1], 0);
752
+ const __m128i lane3 = _mm256_extracti128_si256(chunks[1], 1);
753
+
754
+ _mm256_storeu_si256((__m256i *)(output + 0 * 16), chunks[0]);
755
+ _mm_storeu_si128((__m128i *)(output + 2 * 16), lane2);
756
+
757
+ const __m128i v0 = _mm_set1_epi8(char(pos - 1));
758
+ const __m128i v1 =
759
+ _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
760
+ const __m128i v2 = _mm_cmpgt_epi8(v1, v0);
761
+ const __m128i sh = _mm_sub_epi8(v1, v2);
762
+ const __m128i compressed = _mm_shuffle_epi8(lane3, sh);
763
+
764
+ _mm_storeu_si128((__m128i *)(output + 3 * 16), compressed);
765
+ } break;
766
+ }
767
+
768
+ return 63;
769
+ }
770
+ };
771
+
772
+ simdutf_warn_unused size_t avx2_binary_length_from_base64(const char *input,
773
+ size_t length) {
774
+ size_t count = 0;
775
+ const char *ptr = input;
776
+ const char *end = input + length;
777
+
778
+ __m256i spaces = _mm256_set1_epi8(0x20);
779
+ while (ptr + 32 <= end) {
780
+ __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
781
+ __m256i gt_space = _mm256_cmpgt_epi8(data, spaces);
782
+ uint32_t mask = static_cast<uint32_t>(_mm256_movemask_epi8(gt_space));
783
+ count += count_ones(mask);
784
+ ptr += 32;
785
+ }
786
+
787
+ while (ptr < end) {
788
+ count += (*ptr > 0x20) ? 1 : 0;
789
+ ptr++;
790
+ }
791
+
792
+ size_t padding = 0;
793
+ size_t pos = length;
794
+ while (pos > 0 && padding < 2) {
795
+ char c = input[--pos];
796
+ if (c == '=') {
797
+ padding++;
798
+ } else if (c > ' ') {
799
+ break;
800
+ }
801
+ }
802
+ return ((count - padding) * 3) / 4;
803
+ }
804
+
805
+ simdutf_warn_unused size_t avx2_binary_length_from_base64(const char16_t *input,
806
+ size_t length) {
807
+ size_t count = 0;
808
+ const char16_t *ptr = input;
809
+ const char16_t *end = input + length;
810
+
811
+ __m256i spaces = _mm256_set1_epi16(0x20);
812
+ while (ptr + 16 <= end) {
813
+ __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
814
+ __m256i gt_space = _mm256_cmpgt_epi16(data, spaces);
815
+ uint32_t mask = static_cast<uint32_t>(_mm256_movemask_epi8(gt_space));
816
+ count += count_ones(mask);
817
+ ptr += 16;
818
+ }
819
+ count /= 2;
820
+
821
+ while (ptr < end) {
822
+ count += (*ptr > 0x20) ? 1 : 0;
823
+ ptr++;
824
+ }
825
+
826
+ size_t padding = 0;
827
+ size_t pos = length;
828
+ while (pos > 0 && padding < 2) {
829
+ char16_t c = input[--pos];
830
+ if (c == '=') {
831
+ padding++;
832
+ } else if (c > ' ') {
833
+ break;
834
+ }
835
+ }
836
+ return ((count - padding) * 3) / 4;
837
+ }