react-native-quick-crypto 1.0.19 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (561) hide show
  1. package/QuickCrypto.podspec +12 -38
  2. package/README.md +2 -0
  3. package/android/CMakeLists.txt +3 -0
  4. package/android/build.gradle +5 -1
  5. package/cpp/argon2/HybridArgon2.cpp +10 -3
  6. package/cpp/blake3/HybridBlake3.cpp +5 -3
  7. package/cpp/cipher/CCMCipher.cpp +29 -16
  8. package/cpp/cipher/CCMCipher.hpp +2 -4
  9. package/cpp/cipher/ChaCha20Cipher.cpp +14 -18
  10. package/cpp/cipher/ChaCha20Cipher.hpp +2 -4
  11. package/cpp/cipher/ChaCha20Poly1305Cipher.cpp +34 -23
  12. package/cpp/cipher/ChaCha20Poly1305Cipher.hpp +2 -4
  13. package/cpp/cipher/GCMCipher.cpp +14 -15
  14. package/cpp/cipher/HybridCipher.cpp +39 -36
  15. package/cpp/cipher/HybridCipher.hpp +17 -1
  16. package/cpp/cipher/HybridRsaCipher.cpp +74 -29
  17. package/cpp/cipher/OCBCipher.cpp +4 -3
  18. package/cpp/cipher/XChaCha20Poly1305Cipher.cpp +14 -13
  19. package/cpp/cipher/XSalsa20Cipher.cpp +72 -6
  20. package/cpp/cipher/XSalsa20Cipher.hpp +25 -3
  21. package/cpp/cipher/XSalsa20Poly1305Cipher.cpp +21 -25
  22. package/cpp/dh/HybridDiffieHellman.cpp +29 -0
  23. package/cpp/ec/HybridEcKeyPair.cpp +35 -33
  24. package/cpp/ec/HybridEcKeyPair.hpp +3 -7
  25. package/cpp/ecdh/HybridECDH.cpp +23 -0
  26. package/cpp/ed25519/HybridEdKeyPair.cpp +73 -117
  27. package/cpp/ed25519/HybridEdKeyPair.hpp +5 -9
  28. package/cpp/hash/HybridHash.cpp +5 -7
  29. package/cpp/hkdf/HybridHkdf.cpp +6 -4
  30. package/cpp/hmac/HybridHmac.cpp +4 -6
  31. package/cpp/kmac/HybridKmac.cpp +4 -4
  32. package/cpp/mldsa/HybridMlDsaKeyPair.cpp +37 -49
  33. package/cpp/mlkem/HybridMlKemKeyPair.cpp +39 -43
  34. package/cpp/pbkdf2/HybridPbkdf2.cpp +7 -8
  35. package/cpp/rsa/HybridRsaKeyPair.cpp +5 -8
  36. package/cpp/rsa/HybridRsaKeyPair.hpp +4 -7
  37. package/cpp/scrypt/HybridScrypt.cpp +6 -4
  38. package/cpp/sign/HybridSignHandle.cpp +25 -68
  39. package/cpp/sign/HybridVerifyHandle.cpp +23 -60
  40. package/cpp/utils/HybridUtils.cpp +213 -111
  41. package/cpp/utils/HybridUtils.hpp +9 -2
  42. package/cpp/utils/QuickCryptoUtils.hpp +72 -0
  43. package/deps/simdutf/LICENSE-APACHE +201 -0
  44. package/deps/simdutf/LICENSE-MIT +18 -0
  45. package/deps/simdutf/README.md +2782 -0
  46. package/deps/simdutf/include/simdutf/avx512.h +79 -0
  47. package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
  48. package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
  49. package/deps/simdutf/include/simdutf/common_defs.h +186 -0
  50. package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
  51. package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
  52. package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
  53. package/deps/simdutf/include/simdutf/error.h +126 -0
  54. package/deps/simdutf/include/simdutf/implementation.h +7081 -0
  55. package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
  56. package/deps/simdutf/include/simdutf/portability.h +285 -0
  57. package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
  58. package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
  59. package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
  60. package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
  61. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
  62. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
  63. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
  64. package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
  65. package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
  66. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
  67. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
  68. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
  69. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
  70. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
  71. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
  72. package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
  73. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
  74. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
  75. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
  76. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
  77. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
  78. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
  79. package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
  80. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
  81. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
  82. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
  83. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
  84. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
  85. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
  86. package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
  87. package/deps/simdutf/include/simdutf.h +26 -0
  88. package/deps/simdutf/include/simdutf_c.h +342 -0
  89. package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
  90. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
  91. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
  92. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
  93. package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
  94. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
  95. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
  96. package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
  97. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
  98. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
  99. package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
  100. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
  101. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
  102. package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
  103. package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
  104. package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
  105. package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
  106. package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
  107. package/deps/simdutf/src/encoding_types.cpp +67 -0
  108. package/deps/simdutf/src/error.cpp +3 -0
  109. package/deps/simdutf/src/fallback/implementation.cpp +589 -0
  110. package/deps/simdutf/src/generic/ascii_validation.h +50 -0
  111. package/deps/simdutf/src/generic/base64.h +233 -0
  112. package/deps/simdutf/src/generic/base64lengths.h +63 -0
  113. package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
  114. package/deps/simdutf/src/generic/find.h +75 -0
  115. package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
  116. package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
  117. package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
  118. package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
  119. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
  120. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
  121. package/deps/simdutf/src/generic/utf16.h +73 -0
  122. package/deps/simdutf/src/generic/utf32.h +136 -0
  123. package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
  124. package/deps/simdutf/src/generic/utf8.h +92 -0
  125. package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
  126. package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
  127. package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
  128. package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
  129. package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
  130. package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
  131. package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
  132. package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
  133. package/deps/simdutf/src/generic/validate_utf16.h +164 -0
  134. package/deps/simdutf/src/generic/validate_utf32.h +99 -0
  135. package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
  136. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
  137. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
  138. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
  139. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
  140. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
  141. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
  142. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
  143. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
  144. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
  145. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
  146. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
  147. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
  148. package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
  149. package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
  150. package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
  151. package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
  152. package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
  153. package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
  154. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
  155. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
  156. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
  157. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
  158. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
  159. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
  160. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
  161. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
  162. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
  163. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
  164. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
  165. package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
  166. package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
  167. package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
  168. package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
  169. package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
  170. package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
  171. package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
  172. package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
  173. package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
  174. package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
  175. package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
  176. package/deps/simdutf/src/implementation.cpp +2526 -0
  177. package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
  178. package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
  179. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
  180. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
  181. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
  182. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
  183. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
  184. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
  185. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
  186. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
  187. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
  188. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
  189. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
  190. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
  191. package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
  192. package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
  193. package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
  194. package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
  195. package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
  196. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
  197. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
  198. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
  199. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
  200. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
  201. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
  202. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
  203. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
  204. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
  205. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
  206. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
  207. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
  208. package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
  209. package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
  210. package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
  211. package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
  212. package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
  213. package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
  214. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
  215. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
  216. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
  217. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
  218. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
  219. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
  220. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
  221. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
  222. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
  223. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
  224. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
  225. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
  226. package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
  227. package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
  228. package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
  229. package/deps/simdutf/src/ppc64/templates.cpp +91 -0
  230. package/deps/simdutf/src/rvv/implementation.cpp +138 -0
  231. package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
  232. package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
  233. package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
  234. package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
  235. package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
  236. package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
  237. package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
  238. package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
  239. package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
  240. package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
  241. package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
  242. package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
  243. package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
  244. package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
  245. package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
  246. package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
  247. package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
  248. package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
  249. package/deps/simdutf/src/simdutf/arm64.h +43 -0
  250. package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
  251. package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
  252. package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
  253. package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
  254. package/deps/simdutf/src/simdutf/fallback.h +42 -0
  255. package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
  256. package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
  257. package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
  258. package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
  259. package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
  260. package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
  261. package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
  262. package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
  263. package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
  264. package/deps/simdutf/src/simdutf/haswell.h +63 -0
  265. package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
  266. package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
  267. package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
  268. package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
  269. package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
  270. package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
  271. package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
  272. package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
  273. package/deps/simdutf/src/simdutf/icelake.h +81 -0
  274. package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
  275. package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
  276. package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
  277. package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
  278. package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
  279. package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
  280. package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
  281. package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
  282. package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
  283. package/deps/simdutf/src/simdutf/lasx.h +49 -0
  284. package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
  285. package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
  286. package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
  287. package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
  288. package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
  289. package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
  290. package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
  291. package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
  292. package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
  293. package/deps/simdutf/src/simdutf/lsx.h +52 -0
  294. package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
  295. package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
  296. package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
  297. package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
  298. package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
  299. package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
  300. package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
  301. package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
  302. package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
  303. package/deps/simdutf/src/simdutf/ppc64.h +40 -0
  304. package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
  305. package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
  306. package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
  307. package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
  308. package/deps/simdutf/src/simdutf/rvv.h +41 -0
  309. package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
  310. package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
  311. package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
  312. package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
  313. package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
  314. package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
  315. package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
  316. package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
  317. package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
  318. package/deps/simdutf/src/simdutf/westmere.h +59 -0
  319. package/deps/simdutf/src/simdutf.cpp +152 -0
  320. package/deps/simdutf/src/simdutf_c.cpp +525 -0
  321. package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
  322. package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
  323. package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
  324. package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
  325. package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
  326. package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
  327. package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
  328. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
  329. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
  330. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
  331. package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
  332. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
  333. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
  334. package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
  335. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
  336. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
  337. package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
  338. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
  339. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
  340. package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
  341. package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
  342. package/lib/commonjs/argon2.js +51 -2
  343. package/lib/commonjs/argon2.js.map +1 -1
  344. package/lib/commonjs/cipher.js +109 -11
  345. package/lib/commonjs/cipher.js.map +1 -1
  346. package/lib/commonjs/dsa.js +8 -2
  347. package/lib/commonjs/dsa.js.map +1 -1
  348. package/lib/commonjs/hash.js +15 -5
  349. package/lib/commonjs/hash.js.map +1 -1
  350. package/lib/commonjs/hkdf.js +33 -6
  351. package/lib/commonjs/hkdf.js.map +1 -1
  352. package/lib/commonjs/hmac.js +15 -5
  353. package/lib/commonjs/hmac.js.map +1 -1
  354. package/lib/commonjs/keys/publicCipher.js +10 -4
  355. package/lib/commonjs/keys/publicCipher.js.map +1 -1
  356. package/lib/commonjs/random.js +11 -2
  357. package/lib/commonjs/random.js.map +1 -1
  358. package/lib/commonjs/rsa.js +12 -5
  359. package/lib/commonjs/rsa.js.map +1 -1
  360. package/lib/commonjs/scrypt.js +47 -6
  361. package/lib/commonjs/scrypt.js.map +1 -1
  362. package/lib/commonjs/subtle.js +76 -5
  363. package/lib/commonjs/subtle.js.map +1 -1
  364. package/lib/commonjs/utils/cipher.js +18 -7
  365. package/lib/commonjs/utils/cipher.js.map +1 -1
  366. package/lib/commonjs/utils/conversion.js +33 -9
  367. package/lib/commonjs/utils/conversion.js.map +1 -1
  368. package/lib/commonjs/utils/timingSafeEqual.js +7 -2
  369. package/lib/commonjs/utils/timingSafeEqual.js.map +1 -1
  370. package/lib/commonjs/x509certificate.js +6 -6
  371. package/lib/commonjs/x509certificate.js.map +1 -1
  372. package/lib/module/argon2.js +51 -2
  373. package/lib/module/argon2.js.map +1 -1
  374. package/lib/module/cipher.js +109 -11
  375. package/lib/module/cipher.js.map +1 -1
  376. package/lib/module/dsa.js +8 -2
  377. package/lib/module/dsa.js.map +1 -1
  378. package/lib/module/hash.js +15 -5
  379. package/lib/module/hash.js.map +1 -1
  380. package/lib/module/hkdf.js +33 -6
  381. package/lib/module/hkdf.js.map +1 -1
  382. package/lib/module/hmac.js +15 -5
  383. package/lib/module/hmac.js.map +1 -1
  384. package/lib/module/keys/publicCipher.js +10 -4
  385. package/lib/module/keys/publicCipher.js.map +1 -1
  386. package/lib/module/random.js +11 -2
  387. package/lib/module/random.js.map +1 -1
  388. package/lib/module/rsa.js +11 -4
  389. package/lib/module/rsa.js.map +1 -1
  390. package/lib/module/scrypt.js +47 -6
  391. package/lib/module/scrypt.js.map +1 -1
  392. package/lib/module/subtle.js +76 -5
  393. package/lib/module/subtle.js.map +1 -1
  394. package/lib/module/utils/cipher.js +18 -7
  395. package/lib/module/utils/cipher.js.map +1 -1
  396. package/lib/module/utils/conversion.js +33 -9
  397. package/lib/module/utils/conversion.js.map +1 -1
  398. package/lib/module/utils/timingSafeEqual.js +8 -3
  399. package/lib/module/utils/timingSafeEqual.js.map +1 -1
  400. package/lib/module/x509certificate.js +6 -6
  401. package/lib/module/x509certificate.js.map +1 -1
  402. package/lib/typescript/argon2.d.ts.map +1 -1
  403. package/lib/typescript/cipher.d.ts +2 -2
  404. package/lib/typescript/cipher.d.ts.map +1 -1
  405. package/lib/typescript/dsa.d.ts.map +1 -1
  406. package/lib/typescript/hash.d.ts +2 -2
  407. package/lib/typescript/hash.d.ts.map +1 -1
  408. package/lib/typescript/hkdf.d.ts.map +1 -1
  409. package/lib/typescript/hmac.d.ts +2 -2
  410. package/lib/typescript/hmac.d.ts.map +1 -1
  411. package/lib/typescript/index.d.ts +1 -1
  412. package/lib/typescript/index.d.ts.map +1 -1
  413. package/lib/typescript/keys/publicCipher.d.ts.map +1 -1
  414. package/lib/typescript/random.d.ts.map +1 -1
  415. package/lib/typescript/rsa.d.ts.map +1 -1
  416. package/lib/typescript/scrypt.d.ts.map +1 -1
  417. package/lib/typescript/specs/utils.nitro.d.ts +0 -2
  418. package/lib/typescript/specs/utils.nitro.d.ts.map +1 -1
  419. package/lib/typescript/subtle.d.ts.map +1 -1
  420. package/lib/typescript/utils/cipher.d.ts +13 -1
  421. package/lib/typescript/utils/cipher.d.ts.map +1 -1
  422. package/lib/typescript/utils/conversion.d.ts +9 -6
  423. package/lib/typescript/utils/conversion.d.ts.map +1 -1
  424. package/lib/typescript/utils/timingSafeEqual.d.ts.map +1 -1
  425. package/lib/typescript/x509certificate.d.ts.map +1 -1
  426. package/nitrogen/generated/shared/c++/HybridUtilsSpec.cpp +0 -2
  427. package/nitrogen/generated/shared/c++/HybridUtilsSpec.hpp +0 -3
  428. package/package.json +38 -6
  429. package/src/argon2.ts +80 -2
  430. package/src/cipher.ts +139 -15
  431. package/src/dsa.ts +11 -2
  432. package/src/hash.ts +17 -7
  433. package/src/hkdf.ts +44 -6
  434. package/src/hmac.ts +17 -7
  435. package/src/keys/publicCipher.ts +10 -4
  436. package/src/random.ts +11 -2
  437. package/src/rsa.ts +18 -4
  438. package/src/scrypt.ts +73 -6
  439. package/src/specs/utils.nitro.ts +0 -2
  440. package/src/subtle.ts +90 -8
  441. package/src/utils/cipher.ts +30 -8
  442. package/src/utils/conversion.ts +58 -20
  443. package/src/utils/timingSafeEqual.ts +8 -3
  444. package/src/x509certificate.ts +5 -6
  445. package/deps/blake3/.cargo/config.toml +0 -2
  446. package/deps/blake3/.git-blame-ignore-revs +0 -2
  447. package/deps/blake3/.github/workflows/build_b3sum.py +0 -38
  448. package/deps/blake3/.github/workflows/ci.yml +0 -491
  449. package/deps/blake3/.github/workflows/tag.yml +0 -43
  450. package/deps/blake3/.github/workflows/upload_github_release_asset.py +0 -73
  451. package/deps/blake3/CONTRIBUTING.md +0 -31
  452. package/deps/blake3/Cargo.toml +0 -135
  453. package/deps/blake3/b3sum/Cargo.lock +0 -513
  454. package/deps/blake3/b3sum/Cargo.toml +0 -26
  455. package/deps/blake3/b3sum/README.md +0 -72
  456. package/deps/blake3/b3sum/src/main.rs +0 -564
  457. package/deps/blake3/b3sum/src/unit_tests.rs +0 -235
  458. package/deps/blake3/b3sum/tests/cli_tests.rs +0 -680
  459. package/deps/blake3/b3sum/what_does_check_do.md +0 -176
  460. package/deps/blake3/benches/bench.rs +0 -623
  461. package/deps/blake3/build.rs +0 -389
  462. package/deps/blake3/c/CMakeLists.txt +0 -383
  463. package/deps/blake3/c/CMakePresets.json +0 -73
  464. package/deps/blake3/c/Makefile.testing +0 -82
  465. package/deps/blake3/c/blake3-config.cmake.in +0 -14
  466. package/deps/blake3/c/blake3_avx2.c +0 -326
  467. package/deps/blake3/c/blake3_avx2_x86-64_unix.S +0 -1815
  468. package/deps/blake3/c/blake3_avx2_x86-64_windows_gnu.S +0 -1817
  469. package/deps/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +0 -1828
  470. package/deps/blake3/c/blake3_avx512.c +0 -1388
  471. package/deps/blake3/c/blake3_avx512_x86-64_unix.S +0 -4824
  472. package/deps/blake3/c/blake3_avx512_x86-64_windows_gnu.S +0 -2615
  473. package/deps/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +0 -2634
  474. package/deps/blake3/c/blake3_c_rust_bindings/Cargo.toml +0 -32
  475. package/deps/blake3/c/blake3_c_rust_bindings/README.md +0 -4
  476. package/deps/blake3/c/blake3_c_rust_bindings/benches/bench.rs +0 -477
  477. package/deps/blake3/c/blake3_c_rust_bindings/build.rs +0 -253
  478. package/deps/blake3/c/blake3_c_rust_bindings/cross_test.sh +0 -31
  479. package/deps/blake3/c/blake3_c_rust_bindings/src/lib.rs +0 -333
  480. package/deps/blake3/c/blake3_c_rust_bindings/src/test.rs +0 -696
  481. package/deps/blake3/c/blake3_sse2.c +0 -566
  482. package/deps/blake3/c/blake3_sse2_x86-64_unix.S +0 -2291
  483. package/deps/blake3/c/blake3_sse2_x86-64_windows_gnu.S +0 -2332
  484. package/deps/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +0 -2350
  485. package/deps/blake3/c/blake3_sse41.c +0 -560
  486. package/deps/blake3/c/blake3_sse41_x86-64_unix.S +0 -2028
  487. package/deps/blake3/c/blake3_sse41_x86-64_windows_gnu.S +0 -2069
  488. package/deps/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +0 -2089
  489. package/deps/blake3/c/blake3_tbb.cpp +0 -37
  490. package/deps/blake3/c/dependencies/CMakeLists.txt +0 -3
  491. package/deps/blake3/c/dependencies/tbb/CMakeLists.txt +0 -28
  492. package/deps/blake3/c/example.c +0 -36
  493. package/deps/blake3/c/example_tbb.c +0 -57
  494. package/deps/blake3/c/libblake3.pc.in +0 -12
  495. package/deps/blake3/c/main.c +0 -166
  496. package/deps/blake3/c/test.py +0 -97
  497. package/deps/blake3/media/B3.svg +0 -70
  498. package/deps/blake3/media/BLAKE3.svg +0 -85
  499. package/deps/blake3/media/speed.svg +0 -1474
  500. package/deps/blake3/reference_impl/Cargo.toml +0 -8
  501. package/deps/blake3/reference_impl/README.md +0 -14
  502. package/deps/blake3/reference_impl/reference_impl.rs +0 -374
  503. package/deps/blake3/src/ffi_avx2.rs +0 -65
  504. package/deps/blake3/src/ffi_avx512.rs +0 -169
  505. package/deps/blake3/src/ffi_neon.rs +0 -82
  506. package/deps/blake3/src/ffi_sse2.rs +0 -126
  507. package/deps/blake3/src/ffi_sse41.rs +0 -126
  508. package/deps/blake3/src/guts.rs +0 -60
  509. package/deps/blake3/src/hazmat.rs +0 -704
  510. package/deps/blake3/src/io.rs +0 -64
  511. package/deps/blake3/src/join.rs +0 -92
  512. package/deps/blake3/src/lib.rs +0 -1835
  513. package/deps/blake3/src/platform.rs +0 -587
  514. package/deps/blake3/src/portable.rs +0 -198
  515. package/deps/blake3/src/rust_avx2.rs +0 -474
  516. package/deps/blake3/src/rust_sse2.rs +0 -775
  517. package/deps/blake3/src/rust_sse41.rs +0 -766
  518. package/deps/blake3/src/test.rs +0 -1049
  519. package/deps/blake3/src/traits.rs +0 -227
  520. package/deps/blake3/src/wasm32_simd.rs +0 -794
  521. package/deps/blake3/test_vectors/Cargo.toml +0 -19
  522. package/deps/blake3/test_vectors/cross_test.sh +0 -25
  523. package/deps/blake3/test_vectors/src/bin/generate.rs +0 -4
  524. package/deps/blake3/test_vectors/src/lib.rs +0 -350
  525. package/deps/blake3/test_vectors/test_vectors.json +0 -217
  526. package/deps/blake3/tools/compiler_version/Cargo.toml +0 -7
  527. package/deps/blake3/tools/compiler_version/build.rs +0 -6
  528. package/deps/blake3/tools/compiler_version/src/main.rs +0 -27
  529. package/deps/blake3/tools/instruction_set_support/Cargo.toml +0 -6
  530. package/deps/blake3/tools/instruction_set_support/src/main.rs +0 -10
  531. package/deps/blake3/tools/release.md +0 -16
  532. package/deps/ncrypto/.bazelignore +0 -4
  533. package/deps/ncrypto/.bazelrc +0 -1
  534. package/deps/ncrypto/.bazelversion +0 -1
  535. package/deps/ncrypto/.clang-format +0 -111
  536. package/deps/ncrypto/.github/workflows/bazel.yml +0 -58
  537. package/deps/ncrypto/.github/workflows/commitlint.yml +0 -16
  538. package/deps/ncrypto/.github/workflows/linter.yml +0 -38
  539. package/deps/ncrypto/.github/workflows/macos.yml +0 -43
  540. package/deps/ncrypto/.github/workflows/release-please.yml +0 -16
  541. package/deps/ncrypto/.github/workflows/ubuntu.yml +0 -128
  542. package/deps/ncrypto/.github/workflows/visual-studio.yml +0 -49
  543. package/deps/ncrypto/.python-version +0 -1
  544. package/deps/ncrypto/.release-please-manifest.json +0 -3
  545. package/deps/ncrypto/BUILD.bazel +0 -44
  546. package/deps/ncrypto/CHANGELOG.md +0 -37
  547. package/deps/ncrypto/CMakeLists.txt +0 -79
  548. package/deps/ncrypto/MODULE.bazel +0 -16
  549. package/deps/ncrypto/MODULE.bazel.lock +0 -461
  550. package/deps/ncrypto/cmake/CPM.cmake +0 -1225
  551. package/deps/ncrypto/cmake/ncrypto-flags.cmake +0 -17
  552. package/deps/ncrypto/ncrypto.pc.in +0 -10
  553. package/deps/ncrypto/patches/0001-Expose-libdecrepit-so-NodeJS-can-use-it-for-ncrypto.patch +0 -28
  554. package/deps/ncrypto/pyproject.toml +0 -38
  555. package/deps/ncrypto/release-please-config.json +0 -11
  556. package/deps/ncrypto/src/CMakeLists.txt +0 -40
  557. package/deps/ncrypto/tests/BUILD.bazel +0 -11
  558. package/deps/ncrypto/tests/CMakeLists.txt +0 -7
  559. package/deps/ncrypto/tests/basic.cpp +0 -856
  560. package/deps/ncrypto/tools/run-clang-format.sh +0 -42
  561. package/lib/tsconfig.tsbuildinfo +0 -1
@@ -0,0 +1,36 @@
1
+ // file included directly
2
+ template <endianness big_endian>
3
+ size_t icelake_convert_latin1_to_utf16(const char *latin1_input, size_t len,
4
+ char16_t *utf16_output) {
5
+ size_t rounded_len = len & ~0x1F; // Round down to nearest multiple of 32
6
+
7
+ __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809,
8
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809,
9
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809,
10
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809);
11
+ for (size_t i = 0; i < rounded_len; i += 32) {
12
+ // Load 32 Latin1 characters into a 256-bit register
13
+ __m256i in = _mm256_loadu_si256((__m256i *)&latin1_input[i]);
14
+ // Zero extend each set of 8 Latin1 characters to 32 16-bit integers
15
+ __m512i out = _mm512_cvtepu8_epi16(in);
16
+ if (big_endian) {
17
+ out = _mm512_shuffle_epi8(out, byteflip);
18
+ }
19
+ // Store the results back to memory
20
+ _mm512_storeu_si512((__m512i *)&utf16_output[i], out);
21
+ }
22
+ if (rounded_len != len) {
23
+ uint32_t mask = uint32_t(1 << (len - rounded_len)) - 1;
24
+ __m256i in = _mm256_maskz_loadu_epi8(mask, latin1_input + rounded_len);
25
+
26
+ // Zero extend each set of 8 Latin1 characters to 32 16-bit integers
27
+ __m512i out = _mm512_cvtepu8_epi16(in);
28
+ if (big_endian) {
29
+ out = _mm512_shuffle_epi8(out, byteflip);
30
+ }
31
+ // Store the results back to memory
32
+ _mm512_mask_storeu_epi16(utf16_output + rounded_len, mask, out);
33
+ }
34
+
35
+ return len;
36
+ }
@@ -0,0 +1,23 @@
1
+ void avx512_convert_latin1_to_utf32(const char *buf, size_t len,
2
+ char32_t *utf32_output) {
3
+ while (len >= 16) {
4
+ // Load 16 Latin1 characters into a 128-bit register
5
+ __m128i in = _mm_loadu_si128((__m128i *)buf);
6
+
7
+ // Zero extend each set of 8 Latin1 characters to 16 32-bit integers using
8
+ // vpmovzxbd
9
+ __m512i out = _mm512_cvtepu8_epi32(in);
10
+
11
+ // Store the results back to memory
12
+ _mm512_storeu_si512((__m512i *)utf32_output, out);
13
+
14
+ len -= 16;
15
+ buf += 16;
16
+ utf32_output += 16;
17
+ }
18
+
19
+ __mmask16 mask = __mmask16((1 << len) - 1);
20
+ __m128i in = _mm_maskz_loadu_epi8(mask, buf);
21
+ __m512i out = _mm512_cvtepu8_epi32(in);
22
+ _mm512_mask_storeu_epi32((__m512i *)utf32_output, mask, out);
23
+ }
@@ -0,0 +1,107 @@
1
+ // file included directly
2
+
3
+ static inline size_t latin1_to_utf8_avx512_vec(__m512i input, size_t input_len,
4
+ char *utf8_output,
5
+ int mask_output) {
6
+ __mmask64 nonascii = _mm512_movepi8_mask(input);
7
+ size_t output_size = input_len + (size_t)count_ones(nonascii);
8
+
9
+ // Mask to denote whether the byte is a leading byte that is not ascii
10
+ __mmask64 sixth = _mm512_cmpge_epu8_mask(
11
+ input, _mm512_set1_epi8(-64)); // binary representation of -64: 1100 0000
12
+
13
+ const uint64_t alternate_bits = UINT64_C(0x5555555555555555);
14
+ uint64_t ascii = ~nonascii;
15
+ // the bits in ascii are inverted and zeros are interspersed in between them
16
+ uint64_t maskA = ~_pdep_u64(ascii, alternate_bits);
17
+ uint64_t maskB = ~_pdep_u64(ascii >> 32, alternate_bits);
18
+
19
+ // interleave bytes from top and bottom halves (abcd...ABCD -> aAbBcCdD)
20
+ __m512i input_interleaved = _mm512_permutexvar_epi8(
21
+ _mm512_set_epi32(0x3f1f3e1e, 0x3d1d3c1c, 0x3b1b3a1a, 0x39193818,
22
+ 0x37173616, 0x35153414, 0x33133212, 0x31113010,
23
+ 0x2f0f2e0e, 0x2d0d2c0c, 0x2b0b2a0a, 0x29092808,
24
+ 0x27072606, 0x25052404, 0x23032202, 0x21012000),
25
+ input);
26
+
27
+ // double size of each byte, and insert the leading byte 1100 0010
28
+
29
+ /*
30
+ upscale the bytes to 16-bit value, adding the 0b11000000 leading byte in the
31
+ process. We adjust for the bytes that have their two most significant bits.
32
+ This takes care of the first 32 bytes, assuming we interleaved the bytes. */
33
+ __m512i outputA =
34
+ _mm512_shldi_epi16(input_interleaved, _mm512_set1_epi8(-62), 8);
35
+ outputA = _mm512_mask_add_epi16(
36
+ outputA, (__mmask32)sixth, outputA,
37
+ _mm512_set1_epi16(1 - 0x4000)); // 1- 0x4000 = 1100 0000 0000 0001????
38
+
39
+ // in the second 32-bit half, set first or second option based on whether
40
+ // original input is leading byte (second case) or not (first case)
41
+ __m512i leadingB =
42
+ _mm512_mask_blend_epi16((__mmask32)(sixth >> 32),
43
+ _mm512_set1_epi16(0x00c2), // 0000 0000 1101 0010
44
+ _mm512_set1_epi16(0x40c3)); // 0100 0000 1100 0011
45
+ __m512i outputB = _mm512_ternarylogic_epi32(
46
+ input_interleaved, leadingB, _mm512_set1_epi16((short)0xff00),
47
+ (240 & 170) ^ 204); // (input_interleaved & 0xff00) ^ leadingB
48
+
49
+ // prune redundant bytes
50
+ outputA = _mm512_maskz_compress_epi8(maskA, outputA);
51
+ outputB = _mm512_maskz_compress_epi8(maskB, outputB);
52
+
53
+ size_t output_sizeA = (size_t)count_ones((uint32_t)nonascii) + 32;
54
+
55
+ if (mask_output) {
56
+ if (input_len > 32) { // is the second half of the input vector used?
57
+ __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_sizeA);
58
+ _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA);
59
+ utf8_output += output_sizeA;
60
+ write_mask = _bzhi_u64(~0ULL, (unsigned int)(output_size - output_sizeA));
61
+ _mm512_mask_storeu_epi8(utf8_output, write_mask, outputB);
62
+ } else {
63
+ __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_size);
64
+ _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA);
65
+ }
66
+ } else {
67
+ _mm512_storeu_si512(utf8_output, outputA);
68
+ utf8_output += output_sizeA;
69
+ _mm512_storeu_si512(utf8_output, outputB);
70
+ }
71
+ return output_size;
72
+ }
73
+
74
+ static inline size_t latin1_to_utf8_avx512_branch(__m512i input,
75
+ char *utf8_output) {
76
+ __mmask64 nonascii = _mm512_movepi8_mask(input);
77
+ if (nonascii) {
78
+ return latin1_to_utf8_avx512_vec(input, 64, utf8_output, 0);
79
+ } else {
80
+ _mm512_storeu_si512(utf8_output, input);
81
+ return 64;
82
+ }
83
+ }
84
+
85
+ size_t latin1_to_utf8_avx512_start(const char *buf, size_t len,
86
+ char *utf8_output) {
87
+ char *start = utf8_output;
88
+ size_t pos = 0;
89
+ // if there's at least 128 bytes remaining, we don't need to mask the output
90
+ for (; pos + 128 <= len; pos += 64) {
91
+ __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos));
92
+ utf8_output += latin1_to_utf8_avx512_branch(input, utf8_output);
93
+ }
94
+ // in the last 128 bytes, the first 64 may require masking the output
95
+ if (pos + 64 <= len) {
96
+ __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos));
97
+ utf8_output += latin1_to_utf8_avx512_vec(input, 64, utf8_output, 1);
98
+ pos += 64;
99
+ }
100
+ // with the last 64 bytes, the input also needs to be masked
101
+ if (pos < len) {
102
+ __mmask64 load_mask = _bzhi_u64(~0ULL, (unsigned int)(len - pos));
103
+ __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)(buf + pos));
104
+ utf8_output += latin1_to_utf8_avx512_vec(input, len - pos, utf8_output, 1);
105
+ }
106
+ return (size_t)(utf8_output - start);
107
+ }
@@ -0,0 +1,103 @@
1
+ // file included directly
2
+ template <endianness big_endian>
3
+ size_t icelake_convert_utf16_to_latin1(const char16_t *buf, size_t len,
4
+ char *latin1_output) {
5
+ const char16_t *end = buf + len;
6
+ __m512i v_0xFF = _mm512_set1_epi16(0xff);
7
+ __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809,
8
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809,
9
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809,
10
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809);
11
+ __m512i shufmask = _mm512_set_epi8(
12
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13
+ 0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38,
14
+ 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0);
15
+ while (end - buf >= 32) {
16
+ __m512i in = _mm512_loadu_si512((__m512i *)buf);
17
+ if (big_endian) {
18
+ in = _mm512_shuffle_epi8(in, byteflip);
19
+ }
20
+ if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) {
21
+ return 0;
22
+ }
23
+ _mm256_storeu_si256(
24
+ (__m256i *)latin1_output,
25
+ _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in)));
26
+ latin1_output += 32;
27
+ buf += 32;
28
+ }
29
+ if (buf < end) {
30
+ uint32_t mask(uint32_t(1 << (end - buf)) - 1);
31
+ __m512i in = _mm512_maskz_loadu_epi16(mask, buf);
32
+ if (big_endian) {
33
+ in = _mm512_shuffle_epi8(in, byteflip);
34
+ }
35
+ if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) {
36
+ return 0;
37
+ }
38
+ _mm256_mask_storeu_epi8(
39
+ latin1_output, mask,
40
+ _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in)));
41
+ }
42
+ return len;
43
+ }
44
+
45
+ template <endianness big_endian>
46
+ std::pair<result, char *>
47
+ icelake_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len,
48
+ char *latin1_output) {
49
+ const char16_t *end = buf + len;
50
+ const char16_t *start = buf;
51
+ __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809,
52
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809,
53
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809,
54
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809);
55
+ __m512i v_0xFF = _mm512_set1_epi16(0xff);
56
+ __m512i shufmask = _mm512_set_epi8(
57
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58
+ 0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38,
59
+ 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0);
60
+ while (end - buf >= 32) {
61
+ __m512i in = _mm512_loadu_si512((__m512i *)buf);
62
+ if (big_endian) {
63
+ in = _mm512_shuffle_epi8(in, byteflip);
64
+ }
65
+ if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) {
66
+ uint16_t word;
67
+ while ((word = scalar::utf16::swap_if_needed<big_endian>(
68
+ uint16_t(*buf))) <= 0xff) {
69
+ *latin1_output++ = uint8_t(word);
70
+ buf++;
71
+ }
72
+ return std::make_pair(result(error_code::TOO_LARGE, buf - start),
73
+ latin1_output);
74
+ }
75
+ _mm256_storeu_si256(
76
+ (__m256i *)latin1_output,
77
+ _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in)));
78
+ latin1_output += 32;
79
+ buf += 32;
80
+ }
81
+ if (buf < end) {
82
+ uint32_t mask(uint32_t(1 << (end - buf)) - 1);
83
+ __m512i in = _mm512_maskz_loadu_epi16(mask, buf);
84
+ if (big_endian) {
85
+ in = _mm512_shuffle_epi8(in, byteflip);
86
+ }
87
+ if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) {
88
+
89
+ uint16_t word;
90
+ while ((word = scalar::utf16::swap_if_needed<big_endian>(
91
+ uint16_t(*buf))) <= 0xff) {
92
+ *latin1_output++ = uint8_t(word);
93
+ buf++;
94
+ }
95
+ return std::make_pair(result(error_code::TOO_LARGE, buf - start),
96
+ latin1_output);
97
+ }
98
+ _mm256_mask_storeu_epi8(
99
+ latin1_output, mask,
100
+ _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in)));
101
+ }
102
+ return std::make_pair(result(error_code::SUCCESS, len), latin1_output);
103
+ }
@@ -0,0 +1,136 @@
1
+ // file included directly
2
+
3
+ /*
4
+ Returns a pair: the first unprocessed byte from buf and utf32_output
5
+ A scalar routing should carry on the conversion of the tail.
6
+ */
7
+ template <endianness big_endian>
8
+ std::tuple<const char16_t *, char32_t *, bool>
9
+ convert_utf16_to_utf32(const char16_t *buf, size_t len,
10
+ char32_t *utf32_output) {
11
+ const char16_t *end = buf + len;
12
+ const __m512i v_fc00 = _mm512_set1_epi16((uint16_t)0xfc00);
13
+ const __m512i v_d800 = _mm512_set1_epi16((uint16_t)0xd800);
14
+ const __m512i v_dc00 = _mm512_set1_epi16((uint16_t)0xdc00);
15
+ __mmask32 carry{0};
16
+ const __m512i byteflip = _mm512_setr_epi64(
17
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001,
18
+ 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809,
19
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809);
20
+ while (std::distance(buf, end) >= 32) {
21
+ // Always safe because buf + 32 <= end so that end - buf >= 32 bytes:
22
+ __m512i in = _mm512_loadu_si512((__m512i *)buf);
23
+ if (big_endian) {
24
+ in = _mm512_shuffle_epi8(in, byteflip);
25
+ }
26
+
27
+ // H - bitmask for high surrogates
28
+ const __mmask32 H =
29
+ _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_d800);
30
+ // H - bitmask for low surrogates
31
+ const __mmask32 L =
32
+ _mm512_cmpeq_epi16_mask(_mm512_and_si512(in, v_fc00), v_dc00);
33
+
34
+ if ((H | L)) {
35
+ // surrogate pair(s) in a register
36
+ const __mmask32 V =
37
+ (L ^
38
+ (carry | (H << 1))); // A high surrogate must be followed by low one
39
+ // and a low one must be preceded by a high one.
40
+ // If valid, V should be equal to 0
41
+
42
+ if (V == 0) {
43
+ // valid case
44
+ /*
45
+ Input surrogate pair:
46
+ |1101.11aa.aaaa.aaaa|1101.10bb.bbbb.bbbb|
47
+ low surrogate high surrogate
48
+ */
49
+ /* 1. Expand all code units to 32-bit code units
50
+ in
51
+ |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0000.0000.0000.1101.10bb.bbbb.bbbb|
52
+ */
53
+ const __m512i first = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in));
54
+ const __m512i second =
55
+ _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1));
56
+
57
+ /* 2. Shift by one 16-bit word to align low surrogates with high
58
+ surrogates in
59
+ |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0000.0000.0000.1101.10bb.bbbb.bbbb|
60
+ shifted
61
+ |????.????.????.????.????.????.????.????|0000.0000.0000.0000.1101.11aa.aaaa.aaaa|
62
+ */
63
+ const __m512i shifted_first = _mm512_alignr_epi32(second, first, 1);
64
+ const __m512i shifted_second =
65
+ _mm512_alignr_epi32(_mm512_setzero_si512(), second, 1);
66
+
67
+ /* 3. Align all high surrogates in first and second by shifting to the
68
+ left by 10 bits
69
+ |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0011.0110.bbbb.bbbb.bb00.0000.0000|
70
+ */
71
+ const __m512i aligned_first =
72
+ _mm512_mask_slli_epi32(first, (__mmask16)H, first, 10);
73
+ const __m512i aligned_second =
74
+ _mm512_mask_slli_epi32(second, (__mmask16)(H >> 16), second, 10);
75
+
76
+ /* 4. Remove surrogate prefixes and add offset 0x10000 by adding in,
77
+ shifted and constant in
78
+ |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0011.0110.bbbb.bbbb.bb00.0000.0000|
79
+ shifted
80
+ |????.????.????.????.????.????.????.????|0000.0000.0000.0000.1101.11aa.aaaa.aaaa|
81
+ constant|1111.1100.1010.0000.0010.0100.0000.0000|1111.1100.1010.0000.0010.0100.0000.0000|
82
+ */
83
+ const __m512i constant = _mm512_set1_epi32((uint32_t)0xfca02400);
84
+ const __m512i added_first = _mm512_mask_add_epi32(
85
+ aligned_first, (__mmask16)H, aligned_first, shifted_first);
86
+ const __m512i utf32_first = _mm512_mask_add_epi32(
87
+ added_first, (__mmask16)H, added_first, constant);
88
+
89
+ const __m512i added_second =
90
+ _mm512_mask_add_epi32(aligned_second, (__mmask16)(H >> 16),
91
+ aligned_second, shifted_second);
92
+ const __m512i utf32_second = _mm512_mask_add_epi32(
93
+ added_second, (__mmask16)(H >> 16), added_second, constant);
94
+
95
+ // 5. Store all valid UTF-32 code units (low surrogate positions and
96
+ // 32nd word are invalid)
97
+ const __mmask32 valid = ~L & 0x7fffffff;
98
+ // We deliberately do a _mm512_maskz_compress_epi32 followed by
99
+ // storeu_epi32 to ease performance portability to Zen 4.
100
+ const __m512i compressed_first =
101
+ _mm512_maskz_compress_epi32((__mmask16)(valid), utf32_first);
102
+ const size_t howmany1 = count_ones((uint16_t)(valid));
103
+ _mm512_storeu_si512((__m512i *)utf32_output, compressed_first);
104
+ utf32_output += howmany1;
105
+ const __m512i compressed_second =
106
+ _mm512_maskz_compress_epi32((__mmask16)(valid >> 16), utf32_second);
107
+ const size_t howmany2 = count_ones((uint16_t)(valid >> 16));
108
+ // The following could be unsafe in some cases?
109
+ //_mm512_storeu_epi32((__m512i *) utf32_output, compressed_second);
110
+ _mm512_mask_storeu_epi32((__m512i *)utf32_output,
111
+ __mmask16((1 << howmany2) - 1),
112
+ compressed_second);
113
+ utf32_output += howmany2;
114
+ // Only process 31 code units, but keep track if the 31st word is a high
115
+ // surrogate as a carry
116
+ buf += 31;
117
+ carry = (H >> 30) & 0x1;
118
+ } else {
119
+ // invalid case
120
+ return std::make_tuple(buf + carry, utf32_output, false);
121
+ }
122
+ } else {
123
+ // no surrogates
124
+ // extend all thirty-two 16-bit code units to thirty-two 32-bit code units
125
+ _mm512_storeu_si512((__m512i *)(utf32_output),
126
+ _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in)));
127
+ _mm512_storeu_si512(
128
+ (__m512i *)(utf32_output) + 1,
129
+ _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1)));
130
+ utf32_output += 32;
131
+ buf += 32;
132
+ carry = 0;
133
+ }
134
+ } // while
135
+ return std::make_tuple(buf + carry, utf32_output, true);
136
+ }
@@ -0,0 +1,206 @@
1
+ // file included directly
2
+
3
+ /**
4
+ * This function converts the input (inbuf, inlen), assumed to be valid
5
+ * UTF16 (little endian) into UTF-8 (to outbuf). The number of code units
6
+ * written is written to 'outlen' and the function reports the number of input
7
+ * word consumed.
8
+ */
9
+ template <endianness big_endian>
10
+ size_t utf16_to_utf8_avx512i(const char16_t *inbuf, size_t inlen,
11
+ unsigned char *outbuf, size_t *outlen) {
12
+ __m512i in;
13
+ __mmask32 inmask = _cvtu32_mask32(0x7fffffff);
14
+ __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809,
15
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809,
16
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809,
17
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809);
18
+ const char16_t *const inbuf_orig = inbuf;
19
+ const unsigned char *const outbuf_orig = outbuf;
20
+ int adjust = 0;
21
+ int carry = 0;
22
+
23
+ while (inlen >= 32) {
24
+ in = _mm512_loadu_si512(inbuf);
25
+ if (big_endian) {
26
+ in = _mm512_shuffle_epi8(in, byteflip);
27
+ }
28
+ inlen -= 31;
29
+ lastiteration:
30
+ inbuf += 31;
31
+
32
+ failiteration:
33
+ const __mmask32 is234byte = _mm512_mask_cmp_epu16_mask(
34
+ inmask, in, _mm512_set1_epi16(0x0080), _MM_CMPINT_NLT);
35
+
36
+ if (_ktestz_mask32_u8(inmask, is234byte)) {
37
+ // fast path for ASCII only
38
+ _mm512_mask_cvtepi16_storeu_epi8(outbuf, inmask, in);
39
+ outbuf += 31;
40
+ carry = 0;
41
+
42
+ if (inlen < 32) {
43
+ goto tail;
44
+ } else {
45
+ continue;
46
+ }
47
+ }
48
+
49
+ const __mmask32 is12byte =
50
+ _mm512_cmp_epu16_mask(in, _mm512_set1_epi16(0x0800), _MM_CMPINT_LT);
51
+
52
+ if (_ktestc_mask32_u8(is12byte, inmask)) {
53
+ // fast path for 1 and 2 byte only
54
+
55
+ const __m512i twobytes = _mm512_ternarylogic_epi32(
56
+ _mm512_slli_epi16(in, 8), _mm512_srli_epi16(in, 6),
57
+ _mm512_set1_epi16(0x3f3f), 0xa8); // (A|B)&C
58
+ in = _mm512_mask_add_epi16(in, is234byte, twobytes,
59
+ _mm512_set1_epi16(int16_t(0x80c0)));
60
+ const __m512i cmpmask =
61
+ _mm512_mask_blend_epi16(inmask, _mm512_set1_epi16(int16_t(0xffff)),
62
+ _mm512_set1_epi16(0x0800));
63
+ const __mmask64 smoosh =
64
+ _mm512_cmp_epu8_mask(in, cmpmask, _MM_CMPINT_NLT);
65
+ const __m512i out = _mm512_maskz_compress_epi8(smoosh, in);
66
+ _mm512_mask_storeu_epi8(outbuf,
67
+ _cvtu64_mask64(_pext_u64(_cvtmask64_u64(smoosh),
68
+ _cvtmask64_u64(smoosh))),
69
+ out);
70
+ outbuf += 31 + _mm_popcnt_u32(_cvtmask32_u32(is234byte));
71
+ carry = 0;
72
+
73
+ if (inlen < 32) {
74
+ goto tail;
75
+ } else {
76
+ continue;
77
+ }
78
+ }
79
+ __m512i lo = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in));
80
+ __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1));
81
+
82
+ __m512i taglo = _mm512_set1_epi32(0x8080e000);
83
+ __m512i taghi = taglo;
84
+
85
+ const __m512i fc00masked =
86
+ _mm512_and_epi32(in, _mm512_set1_epi16(int16_t(0xfc00)));
87
+ const __mmask32 hisurr = _mm512_mask_cmp_epu16_mask(
88
+ inmask, fc00masked, _mm512_set1_epi16(int16_t(0xd800)), _MM_CMPINT_EQ);
89
+ const __mmask32 losurr = _mm512_cmp_epu16_mask(
90
+ fc00masked, _mm512_set1_epi16(int16_t(0xdc00)), _MM_CMPINT_EQ);
91
+
92
+ int carryout = 0;
93
+ if (!_kortestz_mask32_u8(hisurr, losurr)) {
94
+ // handle surrogates
95
+
96
+ __m512i los = _mm512_alignr_epi32(hi, lo, 1);
97
+ __m512i his = _mm512_alignr_epi32(lo, hi, 1);
98
+
99
+ const __mmask32 hisurrhi = _kshiftri_mask32(hisurr, 16);
100
+ taglo = _mm512_mask_mov_epi32(taglo, __mmask16(hisurr),
101
+ _mm512_set1_epi32(0x808080f0));
102
+ taghi = _mm512_mask_mov_epi32(taghi, __mmask16(hisurrhi),
103
+ _mm512_set1_epi32(0x808080f0));
104
+
105
+ lo = _mm512_mask_slli_epi32(lo, __mmask16(hisurr), lo, 10);
106
+ hi = _mm512_mask_slli_epi32(hi, __mmask16(hisurrhi), hi, 10);
107
+ los = _mm512_add_epi32(los, _mm512_set1_epi32(0xfca02400));
108
+ his = _mm512_add_epi32(his, _mm512_set1_epi32(0xfca02400));
109
+ lo = _mm512_mask_add_epi32(lo, __mmask16(hisurr), lo, los);
110
+ hi = _mm512_mask_add_epi32(hi, __mmask16(hisurrhi), hi, his);
111
+
112
+ carryout = _cvtu32_mask32(_kshiftri_mask32(hisurr, 30));
113
+
114
+ const uint32_t h = _cvtmask32_u32(hisurr);
115
+ const uint32_t l = _cvtmask32_u32(losurr);
116
+ // check for mismatched surrogates
117
+ if ((h + h + carry) ^ l) {
118
+ const uint32_t lonohi = l & ~(h + h + carry);
119
+ const uint32_t hinolo = h & ~(l >> 1);
120
+ inlen = _tzcnt_u32(hinolo | lonohi);
121
+ inmask = __mmask32(0x7fffffff & ((1U << inlen) - 1));
122
+ in = _mm512_maskz_mov_epi16(inmask, in);
123
+ adjust = (int)inlen - 31;
124
+ inlen = 0;
125
+ goto failiteration;
126
+ }
127
+ }
128
+
129
+ hi = _mm512_maskz_mov_epi32(_cvtu32_mask16(0x7fff), hi);
130
+ carry = carryout;
131
+
132
+ __m512i mslo =
133
+ _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), lo);
134
+
135
+ __m512i mshi =
136
+ _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), hi);
137
+
138
+ const __mmask32 outmask = __mmask32(_kandn_mask64(losurr, inmask));
139
+ const __mmask64 outmhi = _kshiftri_mask64(outmask, 16);
140
+
141
+ const __mmask32 is1byte = __mmask32(_knot_mask64(is234byte));
142
+ const __mmask64 is1bhi = _kshiftri_mask64(is1byte, 16);
143
+ const __mmask64 is12bhi = _kshiftri_mask64(is12byte, 16);
144
+
145
+ taglo = _mm512_mask_mov_epi32(taglo, __mmask16(is12byte),
146
+ _mm512_set1_epi32(0x80c00000));
147
+ taghi = _mm512_mask_mov_epi32(taghi, __mmask16(is12bhi),
148
+ _mm512_set1_epi32(0x80c00000));
149
+ __m512i magiclo = _mm512_mask_blend_epi32(__mmask16(outmask),
150
+ _mm512_set1_epi32(0xffffffff),
151
+ _mm512_set1_epi32(0x00010101));
152
+ __m512i magichi = _mm512_mask_blend_epi32(__mmask16(outmhi),
153
+ _mm512_set1_epi32(0xffffffff),
154
+ _mm512_set1_epi32(0x00010101));
155
+
156
+ magiclo = _mm512_mask_blend_epi32(__mmask16(outmask),
157
+ _mm512_set1_epi32(0xffffffff),
158
+ _mm512_set1_epi32(0x00010101));
159
+ magichi = _mm512_mask_blend_epi32(__mmask16(outmhi),
160
+ _mm512_set1_epi32(0xffffffff),
161
+ _mm512_set1_epi32(0x00010101));
162
+
163
+ mslo = _mm512_ternarylogic_epi32(mslo, _mm512_set1_epi32(0x3f3f3f3f), taglo,
164
+ 0xea); // A&B|C
165
+ mshi = _mm512_ternarylogic_epi32(mshi, _mm512_set1_epi32(0x3f3f3f3f), taghi,
166
+ 0xea);
167
+ mslo = _mm512_mask_slli_epi32(mslo, __mmask16(is1byte), lo, 24);
168
+
169
+ mshi = _mm512_mask_slli_epi32(mshi, __mmask16(is1bhi), hi, 24);
170
+
171
+ const __mmask64 wantlo =
172
+ _mm512_cmp_epu8_mask(mslo, magiclo, _MM_CMPINT_NLT);
173
+ const __mmask64 wanthi =
174
+ _mm512_cmp_epu8_mask(mshi, magichi, _MM_CMPINT_NLT);
175
+ const __m512i outlo = _mm512_maskz_compress_epi8(wantlo, mslo);
176
+ const __m512i outhi = _mm512_maskz_compress_epi8(wanthi, mshi);
177
+ const uint64_t wantlo_uint64 = _cvtmask64_u64(wantlo);
178
+ const uint64_t wanthi_uint64 = _cvtmask64_u64(wanthi);
179
+
180
+ uint64_t advlo = _mm_popcnt_u64(wantlo_uint64);
181
+ uint64_t advhi = _mm_popcnt_u64(wanthi_uint64);
182
+
183
+ _mm512_mask_storeu_epi8(
184
+ outbuf, _cvtu64_mask64(_pext_u64(wantlo_uint64, wantlo_uint64)), outlo);
185
+ _mm512_mask_storeu_epi8(
186
+ outbuf + advlo, _cvtu64_mask64(_pext_u64(wanthi_uint64, wanthi_uint64)),
187
+ outhi);
188
+ outbuf += advlo + advhi;
189
+ }
190
+ outbuf += -adjust;
191
+
192
+ tail:
193
+ if (inlen != 0) {
194
+ // We must have inlen < 31.
195
+ inmask = _cvtu32_mask32((1U << inlen) - 1);
196
+ in = _mm512_maskz_loadu_epi16(inmask, inbuf);
197
+ if (big_endian) {
198
+ in = _mm512_shuffle_epi8(in, byteflip);
199
+ }
200
+ adjust = (int)inlen - 31;
201
+ inlen = 0;
202
+ goto lastiteration;
203
+ }
204
+ *outlen = (outbuf - outbuf_orig) + adjust;
205
+ return ((inbuf - inbuf_orig) + adjust);
206
+ }
@@ -0,0 +1,74 @@
1
+ // file included directly
2
+ size_t icelake_convert_utf32_to_latin1(const char32_t *buf, size_t len,
3
+ char *latin1_output) {
4
+ const char32_t *end = buf + len;
5
+ __m512i v_0xFF = _mm512_set1_epi32(0xff);
6
+ __m512i shufmask = _mm512_set_epi8(
7
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60,
9
+ 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0);
10
+ while (end - buf >= 16) {
11
+ __m512i in = _mm512_loadu_si512((__m512i *)buf);
12
+ if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
13
+ return 0;
14
+ }
15
+ _mm_storeu_si128(
16
+ (__m128i *)latin1_output,
17
+ _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
18
+ latin1_output += 16;
19
+ buf += 16;
20
+ }
21
+ if (buf < end) {
22
+ uint16_t mask = uint16_t((1 << (end - buf)) - 1);
23
+ __m512i in = _mm512_maskz_loadu_epi32(mask, buf);
24
+ if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
25
+ return 0;
26
+ }
27
+ _mm_mask_storeu_epi8(
28
+ latin1_output, mask,
29
+ _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
30
+ }
31
+ return len;
32
+ }
33
+
34
+ std::pair<result, char *>
35
+ icelake_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len,
36
+ char *latin1_output) {
37
+ const char32_t *end = buf + len;
38
+ const char32_t *start = buf;
39
+ __m512i v_0xFF = _mm512_set1_epi32(0xff);
40
+ __m512i shufmask = _mm512_set_epi8(
41
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60,
43
+ 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0);
44
+ while (end - buf >= 16) {
45
+ __m512i in = _mm512_loadu_si512((__m512i *)buf);
46
+ if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
47
+ while (uint32_t(*buf) <= 0xff) {
48
+ *latin1_output++ = uint8_t(*buf++);
49
+ }
50
+ return std::make_pair(result(error_code::TOO_LARGE, buf - start),
51
+ latin1_output);
52
+ }
53
+ _mm_storeu_si128(
54
+ (__m128i *)latin1_output,
55
+ _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
56
+ latin1_output += 16;
57
+ buf += 16;
58
+ }
59
+ if (buf < end) {
60
+ uint16_t mask = uint16_t((1 << (end - buf)) - 1);
61
+ __m512i in = _mm512_maskz_loadu_epi32(mask, buf);
62
+ if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
63
+ while (uint32_t(*buf) <= 0xff) {
64
+ *latin1_output++ = uint8_t(*buf++);
65
+ }
66
+ return std::make_pair(result(error_code::TOO_LARGE, buf - start),
67
+ latin1_output);
68
+ }
69
+ _mm_mask_storeu_epi8(
70
+ latin1_output, mask,
71
+ _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
72
+ }
73
+ return std::make_pair(result(error_code::SUCCESS, len), latin1_output);
74
+ }