react-native-quick-crypto 1.0.19 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (561) hide show
  1. package/QuickCrypto.podspec +12 -38
  2. package/README.md +2 -0
  3. package/android/CMakeLists.txt +3 -0
  4. package/android/build.gradle +5 -1
  5. package/cpp/argon2/HybridArgon2.cpp +10 -3
  6. package/cpp/blake3/HybridBlake3.cpp +5 -3
  7. package/cpp/cipher/CCMCipher.cpp +29 -16
  8. package/cpp/cipher/CCMCipher.hpp +2 -4
  9. package/cpp/cipher/ChaCha20Cipher.cpp +14 -18
  10. package/cpp/cipher/ChaCha20Cipher.hpp +2 -4
  11. package/cpp/cipher/ChaCha20Poly1305Cipher.cpp +34 -23
  12. package/cpp/cipher/ChaCha20Poly1305Cipher.hpp +2 -4
  13. package/cpp/cipher/GCMCipher.cpp +14 -15
  14. package/cpp/cipher/HybridCipher.cpp +39 -36
  15. package/cpp/cipher/HybridCipher.hpp +17 -1
  16. package/cpp/cipher/HybridRsaCipher.cpp +74 -29
  17. package/cpp/cipher/OCBCipher.cpp +4 -3
  18. package/cpp/cipher/XChaCha20Poly1305Cipher.cpp +14 -13
  19. package/cpp/cipher/XSalsa20Cipher.cpp +72 -6
  20. package/cpp/cipher/XSalsa20Cipher.hpp +25 -3
  21. package/cpp/cipher/XSalsa20Poly1305Cipher.cpp +21 -25
  22. package/cpp/dh/HybridDiffieHellman.cpp +29 -0
  23. package/cpp/ec/HybridEcKeyPair.cpp +35 -33
  24. package/cpp/ec/HybridEcKeyPair.hpp +3 -7
  25. package/cpp/ecdh/HybridECDH.cpp +23 -0
  26. package/cpp/ed25519/HybridEdKeyPair.cpp +73 -117
  27. package/cpp/ed25519/HybridEdKeyPair.hpp +5 -9
  28. package/cpp/hash/HybridHash.cpp +5 -7
  29. package/cpp/hkdf/HybridHkdf.cpp +6 -4
  30. package/cpp/hmac/HybridHmac.cpp +4 -6
  31. package/cpp/kmac/HybridKmac.cpp +4 -4
  32. package/cpp/mldsa/HybridMlDsaKeyPair.cpp +37 -49
  33. package/cpp/mlkem/HybridMlKemKeyPair.cpp +39 -43
  34. package/cpp/pbkdf2/HybridPbkdf2.cpp +7 -8
  35. package/cpp/rsa/HybridRsaKeyPair.cpp +5 -8
  36. package/cpp/rsa/HybridRsaKeyPair.hpp +4 -7
  37. package/cpp/scrypt/HybridScrypt.cpp +6 -4
  38. package/cpp/sign/HybridSignHandle.cpp +25 -68
  39. package/cpp/sign/HybridVerifyHandle.cpp +23 -60
  40. package/cpp/utils/HybridUtils.cpp +213 -111
  41. package/cpp/utils/HybridUtils.hpp +9 -2
  42. package/cpp/utils/QuickCryptoUtils.hpp +72 -0
  43. package/deps/simdutf/LICENSE-APACHE +201 -0
  44. package/deps/simdutf/LICENSE-MIT +18 -0
  45. package/deps/simdutf/README.md +2782 -0
  46. package/deps/simdutf/include/simdutf/avx512.h +79 -0
  47. package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
  48. package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
  49. package/deps/simdutf/include/simdutf/common_defs.h +186 -0
  50. package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
  51. package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
  52. package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
  53. package/deps/simdutf/include/simdutf/error.h +126 -0
  54. package/deps/simdutf/include/simdutf/implementation.h +7081 -0
  55. package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
  56. package/deps/simdutf/include/simdutf/portability.h +285 -0
  57. package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
  58. package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
  59. package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
  60. package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
  61. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
  62. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
  63. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
  64. package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
  65. package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
  66. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
  67. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
  68. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
  69. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
  70. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
  71. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
  72. package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
  73. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
  74. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
  75. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
  76. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
  77. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
  78. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
  79. package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
  80. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
  81. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
  82. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
  83. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
  84. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
  85. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
  86. package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
  87. package/deps/simdutf/include/simdutf.h +26 -0
  88. package/deps/simdutf/include/simdutf_c.h +342 -0
  89. package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
  90. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
  91. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
  92. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
  93. package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
  94. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
  95. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
  96. package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
  97. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
  98. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
  99. package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
  100. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
  101. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
  102. package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
  103. package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
  104. package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
  105. package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
  106. package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
  107. package/deps/simdutf/src/encoding_types.cpp +67 -0
  108. package/deps/simdutf/src/error.cpp +3 -0
  109. package/deps/simdutf/src/fallback/implementation.cpp +589 -0
  110. package/deps/simdutf/src/generic/ascii_validation.h +50 -0
  111. package/deps/simdutf/src/generic/base64.h +233 -0
  112. package/deps/simdutf/src/generic/base64lengths.h +63 -0
  113. package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
  114. package/deps/simdutf/src/generic/find.h +75 -0
  115. package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
  116. package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
  117. package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
  118. package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
  119. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
  120. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
  121. package/deps/simdutf/src/generic/utf16.h +73 -0
  122. package/deps/simdutf/src/generic/utf32.h +136 -0
  123. package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
  124. package/deps/simdutf/src/generic/utf8.h +92 -0
  125. package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
  126. package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
  127. package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
  128. package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
  129. package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
  130. package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
  131. package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
  132. package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
  133. package/deps/simdutf/src/generic/validate_utf16.h +164 -0
  134. package/deps/simdutf/src/generic/validate_utf32.h +99 -0
  135. package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
  136. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
  137. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
  138. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
  139. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
  140. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
  141. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
  142. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
  143. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
  144. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
  145. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
  146. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
  147. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
  148. package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
  149. package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
  150. package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
  151. package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
  152. package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
  153. package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
  154. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
  155. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
  156. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
  157. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
  158. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
  159. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
  160. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
  161. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
  162. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
  163. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
  164. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
  165. package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
  166. package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
  167. package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
  168. package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
  169. package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
  170. package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
  171. package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
  172. package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
  173. package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
  174. package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
  175. package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
  176. package/deps/simdutf/src/implementation.cpp +2526 -0
  177. package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
  178. package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
  179. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
  180. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
  181. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
  182. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
  183. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
  184. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
  185. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
  186. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
  187. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
  188. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
  189. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
  190. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
  191. package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
  192. package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
  193. package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
  194. package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
  195. package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
  196. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
  197. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
  198. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
  199. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
  200. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
  201. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
  202. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
  203. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
  204. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
  205. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
  206. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
  207. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
  208. package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
  209. package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
  210. package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
  211. package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
  212. package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
  213. package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
  214. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
  215. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
  216. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
  217. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
  218. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
  219. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
  220. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
  221. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
  222. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
  223. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
  224. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
  225. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
  226. package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
  227. package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
  228. package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
  229. package/deps/simdutf/src/ppc64/templates.cpp +91 -0
  230. package/deps/simdutf/src/rvv/implementation.cpp +138 -0
  231. package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
  232. package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
  233. package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
  234. package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
  235. package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
  236. package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
  237. package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
  238. package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
  239. package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
  240. package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
  241. package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
  242. package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
  243. package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
  244. package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
  245. package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
  246. package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
  247. package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
  248. package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
  249. package/deps/simdutf/src/simdutf/arm64.h +43 -0
  250. package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
  251. package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
  252. package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
  253. package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
  254. package/deps/simdutf/src/simdutf/fallback.h +42 -0
  255. package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
  256. package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
  257. package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
  258. package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
  259. package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
  260. package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
  261. package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
  262. package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
  263. package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
  264. package/deps/simdutf/src/simdutf/haswell.h +63 -0
  265. package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
  266. package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
  267. package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
  268. package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
  269. package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
  270. package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
  271. package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
  272. package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
  273. package/deps/simdutf/src/simdutf/icelake.h +81 -0
  274. package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
  275. package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
  276. package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
  277. package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
  278. package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
  279. package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
  280. package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
  281. package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
  282. package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
  283. package/deps/simdutf/src/simdutf/lasx.h +49 -0
  284. package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
  285. package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
  286. package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
  287. package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
  288. package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
  289. package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
  290. package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
  291. package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
  292. package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
  293. package/deps/simdutf/src/simdutf/lsx.h +52 -0
  294. package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
  295. package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
  296. package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
  297. package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
  298. package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
  299. package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
  300. package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
  301. package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
  302. package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
  303. package/deps/simdutf/src/simdutf/ppc64.h +40 -0
  304. package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
  305. package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
  306. package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
  307. package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
  308. package/deps/simdutf/src/simdutf/rvv.h +41 -0
  309. package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
  310. package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
  311. package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
  312. package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
  313. package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
  314. package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
  315. package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
  316. package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
  317. package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
  318. package/deps/simdutf/src/simdutf/westmere.h +59 -0
  319. package/deps/simdutf/src/simdutf.cpp +152 -0
  320. package/deps/simdutf/src/simdutf_c.cpp +525 -0
  321. package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
  322. package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
  323. package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
  324. package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
  325. package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
  326. package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
  327. package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
  328. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
  329. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
  330. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
  331. package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
  332. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
  333. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
  334. package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
  335. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
  336. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
  337. package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
  338. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
  339. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
  340. package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
  341. package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
  342. package/lib/commonjs/argon2.js +51 -2
  343. package/lib/commonjs/argon2.js.map +1 -1
  344. package/lib/commonjs/cipher.js +109 -11
  345. package/lib/commonjs/cipher.js.map +1 -1
  346. package/lib/commonjs/dsa.js +8 -2
  347. package/lib/commonjs/dsa.js.map +1 -1
  348. package/lib/commonjs/hash.js +15 -5
  349. package/lib/commonjs/hash.js.map +1 -1
  350. package/lib/commonjs/hkdf.js +33 -6
  351. package/lib/commonjs/hkdf.js.map +1 -1
  352. package/lib/commonjs/hmac.js +15 -5
  353. package/lib/commonjs/hmac.js.map +1 -1
  354. package/lib/commonjs/keys/publicCipher.js +10 -4
  355. package/lib/commonjs/keys/publicCipher.js.map +1 -1
  356. package/lib/commonjs/random.js +11 -2
  357. package/lib/commonjs/random.js.map +1 -1
  358. package/lib/commonjs/rsa.js +12 -5
  359. package/lib/commonjs/rsa.js.map +1 -1
  360. package/lib/commonjs/scrypt.js +47 -6
  361. package/lib/commonjs/scrypt.js.map +1 -1
  362. package/lib/commonjs/subtle.js +76 -5
  363. package/lib/commonjs/subtle.js.map +1 -1
  364. package/lib/commonjs/utils/cipher.js +18 -7
  365. package/lib/commonjs/utils/cipher.js.map +1 -1
  366. package/lib/commonjs/utils/conversion.js +33 -9
  367. package/lib/commonjs/utils/conversion.js.map +1 -1
  368. package/lib/commonjs/utils/timingSafeEqual.js +7 -2
  369. package/lib/commonjs/utils/timingSafeEqual.js.map +1 -1
  370. package/lib/commonjs/x509certificate.js +6 -6
  371. package/lib/commonjs/x509certificate.js.map +1 -1
  372. package/lib/module/argon2.js +51 -2
  373. package/lib/module/argon2.js.map +1 -1
  374. package/lib/module/cipher.js +109 -11
  375. package/lib/module/cipher.js.map +1 -1
  376. package/lib/module/dsa.js +8 -2
  377. package/lib/module/dsa.js.map +1 -1
  378. package/lib/module/hash.js +15 -5
  379. package/lib/module/hash.js.map +1 -1
  380. package/lib/module/hkdf.js +33 -6
  381. package/lib/module/hkdf.js.map +1 -1
  382. package/lib/module/hmac.js +15 -5
  383. package/lib/module/hmac.js.map +1 -1
  384. package/lib/module/keys/publicCipher.js +10 -4
  385. package/lib/module/keys/publicCipher.js.map +1 -1
  386. package/lib/module/random.js +11 -2
  387. package/lib/module/random.js.map +1 -1
  388. package/lib/module/rsa.js +11 -4
  389. package/lib/module/rsa.js.map +1 -1
  390. package/lib/module/scrypt.js +47 -6
  391. package/lib/module/scrypt.js.map +1 -1
  392. package/lib/module/subtle.js +76 -5
  393. package/lib/module/subtle.js.map +1 -1
  394. package/lib/module/utils/cipher.js +18 -7
  395. package/lib/module/utils/cipher.js.map +1 -1
  396. package/lib/module/utils/conversion.js +33 -9
  397. package/lib/module/utils/conversion.js.map +1 -1
  398. package/lib/module/utils/timingSafeEqual.js +8 -3
  399. package/lib/module/utils/timingSafeEqual.js.map +1 -1
  400. package/lib/module/x509certificate.js +6 -6
  401. package/lib/module/x509certificate.js.map +1 -1
  402. package/lib/typescript/argon2.d.ts.map +1 -1
  403. package/lib/typescript/cipher.d.ts +2 -2
  404. package/lib/typescript/cipher.d.ts.map +1 -1
  405. package/lib/typescript/dsa.d.ts.map +1 -1
  406. package/lib/typescript/hash.d.ts +2 -2
  407. package/lib/typescript/hash.d.ts.map +1 -1
  408. package/lib/typescript/hkdf.d.ts.map +1 -1
  409. package/lib/typescript/hmac.d.ts +2 -2
  410. package/lib/typescript/hmac.d.ts.map +1 -1
  411. package/lib/typescript/index.d.ts +1 -1
  412. package/lib/typescript/index.d.ts.map +1 -1
  413. package/lib/typescript/keys/publicCipher.d.ts.map +1 -1
  414. package/lib/typescript/random.d.ts.map +1 -1
  415. package/lib/typescript/rsa.d.ts.map +1 -1
  416. package/lib/typescript/scrypt.d.ts.map +1 -1
  417. package/lib/typescript/specs/utils.nitro.d.ts +0 -2
  418. package/lib/typescript/specs/utils.nitro.d.ts.map +1 -1
  419. package/lib/typescript/subtle.d.ts.map +1 -1
  420. package/lib/typescript/utils/cipher.d.ts +13 -1
  421. package/lib/typescript/utils/cipher.d.ts.map +1 -1
  422. package/lib/typescript/utils/conversion.d.ts +9 -6
  423. package/lib/typescript/utils/conversion.d.ts.map +1 -1
  424. package/lib/typescript/utils/timingSafeEqual.d.ts.map +1 -1
  425. package/lib/typescript/x509certificate.d.ts.map +1 -1
  426. package/nitrogen/generated/shared/c++/HybridUtilsSpec.cpp +0 -2
  427. package/nitrogen/generated/shared/c++/HybridUtilsSpec.hpp +0 -3
  428. package/package.json +38 -6
  429. package/src/argon2.ts +80 -2
  430. package/src/cipher.ts +139 -15
  431. package/src/dsa.ts +11 -2
  432. package/src/hash.ts +17 -7
  433. package/src/hkdf.ts +44 -6
  434. package/src/hmac.ts +17 -7
  435. package/src/keys/publicCipher.ts +10 -4
  436. package/src/random.ts +11 -2
  437. package/src/rsa.ts +18 -4
  438. package/src/scrypt.ts +73 -6
  439. package/src/specs/utils.nitro.ts +0 -2
  440. package/src/subtle.ts +90 -8
  441. package/src/utils/cipher.ts +30 -8
  442. package/src/utils/conversion.ts +58 -20
  443. package/src/utils/timingSafeEqual.ts +8 -3
  444. package/src/x509certificate.ts +5 -6
  445. package/deps/blake3/.cargo/config.toml +0 -2
  446. package/deps/blake3/.git-blame-ignore-revs +0 -2
  447. package/deps/blake3/.github/workflows/build_b3sum.py +0 -38
  448. package/deps/blake3/.github/workflows/ci.yml +0 -491
  449. package/deps/blake3/.github/workflows/tag.yml +0 -43
  450. package/deps/blake3/.github/workflows/upload_github_release_asset.py +0 -73
  451. package/deps/blake3/CONTRIBUTING.md +0 -31
  452. package/deps/blake3/Cargo.toml +0 -135
  453. package/deps/blake3/b3sum/Cargo.lock +0 -513
  454. package/deps/blake3/b3sum/Cargo.toml +0 -26
  455. package/deps/blake3/b3sum/README.md +0 -72
  456. package/deps/blake3/b3sum/src/main.rs +0 -564
  457. package/deps/blake3/b3sum/src/unit_tests.rs +0 -235
  458. package/deps/blake3/b3sum/tests/cli_tests.rs +0 -680
  459. package/deps/blake3/b3sum/what_does_check_do.md +0 -176
  460. package/deps/blake3/benches/bench.rs +0 -623
  461. package/deps/blake3/build.rs +0 -389
  462. package/deps/blake3/c/CMakeLists.txt +0 -383
  463. package/deps/blake3/c/CMakePresets.json +0 -73
  464. package/deps/blake3/c/Makefile.testing +0 -82
  465. package/deps/blake3/c/blake3-config.cmake.in +0 -14
  466. package/deps/blake3/c/blake3_avx2.c +0 -326
  467. package/deps/blake3/c/blake3_avx2_x86-64_unix.S +0 -1815
  468. package/deps/blake3/c/blake3_avx2_x86-64_windows_gnu.S +0 -1817
  469. package/deps/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +0 -1828
  470. package/deps/blake3/c/blake3_avx512.c +0 -1388
  471. package/deps/blake3/c/blake3_avx512_x86-64_unix.S +0 -4824
  472. package/deps/blake3/c/blake3_avx512_x86-64_windows_gnu.S +0 -2615
  473. package/deps/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +0 -2634
  474. package/deps/blake3/c/blake3_c_rust_bindings/Cargo.toml +0 -32
  475. package/deps/blake3/c/blake3_c_rust_bindings/README.md +0 -4
  476. package/deps/blake3/c/blake3_c_rust_bindings/benches/bench.rs +0 -477
  477. package/deps/blake3/c/blake3_c_rust_bindings/build.rs +0 -253
  478. package/deps/blake3/c/blake3_c_rust_bindings/cross_test.sh +0 -31
  479. package/deps/blake3/c/blake3_c_rust_bindings/src/lib.rs +0 -333
  480. package/deps/blake3/c/blake3_c_rust_bindings/src/test.rs +0 -696
  481. package/deps/blake3/c/blake3_sse2.c +0 -566
  482. package/deps/blake3/c/blake3_sse2_x86-64_unix.S +0 -2291
  483. package/deps/blake3/c/blake3_sse2_x86-64_windows_gnu.S +0 -2332
  484. package/deps/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +0 -2350
  485. package/deps/blake3/c/blake3_sse41.c +0 -560
  486. package/deps/blake3/c/blake3_sse41_x86-64_unix.S +0 -2028
  487. package/deps/blake3/c/blake3_sse41_x86-64_windows_gnu.S +0 -2069
  488. package/deps/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +0 -2089
  489. package/deps/blake3/c/blake3_tbb.cpp +0 -37
  490. package/deps/blake3/c/dependencies/CMakeLists.txt +0 -3
  491. package/deps/blake3/c/dependencies/tbb/CMakeLists.txt +0 -28
  492. package/deps/blake3/c/example.c +0 -36
  493. package/deps/blake3/c/example_tbb.c +0 -57
  494. package/deps/blake3/c/libblake3.pc.in +0 -12
  495. package/deps/blake3/c/main.c +0 -166
  496. package/deps/blake3/c/test.py +0 -97
  497. package/deps/blake3/media/B3.svg +0 -70
  498. package/deps/blake3/media/BLAKE3.svg +0 -85
  499. package/deps/blake3/media/speed.svg +0 -1474
  500. package/deps/blake3/reference_impl/Cargo.toml +0 -8
  501. package/deps/blake3/reference_impl/README.md +0 -14
  502. package/deps/blake3/reference_impl/reference_impl.rs +0 -374
  503. package/deps/blake3/src/ffi_avx2.rs +0 -65
  504. package/deps/blake3/src/ffi_avx512.rs +0 -169
  505. package/deps/blake3/src/ffi_neon.rs +0 -82
  506. package/deps/blake3/src/ffi_sse2.rs +0 -126
  507. package/deps/blake3/src/ffi_sse41.rs +0 -126
  508. package/deps/blake3/src/guts.rs +0 -60
  509. package/deps/blake3/src/hazmat.rs +0 -704
  510. package/deps/blake3/src/io.rs +0 -64
  511. package/deps/blake3/src/join.rs +0 -92
  512. package/deps/blake3/src/lib.rs +0 -1835
  513. package/deps/blake3/src/platform.rs +0 -587
  514. package/deps/blake3/src/portable.rs +0 -198
  515. package/deps/blake3/src/rust_avx2.rs +0 -474
  516. package/deps/blake3/src/rust_sse2.rs +0 -775
  517. package/deps/blake3/src/rust_sse41.rs +0 -766
  518. package/deps/blake3/src/test.rs +0 -1049
  519. package/deps/blake3/src/traits.rs +0 -227
  520. package/deps/blake3/src/wasm32_simd.rs +0 -794
  521. package/deps/blake3/test_vectors/Cargo.toml +0 -19
  522. package/deps/blake3/test_vectors/cross_test.sh +0 -25
  523. package/deps/blake3/test_vectors/src/bin/generate.rs +0 -4
  524. package/deps/blake3/test_vectors/src/lib.rs +0 -350
  525. package/deps/blake3/test_vectors/test_vectors.json +0 -217
  526. package/deps/blake3/tools/compiler_version/Cargo.toml +0 -7
  527. package/deps/blake3/tools/compiler_version/build.rs +0 -6
  528. package/deps/blake3/tools/compiler_version/src/main.rs +0 -27
  529. package/deps/blake3/tools/instruction_set_support/Cargo.toml +0 -6
  530. package/deps/blake3/tools/instruction_set_support/src/main.rs +0 -10
  531. package/deps/blake3/tools/release.md +0 -16
  532. package/deps/ncrypto/.bazelignore +0 -4
  533. package/deps/ncrypto/.bazelrc +0 -1
  534. package/deps/ncrypto/.bazelversion +0 -1
  535. package/deps/ncrypto/.clang-format +0 -111
  536. package/deps/ncrypto/.github/workflows/bazel.yml +0 -58
  537. package/deps/ncrypto/.github/workflows/commitlint.yml +0 -16
  538. package/deps/ncrypto/.github/workflows/linter.yml +0 -38
  539. package/deps/ncrypto/.github/workflows/macos.yml +0 -43
  540. package/deps/ncrypto/.github/workflows/release-please.yml +0 -16
  541. package/deps/ncrypto/.github/workflows/ubuntu.yml +0 -128
  542. package/deps/ncrypto/.github/workflows/visual-studio.yml +0 -49
  543. package/deps/ncrypto/.python-version +0 -1
  544. package/deps/ncrypto/.release-please-manifest.json +0 -3
  545. package/deps/ncrypto/BUILD.bazel +0 -44
  546. package/deps/ncrypto/CHANGELOG.md +0 -37
  547. package/deps/ncrypto/CMakeLists.txt +0 -79
  548. package/deps/ncrypto/MODULE.bazel +0 -16
  549. package/deps/ncrypto/MODULE.bazel.lock +0 -461
  550. package/deps/ncrypto/cmake/CPM.cmake +0 -1225
  551. package/deps/ncrypto/cmake/ncrypto-flags.cmake +0 -17
  552. package/deps/ncrypto/ncrypto.pc.in +0 -10
  553. package/deps/ncrypto/patches/0001-Expose-libdecrepit-so-NodeJS-can-use-it-for-ncrypto.patch +0 -28
  554. package/deps/ncrypto/pyproject.toml +0 -38
  555. package/deps/ncrypto/release-please-config.json +0 -11
  556. package/deps/ncrypto/src/CMakeLists.txt +0 -40
  557. package/deps/ncrypto/tests/BUILD.bazel +0 -11
  558. package/deps/ncrypto/tests/CMakeLists.txt +0 -7
  559. package/deps/ncrypto/tests/basic.cpp +0 -856
  560. package/deps/ncrypto/tools/run-clang-format.sh +0 -42
  561. package/lib/tsconfig.tsbuildinfo +0 -1
@@ -0,0 +1,589 @@
1
+ std::pair<const char32_t *, char *>
2
+ sse_convert_utf32_to_utf8(const char32_t *buf, size_t len, char *utf8_output) {
3
+ const char32_t *end = buf + len;
4
+
5
+ const __m128i v_0000 = _mm_setzero_si128(); //__m128 = 128 bits
6
+ const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); // 1111 1000 0000
7
+ // 0000
8
+ const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); // 1100 0000 1000
9
+ // 0000
10
+ const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); // 1111 1111 1000
11
+ // 0000
12
+ const __m128i v_ffff0000 = _mm_set1_epi32(
13
+ (uint32_t)0xffff0000); // 1111 1111 1111 1111 0000 0000 0000 0000
14
+ const __m128i v_7fffffff = _mm_set1_epi32(
15
+ (uint32_t)0x7fffffff); // 0111 1111 1111 1111 1111 1111 1111 1111
16
+ __m128i running_max = _mm_setzero_si128();
17
+ __m128i forbidden_bytemask = _mm_setzero_si128();
18
+ const size_t safety_margin =
19
+ 12; // to avoid overruns, see issue
20
+ // https://github.com/simdutf/simdutf/issues/92
21
+
22
+ while (end - buf >=
23
+ std::ptrdiff_t(
24
+ 16 + safety_margin)) { // buf is a char32_t pointer, each char32_t
25
+ // has 4 bytes or 32 bits, thus buf + 16 *
26
+ // char_32t = 512 bits = 64 bytes
27
+ // We load two 16 bytes registers for a total of 32 bytes or 16 characters.
28
+ __m128i in = _mm_loadu_si128((__m128i *)buf);
29
+ __m128i nextin = _mm_loadu_si128(
30
+ (__m128i *)buf + 1); // These two values can hold only 8 UTF32 chars
31
+ running_max = _mm_max_epu32(
32
+ _mm_max_epu32(in, running_max), // take element-wise max char32_t from
33
+ // in and running_max vector
34
+ nextin); // and take element-wise max element from nextin and
35
+ // running_max vector
36
+
37
+ // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned
38
+ // saturation
39
+ __m128i in_16 = _mm_packus_epi32(
40
+ _mm_and_si128(in, v_7fffffff),
41
+ _mm_and_si128(
42
+ nextin,
43
+ v_7fffffff)); // in this context pack the two __m128 into a single
44
+ // By ensuring the highest bit is set to 0(&v_7fffffff), we are making sure
45
+ // all values are interpreted as non-negative, or specifically, the values
46
+ // are within the range of valid Unicode code points. remember : having
47
+ // leading byte 0 means a positive number by the two complements system.
48
+ // Unicode is well beneath the range where you'll start getting issues so
49
+ // that's OK.
50
+
51
+ // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp
52
+
53
+ // Check for ASCII fast path
54
+
55
+ // ASCII fast path!!!!
56
+ // We eagerly load another 32 bytes, hoping that they will be ASCII too.
57
+ // The intuition is that we try to collect 16 ASCII characters which
58
+ // requires a total of 64 bytes of input. If we fail, we just pass thirdin
59
+ // and fourthin as our new inputs.
60
+ if (_mm_testz_si128(in_16, v_ff80)) { // if the first two blocks are ASCII
61
+ __m128i thirdin = _mm_loadu_si128((__m128i *)buf + 2);
62
+ __m128i fourthin = _mm_loadu_si128((__m128i *)buf + 3);
63
+ running_max = _mm_max_epu32(
64
+ _mm_max_epu32(thirdin, running_max),
65
+ fourthin); // take the running max of all 4 vectors thus far
66
+ __m128i nextin_16 = _mm_packus_epi32(
67
+ _mm_and_si128(thirdin, v_7fffffff),
68
+ _mm_and_si128(fourthin,
69
+ v_7fffffff)); // pack into 1 vector, now you have two
70
+ if (!_mm_testz_si128(
71
+ nextin_16,
72
+ v_ff80)) { // checks if the second packed vector is ASCII, if not:
73
+ // 1. pack the bytes
74
+ // obviously suboptimal.
75
+ const __m128i utf8_packed = _mm_packus_epi16(
76
+ in_16, in_16); // creates two copy of in_16 in 1 vector
77
+ // 2. store (16 bytes)
78
+ _mm_storeu_si128((__m128i *)utf8_output,
79
+ utf8_packed); // put them into the output
80
+ // 3. adjust pointers
81
+ buf += 8; // the char32_t buffer pointer goes up 8 char32_t chars* 32
82
+ // bits = 256 bits
83
+ utf8_output +=
84
+ 8; // same with output, e.g. lift the first two blocks alone.
85
+ // Proceed with next input
86
+ in_16 = nextin_16;
87
+ // We need to update in and nextin because they are used later.
88
+ in = thirdin;
89
+ nextin = fourthin;
90
+ } else {
91
+ // 1. pack the bytes
92
+ const __m128i utf8_packed = _mm_packus_epi16(in_16, nextin_16);
93
+ // 2. store (16 bytes)
94
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_packed);
95
+ // 3. adjust pointers
96
+ buf += 16;
97
+ utf8_output += 16;
98
+ continue; // we are done for this round!
99
+ }
100
+ }
101
+
102
+ // no bits set above 7th bit -- find out all the ASCII characters
103
+ const __m128i one_byte_bytemask =
104
+ _mm_cmpeq_epi16( // this takes four bytes at a time and compares:
105
+ _mm_and_si128(in_16, v_ff80), // the vector that get only the first
106
+ // 9 bits of each 16-bit/2-byte units
107
+ v_0000 //
108
+ ); // they should be all zero if they are ASCII. E.g. ASCII in UTF32 is
109
+ // of format 0000 0000 0000 0XXX XXXX
110
+ // _mm_cmpeq_epi16 should now return a 1111 1111 1111 1111 for equals, and
111
+ // 0000 0000 0000 0000 if not for each 16-bit/2-byte units
112
+ const uint16_t one_byte_bitmask = static_cast<uint16_t>(_mm_movemask_epi8(
113
+ one_byte_bytemask)); // collect the MSB from previous vector and put
114
+ // them into uint16_t mas
115
+
116
+ // no bits set above 11th bit
117
+ const __m128i one_or_two_bytes_bytemask =
118
+ _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000);
119
+ const uint16_t one_or_two_bytes_bitmask =
120
+ static_cast<uint16_t>(_mm_movemask_epi8(one_or_two_bytes_bytemask));
121
+
122
+ if (one_or_two_bytes_bitmask == 0xffff) {
123
+ // case: all code units either produce 1 or 2 UTF-8 bytes (at least one
124
+ // produces 2 bytes)
125
+ // 1. prepare 2-byte values
126
+ // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
127
+ // expected output : [110a|aaaa|10bb|bbbb] x 8
128
+ const __m128i v_1f00 =
129
+ _mm_set1_epi16((int16_t)0x1f00); // 0001 1111 0000 0000
130
+ const __m128i v_003f =
131
+ _mm_set1_epi16((int16_t)0x003f); // 0000 0000 0011 1111
132
+
133
+ // t0 = [000a|aaaa|bbbb|bb00]
134
+ const __m128i t0 = _mm_slli_epi16(in_16, 2); // shift packed vector by two
135
+ // t1 = [000a|aaaa|0000|0000]
136
+ const __m128i t1 = _mm_and_si128(t0, v_1f00); // potential first utf8 byte
137
+ // t2 = [0000|0000|00bb|bbbb]
138
+ const __m128i t2 =
139
+ _mm_and_si128(in_16, v_003f); // potential second utf8 byte
140
+ // t3 = [000a|aaaa|00bb|bbbb]
141
+ const __m128i t3 =
142
+ _mm_or_si128(t1, t2); // first and second potential utf8 byte together
143
+ // t4 = [110a|aaaa|10bb|bbbb]
144
+ const __m128i t4 = _mm_or_si128(
145
+ t3,
146
+ v_c080); // t3 | 1100 0000 1000 0000 = full potential 2-byte utf8 unit
147
+
148
+ // 2. merge ASCII and 2-byte codewords
149
+ const __m128i utf8_unpacked =
150
+ _mm_blendv_epi8(t4, in_16, one_byte_bytemask);
151
+
152
+ // 3. prepare bitmask for 8-bit lookup
153
+ // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h -
154
+ // MSB, a - LSB)
155
+ const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a
156
+ const uint16_t m1 =
157
+ static_cast<uint16_t>(m0 >> 7); // m1 = 00000000h0g0f0e0
158
+ const uint8_t m2 =
159
+ static_cast<uint8_t>((m0 | m1) & 0xff); // m2 = hdgcfbea
160
+ // 4. pack the bytes
161
+ const uint8_t *row =
162
+ &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
163
+ const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1));
164
+ const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle);
165
+
166
+ // 5. store bytes
167
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_packed);
168
+
169
+ // 6. adjust pointers
170
+ buf += 8;
171
+ utf8_output += row[0];
172
+ continue;
173
+ }
174
+
175
+ // Check for overflow in packing
176
+
177
+ const __m128i saturation_bytemask = _mm_cmpeq_epi32(
178
+ _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000);
179
+ const uint32_t saturation_bitmask =
180
+ static_cast<uint32_t>(_mm_movemask_epi8(saturation_bytemask));
181
+ if (saturation_bitmask == 0xffff) {
182
+ // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
183
+ const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
184
+ forbidden_bytemask =
185
+ _mm_or_si128(forbidden_bytemask,
186
+ _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800));
187
+
188
+ const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
189
+ 0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
190
+
191
+ /* In this branch we handle three cases:
192
+ 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] -
193
+ single UFT-8 byte
194
+ 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] -
195
+ two UTF-8 bytes
196
+ 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] -
197
+ three UTF-8 bytes
198
+
199
+ We expand the input word (16-bit) into two code units (32-bit), thus
200
+ we have room for four bytes. However, we need five distinct bit
201
+ layouts. Note that the last byte in cases #2 and #3 is the same.
202
+
203
+ We precompute byte 1 for case #1 and the common byte for cases #2 & #3
204
+ in register t2.
205
+
206
+ We precompute byte 1 for case #3 and -- **conditionally** -- precompute
207
+ either byte 1 for case #2 or byte 2 for case #3. Note that they
208
+ differ by exactly one bit.
209
+
210
+ Finally from these two code units we build proper UTF-8 sequence, taking
211
+ into account the case (i.e, the number of bytes to write).
212
+ */
213
+ /**
214
+ * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
215
+ * t2 => [0ccc|cccc] [10cc|cccc]
216
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
217
+ */
218
+ #define simdutf_vec(x) _mm_set1_epi16(static_cast<uint16_t>(x))
219
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
220
+ const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even);
221
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
222
+ const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111));
223
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
224
+ const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000));
225
+
226
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc]
227
+ const __m128i s0 = _mm_srli_epi16(in_16, 4);
228
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00]
229
+ const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100));
230
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa]
231
+ const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140));
232
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
233
+ const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000));
234
+ const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask,
235
+ simdutf_vec(0b0100000000000000));
236
+ const __m128i s4 = _mm_xor_si128(s3, m0);
237
+ #undef simdutf_vec
238
+
239
+ // 4. expand code units 16-bit => 32-bit
240
+ const __m128i out0 = _mm_unpacklo_epi16(t2, s4);
241
+ const __m128i out1 = _mm_unpackhi_epi16(t2, s4);
242
+
243
+ // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
244
+ const uint16_t mask =
245
+ (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa);
246
+ if (mask == 0) {
247
+ // We only have three-byte code units. Use fast path.
248
+ const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14,
249
+ 15, 13, -1, -1, -1, -1);
250
+ const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle);
251
+ const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle);
252
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_0);
253
+ utf8_output += 12;
254
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_1);
255
+ utf8_output += 12;
256
+ buf += 8;
257
+ continue;
258
+ }
259
+ const uint8_t mask0 = uint8_t(mask);
260
+
261
+ const uint8_t *row0 =
262
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
263
+ const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1));
264
+ const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0);
265
+
266
+ const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
267
+
268
+ const uint8_t *row1 =
269
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
270
+ const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1));
271
+ const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1);
272
+
273
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_0);
274
+ utf8_output += row0[0];
275
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_1);
276
+ utf8_output += row1[0];
277
+
278
+ buf += 8;
279
+ } else {
280
+ // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=>
281
+ // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem
282
+ // wasteful to use scalar code, but being efficient with SIMD in the
283
+ // presence of surrogate pairs may require non-trivial tables.
284
+ size_t forward = 15;
285
+ size_t k = 0;
286
+ if (size_t(end - buf) < forward + 1) {
287
+ forward = size_t(end - buf - 1);
288
+ }
289
+ for (; k < forward; k++) {
290
+ uint32_t word = buf[k];
291
+ if ((word & 0xFFFFFF80) == 0) {
292
+ *utf8_output++ = char(word);
293
+ } else if ((word & 0xFFFFF800) == 0) {
294
+ *utf8_output++ = char((word >> 6) | 0b11000000);
295
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
296
+ } else if ((word & 0xFFFF0000) == 0) {
297
+ if (word >= 0xD800 && word <= 0xDFFF) {
298
+ return std::make_pair(nullptr, utf8_output);
299
+ }
300
+ *utf8_output++ = char((word >> 12) | 0b11100000);
301
+ *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
302
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
303
+ } else {
304
+ if (word > 0x10FFFF) {
305
+ return std::make_pair(nullptr, utf8_output);
306
+ }
307
+ *utf8_output++ = char((word >> 18) | 0b11110000);
308
+ *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000);
309
+ *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
310
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
311
+ }
312
+ }
313
+ buf += k;
314
+ }
315
+ } // while
316
+
317
+ // check for invalid input
318
+ const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff);
319
+ if (static_cast<uint16_t>(_mm_movemask_epi8(_mm_cmpeq_epi32(
320
+ _mm_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffff) {
321
+ return std::make_pair(nullptr, utf8_output);
322
+ }
323
+
324
+ if (static_cast<uint32_t>(_mm_movemask_epi8(forbidden_bytemask)) != 0) {
325
+ return std::make_pair(nullptr, utf8_output);
326
+ }
327
+
328
+ return std::make_pair(buf, utf8_output);
329
+ }
330
+
331
+ std::pair<result, char *>
332
+ sse_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len,
333
+ char *utf8_output) {
334
+ const char32_t *end = buf + len;
335
+ const char32_t *start = buf;
336
+
337
+ const __m128i v_0000 = _mm_setzero_si128();
338
+ const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
339
+ const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080);
340
+ const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80);
341
+ const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000);
342
+ const __m128i v_7fffffff = _mm_set1_epi32((uint32_t)0x7fffffff);
343
+ const __m128i v_10ffff = _mm_set1_epi32((uint32_t)0x10ffff);
344
+
345
+ const size_t safety_margin =
346
+ 12; // to avoid overruns, see issue
347
+ // https://github.com/simdutf/simdutf/issues/92
348
+
349
+ while (end - buf >= std::ptrdiff_t(16 + safety_margin)) {
350
+ // We load two 16 bytes registers for a total of 32 bytes or 8 characters.
351
+ __m128i in = _mm_loadu_si128((__m128i *)buf);
352
+ __m128i nextin = _mm_loadu_si128((__m128i *)buf + 1);
353
+ // Check for too large input
354
+ __m128i max_input = _mm_max_epu32(_mm_max_epu32(in, nextin), v_10ffff);
355
+ if (static_cast<uint16_t>(_mm_movemask_epi8(
356
+ _mm_cmpeq_epi32(max_input, v_10ffff))) != 0xffff) {
357
+ return std::make_pair(result(error_code::TOO_LARGE, buf - start),
358
+ utf8_output);
359
+ }
360
+
361
+ // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned
362
+ // saturation
363
+ __m128i in_16 = _mm_packus_epi32(_mm_and_si128(in, v_7fffffff),
364
+ _mm_and_si128(nextin, v_7fffffff));
365
+
366
+ // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp
367
+
368
+ // Check for ASCII fast path
369
+ if (_mm_testz_si128(in_16, v_ff80)) { // ASCII fast path!!!!
370
+ // 1. pack the bytes
371
+ // obviously suboptimal.
372
+ const __m128i utf8_packed = _mm_packus_epi16(in_16, in_16);
373
+ // 2. store (16 bytes)
374
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_packed);
375
+ // 3. adjust pointers
376
+ buf += 8;
377
+ utf8_output += 8;
378
+ continue;
379
+ }
380
+
381
+ // no bits set above 7th bit
382
+ const __m128i one_byte_bytemask =
383
+ _mm_cmpeq_epi16(_mm_and_si128(in_16, v_ff80), v_0000);
384
+ const uint16_t one_byte_bitmask =
385
+ static_cast<uint16_t>(_mm_movemask_epi8(one_byte_bytemask));
386
+
387
+ // no bits set above 11th bit
388
+ const __m128i one_or_two_bytes_bytemask =
389
+ _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000);
390
+ const uint16_t one_or_two_bytes_bitmask =
391
+ static_cast<uint16_t>(_mm_movemask_epi8(one_or_two_bytes_bytemask));
392
+
393
+ if (one_or_two_bytes_bitmask == 0xffff) {
394
+ // case: all code units either produce 1 or 2 UTF-8 bytes (at least one
395
+ // produces 2 bytes)
396
+ // 1. prepare 2-byte values
397
+ // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
398
+ // expected output : [110a|aaaa|10bb|bbbb] x 8
399
+ const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00);
400
+ const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f);
401
+
402
+ // t0 = [000a|aaaa|bbbb|bb00]
403
+ const __m128i t0 = _mm_slli_epi16(in_16, 2);
404
+ // t1 = [000a|aaaa|0000|0000]
405
+ const __m128i t1 = _mm_and_si128(t0, v_1f00);
406
+ // t2 = [0000|0000|00bb|bbbb]
407
+ const __m128i t2 = _mm_and_si128(in_16, v_003f);
408
+ // t3 = [000a|aaaa|00bb|bbbb]
409
+ const __m128i t3 = _mm_or_si128(t1, t2);
410
+ // t4 = [110a|aaaa|10bb|bbbb]
411
+ const __m128i t4 = _mm_or_si128(t3, v_c080);
412
+
413
+ // 2. merge ASCII and 2-byte codewords
414
+ const __m128i utf8_unpacked =
415
+ _mm_blendv_epi8(t4, in_16, one_byte_bytemask);
416
+
417
+ // 3. prepare bitmask for 8-bit lookup
418
+ // one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h -
419
+ // MSB, a - LSB)
420
+ const uint16_t m0 = one_byte_bitmask & 0x5555; // m0 = 0h0g0f0e0d0c0b0a
421
+ const uint16_t m1 =
422
+ static_cast<uint16_t>(m0 >> 7); // m1 = 00000000h0g0f0e0
423
+ const uint8_t m2 =
424
+ static_cast<uint8_t>((m0 | m1) & 0xff); // m2 = hdgcfbea
425
+ // 4. pack the bytes
426
+ const uint8_t *row =
427
+ &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
428
+ const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1));
429
+ const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle);
430
+
431
+ // 5. store bytes
432
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_packed);
433
+
434
+ // 6. adjust pointers
435
+ buf += 8;
436
+ utf8_output += row[0];
437
+ continue;
438
+ }
439
+
440
+ // Check for overflow in packing
441
+ const __m128i saturation_bytemask = _mm_cmpeq_epi32(
442
+ _mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000);
443
+ const uint32_t saturation_bitmask =
444
+ static_cast<uint32_t>(_mm_movemask_epi8(saturation_bytemask));
445
+
446
+ if (saturation_bitmask == 0xffff) {
447
+ // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
448
+
449
+ // Check for illegal surrogate code units
450
+ const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
451
+ const __m128i forbidden_bytemask =
452
+ _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800);
453
+ if (static_cast<uint32_t>(_mm_movemask_epi8(forbidden_bytemask)) != 0) {
454
+ return std::make_pair(result(error_code::SURROGATE, buf - start),
455
+ utf8_output);
456
+ }
457
+
458
+ const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
459
+ 0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
460
+
461
+ /* In this branch we handle three cases:
462
+ 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] -
463
+ single UFT-8 byte
464
+ 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] -
465
+ two UTF-8 bytes
466
+ 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] -
467
+ three UTF-8 bytes
468
+
469
+ We expand the input word (16-bit) into two code units (32-bit), thus
470
+ we have room for four bytes. However, we need five distinct bit
471
+ layouts. Note that the last byte in cases #2 and #3 is the same.
472
+
473
+ We precompute byte 1 for case #1 and the common byte for cases #2 & #3
474
+ in register t2.
475
+
476
+ We precompute byte 1 for case #3 and -- **conditionally** -- precompute
477
+ either byte 1 for case #2 or byte 2 for case #3. Note that they
478
+ differ by exactly one bit.
479
+
480
+ Finally from these two code units we build proper UTF-8 sequence, taking
481
+ into account the case (i.e, the number of bytes to write).
482
+ */
483
+ /**
484
+ * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
485
+ * t2 => [0ccc|cccc] [10cc|cccc]
486
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
487
+ */
488
+ #define simdutf_vec(x) _mm_set1_epi16(static_cast<uint16_t>(x))
489
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
490
+ const __m128i t0 = _mm_shuffle_epi8(in_16, dup_even);
491
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
492
+ const __m128i t1 = _mm_and_si128(t0, simdutf_vec(0b0011111101111111));
493
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
494
+ const __m128i t2 = _mm_or_si128(t1, simdutf_vec(0b1000000000000000));
495
+
496
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc]
497
+ const __m128i s0 = _mm_srli_epi16(in_16, 4);
498
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00]
499
+ const __m128i s1 = _mm_and_si128(s0, simdutf_vec(0b0000111111111100));
500
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa]
501
+ const __m128i s2 = _mm_maddubs_epi16(s1, simdutf_vec(0x0140));
502
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
503
+ const __m128i s3 = _mm_or_si128(s2, simdutf_vec(0b1100000011100000));
504
+ const __m128i m0 = _mm_andnot_si128(one_or_two_bytes_bytemask,
505
+ simdutf_vec(0b0100000000000000));
506
+ const __m128i s4 = _mm_xor_si128(s3, m0);
507
+ #undef simdutf_vec
508
+
509
+ // 4. expand code units 16-bit => 32-bit
510
+ const __m128i out0 = _mm_unpacklo_epi16(t2, s4);
511
+ const __m128i out1 = _mm_unpackhi_epi16(t2, s4);
512
+
513
+ // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
514
+ const uint16_t mask =
515
+ (one_byte_bitmask & 0x5555) | (one_or_two_bytes_bitmask & 0xaaaa);
516
+ if (mask == 0) {
517
+ // We only have three-byte code units. Use fast path.
518
+ const __m128i shuffle = _mm_setr_epi8(2, 3, 1, 6, 7, 5, 10, 11, 9, 14,
519
+ 15, 13, -1, -1, -1, -1);
520
+ const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle);
521
+ const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle);
522
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_0);
523
+ utf8_output += 12;
524
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_1);
525
+ utf8_output += 12;
526
+ buf += 8;
527
+ continue;
528
+ }
529
+ const uint8_t mask0 = uint8_t(mask);
530
+
531
+ const uint8_t *row0 =
532
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
533
+ const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1));
534
+ const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0);
535
+
536
+ const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
537
+
538
+ const uint8_t *row1 =
539
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
540
+ const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1));
541
+ const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1);
542
+
543
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_0);
544
+ utf8_output += row0[0];
545
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_1);
546
+ utf8_output += row1[0];
547
+
548
+ buf += 8;
549
+ } else {
550
+ // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=>
551
+ // will produce four UTF-8 bytes Let us do a scalar fallback. It may seem
552
+ // wasteful to use scalar code, but being efficient with SIMD in the
553
+ // presence of surrogate pairs may require non-trivial tables.
554
+ size_t forward = 15;
555
+ size_t k = 0;
556
+ if (size_t(end - buf) < forward + 1) {
557
+ forward = size_t(end - buf - 1);
558
+ }
559
+ for (; k < forward; k++) {
560
+ uint32_t word = buf[k];
561
+ if ((word & 0xFFFFFF80) == 0) {
562
+ *utf8_output++ = char(word);
563
+ } else if ((word & 0xFFFFF800) == 0) {
564
+ *utf8_output++ = char((word >> 6) | 0b11000000);
565
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
566
+ } else if ((word & 0xFFFF0000) == 0) {
567
+ if (word >= 0xD800 && word <= 0xDFFF) {
568
+ return std::make_pair(
569
+ result(error_code::SURROGATE, buf - start + k), utf8_output);
570
+ }
571
+ *utf8_output++ = char((word >> 12) | 0b11100000);
572
+ *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
573
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
574
+ } else {
575
+ if (word > 0x10FFFF) {
576
+ return std::make_pair(
577
+ result(error_code::TOO_LARGE, buf - start + k), utf8_output);
578
+ }
579
+ *utf8_output++ = char((word >> 18) | 0b11110000);
580
+ *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000);
581
+ *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
582
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
583
+ }
584
+ }
585
+ buf += k;
586
+ }
587
+ } // while
588
+ return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
589
+ }
@@ -0,0 +1,58 @@
1
+ // depends on "tables/utf8_to_utf16_tables.h"
2
+
3
+ // Convert up to 12 bytes from utf8 to latin1 using a mask indicating the
4
+ // end of the code points. Only the least significant 12 bits of the mask
5
+ // are accessed.
6
+ // It returns how many bytes were consumed (up to 12).
7
+ size_t convert_masked_utf8_to_latin1(const char *input,
8
+ uint64_t utf8_end_of_code_point_mask,
9
+ char *&latin1_output) {
10
+ // we use an approach where we try to process up to 12 input bytes.
11
+ // Why 12 input bytes and not 16? Because we are concerned with the size of
12
+ // the lookup tables. Also 12 is nicely divisible by two and three.
13
+ //
14
+ //
15
+ // Optimization note: our main path below is load-latency dependent. Thus it
16
+ // is maybe beneficial to have fast paths that depend on branch prediction but
17
+ // have less latency. This results in more instructions but, potentially, also
18
+ // higher speeds.
19
+ //
20
+ const __m128i in = _mm_loadu_si128((__m128i *)input);
21
+ const uint16_t input_utf8_end_of_code_point_mask =
22
+ utf8_end_of_code_point_mask &
23
+ 0xfff; // we are only processing 12 bytes in case it is not all ASCII
24
+ if (utf8_end_of_code_point_mask == 0xfff) {
25
+ // We process the data in chunks of 12 bytes.
26
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in);
27
+ latin1_output += 12; // We wrote 12 characters.
28
+ return 12; // We consumed 12 bytes.
29
+ }
30
+ /// We do not have a fast path available, so we fallback.
31
+ const uint8_t idx =
32
+ tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0];
33
+ const uint8_t consumed =
34
+ tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
35
+ // this indicates an invalid input:
36
+ if (idx >= 64) {
37
+ return consumed;
38
+ }
39
+ // Here we should have (idx < 64), if not, there is a bug in the validation or
40
+ // elsewhere. SIX (6) input code-code units this is a relatively easy scenario
41
+ // we process SIX (6) input code-code units. The max length in bytes of six
42
+ // code code units spanning between 1 and 2 bytes each is 12 bytes. On
43
+ // processors where pdep/pext is fast, we might be able to use a small lookup
44
+ // table.
45
+ const __m128i sh =
46
+ _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
47
+ const __m128i perm = _mm_shuffle_epi8(in, sh);
48
+ const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
49
+ const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
50
+ __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2));
51
+ const __m128i latin1_packed = _mm_packus_epi16(composed, composed);
52
+ // writing 8 bytes even though we only care about the first 6 bytes.
53
+ // performance note: it would be faster to use _mm_storeu_si128, we should
54
+ // investigate.
55
+ _mm_storel_epi64((__m128i *)latin1_output, latin1_packed);
56
+ latin1_output += 6; // We wrote 6 bytes.
57
+ return consumed;
58
+ }