react-native-quick-crypto 1.0.19 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (732) hide show
  1. package/QuickCrypto.podspec +12 -38
  2. package/README.md +2 -0
  3. package/android/CMakeLists.txt +3 -0
  4. package/cpp/utils/HybridUtils.cpp +39 -77
  5. package/deps/simdutf/.clang-format +4 -0
  6. package/deps/simdutf/.github/ISSUE_TEMPLATE/bug_report.md +62 -0
  7. package/deps/simdutf/.github/ISSUE_TEMPLATE/config.yml +1 -0
  8. package/deps/simdutf/.github/ISSUE_TEMPLATE/feature_request.md +35 -0
  9. package/deps/simdutf/.github/ISSUE_TEMPLATE/standard-issue-template.md +29 -0
  10. package/deps/simdutf/.github/pull_request_template.md +51 -0
  11. package/deps/simdutf/.github/workflows/aarch64.yml +39 -0
  12. package/deps/simdutf/.github/workflows/alpine.yml +27 -0
  13. package/deps/simdutf/.github/workflows/amalgamation_demos.yml +34 -0
  14. package/deps/simdutf/.github/workflows/armv7.yml +32 -0
  15. package/deps/simdutf/.github/workflows/atomic_fuzz.yml +25 -0
  16. package/deps/simdutf/.github/workflows/cifuzz.yml +37 -0
  17. package/deps/simdutf/.github/workflows/clangformat.yml +36 -0
  18. package/deps/simdutf/.github/workflows/debian-latestcxxstandards.yml +40 -0
  19. package/deps/simdutf/.github/workflows/debian.yml +33 -0
  20. package/deps/simdutf/.github/workflows/documentation.yml +36 -0
  21. package/deps/simdutf/.github/workflows/emscripten.yml +19 -0
  22. package/deps/simdutf/.github/workflows/loongarch64-gcc-14.2.yml +39 -0
  23. package/deps/simdutf/.github/workflows/macos-latest.yml +29 -0
  24. package/deps/simdutf/.github/workflows/msys2-clang.yml +48 -0
  25. package/deps/simdutf/.github/workflows/msys2.yml +50 -0
  26. package/deps/simdutf/.github/workflows/ppc64le.yml +29 -0
  27. package/deps/simdutf/.github/workflows/rvv-1024-clang-18.yml +35 -0
  28. package/deps/simdutf/.github/workflows/rvv-128-clang-17.yml +35 -0
  29. package/deps/simdutf/.github/workflows/rvv-256-gcc-14.yml +31 -0
  30. package/deps/simdutf/.github/workflows/s390x.yml +29 -0
  31. package/deps/simdutf/.github/workflows/selective-amalgamation.yml +29 -0
  32. package/deps/simdutf/.github/workflows/typos.yml +19 -0
  33. package/deps/simdutf/.github/workflows/ubuntu22-cxx20.yml +30 -0
  34. package/deps/simdutf/.github/workflows/ubuntu22.yml +32 -0
  35. package/deps/simdutf/.github/workflows/ubuntu22_gcc12.yml +27 -0
  36. package/deps/simdutf/.github/workflows/ubuntu22sani.yml +29 -0
  37. package/deps/simdutf/.github/workflows/ubuntu24-cxxstandards.yml +34 -0
  38. package/deps/simdutf/.github/workflows/ubuntu24-unsignedchar.yml +34 -0
  39. package/deps/simdutf/.github/workflows/ubuntu24.yml +32 -0
  40. package/deps/simdutf/.github/workflows/ubuntu24sani.yml +36 -0
  41. package/deps/simdutf/.github/workflows/ubuntu24sani_clang.yml +29 -0
  42. package/deps/simdutf/.github/workflows/vs17-arm-ci.yml +21 -0
  43. package/deps/simdutf/.github/workflows/vs17-ci-cxx20.yml +41 -0
  44. package/deps/simdutf/.github/workflows/vs17-ci.yml +41 -0
  45. package/deps/simdutf/.github/workflows/vs17-clang-ci.yml +41 -0
  46. package/deps/simdutf/.github/workflows/vs17-cxxstandards.yml +36 -0
  47. package/deps/simdutf/AI_USAGE_POLICY.md +56 -0
  48. package/deps/simdutf/AUTHORS +6 -0
  49. package/deps/simdutf/CMakeLists.txt +231 -0
  50. package/deps/simdutf/CONTRIBUTING.md +214 -0
  51. package/deps/simdutf/CONTRIBUTORS +1 -0
  52. package/deps/simdutf/Doxyfile +2584 -0
  53. package/deps/simdutf/LICENSE-APACHE +201 -0
  54. package/deps/simdutf/LICENSE-MIT +18 -0
  55. package/deps/simdutf/Makefile.crosscompile +54 -0
  56. package/deps/simdutf/README-RVV.md +16 -0
  57. package/deps/simdutf/README.md +2782 -0
  58. package/deps/simdutf/SECURITY.md +8 -0
  59. package/deps/simdutf/benchmarks/CMakeLists.txt +101 -0
  60. package/deps/simdutf/benchmarks/alignment.cpp +150 -0
  61. package/deps/simdutf/benchmarks/base64/CMakeLists.txt +30 -0
  62. package/deps/simdutf/benchmarks/base64/benchmark_base64.cpp +875 -0
  63. package/deps/simdutf/benchmarks/base64/libbase64_spaces.h +49 -0
  64. package/deps/simdutf/benchmarks/base64/node_base64.h +227 -0
  65. package/deps/simdutf/benchmarks/base64/openssl3_base64.h +334 -0
  66. package/deps/simdutf/benchmarks/benchmark.cpp +65 -0
  67. package/deps/simdutf/benchmarks/benchmark_to_well_formed_utf16.cpp +347 -0
  68. package/deps/simdutf/benchmarks/competition/.clang-format-ignore +5 -0
  69. package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.cpp +1276 -0
  70. package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.h +595 -0
  71. package/deps/simdutf/benchmarks/competition/README.md +7 -0
  72. package/deps/simdutf/benchmarks/competition/hoehrmann/hoehrmann.h +91 -0
  73. package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16.h +444 -0
  74. package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16_tables.h +13183 -0
  75. package/deps/simdutf/benchmarks/competition/inoue2008/script.py +73 -0
  76. package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.cpp +738 -0
  77. package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.h +293 -0
  78. package/deps/simdutf/benchmarks/competition/u8u16/COPYRIGHT +8 -0
  79. package/deps/simdutf/benchmarks/competition/u8u16/Makefile +44 -0
  80. package/deps/simdutf/benchmarks/competition/u8u16/OSL3.0.txt +169 -0
  81. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/BOM_Profiler.h +148 -0
  82. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/i386_timer.h +45 -0
  83. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/ppc_timer.c +34 -0
  84. package/deps/simdutf/benchmarks/competition/u8u16/README +56 -0
  85. package/deps/simdutf/benchmarks/competition/u8u16/config/config_defs.h +43 -0
  86. package/deps/simdutf/benchmarks/competition/u8u16/config/g4_config.h +27 -0
  87. package/deps/simdutf/benchmarks/competition/u8u16/config/mmx_config.h +16 -0
  88. package/deps/simdutf/benchmarks/competition/u8u16/config/p4_config.h +18 -0
  89. package/deps/simdutf/benchmarks/competition/u8u16/config/p4_ideal_config.h +16 -0
  90. package/deps/simdutf/benchmarks/competition/u8u16/config/spu_config.h +28 -0
  91. package/deps/simdutf/benchmarks/competition/u8u16/config/ssse3_config.h +20 -0
  92. package/deps/simdutf/benchmarks/competition/u8u16/iconv_u8u16.c +2 -0
  93. package/deps/simdutf/benchmarks/competition/u8u16/lib/altivec_simd.h +440 -0
  94. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_basic_ops.py +121 -0
  95. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_half_operand_versions.py +158 -0
  96. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_test.py +270 -0
  97. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd.h +141 -0
  98. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_basic.h +216 -0
  99. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_built_in.h +119 -0
  100. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_modified.h +2430 -0
  101. package/deps/simdutf/benchmarks/competition/u8u16/lib/outline.txt +39 -0
  102. package/deps/simdutf/benchmarks/competition/u8u16/lib/spu_simd.h +421 -0
  103. package/deps/simdutf/benchmarks/competition/u8u16/lib/sse_simd.h +836 -0
  104. package/deps/simdutf/benchmarks/competition/u8u16/lib/stdint.h +222 -0
  105. package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_BE.c +4 -0
  106. package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_LE.c +5 -0
  107. package/deps/simdutf/benchmarks/competition/u8u16/proto/u8u16.py +390 -0
  108. package/deps/simdutf/benchmarks/competition/u8u16/src/Makefile +18 -0
  109. package/deps/simdutf/benchmarks/competition/u8u16/src/bytelex.h +448 -0
  110. package/deps/simdutf/benchmarks/competition/u8u16/src/charsets/ASCII_EBCDIC.h +284 -0
  111. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.c +1975 -0
  112. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.pdf +0 -0
  113. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.w +2263 -0
  114. package/deps/simdutf/benchmarks/competition/u8u16/src/multiliteral.h +239 -0
  115. package/deps/simdutf/benchmarks/competition/u8u16/src/u8u16.c +232 -0
  116. package/deps/simdutf/benchmarks/competition/u8u16/src/x8x16.c +194 -0
  117. package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.c +193 -0
  118. package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.h +167 -0
  119. package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.c +288 -0
  120. package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.h +117 -0
  121. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_g4.c +2 -0
  122. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_mmx.c +2 -0
  123. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4.c +3 -0
  124. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4_ideal.c +2 -0
  125. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_spu.c +2 -0
  126. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_ssse3.c +3 -0
  127. package/deps/simdutf/benchmarks/competition/u8u16/x8x16_p4.c +2 -0
  128. package/deps/simdutf/benchmarks/competition/utf8lut/LICENSE +23 -0
  129. package/deps/simdutf/benchmarks/competition/utf8lut/data/test_minimal.txt +44 -0
  130. package/deps/simdutf/benchmarks/competition/utf8lut/readme.md +106 -0
  131. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.cmd +11 -0
  132. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.sh +13 -0
  133. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_corr_tests.sh +13 -0
  134. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_example.sh +13 -0
  135. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_file_conv.sh +14 -0
  136. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_lib.sh +11 -0
  137. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_sample.sh +8 -0
  138. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_corr_tests.cmd +12 -0
  139. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_example.cmd +13 -0
  140. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_file_conv.cmd +14 -0
  141. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_lib.cmd +11 -0
  142. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_sample.cmd +8 -0
  143. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_corr_tests.cmd +11 -0
  144. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_example.cmd +12 -0
  145. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_file_conv.cmd +13 -0
  146. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_lib.cmd +10 -0
  147. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_sample.cmd +9 -0
  148. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/html_table.py +25 -0
  149. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/measure.py +94 -0
  150. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/resize.py +20 -0
  151. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_all.cmd +2 -0
  152. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_interm.cmd +1 -0
  153. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/CustomMemcpy.h +75 -0
  154. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/PerfDefs.h +47 -0
  155. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.cpp +17 -0
  156. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.h +76 -0
  157. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/AllProcessors.cpp +35 -0
  158. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.cpp +117 -0
  159. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.h +210 -0
  160. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferDecoder.h +158 -0
  161. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferEncoder.h +104 -0
  162. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorPlugins.h +334 -0
  163. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorSelector.h +186 -0
  164. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.cpp +140 -0
  165. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.h +42 -0
  166. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderProcess.h +100 -0
  167. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/Dfa.h +57 -0
  168. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.cpp +85 -0
  169. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.h +27 -0
  170. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderProcess.h +126 -0
  171. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/ProcessTrivial.h +108 -0
  172. package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.cpp +139 -0
  173. package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.h +74 -0
  174. package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.cpp +65 -0
  175. package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.h +91 -0
  176. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/CorrectnessTests.cpp +772 -0
  177. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/Example.cpp +12 -0
  178. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/FileConverter.cpp +486 -0
  179. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/iconv_sample.c +162 -0
  180. package/deps/simdutf/benchmarks/competition/utf8lut/src/utf8lut.h +15 -0
  181. package/deps/simdutf/benchmarks/competition/utf8sse4/fromutf8-sse.cpp +292 -0
  182. package/deps/simdutf/benchmarks/competition/utfcpp/LICENSE +23 -0
  183. package/deps/simdutf/benchmarks/competition/utfcpp/README.md +1503 -0
  184. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/checked.h +335 -0
  185. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/core.h +338 -0
  186. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp11.h +103 -0
  187. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp17.h +103 -0
  188. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/unchecked.h +274 -0
  189. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8.h +34 -0
  190. package/deps/simdutf/benchmarks/dataset/README.md +155 -0
  191. package/deps/simdutf/benchmarks/dataset/emoji.txt +204 -0
  192. package/deps/simdutf/benchmarks/dataset/scripts/utf8type.py +40 -0
  193. package/deps/simdutf/benchmarks/dataset/wikipedia_mars/Makefile +80 -0
  194. package/deps/simdutf/benchmarks/dataset/wikipedia_mars/convert_to_utf6.py +20 -0
  195. package/deps/simdutf/benchmarks/find/CMakeLists.txt +6 -0
  196. package/deps/simdutf/benchmarks/find/findbenchmark.cpp +63 -0
  197. package/deps/simdutf/benchmarks/find/findbenchmarker.h +46 -0
  198. package/deps/simdutf/benchmarks/shortbench.cpp +555 -0
  199. package/deps/simdutf/benchmarks/src/CMakeLists.txt +52 -0
  200. package/deps/simdutf/benchmarks/src/apple_arm_events.h +1104 -0
  201. package/deps/simdutf/benchmarks/src/benchmark.cpp +3899 -0
  202. package/deps/simdutf/benchmarks/src/benchmark.h +317 -0
  203. package/deps/simdutf/benchmarks/src/benchmark_base.cpp +144 -0
  204. package/deps/simdutf/benchmarks/src/benchmark_base.h +98 -0
  205. package/deps/simdutf/benchmarks/src/cmdline.cpp +176 -0
  206. package/deps/simdutf/benchmarks/src/cmdline.h +35 -0
  207. package/deps/simdutf/benchmarks/src/event_counter.h +162 -0
  208. package/deps/simdutf/benchmarks/src/linux-perf-events.h +104 -0
  209. package/deps/simdutf/benchmarks/stream.cpp +209 -0
  210. package/deps/simdutf/benchmarks/threaded.cpp +123 -0
  211. package/deps/simdutf/cmake/CPM.cmake +1363 -0
  212. package/deps/simdutf/cmake/JoinPaths.cmake +23 -0
  213. package/deps/simdutf/cmake/add_cpp_test.cmake +68 -0
  214. package/deps/simdutf/cmake/simdutf-config.cmake.in +2 -0
  215. package/deps/simdutf/cmake/simdutf-flags.cmake +26 -0
  216. package/deps/simdutf/cmake/toolchains-ci/riscv64-linux-gnu.cmake +4 -0
  217. package/deps/simdutf/cmake/toolchains-dev/README.md +32 -0
  218. package/deps/simdutf/cmake/toolchains-dev/aarch64.cmake +14 -0
  219. package/deps/simdutf/cmake/toolchains-dev/loongarch64.cmake +22 -0
  220. package/deps/simdutf/cmake/toolchains-dev/powerpc64.cmake +16 -0
  221. package/deps/simdutf/cmake/toolchains-dev/powerpc64le.cmake +16 -0
  222. package/deps/simdutf/cmake/toolchains-dev/riscv64.cmake +16 -0
  223. package/deps/simdutf/cmake/toolchains-dev/rvv-spike.cmake +38 -0
  224. package/deps/simdutf/doc/avx512.png +0 -0
  225. package/deps/simdutf/doc/logo.png +0 -0
  226. package/deps/simdutf/doc/logo.svg +165 -0
  227. package/deps/simdutf/doc/node2023.png +0 -0
  228. package/deps/simdutf/doc/shortinput.md +78 -0
  229. package/deps/simdutf/doc/utf16utf8.png +0 -0
  230. package/deps/simdutf/doc/utf8utf16.png +0 -0
  231. package/deps/simdutf/doc/widelogo.png +0 -0
  232. package/deps/simdutf/doxygen.py +50 -0
  233. package/deps/simdutf/fuzz/.clang-format +9 -0
  234. package/deps/simdutf/fuzz/CMakeLists.txt +45 -0
  235. package/deps/simdutf/fuzz/README.md +168 -0
  236. package/deps/simdutf/fuzz/atomic_base64.cpp +448 -0
  237. package/deps/simdutf/fuzz/base64.cpp +278 -0
  238. package/deps/simdutf/fuzz/build.sh +83 -0
  239. package/deps/simdutf/fuzz/conversion.cpp +669 -0
  240. package/deps/simdutf/fuzz/helpers/.clang-format-ignore +1 -0
  241. package/deps/simdutf/fuzz/helpers/common.h +135 -0
  242. package/deps/simdutf/fuzz/helpers/nameof.hpp +1258 -0
  243. package/deps/simdutf/fuzz/main.cpp +72 -0
  244. package/deps/simdutf/fuzz/minimize_and_cleanse.sh +87 -0
  245. package/deps/simdutf/fuzz/misc.cpp +216 -0
  246. package/deps/simdutf/fuzz/random_fuzz.sh +154 -0
  247. package/deps/simdutf/fuzz/roundtrip.cpp +588 -0
  248. package/deps/simdutf/fuzz/safe_conversion.cpp +104 -0
  249. package/deps/simdutf/include/simdutf/avx512.h +79 -0
  250. package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
  251. package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
  252. package/deps/simdutf/include/simdutf/common_defs.h +186 -0
  253. package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
  254. package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
  255. package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
  256. package/deps/simdutf/include/simdutf/error.h +126 -0
  257. package/deps/simdutf/include/simdutf/implementation.h +7081 -0
  258. package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
  259. package/deps/simdutf/include/simdutf/portability.h +285 -0
  260. package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
  261. package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
  262. package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
  263. package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
  264. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
  265. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
  266. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
  267. package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
  268. package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
  269. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
  270. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
  271. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
  272. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
  273. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
  274. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
  275. package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
  276. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
  277. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
  278. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
  279. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
  280. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
  281. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
  282. package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
  283. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
  284. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
  285. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
  286. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
  287. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
  288. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
  289. package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
  290. package/deps/simdutf/include/simdutf.h +26 -0
  291. package/deps/simdutf/include/simdutf_c.h +342 -0
  292. package/deps/simdutf/riscv/Dockerfile +16 -0
  293. package/deps/simdutf/riscv/README.md +24 -0
  294. package/deps/simdutf/riscv/remove-docker-station +8 -0
  295. package/deps/simdutf/riscv/run-docker-station +31 -0
  296. package/deps/simdutf/scripts/.flake8 +2 -0
  297. package/deps/simdutf/scripts/Makefile +2 -0
  298. package/deps/simdutf/scripts/README_ADD_FUNCTION.md +49 -0
  299. package/deps/simdutf/scripts/add_function.py +330 -0
  300. package/deps/simdutf/scripts/amalgamation_tests.py +156 -0
  301. package/deps/simdutf/scripts/base64/Makefile +2 -0
  302. package/deps/simdutf/scripts/base64/README.md +2 -0
  303. package/deps/simdutf/scripts/base64/avx512.py +76 -0
  304. package/deps/simdutf/scripts/base64/neon_decode.py +143 -0
  305. package/deps/simdutf/scripts/base64/neon_generate_lut.py +101 -0
  306. package/deps/simdutf/scripts/base64/sse.py +252 -0
  307. package/deps/simdutf/scripts/base64/sseregular.py +160 -0
  308. package/deps/simdutf/scripts/base64/sseurl.py +283 -0
  309. package/deps/simdutf/scripts/base64/table.py +59 -0
  310. package/deps/simdutf/scripts/base64bench_print.py +145 -0
  311. package/deps/simdutf/scripts/benchmark-all.py +119 -0
  312. package/deps/simdutf/scripts/benchmark_print.py +324 -0
  313. package/deps/simdutf/scripts/check_feature_macros.py +156 -0
  314. package/deps/simdutf/scripts/check_typos.sh +13 -0
  315. package/deps/simdutf/scripts/clang_format.sh +35 -0
  316. package/deps/simdutf/scripts/clang_format_docker.sh +38 -0
  317. package/deps/simdutf/scripts/common.py +24 -0
  318. package/deps/simdutf/scripts/compilation_benchmark.py +55 -0
  319. package/deps/simdutf/scripts/compile_many_variations.sh +64 -0
  320. package/deps/simdutf/scripts/create_latex_table.py +62 -0
  321. package/deps/simdutf/scripts/docker/Dockerfile +14 -0
  322. package/deps/simdutf/scripts/docker/Makefile +9 -0
  323. package/deps/simdutf/scripts/docker/README.md +30 -0
  324. package/deps/simdutf/scripts/docker/llvm.gpg +0 -0
  325. package/deps/simdutf/scripts/ppc64_convert_utf16_to_utf8.py +155 -0
  326. package/deps/simdutf/scripts/prepare_doxygen.sh +21 -0
  327. package/deps/simdutf/scripts/release.py +197 -0
  328. package/deps/simdutf/scripts/shortinputplots.py +97 -0
  329. package/deps/simdutf/scripts/sse_convert_utf16_to_utf8.py +422 -0
  330. package/deps/simdutf/scripts/sse_convert_utf32_to_utf16.py +105 -0
  331. package/deps/simdutf/scripts/sse_utf8_utf16_decode.py +186 -0
  332. package/deps/simdutf/scripts/sse_validate_utf16le_proof.py +137 -0
  333. package/deps/simdutf/scripts/sse_validate_utf16le_testcases.py +129 -0
  334. package/deps/simdutf/scripts/table.py +207 -0
  335. package/deps/simdutf/scripts/tests/new.txt +33 -0
  336. package/deps/simdutf/scripts/tests/old.txt +33 -0
  337. package/deps/simdutf/scripts/tests/results.txt +272 -0
  338. package/deps/simdutf/simdutf.pc.in +11 -0
  339. package/deps/simdutf/singleheader/.flake8 +2 -0
  340. package/deps/simdutf/singleheader/CMakeLists.txt +64 -0
  341. package/deps/simdutf/singleheader/README-dev.md +81 -0
  342. package/deps/simdutf/singleheader/README.md +19 -0
  343. package/deps/simdutf/singleheader/amalgamate.py +513 -0
  344. package/deps/simdutf/singleheader/amalgamation_demo.c +59 -0
  345. package/deps/simdutf/singleheader/amalgamation_demo.cpp +54 -0
  346. package/deps/simdutf/singleheader/test-features.py +262 -0
  347. package/deps/simdutf/src/CMakeLists.txt +78 -0
  348. package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
  349. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
  350. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
  351. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
  352. package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
  353. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
  354. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
  355. package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
  356. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
  357. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
  358. package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
  359. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
  360. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
  361. package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
  362. package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
  363. package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
  364. package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
  365. package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
  366. package/deps/simdutf/src/encoding_types.cpp +67 -0
  367. package/deps/simdutf/src/error.cpp +3 -0
  368. package/deps/simdutf/src/fallback/implementation.cpp +589 -0
  369. package/deps/simdutf/src/generic/ascii_validation.h +50 -0
  370. package/deps/simdutf/src/generic/base64.h +233 -0
  371. package/deps/simdutf/src/generic/base64lengths.h +63 -0
  372. package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
  373. package/deps/simdutf/src/generic/find.h +75 -0
  374. package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
  375. package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
  376. package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
  377. package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
  378. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
  379. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
  380. package/deps/simdutf/src/generic/utf16.h +73 -0
  381. package/deps/simdutf/src/generic/utf32.h +136 -0
  382. package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
  383. package/deps/simdutf/src/generic/utf8.h +92 -0
  384. package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
  385. package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
  386. package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
  387. package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
  388. package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
  389. package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
  390. package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
  391. package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
  392. package/deps/simdutf/src/generic/validate_utf16.h +164 -0
  393. package/deps/simdutf/src/generic/validate_utf32.h +99 -0
  394. package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
  395. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
  396. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
  397. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
  398. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
  399. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
  400. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
  401. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
  402. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
  403. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
  404. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
  405. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
  406. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
  407. package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
  408. package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
  409. package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
  410. package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
  411. package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
  412. package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
  413. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
  414. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
  415. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
  416. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
  417. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
  418. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
  419. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
  420. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
  421. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
  422. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
  423. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
  424. package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
  425. package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
  426. package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
  427. package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
  428. package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
  429. package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
  430. package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
  431. package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
  432. package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
  433. package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
  434. package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
  435. package/deps/simdutf/src/implementation.cpp +2526 -0
  436. package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
  437. package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
  438. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
  439. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
  440. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
  441. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
  442. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
  443. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
  444. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
  445. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
  446. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
  447. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
  448. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
  449. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
  450. package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
  451. package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
  452. package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
  453. package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
  454. package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
  455. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
  456. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
  457. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
  458. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
  459. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
  460. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
  461. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
  462. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
  463. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
  464. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
  465. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
  466. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
  467. package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
  468. package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
  469. package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
  470. package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
  471. package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
  472. package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
  473. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
  474. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
  475. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
  476. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
  477. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
  478. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
  479. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
  480. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
  481. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
  482. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
  483. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
  484. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
  485. package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
  486. package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
  487. package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
  488. package/deps/simdutf/src/ppc64/templates.cpp +91 -0
  489. package/deps/simdutf/src/rvv/implementation.cpp +138 -0
  490. package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
  491. package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
  492. package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
  493. package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
  494. package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
  495. package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
  496. package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
  497. package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
  498. package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
  499. package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
  500. package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
  501. package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
  502. package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
  503. package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
  504. package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
  505. package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
  506. package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
  507. package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
  508. package/deps/simdutf/src/simdutf/arm64.h +43 -0
  509. package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
  510. package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
  511. package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
  512. package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
  513. package/deps/simdutf/src/simdutf/fallback.h +42 -0
  514. package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
  515. package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
  516. package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
  517. package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
  518. package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
  519. package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
  520. package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
  521. package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
  522. package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
  523. package/deps/simdutf/src/simdutf/haswell.h +63 -0
  524. package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
  525. package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
  526. package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
  527. package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
  528. package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
  529. package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
  530. package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
  531. package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
  532. package/deps/simdutf/src/simdutf/icelake.h +81 -0
  533. package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
  534. package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
  535. package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
  536. package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
  537. package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
  538. package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
  539. package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
  540. package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
  541. package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
  542. package/deps/simdutf/src/simdutf/lasx.h +49 -0
  543. package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
  544. package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
  545. package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
  546. package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
  547. package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
  548. package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
  549. package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
  550. package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
  551. package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
  552. package/deps/simdutf/src/simdutf/lsx.h +52 -0
  553. package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
  554. package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
  555. package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
  556. package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
  557. package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
  558. package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
  559. package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
  560. package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
  561. package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
  562. package/deps/simdutf/src/simdutf/ppc64.h +40 -0
  563. package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
  564. package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
  565. package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
  566. package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
  567. package/deps/simdutf/src/simdutf/rvv.h +41 -0
  568. package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
  569. package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
  570. package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
  571. package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
  572. package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
  573. package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
  574. package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
  575. package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
  576. package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
  577. package/deps/simdutf/src/simdutf/westmere.h +59 -0
  578. package/deps/simdutf/src/simdutf.cpp +152 -0
  579. package/deps/simdutf/src/simdutf_c.cpp +525 -0
  580. package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
  581. package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
  582. package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
  583. package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
  584. package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
  585. package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
  586. package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
  587. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
  588. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
  589. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
  590. package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
  591. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
  592. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
  593. package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
  594. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
  595. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
  596. package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
  597. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
  598. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
  599. package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
  600. package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
  601. package/deps/simdutf/tests/CMakeLists.txt +483 -0
  602. package/deps/simdutf/tests/atomic_base64_tests.cpp +2845 -0
  603. package/deps/simdutf/tests/base64_tests.cpp +3617 -0
  604. package/deps/simdutf/tests/basic_fuzzer.cpp +805 -0
  605. package/deps/simdutf/tests/bele_tests.cpp +182 -0
  606. package/deps/simdutf/tests/constexpr_base64_tests.cpp +387 -0
  607. package/deps/simdutf/tests/convert_latin1_to_utf16be_tests.cpp +52 -0
  608. package/deps/simdutf/tests/convert_latin1_to_utf16le_tests.cpp +80 -0
  609. package/deps/simdutf/tests/convert_latin1_to_utf32_tests.cpp +66 -0
  610. package/deps/simdutf/tests/convert_latin1_to_utf8_tests.cpp +120 -0
  611. package/deps/simdutf/tests/convert_utf16_to_utf8_safe_tests.cpp +203 -0
  612. package/deps/simdutf/tests/convert_utf16_to_utf8_with_replacement_tests.cpp +276 -0
  613. package/deps/simdutf/tests/convert_utf16be_to_latin1_tests.cpp +109 -0
  614. package/deps/simdutf/tests/convert_utf16be_to_latin1_tests_with_errors.cpp +136 -0
  615. package/deps/simdutf/tests/convert_utf16be_to_utf32_tests.cpp +193 -0
  616. package/deps/simdutf/tests/convert_utf16be_to_utf32_with_errors_tests.cpp +381 -0
  617. package/deps/simdutf/tests/convert_utf16be_to_utf8_tests.cpp +259 -0
  618. package/deps/simdutf/tests/convert_utf16be_to_utf8_with_errors_tests.cpp +266 -0
  619. package/deps/simdutf/tests/convert_utf16le_to_latin1_tests.cpp +148 -0
  620. package/deps/simdutf/tests/convert_utf16le_to_latin1_tests_with_errors.cpp +176 -0
  621. package/deps/simdutf/tests/convert_utf16le_to_utf32_tests.cpp +213 -0
  622. package/deps/simdutf/tests/convert_utf16le_to_utf32_with_errors_tests.cpp +318 -0
  623. package/deps/simdutf/tests/convert_utf16le_to_utf8_tests.cpp +343 -0
  624. package/deps/simdutf/tests/convert_utf16le_to_utf8_with_errors_tests.cpp +271 -0
  625. package/deps/simdutf/tests/convert_utf32_to_latin1_tests.cpp +111 -0
  626. package/deps/simdutf/tests/convert_utf32_to_latin1_with_errors_tests.cpp +96 -0
  627. package/deps/simdutf/tests/convert_utf32_to_utf16be_tests.cpp +148 -0
  628. package/deps/simdutf/tests/convert_utf32_to_utf16be_with_errors_tests.cpp +192 -0
  629. package/deps/simdutf/tests/convert_utf32_to_utf16le_tests.cpp +166 -0
  630. package/deps/simdutf/tests/convert_utf32_to_utf16le_with_errors_tests.cpp +215 -0
  631. package/deps/simdutf/tests/convert_utf32_to_utf8_tests.cpp +181 -0
  632. package/deps/simdutf/tests/convert_utf32_to_utf8_with_errors_tests.cpp +261 -0
  633. package/deps/simdutf/tests/convert_utf8_to_latin1_tests.cpp +516 -0
  634. package/deps/simdutf/tests/convert_utf8_to_latin1_with_errors_tests.cpp +579 -0
  635. package/deps/simdutf/tests/convert_utf8_to_utf16be_tests.cpp +412 -0
  636. package/deps/simdutf/tests/convert_utf8_to_utf16be_with_errors_tests.cpp +480 -0
  637. package/deps/simdutf/tests/convert_utf8_to_utf16le_tests.cpp +671 -0
  638. package/deps/simdutf/tests/convert_utf8_to_utf16le_with_errors_tests.cpp +455 -0
  639. package/deps/simdutf/tests/convert_utf8_to_utf32_tests.cpp +1204 -0
  640. package/deps/simdutf/tests/convert_utf8_to_utf32_with_errors_tests.cpp +337 -0
  641. package/deps/simdutf/tests/convert_valid_utf16be_to_latin1_tests.cpp +37 -0
  642. package/deps/simdutf/tests/convert_valid_utf16be_to_utf32_tests.cpp +97 -0
  643. package/deps/simdutf/tests/convert_valid_utf16be_to_utf8_tests.cpp +126 -0
  644. package/deps/simdutf/tests/convert_valid_utf16le_to_latin1_tests.cpp +71 -0
  645. package/deps/simdutf/tests/convert_valid_utf16le_to_utf32_tests.cpp +122 -0
  646. package/deps/simdutf/tests/convert_valid_utf16le_to_utf8_tests.cpp +244 -0
  647. package/deps/simdutf/tests/convert_valid_utf32_to_latin1_tests.cpp +49 -0
  648. package/deps/simdutf/tests/convert_valid_utf32_to_utf16be_tests.cpp +92 -0
  649. package/deps/simdutf/tests/convert_valid_utf32_to_utf16le_tests.cpp +114 -0
  650. package/deps/simdutf/tests/convert_valid_utf32_to_utf8_tests.cpp +109 -0
  651. package/deps/simdutf/tests/convert_valid_utf8_to_latin1_tests.cpp +84 -0
  652. package/deps/simdutf/tests/convert_valid_utf8_to_utf16be_tests.cpp +124 -0
  653. package/deps/simdutf/tests/convert_valid_utf8_to_utf16le_tests.cpp +221 -0
  654. package/deps/simdutf/tests/convert_valid_utf8_to_utf32_tests.cpp +155 -0
  655. package/deps/simdutf/tests/count_utf16be.cpp +64 -0
  656. package/deps/simdutf/tests/count_utf16le.cpp +61 -0
  657. package/deps/simdutf/tests/count_utf8.cpp +87 -0
  658. package/deps/simdutf/tests/detect_encodings_tests.cpp +312 -0
  659. package/deps/simdutf/tests/embed/valid_utf8.txt +1 -0
  660. package/deps/simdutf/tests/embed_tests.cpp +22 -0
  661. package/deps/simdutf/tests/find_tests.cpp +77 -0
  662. package/deps/simdutf/tests/fixed_string_tests.cpp +153 -0
  663. package/deps/simdutf/tests/helpers/CMakeLists.txt +25 -0
  664. package/deps/simdutf/tests/helpers/compiletime_conversions.h +222 -0
  665. package/deps/simdutf/tests/helpers/fixed_string.h +267 -0
  666. package/deps/simdutf/tests/helpers/random_int.cpp +30 -0
  667. package/deps/simdutf/tests/helpers/random_int.h +39 -0
  668. package/deps/simdutf/tests/helpers/random_utf16.cpp +123 -0
  669. package/deps/simdutf/tests/helpers/random_utf16.h +52 -0
  670. package/deps/simdutf/tests/helpers/random_utf32.cpp +41 -0
  671. package/deps/simdutf/tests/helpers/random_utf32.h +40 -0
  672. package/deps/simdutf/tests/helpers/random_utf8.cpp +93 -0
  673. package/deps/simdutf/tests/helpers/random_utf8.h +36 -0
  674. package/deps/simdutf/tests/helpers/test.cpp +231 -0
  675. package/deps/simdutf/tests/helpers/test.h +193 -0
  676. package/deps/simdutf/tests/helpers/transcode_test_base.cpp +1257 -0
  677. package/deps/simdutf/tests/helpers/transcode_test_base.h +683 -0
  678. package/deps/simdutf/tests/helpers/utf16.h +27 -0
  679. package/deps/simdutf/tests/installation_tests/find/CMakeLists.txt +43 -0
  680. package/deps/simdutf/tests/installation_tests/from_fetch/CMakeLists.txt +47 -0
  681. package/deps/simdutf/tests/internal_tests.cpp +27 -0
  682. package/deps/simdutf/tests/null_safety_tests.cpp +94 -0
  683. package/deps/simdutf/tests/random_fuzzer.cpp +779 -0
  684. package/deps/simdutf/tests/readme_tests.cpp +274 -0
  685. package/deps/simdutf/tests/reference/CMakeLists.txt +23 -0
  686. package/deps/simdutf/tests/reference/decode_utf16.h +81 -0
  687. package/deps/simdutf/tests/reference/decode_utf32.h +47 -0
  688. package/deps/simdutf/tests/reference/encode_latin1.cpp +1 -0
  689. package/deps/simdutf/tests/reference/encode_latin1.h +32 -0
  690. package/deps/simdutf/tests/reference/encode_utf16.cpp +49 -0
  691. package/deps/simdutf/tests/reference/encode_utf16.h +20 -0
  692. package/deps/simdutf/tests/reference/encode_utf32.cpp +1 -0
  693. package/deps/simdutf/tests/reference/encode_utf32.h +36 -0
  694. package/deps/simdutf/tests/reference/encode_utf8.cpp +1 -0
  695. package/deps/simdutf/tests/reference/encode_utf8.h +40 -0
  696. package/deps/simdutf/tests/reference/validate_utf16.cpp +60 -0
  697. package/deps/simdutf/tests/reference/validate_utf16.h +14 -0
  698. package/deps/simdutf/tests/reference/validate_utf16_to_latin1.cpp +35 -0
  699. package/deps/simdutf/tests/reference/validate_utf16_to_latin1.h +13 -0
  700. package/deps/simdutf/tests/reference/validate_utf32.cpp +27 -0
  701. package/deps/simdutf/tests/reference/validate_utf32.h +12 -0
  702. package/deps/simdutf/tests/reference/validate_utf32_to_latin1.cpp +27 -0
  703. package/deps/simdutf/tests/reference/validate_utf32_to_latin1.h +12 -0
  704. package/deps/simdutf/tests/reference/validate_utf8.cpp +82 -0
  705. package/deps/simdutf/tests/reference/validate_utf8.h +11 -0
  706. package/deps/simdutf/tests/reference/validate_utf8_to_latin1.cpp +43 -0
  707. package/deps/simdutf/tests/reference/validate_utf8_to_latin1.h +12 -0
  708. package/deps/simdutf/tests/select_implementation.cpp +43 -0
  709. package/deps/simdutf/tests/simdutf_c_tests.cpp +244 -0
  710. package/deps/simdutf/tests/span_tests.cpp +401 -0
  711. package/deps/simdutf/tests/special_tests.cpp +559 -0
  712. package/deps/simdutf/tests/straight_c_test.c +187 -0
  713. package/deps/simdutf/tests/text_encoding_tests.cpp +77 -0
  714. package/deps/simdutf/tests/to_well_formed_utf16_tests.cpp +377 -0
  715. package/deps/simdutf/tests/utf8_length_from_utf16_tests.cpp +202 -0
  716. package/deps/simdutf/tests/validate_ascii_basic_tests.cpp +165 -0
  717. package/deps/simdutf/tests/validate_ascii_with_errors_tests.cpp +77 -0
  718. package/deps/simdutf/tests/validate_utf16be_basic_tests.cpp +175 -0
  719. package/deps/simdutf/tests/validate_utf16be_with_errors_tests.cpp +188 -0
  720. package/deps/simdutf/tests/validate_utf16le_basic_tests.cpp +268 -0
  721. package/deps/simdutf/tests/validate_utf16le_with_errors_tests.cpp +274 -0
  722. package/deps/simdutf/tests/validate_utf32_basic_tests.cpp +92 -0
  723. package/deps/simdutf/tests/validate_utf32_with_errors_tests.cpp +114 -0
  724. package/deps/simdutf/tests/validate_utf8_basic_tests.cpp +178 -0
  725. package/deps/simdutf/tests/validate_utf8_brute_force_tests.cpp +88 -0
  726. package/deps/simdutf/tests/validate_utf8_puzzler_tests.cpp +33 -0
  727. package/deps/simdutf/tests/validate_utf8_with_errors_tests.cpp +228 -0
  728. package/deps/simdutf/tools/CMakeLists.txt +85 -0
  729. package/deps/simdutf/tools/fastbase64.cpp +250 -0
  730. package/deps/simdutf/tools/sutf.cpp +556 -0
  731. package/deps/simdutf/tools/sutf.h +40 -0
  732. package/package.json +2 -2
@@ -0,0 +1,574 @@
1
+ // file included directly
2
+
3
+ // Todo: currently, this is just the haswell code, optimize for icelake kernel.
4
+ std::pair<const char32_t *, char *>
5
+ avx512_convert_utf32_to_utf8(const char32_t *buf, size_t len,
6
+ char *utf8_output) {
7
+ const char32_t *end = buf + len;
8
+ const __m256i v_0000 = _mm256_setzero_si256();
9
+ const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000);
10
+ const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80);
11
+ const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800);
12
+ const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080);
13
+ const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff);
14
+ __m256i running_max = _mm256_setzero_si256();
15
+ __m256i forbidden_bytemask = _mm256_setzero_si256();
16
+
17
+ const size_t safety_margin =
18
+ 12; // to avoid overruns, see issue
19
+ // https://github.com/simdutf/simdutf/issues/92
20
+
21
+ while (end - buf >= std::ptrdiff_t(16 + safety_margin)) {
22
+ __m256i in = _mm256_loadu_si256((__m256i *)buf);
23
+ __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1);
24
+ running_max = _mm256_max_epu32(_mm256_max_epu32(in, running_max), nextin);
25
+
26
+ // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned
27
+ // saturation
28
+ __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff),
29
+ _mm256_and_si256(nextin, v_7fffffff));
30
+ in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000);
31
+
32
+ // Try to apply UTF-16 => UTF-8 routine on 256 bits
33
+ // (haswell/avx2_convert_utf16_to_utf8.cpp)
34
+
35
+ if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!!
36
+ // 1. pack the bytes
37
+ const __m128i utf8_packed = _mm_packus_epi16(
38
+ _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1));
39
+ // 2. store (16 bytes)
40
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_packed);
41
+ // 3. adjust pointers
42
+ buf += 16;
43
+ utf8_output += 16;
44
+ continue; // we are done for this round!
45
+ }
46
+ // no bits set above 7th bit
47
+ const __m256i one_byte_bytemask =
48
+ _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000);
49
+ const uint32_t one_byte_bitmask =
50
+ static_cast<uint32_t>(_mm256_movemask_epi8(one_byte_bytemask));
51
+
52
+ // no bits set above 11th bit
53
+ const __m256i one_or_two_bytes_bytemask =
54
+ _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000);
55
+ const uint32_t one_or_two_bytes_bitmask =
56
+ static_cast<uint32_t>(_mm256_movemask_epi8(one_or_two_bytes_bytemask));
57
+ if (one_or_two_bytes_bitmask == 0xffffffff) {
58
+ // 1. prepare 2-byte values
59
+ // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
60
+ // expected output : [110a|aaaa|10bb|bbbb] x 8
61
+ const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00);
62
+ const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f);
63
+
64
+ // t0 = [000a|aaaa|bbbb|bb00]
65
+ const __m256i t0 = _mm256_slli_epi16(in_16, 2);
66
+ // t1 = [000a|aaaa|0000|0000]
67
+ const __m256i t1 = _mm256_and_si256(t0, v_1f00);
68
+ // t2 = [0000|0000|00bb|bbbb]
69
+ const __m256i t2 = _mm256_and_si256(in_16, v_003f);
70
+ // t3 = [000a|aaaa|00bb|bbbb]
71
+ const __m256i t3 = _mm256_or_si256(t1, t2);
72
+ // t4 = [110a|aaaa|10bb|bbbb]
73
+ const __m256i t4 = _mm256_or_si256(t3, v_c080);
74
+
75
+ // 2. merge ASCII and 2-byte codewords
76
+ const __m256i utf8_unpacked =
77
+ _mm256_blendv_epi8(t4, in_16, one_byte_bytemask);
78
+
79
+ // 3. prepare bitmask for 8-bit lookup
80
+ const uint32_t M0 = one_byte_bitmask & 0x55555555;
81
+ const uint32_t M1 = M0 >> 7;
82
+ const uint32_t M2 = (M1 | M0) & 0x00ff00ff;
83
+ // 4. pack the bytes
84
+
85
+ const uint8_t *row =
86
+ &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
87
+ const uint8_t *row_2 =
88
+ &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >>
89
+ 16)][0];
90
+
91
+ const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1));
92
+ const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1));
93
+
94
+ const __m256i utf8_packed = _mm256_shuffle_epi8(
95
+ utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2));
96
+ // 5. store bytes
97
+ _mm_storeu_si128((__m128i *)utf8_output,
98
+ _mm256_castsi256_si128(utf8_packed));
99
+ utf8_output += row[0];
100
+ _mm_storeu_si128((__m128i *)utf8_output,
101
+ _mm256_extractf128_si256(utf8_packed, 1));
102
+ utf8_output += row_2[0];
103
+
104
+ // 6. adjust pointers
105
+ buf += 16;
106
+ continue;
107
+ }
108
+ // Must check for overflow in packing
109
+ const __m256i saturation_bytemask = _mm256_cmpeq_epi32(
110
+ _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000);
111
+ const uint32_t saturation_bitmask =
112
+ static_cast<uint32_t>(_mm256_movemask_epi8(saturation_bytemask));
113
+ if (saturation_bitmask == 0xffffffff) {
114
+ // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
115
+ const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
116
+ forbidden_bytemask = _mm256_or_si256(
117
+ forbidden_bytemask,
118
+ _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800));
119
+
120
+ const __m256i dup_even = _mm256_setr_epi16(
121
+ 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e,
122
+ 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
123
+
124
+ /* In this branch we handle three cases:
125
+ 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] -
126
+ single UFT-8 byte
127
+ 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two
128
+ UTF-8 bytes
129
+ 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] -
130
+ three UTF-8 bytes
131
+
132
+ We expand the input word (16-bit) into two code units (32-bit), thus
133
+ we have room for four bytes. However, we need five distinct bit
134
+ layouts. Note that the last byte in cases #2 and #3 is the same.
135
+
136
+ We precompute byte 1 for case #1 and the common byte for cases #2 & #3
137
+ in register t2.
138
+
139
+ We precompute byte 1 for case #3 and -- **conditionally** -- precompute
140
+ either byte 1 for case #2 or byte 2 for case #3. Note that they
141
+ differ by exactly one bit.
142
+
143
+ Finally from these two code units we build proper UTF-8 sequence, taking
144
+ into account the case (i.e, the number of bytes to write).
145
+ */
146
+ /**
147
+ * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
148
+ * t2 => [0ccc|cccc] [10cc|cccc]
149
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
150
+ */
151
+ #define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x))
152
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
153
+ const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even);
154
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
155
+ const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111));
156
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
157
+ const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000));
158
+
159
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc]
160
+ const __m256i s0 = _mm256_srli_epi16(in_16, 4);
161
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00]
162
+ const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100));
163
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa]
164
+ const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140));
165
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
166
+ const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000));
167
+ const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask,
168
+ simdutf_vec(0b0100000000000000));
169
+ const __m256i s4 = _mm256_xor_si256(s3, m0);
170
+ #undef simdutf_vec
171
+
172
+ // 4. expand code units 16-bit => 32-bit
173
+ const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
174
+ const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
175
+
176
+ // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
177
+ const uint32_t mask = (one_byte_bitmask & 0x55555555) |
178
+ (one_or_two_bytes_bitmask & 0xaaaaaaaa);
179
+ // Due to the wider registers, the following path is less likely to be
180
+ // useful.
181
+ /*if(mask == 0) {
182
+ // We only have three-byte code units. Use fast path.
183
+ const __m256i shuffle =
184
+ _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1,
185
+ 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 =
186
+ _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 =
187
+ _mm256_shuffle_epi8(out1, shuffle);
188
+ _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0));
189
+ utf8_output += 12;
190
+ _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1));
191
+ utf8_output += 12;
192
+ _mm_storeu_si128((__m128i*)utf8_output,
193
+ _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12;
194
+ _mm_storeu_si128((__m128i*)utf8_output,
195
+ _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16;
196
+ continue;
197
+ }*/
198
+ const uint8_t mask0 = uint8_t(mask);
199
+ const uint8_t *row0 =
200
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
201
+ const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1));
202
+ const __m128i utf8_0 =
203
+ _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0);
204
+
205
+ const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
206
+ const uint8_t *row1 =
207
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
208
+ const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1));
209
+ const __m128i utf8_1 =
210
+ _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1);
211
+
212
+ const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
213
+ const uint8_t *row2 =
214
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
215
+ const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1));
216
+ const __m128i utf8_2 =
217
+ _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2);
218
+
219
+ const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
220
+ const uint8_t *row3 =
221
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
222
+ const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1));
223
+ const __m128i utf8_3 =
224
+ _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3);
225
+
226
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_0);
227
+ utf8_output += row0[0];
228
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_1);
229
+ utf8_output += row1[0];
230
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_2);
231
+ utf8_output += row2[0];
232
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_3);
233
+ utf8_output += row3[0];
234
+ buf += 16;
235
+ } else {
236
+ // case: at least one 32-bit word is larger than 0xFFFF <=> it will
237
+ // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem
238
+ // wasteful to use scalar code, but being efficient with SIMD may require
239
+ // large, non-trivial tables?
240
+ size_t forward = 15;
241
+ size_t k = 0;
242
+ if (size_t(end - buf) < forward + 1) {
243
+ forward = size_t(end - buf - 1);
244
+ }
245
+ for (; k < forward; k++) {
246
+ uint32_t word = buf[k];
247
+ if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII)
248
+ *utf8_output++ = char(word);
249
+ } else if ((word & 0xFFFFF800) == 0) { // 2-byte
250
+ *utf8_output++ = char((word >> 6) | 0b11000000);
251
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
252
+ } else if ((word & 0xFFFF0000) == 0) { // 3-byte
253
+ if (word >= 0xD800 && word <= 0xDFFF) {
254
+ return std::make_pair(nullptr, utf8_output);
255
+ }
256
+ *utf8_output++ = char((word >> 12) | 0b11100000);
257
+ *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
258
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
259
+ } else { // 4-byte
260
+ if (word > 0x10FFFF) {
261
+ return std::make_pair(nullptr, utf8_output);
262
+ }
263
+ *utf8_output++ = char((word >> 18) | 0b11110000);
264
+ *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000);
265
+ *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
266
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
267
+ }
268
+ }
269
+ buf += k;
270
+ }
271
+ } // while
272
+
273
+ // check for invalid input
274
+ const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff);
275
+ if (static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpeq_epi32(
276
+ _mm256_max_epu32(running_max, v_10ffff), v_10ffff))) != 0xffffffff) {
277
+ return std::make_pair(nullptr, utf8_output);
278
+ }
279
+
280
+ if (static_cast<uint32_t>(_mm256_movemask_epi8(forbidden_bytemask)) != 0) {
281
+ return std::make_pair(nullptr, utf8_output);
282
+ }
283
+
284
+ return std::make_pair(buf, utf8_output);
285
+ }
286
+
287
+ // Todo: currently, this is just the haswell code, optimize for icelake kernel.
288
+ std::pair<result, char *>
289
+ avx512_convert_utf32_to_utf8_with_errors(const char32_t *buf, size_t len,
290
+ char *utf8_output) {
291
+ const char32_t *end = buf + len;
292
+ const char32_t *start = buf;
293
+
294
+ const __m256i v_0000 = _mm256_setzero_si256();
295
+ const __m256i v_ffff0000 = _mm256_set1_epi32((uint32_t)0xffff0000);
296
+ const __m256i v_ff80 = _mm256_set1_epi16((uint16_t)0xff80);
297
+ const __m256i v_f800 = _mm256_set1_epi16((uint16_t)0xf800);
298
+ const __m256i v_c080 = _mm256_set1_epi16((uint16_t)0xc080);
299
+ const __m256i v_7fffffff = _mm256_set1_epi32((uint32_t)0x7fffffff);
300
+ const __m256i v_10ffff = _mm256_set1_epi32((uint32_t)0x10ffff);
301
+
302
+ const size_t safety_margin =
303
+ 12; // to avoid overruns, see issue
304
+ // https://github.com/simdutf/simdutf/issues/92
305
+
306
+ while (end - buf >= std::ptrdiff_t(16 + safety_margin)) {
307
+ __m256i in = _mm256_loadu_si256((__m256i *)buf);
308
+ __m256i nextin = _mm256_loadu_si256((__m256i *)buf + 1);
309
+ // Check for too large input
310
+ const __m256i max_input =
311
+ _mm256_max_epu32(_mm256_max_epu32(in, nextin), v_10ffff);
312
+ if (static_cast<uint32_t>(_mm256_movemask_epi8(
313
+ _mm256_cmpeq_epi32(max_input, v_10ffff))) != 0xffffffff) {
314
+ return std::make_pair(result(error_code::TOO_LARGE, buf - start),
315
+ utf8_output);
316
+ }
317
+
318
+ // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned
319
+ // saturation
320
+ __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff),
321
+ _mm256_and_si256(nextin, v_7fffffff));
322
+ in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000);
323
+
324
+ // Try to apply UTF-16 => UTF-8 routine on 256 bits
325
+ // (haswell/avx2_convert_utf16_to_utf8.cpp)
326
+
327
+ if (_mm256_testz_si256(in_16, v_ff80)) { // ASCII fast path!!!!
328
+ // 1. pack the bytes
329
+ const __m128i utf8_packed = _mm_packus_epi16(
330
+ _mm256_castsi256_si128(in_16), _mm256_extractf128_si256(in_16, 1));
331
+ // 2. store (16 bytes)
332
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_packed);
333
+ // 3. adjust pointers
334
+ buf += 16;
335
+ utf8_output += 16;
336
+ continue; // we are done for this round!
337
+ }
338
+ // no bits set above 7th bit
339
+ const __m256i one_byte_bytemask =
340
+ _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_ff80), v_0000);
341
+ const uint32_t one_byte_bitmask =
342
+ static_cast<uint32_t>(_mm256_movemask_epi8(one_byte_bytemask));
343
+
344
+ // no bits set above 11th bit
345
+ const __m256i one_or_two_bytes_bytemask =
346
+ _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_0000);
347
+ const uint32_t one_or_two_bytes_bitmask =
348
+ static_cast<uint32_t>(_mm256_movemask_epi8(one_or_two_bytes_bytemask));
349
+ if (one_or_two_bytes_bitmask == 0xffffffff) {
350
+ // 1. prepare 2-byte values
351
+ // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
352
+ // expected output : [110a|aaaa|10bb|bbbb] x 8
353
+ const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00);
354
+ const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f);
355
+
356
+ // t0 = [000a|aaaa|bbbb|bb00]
357
+ const __m256i t0 = _mm256_slli_epi16(in_16, 2);
358
+ // t1 = [000a|aaaa|0000|0000]
359
+ const __m256i t1 = _mm256_and_si256(t0, v_1f00);
360
+ // t2 = [0000|0000|00bb|bbbb]
361
+ const __m256i t2 = _mm256_and_si256(in_16, v_003f);
362
+ // t3 = [000a|aaaa|00bb|bbbb]
363
+ const __m256i t3 = _mm256_or_si256(t1, t2);
364
+ // t4 = [110a|aaaa|10bb|bbbb]
365
+ const __m256i t4 = _mm256_or_si256(t3, v_c080);
366
+
367
+ // 2. merge ASCII and 2-byte codewords
368
+ const __m256i utf8_unpacked =
369
+ _mm256_blendv_epi8(t4, in_16, one_byte_bytemask);
370
+
371
+ // 3. prepare bitmask for 8-bit lookup
372
+ const uint32_t M0 = one_byte_bitmask & 0x55555555;
373
+ const uint32_t M1 = M0 >> 7;
374
+ const uint32_t M2 = (M1 | M0) & 0x00ff00ff;
375
+ // 4. pack the bytes
376
+
377
+ const uint8_t *row =
378
+ &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
379
+ const uint8_t *row_2 =
380
+ &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >>
381
+ 16)][0];
382
+
383
+ const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1));
384
+ const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1));
385
+
386
+ const __m256i utf8_packed = _mm256_shuffle_epi8(
387
+ utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2));
388
+ // 5. store bytes
389
+ _mm_storeu_si128((__m128i *)utf8_output,
390
+ _mm256_castsi256_si128(utf8_packed));
391
+ utf8_output += row[0];
392
+ _mm_storeu_si128((__m128i *)utf8_output,
393
+ _mm256_extractf128_si256(utf8_packed, 1));
394
+ utf8_output += row_2[0];
395
+
396
+ // 6. adjust pointers
397
+ buf += 16;
398
+ continue;
399
+ }
400
+ // Must check for overflow in packing
401
+ const __m256i saturation_bytemask = _mm256_cmpeq_epi32(
402
+ _mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000);
403
+ const uint32_t saturation_bitmask =
404
+ static_cast<uint32_t>(_mm256_movemask_epi8(saturation_bytemask));
405
+ if (saturation_bitmask == 0xffffffff) {
406
+ // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
407
+
408
+ // Check for illegal surrogate code units
409
+ const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
410
+ const __m256i forbidden_bytemask =
411
+ _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800);
412
+ if (static_cast<uint32_t>(_mm256_movemask_epi8(forbidden_bytemask)) !=
413
+ 0x0) {
414
+ return std::make_pair(result(error_code::SURROGATE, buf - start),
415
+ utf8_output);
416
+ }
417
+
418
+ const __m256i dup_even = _mm256_setr_epi16(
419
+ 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e,
420
+ 0x0000, 0x0202, 0x0404, 0x0606, 0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
421
+
422
+ /* In this branch we handle three cases:
423
+ 1. [0000|0000|0ccc|cccc] => [0ccc|cccc] -
424
+ single UFT-8 byte
425
+ 2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc] - two
426
+ UTF-8 bytes
427
+ 3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] -
428
+ three UTF-8 bytes
429
+
430
+ We expand the input word (16-bit) into two code units (32-bit), thus
431
+ we have room for four bytes. However, we need five distinct bit
432
+ layouts. Note that the last byte in cases #2 and #3 is the same.
433
+
434
+ We precompute byte 1 for case #1 and the common byte for cases #2 & #3
435
+ in register t2.
436
+
437
+ We precompute byte 1 for case #3 and -- **conditionally** -- precompute
438
+ either byte 1 for case #2 or byte 2 for case #3. Note that they
439
+ differ by exactly one bit.
440
+
441
+ Finally from these two code units we build proper UTF-8 sequence, taking
442
+ into account the case (i.e, the number of bytes to write).
443
+ */
444
+ /**
445
+ * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
446
+ * t2 => [0ccc|cccc] [10cc|cccc]
447
+ * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
448
+ */
449
+ #define simdutf_vec(x) _mm256_set1_epi16(static_cast<uint16_t>(x))
450
+ // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
451
+ const __m256i t0 = _mm256_shuffle_epi8(in_16, dup_even);
452
+ // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
453
+ const __m256i t1 = _mm256_and_si256(t0, simdutf_vec(0b0011111101111111));
454
+ // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
455
+ const __m256i t2 = _mm256_or_si256(t1, simdutf_vec(0b1000000000000000));
456
+
457
+ // [aaaa|bbbb|bbcc|cccc] => [0000|aaaa|bbbb|bbcc]
458
+ const __m256i s0 = _mm256_srli_epi16(in_16, 4);
459
+ // [0000|aaaa|bbbb|bbcc] => [0000|aaaa|bbbb|bb00]
460
+ const __m256i s1 = _mm256_and_si256(s0, simdutf_vec(0b0000111111111100));
461
+ // [0000|aaaa|bbbb|bb00] => [00bb|bbbb|0000|aaaa]
462
+ const __m256i s2 = _mm256_maddubs_epi16(s1, simdutf_vec(0x0140));
463
+ // [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
464
+ const __m256i s3 = _mm256_or_si256(s2, simdutf_vec(0b1100000011100000));
465
+ const __m256i m0 = _mm256_andnot_si256(one_or_two_bytes_bytemask,
466
+ simdutf_vec(0b0100000000000000));
467
+ const __m256i s4 = _mm256_xor_si256(s3, m0);
468
+ #undef simdutf_vec
469
+
470
+ // 4. expand code units 16-bit => 32-bit
471
+ const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
472
+ const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
473
+
474
+ // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
475
+ const uint32_t mask = (one_byte_bitmask & 0x55555555) |
476
+ (one_or_two_bytes_bitmask & 0xaaaaaaaa);
477
+ // Due to the wider registers, the following path is less likely to be
478
+ // useful.
479
+ /*if(mask == 0) {
480
+ // We only have three-byte code units. Use fast path.
481
+ const __m256i shuffle =
482
+ _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1,
483
+ 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1); const __m256i utf8_0 =
484
+ _mm256_shuffle_epi8(out0, shuffle); const __m256i utf8_1 =
485
+ _mm256_shuffle_epi8(out1, shuffle);
486
+ _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_0));
487
+ utf8_output += 12;
488
+ _mm_storeu_si128((__m128i*)utf8_output, _mm256_castsi256_si128(utf8_1));
489
+ utf8_output += 12;
490
+ _mm_storeu_si128((__m128i*)utf8_output,
491
+ _mm256_extractf128_si256(utf8_0,1)); utf8_output += 12;
492
+ _mm_storeu_si128((__m128i*)utf8_output,
493
+ _mm256_extractf128_si256(utf8_1,1)); utf8_output += 12; buf += 16;
494
+ continue;
495
+ }*/
496
+ const uint8_t mask0 = uint8_t(mask);
497
+ const uint8_t *row0 =
498
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
499
+ const __m128i shuffle0 = _mm_loadu_si128((__m128i *)(row0 + 1));
500
+ const __m128i utf8_0 =
501
+ _mm_shuffle_epi8(_mm256_castsi256_si128(out0), shuffle0);
502
+
503
+ const uint8_t mask1 = static_cast<uint8_t>(mask >> 8);
504
+ const uint8_t *row1 =
505
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
506
+ const __m128i shuffle1 = _mm_loadu_si128((__m128i *)(row1 + 1));
507
+ const __m128i utf8_1 =
508
+ _mm_shuffle_epi8(_mm256_castsi256_si128(out1), shuffle1);
509
+
510
+ const uint8_t mask2 = static_cast<uint8_t>(mask >> 16);
511
+ const uint8_t *row2 =
512
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask2][0];
513
+ const __m128i shuffle2 = _mm_loadu_si128((__m128i *)(row2 + 1));
514
+ const __m128i utf8_2 =
515
+ _mm_shuffle_epi8(_mm256_extractf128_si256(out0, 1), shuffle2);
516
+
517
+ const uint8_t mask3 = static_cast<uint8_t>(mask >> 24);
518
+ const uint8_t *row3 =
519
+ &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask3][0];
520
+ const __m128i shuffle3 = _mm_loadu_si128((__m128i *)(row3 + 1));
521
+ const __m128i utf8_3 =
522
+ _mm_shuffle_epi8(_mm256_extractf128_si256(out1, 1), shuffle3);
523
+
524
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_0);
525
+ utf8_output += row0[0];
526
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_1);
527
+ utf8_output += row1[0];
528
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_2);
529
+ utf8_output += row2[0];
530
+ _mm_storeu_si128((__m128i *)utf8_output, utf8_3);
531
+ utf8_output += row3[0];
532
+ buf += 16;
533
+ } else {
534
+ // case: at least one 32-bit word is larger than 0xFFFF <=> it will
535
+ // produce four UTF-8 bytes. Let us do a scalar fallback. It may seem
536
+ // wasteful to use scalar code, but being efficient with SIMD may require
537
+ // large, non-trivial tables?
538
+ size_t forward = 15;
539
+ size_t k = 0;
540
+ if (size_t(end - buf) < forward + 1) {
541
+ forward = size_t(end - buf - 1);
542
+ }
543
+ for (; k < forward; k++) {
544
+ uint32_t word = buf[k];
545
+ if ((word & 0xFFFFFF80) == 0) { // 1-byte (ASCII)
546
+ *utf8_output++ = char(word);
547
+ } else if ((word & 0xFFFFF800) == 0) { // 2-byte
548
+ *utf8_output++ = char((word >> 6) | 0b11000000);
549
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
550
+ } else if ((word & 0xFFFF0000) == 0) { // 3-byte
551
+ if (word >= 0xD800 && word <= 0xDFFF) {
552
+ return std::make_pair(
553
+ result(error_code::SURROGATE, buf - start + k), utf8_output);
554
+ }
555
+ *utf8_output++ = char((word >> 12) | 0b11100000);
556
+ *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
557
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
558
+ } else { // 4-byte
559
+ if (word > 0x10FFFF) {
560
+ return std::make_pair(
561
+ result(error_code::TOO_LARGE, buf - start + k), utf8_output);
562
+ }
563
+ *utf8_output++ = char((word >> 18) | 0b11110000);
564
+ *utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000);
565
+ *utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
566
+ *utf8_output++ = char((word & 0b111111) | 0b10000000);
567
+ }
568
+ }
569
+ buf += k;
570
+ }
571
+ } // while
572
+
573
+ return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
574
+ }
@@ -0,0 +1,104 @@
1
+ // file included directly
2
+
3
+ // File contains conversion procedure from possibly invalid UTF-8 strings.
4
+
5
+ template <bool is_remaining>
6
+ simdutf_really_inline size_t process_block_from_utf8_to_latin1(
7
+ const char *buf, size_t len, char *latin_output, __m512i minus64,
8
+ __m512i one, __mmask64 *next_leading_ptr, __mmask64 *next_bit6_ptr) {
9
+ __mmask64 load_mask =
10
+ is_remaining ? _bzhi_u64(~0ULL, (unsigned int)len) : ~0ULL;
11
+ __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf);
12
+ __mmask64 nonascii = _mm512_movepi8_mask(input);
13
+ if (nonascii == 0) {
14
+ if (*next_leading_ptr) { // If we ended with a leading byte, it is an error.
15
+ return 0; // Indicates error
16
+ }
17
+ is_remaining
18
+ ? _mm512_mask_storeu_epi8((__m512i *)latin_output, load_mask, input)
19
+ : _mm512_storeu_si512((__m512i *)latin_output, input);
20
+ return len;
21
+ }
22
+
23
+ const __mmask64 leading = _mm512_cmpge_epu8_mask(input, minus64);
24
+
25
+ __m512i highbits = _mm512_xor_si512(input, _mm512_set1_epi8(-62));
26
+ __mmask64 invalid_leading_bytes =
27
+ _mm512_mask_cmpgt_epu8_mask(leading, highbits, one);
28
+
29
+ if (invalid_leading_bytes) {
30
+ return 0; // Indicates error
31
+ }
32
+
33
+ __mmask64 leading_shift = (leading << 1) | *next_leading_ptr;
34
+
35
+ if ((nonascii ^ leading) != leading_shift) {
36
+ return 0; // Indicates error
37
+ }
38
+
39
+ const __mmask64 bit6 = _mm512_cmpeq_epi8_mask(highbits, one);
40
+ input =
41
+ _mm512_mask_sub_epi8(input, (bit6 << 1) | *next_bit6_ptr, input, minus64);
42
+
43
+ __mmask64 retain = ~leading & load_mask;
44
+ __m512i output = _mm512_maskz_compress_epi8(retain, input);
45
+ int64_t written_out = count_ones(retain);
46
+ if (written_out == 0) {
47
+ return 0; // Indicates error
48
+ }
49
+ *next_bit6_ptr = bit6 >> 63;
50
+ *next_leading_ptr = leading >> 63;
51
+
52
+ __mmask64 store_mask = ~UINT64_C(0) >> (64 - written_out);
53
+
54
+ _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output);
55
+
56
+ return written_out;
57
+ }
58
+
59
+ size_t utf8_to_latin1_avx512(const char *&inbuf, size_t len,
60
+ char *&inlatin_output) {
61
+ const char *buf = inbuf;
62
+ char *latin_output = inlatin_output;
63
+ char *start = latin_output;
64
+ size_t pos = 0;
65
+ __m512i minus64 = _mm512_set1_epi8(-64); // 11111111111 ... 1100 0000
66
+ __m512i one = _mm512_set1_epi8(1);
67
+ __mmask64 next_leading = 0;
68
+ __mmask64 next_bit6 = 0;
69
+
70
+ while (pos + 64 <= len) {
71
+ size_t written = process_block_from_utf8_to_latin1<false>(
72
+ buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6);
73
+ if (written == 0) {
74
+ inlatin_output = latin_output;
75
+ inbuf = buf + pos - next_leading;
76
+ return 0; // Indicates error at pos or after, or just before pos (too
77
+ // short error)
78
+ }
79
+ latin_output += written;
80
+ pos += 64;
81
+ }
82
+
83
+ if (pos < len) {
84
+ size_t remaining = len - pos;
85
+ size_t written = process_block_from_utf8_to_latin1<true>(
86
+ buf + pos, remaining, latin_output, minus64, one, &next_leading,
87
+ &next_bit6);
88
+ if (written == 0) {
89
+ inbuf = buf + pos - next_leading;
90
+ inlatin_output = latin_output;
91
+ return 0; // Indicates error at pos or after, or just before pos (too
92
+ // short error)
93
+ }
94
+ latin_output += written;
95
+ }
96
+ if (next_leading) {
97
+ inbuf = buf + len - next_leading;
98
+ inlatin_output = latin_output;
99
+ return 0; // Indicates error at end of buffer
100
+ }
101
+ inlatin_output = latin_output;
102
+ inbuf += len;
103
+ return size_t(latin_output - start);
104
+ }