react-native-quick-crypto 1.0.19 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (732) hide show
  1. package/QuickCrypto.podspec +12 -38
  2. package/README.md +2 -0
  3. package/android/CMakeLists.txt +3 -0
  4. package/cpp/utils/HybridUtils.cpp +39 -77
  5. package/deps/simdutf/.clang-format +4 -0
  6. package/deps/simdutf/.github/ISSUE_TEMPLATE/bug_report.md +62 -0
  7. package/deps/simdutf/.github/ISSUE_TEMPLATE/config.yml +1 -0
  8. package/deps/simdutf/.github/ISSUE_TEMPLATE/feature_request.md +35 -0
  9. package/deps/simdutf/.github/ISSUE_TEMPLATE/standard-issue-template.md +29 -0
  10. package/deps/simdutf/.github/pull_request_template.md +51 -0
  11. package/deps/simdutf/.github/workflows/aarch64.yml +39 -0
  12. package/deps/simdutf/.github/workflows/alpine.yml +27 -0
  13. package/deps/simdutf/.github/workflows/amalgamation_demos.yml +34 -0
  14. package/deps/simdutf/.github/workflows/armv7.yml +32 -0
  15. package/deps/simdutf/.github/workflows/atomic_fuzz.yml +25 -0
  16. package/deps/simdutf/.github/workflows/cifuzz.yml +37 -0
  17. package/deps/simdutf/.github/workflows/clangformat.yml +36 -0
  18. package/deps/simdutf/.github/workflows/debian-latestcxxstandards.yml +40 -0
  19. package/deps/simdutf/.github/workflows/debian.yml +33 -0
  20. package/deps/simdutf/.github/workflows/documentation.yml +36 -0
  21. package/deps/simdutf/.github/workflows/emscripten.yml +19 -0
  22. package/deps/simdutf/.github/workflows/loongarch64-gcc-14.2.yml +39 -0
  23. package/deps/simdutf/.github/workflows/macos-latest.yml +29 -0
  24. package/deps/simdutf/.github/workflows/msys2-clang.yml +48 -0
  25. package/deps/simdutf/.github/workflows/msys2.yml +50 -0
  26. package/deps/simdutf/.github/workflows/ppc64le.yml +29 -0
  27. package/deps/simdutf/.github/workflows/rvv-1024-clang-18.yml +35 -0
  28. package/deps/simdutf/.github/workflows/rvv-128-clang-17.yml +35 -0
  29. package/deps/simdutf/.github/workflows/rvv-256-gcc-14.yml +31 -0
  30. package/deps/simdutf/.github/workflows/s390x.yml +29 -0
  31. package/deps/simdutf/.github/workflows/selective-amalgamation.yml +29 -0
  32. package/deps/simdutf/.github/workflows/typos.yml +19 -0
  33. package/deps/simdutf/.github/workflows/ubuntu22-cxx20.yml +30 -0
  34. package/deps/simdutf/.github/workflows/ubuntu22.yml +32 -0
  35. package/deps/simdutf/.github/workflows/ubuntu22_gcc12.yml +27 -0
  36. package/deps/simdutf/.github/workflows/ubuntu22sani.yml +29 -0
  37. package/deps/simdutf/.github/workflows/ubuntu24-cxxstandards.yml +34 -0
  38. package/deps/simdutf/.github/workflows/ubuntu24-unsignedchar.yml +34 -0
  39. package/deps/simdutf/.github/workflows/ubuntu24.yml +32 -0
  40. package/deps/simdutf/.github/workflows/ubuntu24sani.yml +36 -0
  41. package/deps/simdutf/.github/workflows/ubuntu24sani_clang.yml +29 -0
  42. package/deps/simdutf/.github/workflows/vs17-arm-ci.yml +21 -0
  43. package/deps/simdutf/.github/workflows/vs17-ci-cxx20.yml +41 -0
  44. package/deps/simdutf/.github/workflows/vs17-ci.yml +41 -0
  45. package/deps/simdutf/.github/workflows/vs17-clang-ci.yml +41 -0
  46. package/deps/simdutf/.github/workflows/vs17-cxxstandards.yml +36 -0
  47. package/deps/simdutf/AI_USAGE_POLICY.md +56 -0
  48. package/deps/simdutf/AUTHORS +6 -0
  49. package/deps/simdutf/CMakeLists.txt +231 -0
  50. package/deps/simdutf/CONTRIBUTING.md +214 -0
  51. package/deps/simdutf/CONTRIBUTORS +1 -0
  52. package/deps/simdutf/Doxyfile +2584 -0
  53. package/deps/simdutf/LICENSE-APACHE +201 -0
  54. package/deps/simdutf/LICENSE-MIT +18 -0
  55. package/deps/simdutf/Makefile.crosscompile +54 -0
  56. package/deps/simdutf/README-RVV.md +16 -0
  57. package/deps/simdutf/README.md +2782 -0
  58. package/deps/simdutf/SECURITY.md +8 -0
  59. package/deps/simdutf/benchmarks/CMakeLists.txt +101 -0
  60. package/deps/simdutf/benchmarks/alignment.cpp +150 -0
  61. package/deps/simdutf/benchmarks/base64/CMakeLists.txt +30 -0
  62. package/deps/simdutf/benchmarks/base64/benchmark_base64.cpp +875 -0
  63. package/deps/simdutf/benchmarks/base64/libbase64_spaces.h +49 -0
  64. package/deps/simdutf/benchmarks/base64/node_base64.h +227 -0
  65. package/deps/simdutf/benchmarks/base64/openssl3_base64.h +334 -0
  66. package/deps/simdutf/benchmarks/benchmark.cpp +65 -0
  67. package/deps/simdutf/benchmarks/benchmark_to_well_formed_utf16.cpp +347 -0
  68. package/deps/simdutf/benchmarks/competition/.clang-format-ignore +5 -0
  69. package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.cpp +1276 -0
  70. package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.h +595 -0
  71. package/deps/simdutf/benchmarks/competition/README.md +7 -0
  72. package/deps/simdutf/benchmarks/competition/hoehrmann/hoehrmann.h +91 -0
  73. package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16.h +444 -0
  74. package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16_tables.h +13183 -0
  75. package/deps/simdutf/benchmarks/competition/inoue2008/script.py +73 -0
  76. package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.cpp +738 -0
  77. package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.h +293 -0
  78. package/deps/simdutf/benchmarks/competition/u8u16/COPYRIGHT +8 -0
  79. package/deps/simdutf/benchmarks/competition/u8u16/Makefile +44 -0
  80. package/deps/simdutf/benchmarks/competition/u8u16/OSL3.0.txt +169 -0
  81. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/BOM_Profiler.h +148 -0
  82. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/i386_timer.h +45 -0
  83. package/deps/simdutf/benchmarks/competition/u8u16/Profiling/ppc_timer.c +34 -0
  84. package/deps/simdutf/benchmarks/competition/u8u16/README +56 -0
  85. package/deps/simdutf/benchmarks/competition/u8u16/config/config_defs.h +43 -0
  86. package/deps/simdutf/benchmarks/competition/u8u16/config/g4_config.h +27 -0
  87. package/deps/simdutf/benchmarks/competition/u8u16/config/mmx_config.h +16 -0
  88. package/deps/simdutf/benchmarks/competition/u8u16/config/p4_config.h +18 -0
  89. package/deps/simdutf/benchmarks/competition/u8u16/config/p4_ideal_config.h +16 -0
  90. package/deps/simdutf/benchmarks/competition/u8u16/config/spu_config.h +28 -0
  91. package/deps/simdutf/benchmarks/competition/u8u16/config/ssse3_config.h +20 -0
  92. package/deps/simdutf/benchmarks/competition/u8u16/iconv_u8u16.c +2 -0
  93. package/deps/simdutf/benchmarks/competition/u8u16/lib/altivec_simd.h +440 -0
  94. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_basic_ops.py +121 -0
  95. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_half_operand_versions.py +158 -0
  96. package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_test.py +270 -0
  97. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd.h +141 -0
  98. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_basic.h +216 -0
  99. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_built_in.h +119 -0
  100. package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_modified.h +2430 -0
  101. package/deps/simdutf/benchmarks/competition/u8u16/lib/outline.txt +39 -0
  102. package/deps/simdutf/benchmarks/competition/u8u16/lib/spu_simd.h +421 -0
  103. package/deps/simdutf/benchmarks/competition/u8u16/lib/sse_simd.h +836 -0
  104. package/deps/simdutf/benchmarks/competition/u8u16/lib/stdint.h +222 -0
  105. package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_BE.c +4 -0
  106. package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_LE.c +5 -0
  107. package/deps/simdutf/benchmarks/competition/u8u16/proto/u8u16.py +390 -0
  108. package/deps/simdutf/benchmarks/competition/u8u16/src/Makefile +18 -0
  109. package/deps/simdutf/benchmarks/competition/u8u16/src/bytelex.h +448 -0
  110. package/deps/simdutf/benchmarks/competition/u8u16/src/charsets/ASCII_EBCDIC.h +284 -0
  111. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.c +1975 -0
  112. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.pdf +0 -0
  113. package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.w +2263 -0
  114. package/deps/simdutf/benchmarks/competition/u8u16/src/multiliteral.h +239 -0
  115. package/deps/simdutf/benchmarks/competition/u8u16/src/u8u16.c +232 -0
  116. package/deps/simdutf/benchmarks/competition/u8u16/src/x8x16.c +194 -0
  117. package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.c +193 -0
  118. package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.h +167 -0
  119. package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.c +288 -0
  120. package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.h +117 -0
  121. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_g4.c +2 -0
  122. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_mmx.c +2 -0
  123. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4.c +3 -0
  124. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4_ideal.c +2 -0
  125. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_spu.c +2 -0
  126. package/deps/simdutf/benchmarks/competition/u8u16/u8u16_ssse3.c +3 -0
  127. package/deps/simdutf/benchmarks/competition/u8u16/x8x16_p4.c +2 -0
  128. package/deps/simdutf/benchmarks/competition/utf8lut/LICENSE +23 -0
  129. package/deps/simdutf/benchmarks/competition/utf8lut/data/test_minimal.txt +44 -0
  130. package/deps/simdutf/benchmarks/competition/utf8lut/readme.md +106 -0
  131. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.cmd +11 -0
  132. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.sh +13 -0
  133. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_corr_tests.sh +13 -0
  134. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_example.sh +13 -0
  135. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_file_conv.sh +14 -0
  136. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_lib.sh +11 -0
  137. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_sample.sh +8 -0
  138. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_corr_tests.cmd +12 -0
  139. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_example.cmd +13 -0
  140. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_file_conv.cmd +14 -0
  141. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_lib.cmd +11 -0
  142. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_sample.cmd +8 -0
  143. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_corr_tests.cmd +11 -0
  144. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_example.cmd +12 -0
  145. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_file_conv.cmd +13 -0
  146. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_lib.cmd +10 -0
  147. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_sample.cmd +9 -0
  148. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/html_table.py +25 -0
  149. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/measure.py +94 -0
  150. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/resize.py +20 -0
  151. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_all.cmd +2 -0
  152. package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_interm.cmd +1 -0
  153. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/CustomMemcpy.h +75 -0
  154. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/PerfDefs.h +47 -0
  155. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.cpp +17 -0
  156. package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.h +76 -0
  157. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/AllProcessors.cpp +35 -0
  158. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.cpp +117 -0
  159. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.h +210 -0
  160. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferDecoder.h +158 -0
  161. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferEncoder.h +104 -0
  162. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorPlugins.h +334 -0
  163. package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorSelector.h +186 -0
  164. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.cpp +140 -0
  165. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.h +42 -0
  166. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderProcess.h +100 -0
  167. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/Dfa.h +57 -0
  168. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.cpp +85 -0
  169. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.h +27 -0
  170. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderProcess.h +126 -0
  171. package/deps/simdutf/benchmarks/competition/utf8lut/src/core/ProcessTrivial.h +108 -0
  172. package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.cpp +139 -0
  173. package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.h +74 -0
  174. package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.cpp +65 -0
  175. package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.h +91 -0
  176. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/CorrectnessTests.cpp +772 -0
  177. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/Example.cpp +12 -0
  178. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/FileConverter.cpp +486 -0
  179. package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/iconv_sample.c +162 -0
  180. package/deps/simdutf/benchmarks/competition/utf8lut/src/utf8lut.h +15 -0
  181. package/deps/simdutf/benchmarks/competition/utf8sse4/fromutf8-sse.cpp +292 -0
  182. package/deps/simdutf/benchmarks/competition/utfcpp/LICENSE +23 -0
  183. package/deps/simdutf/benchmarks/competition/utfcpp/README.md +1503 -0
  184. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/checked.h +335 -0
  185. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/core.h +338 -0
  186. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp11.h +103 -0
  187. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp17.h +103 -0
  188. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/unchecked.h +274 -0
  189. package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8.h +34 -0
  190. package/deps/simdutf/benchmarks/dataset/README.md +155 -0
  191. package/deps/simdutf/benchmarks/dataset/emoji.txt +204 -0
  192. package/deps/simdutf/benchmarks/dataset/scripts/utf8type.py +40 -0
  193. package/deps/simdutf/benchmarks/dataset/wikipedia_mars/Makefile +80 -0
  194. package/deps/simdutf/benchmarks/dataset/wikipedia_mars/convert_to_utf6.py +20 -0
  195. package/deps/simdutf/benchmarks/find/CMakeLists.txt +6 -0
  196. package/deps/simdutf/benchmarks/find/findbenchmark.cpp +63 -0
  197. package/deps/simdutf/benchmarks/find/findbenchmarker.h +46 -0
  198. package/deps/simdutf/benchmarks/shortbench.cpp +555 -0
  199. package/deps/simdutf/benchmarks/src/CMakeLists.txt +52 -0
  200. package/deps/simdutf/benchmarks/src/apple_arm_events.h +1104 -0
  201. package/deps/simdutf/benchmarks/src/benchmark.cpp +3899 -0
  202. package/deps/simdutf/benchmarks/src/benchmark.h +317 -0
  203. package/deps/simdutf/benchmarks/src/benchmark_base.cpp +144 -0
  204. package/deps/simdutf/benchmarks/src/benchmark_base.h +98 -0
  205. package/deps/simdutf/benchmarks/src/cmdline.cpp +176 -0
  206. package/deps/simdutf/benchmarks/src/cmdline.h +35 -0
  207. package/deps/simdutf/benchmarks/src/event_counter.h +162 -0
  208. package/deps/simdutf/benchmarks/src/linux-perf-events.h +104 -0
  209. package/deps/simdutf/benchmarks/stream.cpp +209 -0
  210. package/deps/simdutf/benchmarks/threaded.cpp +123 -0
  211. package/deps/simdutf/cmake/CPM.cmake +1363 -0
  212. package/deps/simdutf/cmake/JoinPaths.cmake +23 -0
  213. package/deps/simdutf/cmake/add_cpp_test.cmake +68 -0
  214. package/deps/simdutf/cmake/simdutf-config.cmake.in +2 -0
  215. package/deps/simdutf/cmake/simdutf-flags.cmake +26 -0
  216. package/deps/simdutf/cmake/toolchains-ci/riscv64-linux-gnu.cmake +4 -0
  217. package/deps/simdutf/cmake/toolchains-dev/README.md +32 -0
  218. package/deps/simdutf/cmake/toolchains-dev/aarch64.cmake +14 -0
  219. package/deps/simdutf/cmake/toolchains-dev/loongarch64.cmake +22 -0
  220. package/deps/simdutf/cmake/toolchains-dev/powerpc64.cmake +16 -0
  221. package/deps/simdutf/cmake/toolchains-dev/powerpc64le.cmake +16 -0
  222. package/deps/simdutf/cmake/toolchains-dev/riscv64.cmake +16 -0
  223. package/deps/simdutf/cmake/toolchains-dev/rvv-spike.cmake +38 -0
  224. package/deps/simdutf/doc/avx512.png +0 -0
  225. package/deps/simdutf/doc/logo.png +0 -0
  226. package/deps/simdutf/doc/logo.svg +165 -0
  227. package/deps/simdutf/doc/node2023.png +0 -0
  228. package/deps/simdutf/doc/shortinput.md +78 -0
  229. package/deps/simdutf/doc/utf16utf8.png +0 -0
  230. package/deps/simdutf/doc/utf8utf16.png +0 -0
  231. package/deps/simdutf/doc/widelogo.png +0 -0
  232. package/deps/simdutf/doxygen.py +50 -0
  233. package/deps/simdutf/fuzz/.clang-format +9 -0
  234. package/deps/simdutf/fuzz/CMakeLists.txt +45 -0
  235. package/deps/simdutf/fuzz/README.md +168 -0
  236. package/deps/simdutf/fuzz/atomic_base64.cpp +448 -0
  237. package/deps/simdutf/fuzz/base64.cpp +278 -0
  238. package/deps/simdutf/fuzz/build.sh +83 -0
  239. package/deps/simdutf/fuzz/conversion.cpp +669 -0
  240. package/deps/simdutf/fuzz/helpers/.clang-format-ignore +1 -0
  241. package/deps/simdutf/fuzz/helpers/common.h +135 -0
  242. package/deps/simdutf/fuzz/helpers/nameof.hpp +1258 -0
  243. package/deps/simdutf/fuzz/main.cpp +72 -0
  244. package/deps/simdutf/fuzz/minimize_and_cleanse.sh +87 -0
  245. package/deps/simdutf/fuzz/misc.cpp +216 -0
  246. package/deps/simdutf/fuzz/random_fuzz.sh +154 -0
  247. package/deps/simdutf/fuzz/roundtrip.cpp +588 -0
  248. package/deps/simdutf/fuzz/safe_conversion.cpp +104 -0
  249. package/deps/simdutf/include/simdutf/avx512.h +79 -0
  250. package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
  251. package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
  252. package/deps/simdutf/include/simdutf/common_defs.h +186 -0
  253. package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
  254. package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
  255. package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
  256. package/deps/simdutf/include/simdutf/error.h +126 -0
  257. package/deps/simdutf/include/simdutf/implementation.h +7081 -0
  258. package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
  259. package/deps/simdutf/include/simdutf/portability.h +285 -0
  260. package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
  261. package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
  262. package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
  263. package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
  264. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
  265. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
  266. package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
  267. package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
  268. package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
  269. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
  270. package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
  271. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
  272. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
  273. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
  274. package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
  275. package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
  276. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
  277. package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
  278. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
  279. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
  280. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
  281. package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
  282. package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
  283. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
  284. package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
  285. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
  286. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
  287. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
  288. package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
  289. package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
  290. package/deps/simdutf/include/simdutf.h +26 -0
  291. package/deps/simdutf/include/simdutf_c.h +342 -0
  292. package/deps/simdutf/riscv/Dockerfile +16 -0
  293. package/deps/simdutf/riscv/README.md +24 -0
  294. package/deps/simdutf/riscv/remove-docker-station +8 -0
  295. package/deps/simdutf/riscv/run-docker-station +31 -0
  296. package/deps/simdutf/scripts/.flake8 +2 -0
  297. package/deps/simdutf/scripts/Makefile +2 -0
  298. package/deps/simdutf/scripts/README_ADD_FUNCTION.md +49 -0
  299. package/deps/simdutf/scripts/add_function.py +330 -0
  300. package/deps/simdutf/scripts/amalgamation_tests.py +156 -0
  301. package/deps/simdutf/scripts/base64/Makefile +2 -0
  302. package/deps/simdutf/scripts/base64/README.md +2 -0
  303. package/deps/simdutf/scripts/base64/avx512.py +76 -0
  304. package/deps/simdutf/scripts/base64/neon_decode.py +143 -0
  305. package/deps/simdutf/scripts/base64/neon_generate_lut.py +101 -0
  306. package/deps/simdutf/scripts/base64/sse.py +252 -0
  307. package/deps/simdutf/scripts/base64/sseregular.py +160 -0
  308. package/deps/simdutf/scripts/base64/sseurl.py +283 -0
  309. package/deps/simdutf/scripts/base64/table.py +59 -0
  310. package/deps/simdutf/scripts/base64bench_print.py +145 -0
  311. package/deps/simdutf/scripts/benchmark-all.py +119 -0
  312. package/deps/simdutf/scripts/benchmark_print.py +324 -0
  313. package/deps/simdutf/scripts/check_feature_macros.py +156 -0
  314. package/deps/simdutf/scripts/check_typos.sh +13 -0
  315. package/deps/simdutf/scripts/clang_format.sh +35 -0
  316. package/deps/simdutf/scripts/clang_format_docker.sh +38 -0
  317. package/deps/simdutf/scripts/common.py +24 -0
  318. package/deps/simdutf/scripts/compilation_benchmark.py +55 -0
  319. package/deps/simdutf/scripts/compile_many_variations.sh +64 -0
  320. package/deps/simdutf/scripts/create_latex_table.py +62 -0
  321. package/deps/simdutf/scripts/docker/Dockerfile +14 -0
  322. package/deps/simdutf/scripts/docker/Makefile +9 -0
  323. package/deps/simdutf/scripts/docker/README.md +30 -0
  324. package/deps/simdutf/scripts/docker/llvm.gpg +0 -0
  325. package/deps/simdutf/scripts/ppc64_convert_utf16_to_utf8.py +155 -0
  326. package/deps/simdutf/scripts/prepare_doxygen.sh +21 -0
  327. package/deps/simdutf/scripts/release.py +197 -0
  328. package/deps/simdutf/scripts/shortinputplots.py +97 -0
  329. package/deps/simdutf/scripts/sse_convert_utf16_to_utf8.py +422 -0
  330. package/deps/simdutf/scripts/sse_convert_utf32_to_utf16.py +105 -0
  331. package/deps/simdutf/scripts/sse_utf8_utf16_decode.py +186 -0
  332. package/deps/simdutf/scripts/sse_validate_utf16le_proof.py +137 -0
  333. package/deps/simdutf/scripts/sse_validate_utf16le_testcases.py +129 -0
  334. package/deps/simdutf/scripts/table.py +207 -0
  335. package/deps/simdutf/scripts/tests/new.txt +33 -0
  336. package/deps/simdutf/scripts/tests/old.txt +33 -0
  337. package/deps/simdutf/scripts/tests/results.txt +272 -0
  338. package/deps/simdutf/simdutf.pc.in +11 -0
  339. package/deps/simdutf/singleheader/.flake8 +2 -0
  340. package/deps/simdutf/singleheader/CMakeLists.txt +64 -0
  341. package/deps/simdutf/singleheader/README-dev.md +81 -0
  342. package/deps/simdutf/singleheader/README.md +19 -0
  343. package/deps/simdutf/singleheader/amalgamate.py +513 -0
  344. package/deps/simdutf/singleheader/amalgamation_demo.c +59 -0
  345. package/deps/simdutf/singleheader/amalgamation_demo.cpp +54 -0
  346. package/deps/simdutf/singleheader/test-features.py +262 -0
  347. package/deps/simdutf/src/CMakeLists.txt +78 -0
  348. package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
  349. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
  350. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
  351. package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
  352. package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
  353. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
  354. package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
  355. package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
  356. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
  357. package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
  358. package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
  359. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
  360. package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
  361. package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
  362. package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
  363. package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
  364. package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
  365. package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
  366. package/deps/simdutf/src/encoding_types.cpp +67 -0
  367. package/deps/simdutf/src/error.cpp +3 -0
  368. package/deps/simdutf/src/fallback/implementation.cpp +589 -0
  369. package/deps/simdutf/src/generic/ascii_validation.h +50 -0
  370. package/deps/simdutf/src/generic/base64.h +233 -0
  371. package/deps/simdutf/src/generic/base64lengths.h +63 -0
  372. package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
  373. package/deps/simdutf/src/generic/find.h +75 -0
  374. package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
  375. package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
  376. package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
  377. package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
  378. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
  379. package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
  380. package/deps/simdutf/src/generic/utf16.h +73 -0
  381. package/deps/simdutf/src/generic/utf32.h +136 -0
  382. package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
  383. package/deps/simdutf/src/generic/utf8.h +92 -0
  384. package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
  385. package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
  386. package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
  387. package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
  388. package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
  389. package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
  390. package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
  391. package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
  392. package/deps/simdutf/src/generic/validate_utf16.h +164 -0
  393. package/deps/simdutf/src/generic/validate_utf32.h +99 -0
  394. package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
  395. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
  396. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
  397. package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
  398. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
  399. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
  400. package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
  401. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
  402. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
  403. package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
  404. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
  405. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
  406. package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
  407. package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
  408. package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
  409. package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
  410. package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
  411. package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
  412. package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
  413. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
  414. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
  415. package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
  416. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
  417. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
  418. package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
  419. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
  420. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
  421. package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
  422. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
  423. package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
  424. package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
  425. package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
  426. package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
  427. package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
  428. package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
  429. package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
  430. package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
  431. package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
  432. package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
  433. package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
  434. package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
  435. package/deps/simdutf/src/implementation.cpp +2526 -0
  436. package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
  437. package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
  438. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
  439. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
  440. package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
  441. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
  442. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
  443. package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
  444. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
  445. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
  446. package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
  447. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
  448. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
  449. package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
  450. package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
  451. package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
  452. package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
  453. package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
  454. package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
  455. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
  456. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
  457. package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
  458. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
  459. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
  460. package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
  461. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
  462. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
  463. package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
  464. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
  465. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
  466. package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
  467. package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
  468. package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
  469. package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
  470. package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
  471. package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
  472. package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
  473. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
  474. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
  475. package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
  476. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
  477. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
  478. package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
  479. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
  480. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
  481. package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
  482. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
  483. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
  484. package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
  485. package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
  486. package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
  487. package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
  488. package/deps/simdutf/src/ppc64/templates.cpp +91 -0
  489. package/deps/simdutf/src/rvv/implementation.cpp +138 -0
  490. package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
  491. package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
  492. package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
  493. package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
  494. package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
  495. package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
  496. package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
  497. package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
  498. package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
  499. package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
  500. package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
  501. package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
  502. package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
  503. package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
  504. package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
  505. package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
  506. package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
  507. package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
  508. package/deps/simdutf/src/simdutf/arm64.h +43 -0
  509. package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
  510. package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
  511. package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
  512. package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
  513. package/deps/simdutf/src/simdutf/fallback.h +42 -0
  514. package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
  515. package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
  516. package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
  517. package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
  518. package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
  519. package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
  520. package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
  521. package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
  522. package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
  523. package/deps/simdutf/src/simdutf/haswell.h +63 -0
  524. package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
  525. package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
  526. package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
  527. package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
  528. package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
  529. package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
  530. package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
  531. package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
  532. package/deps/simdutf/src/simdutf/icelake.h +81 -0
  533. package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
  534. package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
  535. package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
  536. package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
  537. package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
  538. package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
  539. package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
  540. package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
  541. package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
  542. package/deps/simdutf/src/simdutf/lasx.h +49 -0
  543. package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
  544. package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
  545. package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
  546. package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
  547. package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
  548. package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
  549. package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
  550. package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
  551. package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
  552. package/deps/simdutf/src/simdutf/lsx.h +52 -0
  553. package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
  554. package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
  555. package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
  556. package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
  557. package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
  558. package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
  559. package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
  560. package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
  561. package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
  562. package/deps/simdutf/src/simdutf/ppc64.h +40 -0
  563. package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
  564. package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
  565. package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
  566. package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
  567. package/deps/simdutf/src/simdutf/rvv.h +41 -0
  568. package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
  569. package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
  570. package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
  571. package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
  572. package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
  573. package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
  574. package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
  575. package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
  576. package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
  577. package/deps/simdutf/src/simdutf/westmere.h +59 -0
  578. package/deps/simdutf/src/simdutf.cpp +152 -0
  579. package/deps/simdutf/src/simdutf_c.cpp +525 -0
  580. package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
  581. package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
  582. package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
  583. package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
  584. package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
  585. package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
  586. package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
  587. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
  588. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
  589. package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
  590. package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
  591. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
  592. package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
  593. package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
  594. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
  595. package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
  596. package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
  597. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
  598. package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
  599. package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
  600. package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
  601. package/deps/simdutf/tests/CMakeLists.txt +483 -0
  602. package/deps/simdutf/tests/atomic_base64_tests.cpp +2845 -0
  603. package/deps/simdutf/tests/base64_tests.cpp +3617 -0
  604. package/deps/simdutf/tests/basic_fuzzer.cpp +805 -0
  605. package/deps/simdutf/tests/bele_tests.cpp +182 -0
  606. package/deps/simdutf/tests/constexpr_base64_tests.cpp +387 -0
  607. package/deps/simdutf/tests/convert_latin1_to_utf16be_tests.cpp +52 -0
  608. package/deps/simdutf/tests/convert_latin1_to_utf16le_tests.cpp +80 -0
  609. package/deps/simdutf/tests/convert_latin1_to_utf32_tests.cpp +66 -0
  610. package/deps/simdutf/tests/convert_latin1_to_utf8_tests.cpp +120 -0
  611. package/deps/simdutf/tests/convert_utf16_to_utf8_safe_tests.cpp +203 -0
  612. package/deps/simdutf/tests/convert_utf16_to_utf8_with_replacement_tests.cpp +276 -0
  613. package/deps/simdutf/tests/convert_utf16be_to_latin1_tests.cpp +109 -0
  614. package/deps/simdutf/tests/convert_utf16be_to_latin1_tests_with_errors.cpp +136 -0
  615. package/deps/simdutf/tests/convert_utf16be_to_utf32_tests.cpp +193 -0
  616. package/deps/simdutf/tests/convert_utf16be_to_utf32_with_errors_tests.cpp +381 -0
  617. package/deps/simdutf/tests/convert_utf16be_to_utf8_tests.cpp +259 -0
  618. package/deps/simdutf/tests/convert_utf16be_to_utf8_with_errors_tests.cpp +266 -0
  619. package/deps/simdutf/tests/convert_utf16le_to_latin1_tests.cpp +148 -0
  620. package/deps/simdutf/tests/convert_utf16le_to_latin1_tests_with_errors.cpp +176 -0
  621. package/deps/simdutf/tests/convert_utf16le_to_utf32_tests.cpp +213 -0
  622. package/deps/simdutf/tests/convert_utf16le_to_utf32_with_errors_tests.cpp +318 -0
  623. package/deps/simdutf/tests/convert_utf16le_to_utf8_tests.cpp +343 -0
  624. package/deps/simdutf/tests/convert_utf16le_to_utf8_with_errors_tests.cpp +271 -0
  625. package/deps/simdutf/tests/convert_utf32_to_latin1_tests.cpp +111 -0
  626. package/deps/simdutf/tests/convert_utf32_to_latin1_with_errors_tests.cpp +96 -0
  627. package/deps/simdutf/tests/convert_utf32_to_utf16be_tests.cpp +148 -0
  628. package/deps/simdutf/tests/convert_utf32_to_utf16be_with_errors_tests.cpp +192 -0
  629. package/deps/simdutf/tests/convert_utf32_to_utf16le_tests.cpp +166 -0
  630. package/deps/simdutf/tests/convert_utf32_to_utf16le_with_errors_tests.cpp +215 -0
  631. package/deps/simdutf/tests/convert_utf32_to_utf8_tests.cpp +181 -0
  632. package/deps/simdutf/tests/convert_utf32_to_utf8_with_errors_tests.cpp +261 -0
  633. package/deps/simdutf/tests/convert_utf8_to_latin1_tests.cpp +516 -0
  634. package/deps/simdutf/tests/convert_utf8_to_latin1_with_errors_tests.cpp +579 -0
  635. package/deps/simdutf/tests/convert_utf8_to_utf16be_tests.cpp +412 -0
  636. package/deps/simdutf/tests/convert_utf8_to_utf16be_with_errors_tests.cpp +480 -0
  637. package/deps/simdutf/tests/convert_utf8_to_utf16le_tests.cpp +671 -0
  638. package/deps/simdutf/tests/convert_utf8_to_utf16le_with_errors_tests.cpp +455 -0
  639. package/deps/simdutf/tests/convert_utf8_to_utf32_tests.cpp +1204 -0
  640. package/deps/simdutf/tests/convert_utf8_to_utf32_with_errors_tests.cpp +337 -0
  641. package/deps/simdutf/tests/convert_valid_utf16be_to_latin1_tests.cpp +37 -0
  642. package/deps/simdutf/tests/convert_valid_utf16be_to_utf32_tests.cpp +97 -0
  643. package/deps/simdutf/tests/convert_valid_utf16be_to_utf8_tests.cpp +126 -0
  644. package/deps/simdutf/tests/convert_valid_utf16le_to_latin1_tests.cpp +71 -0
  645. package/deps/simdutf/tests/convert_valid_utf16le_to_utf32_tests.cpp +122 -0
  646. package/deps/simdutf/tests/convert_valid_utf16le_to_utf8_tests.cpp +244 -0
  647. package/deps/simdutf/tests/convert_valid_utf32_to_latin1_tests.cpp +49 -0
  648. package/deps/simdutf/tests/convert_valid_utf32_to_utf16be_tests.cpp +92 -0
  649. package/deps/simdutf/tests/convert_valid_utf32_to_utf16le_tests.cpp +114 -0
  650. package/deps/simdutf/tests/convert_valid_utf32_to_utf8_tests.cpp +109 -0
  651. package/deps/simdutf/tests/convert_valid_utf8_to_latin1_tests.cpp +84 -0
  652. package/deps/simdutf/tests/convert_valid_utf8_to_utf16be_tests.cpp +124 -0
  653. package/deps/simdutf/tests/convert_valid_utf8_to_utf16le_tests.cpp +221 -0
  654. package/deps/simdutf/tests/convert_valid_utf8_to_utf32_tests.cpp +155 -0
  655. package/deps/simdutf/tests/count_utf16be.cpp +64 -0
  656. package/deps/simdutf/tests/count_utf16le.cpp +61 -0
  657. package/deps/simdutf/tests/count_utf8.cpp +87 -0
  658. package/deps/simdutf/tests/detect_encodings_tests.cpp +312 -0
  659. package/deps/simdutf/tests/embed/valid_utf8.txt +1 -0
  660. package/deps/simdutf/tests/embed_tests.cpp +22 -0
  661. package/deps/simdutf/tests/find_tests.cpp +77 -0
  662. package/deps/simdutf/tests/fixed_string_tests.cpp +153 -0
  663. package/deps/simdutf/tests/helpers/CMakeLists.txt +25 -0
  664. package/deps/simdutf/tests/helpers/compiletime_conversions.h +222 -0
  665. package/deps/simdutf/tests/helpers/fixed_string.h +267 -0
  666. package/deps/simdutf/tests/helpers/random_int.cpp +30 -0
  667. package/deps/simdutf/tests/helpers/random_int.h +39 -0
  668. package/deps/simdutf/tests/helpers/random_utf16.cpp +123 -0
  669. package/deps/simdutf/tests/helpers/random_utf16.h +52 -0
  670. package/deps/simdutf/tests/helpers/random_utf32.cpp +41 -0
  671. package/deps/simdutf/tests/helpers/random_utf32.h +40 -0
  672. package/deps/simdutf/tests/helpers/random_utf8.cpp +93 -0
  673. package/deps/simdutf/tests/helpers/random_utf8.h +36 -0
  674. package/deps/simdutf/tests/helpers/test.cpp +231 -0
  675. package/deps/simdutf/tests/helpers/test.h +193 -0
  676. package/deps/simdutf/tests/helpers/transcode_test_base.cpp +1257 -0
  677. package/deps/simdutf/tests/helpers/transcode_test_base.h +683 -0
  678. package/deps/simdutf/tests/helpers/utf16.h +27 -0
  679. package/deps/simdutf/tests/installation_tests/find/CMakeLists.txt +43 -0
  680. package/deps/simdutf/tests/installation_tests/from_fetch/CMakeLists.txt +47 -0
  681. package/deps/simdutf/tests/internal_tests.cpp +27 -0
  682. package/deps/simdutf/tests/null_safety_tests.cpp +94 -0
  683. package/deps/simdutf/tests/random_fuzzer.cpp +779 -0
  684. package/deps/simdutf/tests/readme_tests.cpp +274 -0
  685. package/deps/simdutf/tests/reference/CMakeLists.txt +23 -0
  686. package/deps/simdutf/tests/reference/decode_utf16.h +81 -0
  687. package/deps/simdutf/tests/reference/decode_utf32.h +47 -0
  688. package/deps/simdutf/tests/reference/encode_latin1.cpp +1 -0
  689. package/deps/simdutf/tests/reference/encode_latin1.h +32 -0
  690. package/deps/simdutf/tests/reference/encode_utf16.cpp +49 -0
  691. package/deps/simdutf/tests/reference/encode_utf16.h +20 -0
  692. package/deps/simdutf/tests/reference/encode_utf32.cpp +1 -0
  693. package/deps/simdutf/tests/reference/encode_utf32.h +36 -0
  694. package/deps/simdutf/tests/reference/encode_utf8.cpp +1 -0
  695. package/deps/simdutf/tests/reference/encode_utf8.h +40 -0
  696. package/deps/simdutf/tests/reference/validate_utf16.cpp +60 -0
  697. package/deps/simdutf/tests/reference/validate_utf16.h +14 -0
  698. package/deps/simdutf/tests/reference/validate_utf16_to_latin1.cpp +35 -0
  699. package/deps/simdutf/tests/reference/validate_utf16_to_latin1.h +13 -0
  700. package/deps/simdutf/tests/reference/validate_utf32.cpp +27 -0
  701. package/deps/simdutf/tests/reference/validate_utf32.h +12 -0
  702. package/deps/simdutf/tests/reference/validate_utf32_to_latin1.cpp +27 -0
  703. package/deps/simdutf/tests/reference/validate_utf32_to_latin1.h +12 -0
  704. package/deps/simdutf/tests/reference/validate_utf8.cpp +82 -0
  705. package/deps/simdutf/tests/reference/validate_utf8.h +11 -0
  706. package/deps/simdutf/tests/reference/validate_utf8_to_latin1.cpp +43 -0
  707. package/deps/simdutf/tests/reference/validate_utf8_to_latin1.h +12 -0
  708. package/deps/simdutf/tests/select_implementation.cpp +43 -0
  709. package/deps/simdutf/tests/simdutf_c_tests.cpp +244 -0
  710. package/deps/simdutf/tests/span_tests.cpp +401 -0
  711. package/deps/simdutf/tests/special_tests.cpp +559 -0
  712. package/deps/simdutf/tests/straight_c_test.c +187 -0
  713. package/deps/simdutf/tests/text_encoding_tests.cpp +77 -0
  714. package/deps/simdutf/tests/to_well_formed_utf16_tests.cpp +377 -0
  715. package/deps/simdutf/tests/utf8_length_from_utf16_tests.cpp +202 -0
  716. package/deps/simdutf/tests/validate_ascii_basic_tests.cpp +165 -0
  717. package/deps/simdutf/tests/validate_ascii_with_errors_tests.cpp +77 -0
  718. package/deps/simdutf/tests/validate_utf16be_basic_tests.cpp +175 -0
  719. package/deps/simdutf/tests/validate_utf16be_with_errors_tests.cpp +188 -0
  720. package/deps/simdutf/tests/validate_utf16le_basic_tests.cpp +268 -0
  721. package/deps/simdutf/tests/validate_utf16le_with_errors_tests.cpp +274 -0
  722. package/deps/simdutf/tests/validate_utf32_basic_tests.cpp +92 -0
  723. package/deps/simdutf/tests/validate_utf32_with_errors_tests.cpp +114 -0
  724. package/deps/simdutf/tests/validate_utf8_basic_tests.cpp +178 -0
  725. package/deps/simdutf/tests/validate_utf8_brute_force_tests.cpp +88 -0
  726. package/deps/simdutf/tests/validate_utf8_puzzler_tests.cpp +33 -0
  727. package/deps/simdutf/tests/validate_utf8_with_errors_tests.cpp +228 -0
  728. package/deps/simdutf/tools/CMakeLists.txt +85 -0
  729. package/deps/simdutf/tools/fastbase64.cpp +250 -0
  730. package/deps/simdutf/tools/sutf.cpp +556 -0
  731. package/deps/simdutf/tools/sutf.h +40 -0
  732. package/package.json +2 -2
@@ -0,0 +1,1903 @@
1
+ #include <tuple>
2
+ #include <utility>
3
+ #include "simdutf/icelake/intrinsics.h"
4
+
5
+ #include "simdutf/icelake/begin.h"
6
+ namespace simdutf {
7
+ namespace SIMDUTF_IMPLEMENTATION {
8
+ namespace {
9
+ #ifndef SIMDUTF_ICELAKE_H
10
+ #error "icelake.h must be included"
11
+ #endif
12
+ using namespace simd;
13
+
14
+ #include "icelake/icelake_macros.inl.cpp"
15
+ #include "icelake/icelake_common.inl.cpp"
16
+ #if SIMDUTF_FEATURE_UTF8
17
+ #include "icelake/icelake_utf8_common.inl.cpp"
18
+ #endif // SIMDUTF_FEATURE_UTF8
19
+
20
+ #if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
21
+ #include "icelake/icelake_utf8_validation.inl.cpp"
22
+ #endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
23
+
24
+ #if SIMDUTF_FEATURE_UTF8 && \
25
+ (SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1)
26
+ #include "icelake/icelake_from_valid_utf8.inl.cpp"
27
+ #include "icelake/icelake_from_utf8.inl.cpp"
28
+ #endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 ||
29
+ // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1)
30
+
31
+ #if SIMDUTF_FEATURE_UTF16
32
+ #include "icelake/icelake_utf16fix.cpp"
33
+ #endif // SIMDUTF_FEATURE_UTF16
34
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
35
+ #include "icelake/icelake_convert_utf8_to_latin1.inl.cpp"
36
+ #include "icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp"
37
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
38
+
39
+ #if SIMDUTF_FEATURE_UTF16
40
+ #include "icelake/icelake_convert_utf16_to_latin1.inl.cpp"
41
+ #endif // SIMDUTF_FEATURE_UTF16
42
+
43
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
44
+ #include "icelake/icelake_convert_utf16_to_utf8.inl.cpp"
45
+ #include "icelake/icelake_convert_utf8_to_utf16.inl.cpp"
46
+ #include "icelake/icelake_utf8_length_from_utf16.inl.cpp"
47
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
48
+
49
+ #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
50
+ #include "icelake/icelake_convert_utf16_to_utf32.inl.cpp"
51
+ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
52
+
53
+ #if SIMDUTF_FEATURE_UTF32
54
+ #include "icelake/icelake_convert_utf32_to_latin1.inl.cpp"
55
+ #endif // SIMDUTF_FEATURE_UTF32
56
+
57
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
58
+ #include "icelake/icelake_convert_utf32_to_utf8.inl.cpp"
59
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
60
+
61
+ #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
62
+ #include "icelake/icelake_convert_utf32_to_utf16.inl.cpp"
63
+ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
64
+
65
+ #if SIMDUTF_FEATURE_ASCII
66
+ #include "icelake/icelake_ascii_validation.inl.cpp"
67
+ #endif // SIMDUTF_FEATURE_ASCII
68
+ #if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
69
+ #include "icelake/icelake_utf32_validation.inl.cpp"
70
+ #endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
71
+ #if SIMDUTF_FEATURE_UTF8
72
+ #include "icelake/icelake_convert_latin1_to_utf8.inl.cpp"
73
+ #endif // SIMDUTF_FEATURE_UTF8
74
+ #if SIMDUTF_FEATURE_UTF16
75
+ #include "icelake/icelake_convert_latin1_to_utf16.inl.cpp"
76
+ #endif // SIMDUTF_FEATURE_UTF16
77
+ #if SIMDUTF_FEATURE_UTF32
78
+ #include "icelake/icelake_convert_latin1_to_utf32.inl.cpp"
79
+ #endif // SIMDUTF_FEATURE_UTF32
80
+ #if SIMDUTF_FEATURE_BASE64
81
+ #include "icelake/icelake_base64.inl.cpp"
82
+ #include "icelake/icelake_find.inl.cpp"
83
+ #endif // SIMDUTF_FEATURE_BASE64
84
+
85
+ #include <cstdint>
86
+
87
+ } // namespace
88
+ } // namespace SIMDUTF_IMPLEMENTATION
89
+ } // namespace simdutf
90
+
91
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
92
+ #include "generic/utf32.h"
93
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
94
+
95
+ namespace simdutf {
96
+ namespace SIMDUTF_IMPLEMENTATION {
97
+
98
+ #if SIMDUTF_FEATURE_DETECT_ENCODING
99
+ simdutf_warn_unused int
100
+ implementation::detect_encodings(const char *input,
101
+ size_t length) const noexcept {
102
+ // If there is a BOM, then we trust it.
103
+ auto bom_encoding = simdutf::BOM::check_bom(input, length);
104
+ if (bom_encoding != encoding_type::unspecified) {
105
+ return bom_encoding;
106
+ }
107
+
108
+ int out = 0;
109
+ uint32_t utf16_err = (length % 2);
110
+ uint32_t utf32_err = (length % 4);
111
+ uint32_t ends_with_high = 0;
112
+ avx512_utf8_checker checker{};
113
+ const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000);
114
+ __m512i currentmax = _mm512_setzero_si512();
115
+ __m512i currentoffsetmax = _mm512_setzero_si512();
116
+ const char *ptr = input;
117
+ const char *end = ptr + length;
118
+ for (; end - ptr >= 64; ptr += 64) {
119
+ // utf8 checks
120
+ const __m512i data = _mm512_loadu_si512((const __m512i *)ptr);
121
+ checker.check_next_input(data);
122
+
123
+ // utf16le_checks
124
+ __m512i diff = _mm512_sub_epi16(data, _mm512_set1_epi16(uint16_t(0xD800)));
125
+ __mmask32 surrogates =
126
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
127
+ __mmask32 highsurrogates =
128
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
129
+ __mmask32 lowsurrogates = surrogates ^ highsurrogates;
130
+ utf16_err |= (((highsurrogates << 1) | ends_with_high) != lowsurrogates);
131
+ ends_with_high = ((highsurrogates & 0x80000000) != 0);
132
+
133
+ // utf32le checks
134
+ currentoffsetmax =
135
+ _mm512_max_epu32(_mm512_add_epi32(data, offset), currentoffsetmax);
136
+ currentmax = _mm512_max_epu32(data, currentmax);
137
+ }
138
+
139
+ // last block with 0 <= len < 64
140
+ __mmask64 read_mask = (__mmask64(1) << (end - ptr)) - 1;
141
+ const __m512i data = _mm512_maskz_loadu_epi8(read_mask, (const __m512i *)ptr);
142
+ checker.check_next_input(data);
143
+
144
+ __m512i diff = _mm512_sub_epi16(data, _mm512_set1_epi16(uint16_t(0xD800)));
145
+ __mmask32 surrogates =
146
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
147
+ __mmask32 highsurrogates =
148
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
149
+ __mmask32 lowsurrogates = surrogates ^ highsurrogates;
150
+ utf16_err |= (((highsurrogates << 1) | ends_with_high) != lowsurrogates);
151
+
152
+ currentoffsetmax =
153
+ _mm512_max_epu32(_mm512_add_epi32(data, offset), currentoffsetmax);
154
+ currentmax = _mm512_max_epu32(data, currentmax);
155
+
156
+ const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff);
157
+ const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff);
158
+ __m512i is_zero =
159
+ _mm512_xor_si512(_mm512_max_epu32(currentmax, standardmax), standardmax);
160
+ utf32_err |= (_mm512_test_epi8_mask(is_zero, is_zero) != 0);
161
+ is_zero = _mm512_xor_si512(
162
+ _mm512_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax);
163
+ utf32_err |= (_mm512_test_epi8_mask(is_zero, is_zero) != 0);
164
+ checker.check_eof();
165
+ bool is_valid_utf8 = !checker.errors();
166
+ if (is_valid_utf8) {
167
+ out |= encoding_type::UTF8;
168
+ }
169
+ if (utf16_err == 0) {
170
+ out |= encoding_type::UTF16_LE;
171
+ }
172
+ if (utf32_err == 0) {
173
+ out |= encoding_type::UTF32_LE;
174
+ }
175
+ return out;
176
+ }
177
+ #endif // SIMDUTF_FEATURE_DETECT_ENCODING
178
+
179
+ #if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
180
+ simdutf_warn_unused bool
181
+ implementation::validate_utf8(const char *buf, size_t len) const noexcept {
182
+ if (simdutf_unlikely(len == 0)) {
183
+ return true;
184
+ }
185
+ avx512_utf8_checker checker{};
186
+ const char *ptr = buf;
187
+ const char *end = ptr + len;
188
+ for (; end - ptr >= 64; ptr += 64) {
189
+ const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr);
190
+ checker.check_next_input(utf8);
191
+ }
192
+ if (end != ptr) {
193
+ const __m512i utf8 = _mm512_maskz_loadu_epi8(
194
+ ~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr);
195
+ checker.check_next_input(utf8);
196
+ }
197
+ checker.check_eof();
198
+ return !checker.errors();
199
+ }
200
+ #endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
201
+
202
+ #if SIMDUTF_FEATURE_UTF8
203
+ simdutf_warn_unused result implementation::validate_utf8_with_errors(
204
+ const char *buf, size_t len) const noexcept {
205
+ if (simdutf_unlikely(len == 0)) {
206
+ return result(error_code::SUCCESS, len);
207
+ }
208
+ avx512_utf8_checker checker{};
209
+ const char *ptr = buf;
210
+ const char *end = ptr + len;
211
+ size_t count{0};
212
+ for (; end - ptr >= 64; ptr += 64) {
213
+ const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr);
214
+ checker.check_next_input(utf8);
215
+ if (checker.errors()) {
216
+ if (count != 0) {
217
+ count--;
218
+ } // Sometimes the error is only detected in the next chunk
219
+ result res = scalar::utf8::rewind_and_validate_with_errors(
220
+ reinterpret_cast<const char *>(buf),
221
+ reinterpret_cast<const char *>(buf + count), len - count);
222
+ res.count += count;
223
+ return res;
224
+ }
225
+ count += 64;
226
+ }
227
+ if (end != ptr) {
228
+ const __m512i utf8 = _mm512_maskz_loadu_epi8(
229
+ ~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr);
230
+ checker.check_next_input(utf8);
231
+ }
232
+ checker.check_eof();
233
+ if (checker.errors()) {
234
+ if (count != 0) {
235
+ count--;
236
+ } // Sometimes the error is only detected in the next chunk
237
+ result res = scalar::utf8::rewind_and_validate_with_errors(
238
+ reinterpret_cast<const char *>(buf),
239
+ reinterpret_cast<const char *>(buf + count), len - count);
240
+ res.count += count;
241
+ return res;
242
+ }
243
+ return result(error_code::SUCCESS, len);
244
+ }
245
+ #endif // SIMDUTF_FEATURE_UTF8
246
+
247
+ #if SIMDUTF_FEATURE_ASCII
248
+ simdutf_warn_unused bool
249
+ implementation::validate_ascii(const char *buf, size_t len) const noexcept {
250
+ return icelake::validate_ascii(buf, len);
251
+ }
252
+
253
+ simdutf_warn_unused result implementation::validate_ascii_with_errors(
254
+ const char *buf, size_t len) const noexcept {
255
+ const char *buf_orig = buf;
256
+ const char *end = buf + len;
257
+ const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80);
258
+ for (; end - buf >= 64; buf += 64) {
259
+ const __m512i input = _mm512_loadu_si512((const __m512i *)buf);
260
+ __mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT);
261
+ if (notascii) {
262
+ return result(error_code::TOO_LARGE,
263
+ buf - buf_orig + _tzcnt_u64(notascii));
264
+ }
265
+ }
266
+ if (end != buf) {
267
+ const __m512i input = _mm512_maskz_loadu_epi8(
268
+ ~UINT64_C(0) >> (64 - (end - buf)), (const __m512i *)buf);
269
+ __mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT);
270
+ if (notascii) {
271
+ return result(error_code::TOO_LARGE,
272
+ buf - buf_orig + _tzcnt_u64(notascii));
273
+ }
274
+ }
275
+ return result(error_code::SUCCESS, len);
276
+ }
277
+ #endif // SIMDUTF_FEATURE_ASCII
278
+ #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
279
+ simdutf_warn_unused bool
280
+ implementation::validate_utf16le_as_ascii(const char16_t *buf,
281
+ size_t len) const noexcept {
282
+ const char16_t *end = buf + len;
283
+ __m512i limit = _mm512_set1_epi16(uint16_t(0x007F));
284
+ for (; end - buf >= 32;) {
285
+ __m512i in = _mm512_loadu_si512((__m512i *)buf);
286
+ auto mask = _mm512_cmpgt_epu16_mask(in, limit);
287
+ if (mask) {
288
+ return false;
289
+ }
290
+ buf += 32;
291
+ }
292
+ if (buf < end) {
293
+ __m512i in =
294
+ _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf);
295
+ auto mask = _mm512_cmpgt_epu16_mask(in, limit);
296
+ if (mask) {
297
+ return false;
298
+ }
299
+ }
300
+ return true;
301
+ }
302
+
303
+ simdutf_warn_unused bool
304
+ implementation::validate_utf16be_as_ascii(const char16_t *buf,
305
+ size_t len) const noexcept {
306
+ const char16_t *end = buf + len;
307
+ const __m512i byteflip = _mm512_setr_epi64(
308
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001,
309
+ 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809,
310
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809);
311
+ __m512i limit = _mm512_set1_epi16(uint16_t(0x007F));
312
+ for (; end - buf >= 32;) {
313
+ __m512i in = _mm512_loadu_si512((__m512i *)buf);
314
+ in = _mm512_shuffle_epi8(in, byteflip);
315
+ auto mask = _mm512_cmpgt_epu16_mask(in, limit);
316
+ if (mask) {
317
+ return false;
318
+ }
319
+ buf += 32;
320
+ }
321
+ if (buf < end) {
322
+ __m512i in =
323
+ _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf);
324
+ in = _mm512_shuffle_epi8(in, byteflip);
325
+ auto mask = _mm512_cmpgt_epu16_mask(in, limit);
326
+ if (mask) {
327
+ return false;
328
+ }
329
+ }
330
+ return true;
331
+ }
332
+ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
333
+ #if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
334
+ simdutf_warn_unused bool
335
+ implementation::validate_utf16le(const char16_t *buf,
336
+ size_t len) const noexcept {
337
+ const char16_t *end = buf + len;
338
+
339
+ // Optimized: Process 64 code units (2x 512-bit) per iteration
340
+ const __m512i surr_base = _mm512_set1_epi16(uint16_t(0xD800));
341
+ const __m512i surr_range = _mm512_set1_epi16(uint16_t(0x0800));
342
+ const __m512i high_range = _mm512_set1_epi16(uint16_t(0x0400));
343
+
344
+ for (; end - buf >= 64;) {
345
+ __m512i in_1 = _mm512_loadu_si512((__m512i *)buf);
346
+ __m512i in_2 = _mm512_loadu_si512((__m512i *)(buf + 32));
347
+
348
+ __m512i diff_1 = _mm512_sub_epi16(in_1, surr_base);
349
+ __m512i diff_2 = _mm512_sub_epi16(in_2, surr_base);
350
+
351
+ __mmask32 surrogates_1 = _mm512_cmplt_epu16_mask(diff_1, surr_range);
352
+ __mmask32 surrogates_2 = _mm512_cmplt_epu16_mask(diff_2, surr_range);
353
+
354
+ if (surrogates_1 | surrogates_2) {
355
+ __mmask32 highsurrogates_1 = _mm512_cmplt_epu16_mask(diff_1, high_range);
356
+ __mmask32 lowsurrogates_1 = surrogates_1 ^ highsurrogates_1;
357
+
358
+ __mmask32 highsurrogates_2 = _mm512_cmplt_epu16_mask(diff_2, high_range);
359
+ __mmask32 lowsurrogates_2 = surrogates_2 ^ highsurrogates_2;
360
+
361
+ // Validate first block: high must be followed by low
362
+ if ((highsurrogates_1 << 1) != lowsurrogates_1) {
363
+ return false;
364
+ }
365
+
366
+ // Check boundary between blocks: if first block ends with high, second
367
+ // must start with low
368
+ bool ends_with_high_1 = ((highsurrogates_1 & 0x80000000) != 0);
369
+ bool starts_with_low_2 = ((lowsurrogates_2 & 0x1) != 0);
370
+ if (ends_with_high_1 && !starts_with_low_2) {
371
+ return false;
372
+ }
373
+
374
+ // Validate second block (shift by 1 if first ended with high)
375
+ __mmask32 expected_low_2 = ends_with_high_1
376
+ ? (highsurrogates_2 << 1) | 0x1
377
+ : (highsurrogates_2 << 1);
378
+ if (expected_low_2 != lowsurrogates_2) {
379
+ return false;
380
+ }
381
+
382
+ bool ends_with_high_2 = ((highsurrogates_2 & 0x80000000) != 0);
383
+ if (ends_with_high_2) {
384
+ buf += 63; // advance by 63 to start with high surrogate next round
385
+ } else {
386
+ buf += 64;
387
+ }
388
+ } else {
389
+ buf += 64;
390
+ }
391
+ }
392
+
393
+ // Handle remaining 32-63 code units
394
+ for (; end - buf >= 32;) {
395
+ __m512i in = _mm512_loadu_si512((__m512i *)buf);
396
+ __m512i diff = _mm512_sub_epi16(in, surr_base);
397
+ __mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, surr_range);
398
+ if (surrogates) {
399
+ __mmask32 highsurrogates = _mm512_cmplt_epu16_mask(diff, high_range);
400
+ __mmask32 lowsurrogates = surrogates ^ highsurrogates;
401
+ // high must be followed by low
402
+ if ((highsurrogates << 1) != lowsurrogates) {
403
+ return false;
404
+ }
405
+ bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
406
+ if (ends_with_high) {
407
+ buf += 31; // advance only by 31 code units so that we start with the
408
+ // high surrogate on the next round.
409
+ } else {
410
+ buf += 32;
411
+ }
412
+ } else {
413
+ buf += 32;
414
+ }
415
+ }
416
+ if (buf < end) {
417
+ __m512i in =
418
+ _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf);
419
+ __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
420
+ __mmask32 surrogates =
421
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
422
+ if (surrogates) {
423
+ __mmask32 highsurrogates =
424
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
425
+ __mmask32 lowsurrogates = surrogates ^ highsurrogates;
426
+ // high must be followed by low
427
+ if ((highsurrogates << 1) != lowsurrogates) {
428
+ return false;
429
+ }
430
+ }
431
+ }
432
+ return true;
433
+ }
434
+ #endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
435
+
436
+ #if SIMDUTF_FEATURE_UTF16
437
+ simdutf_warn_unused bool
438
+ implementation::validate_utf16be(const char16_t *buf,
439
+ size_t len) const noexcept {
440
+ const char16_t *end = buf + len;
441
+
442
+ for (; end - buf >= 32;) {
443
+ __m512i in = _mm512_slli_epi32(_mm512_loadu_si512((__m512i *)buf), 8);
444
+ __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
445
+ __mmask32 surrogates =
446
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
447
+ if (surrogates) {
448
+ __mmask32 highsurrogates =
449
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
450
+ __mmask32 lowsurrogates = surrogates ^ highsurrogates;
451
+ // high must be followed by low
452
+ if ((highsurrogates << 1) != lowsurrogates) {
453
+ return false;
454
+ }
455
+ bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
456
+ if (ends_with_high) {
457
+ buf += 31; // advance only by 31 code units so that we start with the
458
+ // high surrogate on the next round.
459
+ } else {
460
+ buf += 32;
461
+ }
462
+ } else {
463
+ buf += 32;
464
+ }
465
+ }
466
+ if (buf < end) {
467
+ __m512i in = _mm512_slli_epi16(
468
+ _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), 8);
469
+ __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
470
+ __mmask32 surrogates =
471
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
472
+ if (surrogates) {
473
+ __mmask32 highsurrogates =
474
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
475
+ __mmask32 lowsurrogates = surrogates ^ highsurrogates;
476
+ // high must be followed by low
477
+ if ((highsurrogates << 1) != lowsurrogates) {
478
+ return false;
479
+ }
480
+ }
481
+ }
482
+ return true;
483
+ }
484
+
485
+ simdutf_warn_unused result implementation::validate_utf16le_with_errors(
486
+ const char16_t *buf, size_t len) const noexcept {
487
+ const char16_t *start_buf = buf;
488
+ const char16_t *end = buf + len;
489
+ for (; end - buf >= 32;) {
490
+ __m512i in = _mm512_loadu_si512((__m512i *)buf);
491
+ __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
492
+ __mmask32 surrogates =
493
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
494
+ if (surrogates) {
495
+ __mmask32 highsurrogates =
496
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
497
+ __mmask32 lowsurrogates = surrogates ^ highsurrogates;
498
+ // high must be followed by low
499
+ if ((highsurrogates << 1) != lowsurrogates) {
500
+ uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1));
501
+ uint32_t extra_high =
502
+ _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1));
503
+ return result(error_code::SURROGATE,
504
+ (buf - start_buf) +
505
+ (extra_low < extra_high ? extra_low : extra_high));
506
+ }
507
+ bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
508
+ if (ends_with_high) {
509
+ buf += 31; // advance only by 31 code units so that we start with the
510
+ // high surrogate on the next round.
511
+ } else {
512
+ buf += 32;
513
+ }
514
+ } else {
515
+ buf += 32;
516
+ }
517
+ }
518
+ if (buf < end) {
519
+ __m512i in =
520
+ _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf);
521
+ __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
522
+ __mmask32 surrogates =
523
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
524
+ if (surrogates) {
525
+ __mmask32 highsurrogates =
526
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
527
+ __mmask32 lowsurrogates = surrogates ^ highsurrogates;
528
+ // high must be followed by low
529
+ if ((highsurrogates << 1) != lowsurrogates) {
530
+ uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1));
531
+ uint32_t extra_high =
532
+ _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1));
533
+ return result(error_code::SURROGATE,
534
+ (buf - start_buf) +
535
+ (extra_low < extra_high ? extra_low : extra_high));
536
+ }
537
+ }
538
+ }
539
+ return result(error_code::SUCCESS, len);
540
+ }
541
+
542
+ simdutf_warn_unused result implementation::validate_utf16be_with_errors(
543
+ const char16_t *buf, size_t len) const noexcept {
544
+ const char16_t *start_buf = buf;
545
+ const char16_t *end = buf + len;
546
+
547
+ for (; end - buf >= 32;) {
548
+ __m512i in = _mm512_slli_epi16(_mm512_loadu_si512((__m512i *)buf), 8);
549
+ __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
550
+ __mmask32 surrogates =
551
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
552
+ if (surrogates) {
553
+ __mmask32 highsurrogates =
554
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
555
+ __mmask32 lowsurrogates = surrogates ^ highsurrogates;
556
+ // high must be followed by low
557
+ if ((highsurrogates << 1) != lowsurrogates) {
558
+ uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1));
559
+ uint32_t extra_high =
560
+ _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1));
561
+ return result(error_code::SURROGATE,
562
+ (buf - start_buf) +
563
+ (extra_low < extra_high ? extra_low : extra_high));
564
+ }
565
+ bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
566
+ if (ends_with_high) {
567
+ buf += 31; // advance only by 31 code units so that we start with the
568
+ // high surrogate on the next round.
569
+ } else {
570
+ buf += 32;
571
+ }
572
+ } else {
573
+ buf += 32;
574
+ }
575
+ }
576
+ if (buf < end) {
577
+ __m512i in = _mm512_slli_epi16(
578
+ _mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), 8);
579
+ __m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
580
+ __mmask32 surrogates =
581
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
582
+ if (surrogates) {
583
+ __mmask32 highsurrogates =
584
+ _mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
585
+ __mmask32 lowsurrogates = surrogates ^ highsurrogates;
586
+ // high must be followed by low
587
+ if ((highsurrogates << 1) != lowsurrogates) {
588
+ uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1));
589
+ uint32_t extra_high =
590
+ _tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1));
591
+ return result(error_code::SURROGATE,
592
+ (buf - start_buf) +
593
+ (extra_low < extra_high ? extra_low : extra_high));
594
+ }
595
+ }
596
+ }
597
+ return result(error_code::SUCCESS, len);
598
+ }
599
+
600
+ void implementation::to_well_formed_utf16le(const char16_t *input, size_t len,
601
+ char16_t *output) const noexcept {
602
+ return utf16fix_avx512<endianness::LITTLE>(input, len, output);
603
+ }
604
+
605
+ void implementation::to_well_formed_utf16be(const char16_t *input, size_t len,
606
+ char16_t *output) const noexcept {
607
+ return utf16fix_avx512<endianness::BIG>(input, len, output);
608
+ }
609
+ #endif // SIMDUTF_FEATURE_UTF16
610
+
611
+ #if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
612
+ simdutf_warn_unused bool
613
+ implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
614
+ return icelake::validate_utf32(buf, len);
615
+ }
616
+ #endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
617
+
618
+ #if SIMDUTF_FEATURE_UTF32
619
+ simdutf_warn_unused result implementation::validate_utf32_with_errors(
620
+ const char32_t *buf, size_t len) const noexcept {
621
+ const char32_t *buf_orig = buf;
622
+ if (len >= 16) {
623
+ const char32_t *end = buf + len - 16;
624
+ while (buf <= end) {
625
+ __m512i utf32 = _mm512_loadu_si512((const __m512i *)buf);
626
+ __mmask16 outside_range = _mm512_cmp_epu32_mask(
627
+ utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT);
628
+
629
+ __m512i utf32_off =
630
+ _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000));
631
+
632
+ __mmask16 surrogate_range = _mm512_cmp_epu32_mask(
633
+ utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT);
634
+ if ((outside_range | surrogate_range)) {
635
+ auto outside_idx = _tzcnt_u32(outside_range);
636
+ auto surrogate_idx = _tzcnt_u32(surrogate_range);
637
+
638
+ if (outside_idx < surrogate_idx) {
639
+ return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx);
640
+ }
641
+
642
+ return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx);
643
+ }
644
+
645
+ buf += 16;
646
+ }
647
+ }
648
+ if (len > 0) {
649
+ __m512i utf32 = _mm512_maskz_loadu_epi32(
650
+ __mmask16((1U << (buf_orig + len - buf)) - 1), (const __m512i *)buf);
651
+ __mmask16 outside_range = _mm512_cmp_epu32_mask(
652
+ utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT);
653
+ __m512i utf32_off = _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000));
654
+
655
+ __mmask16 surrogate_range = _mm512_cmp_epu32_mask(
656
+ utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT);
657
+ if ((outside_range | surrogate_range)) {
658
+ auto outside_idx = _tzcnt_u32(outside_range);
659
+ auto surrogate_idx = _tzcnt_u32(surrogate_range);
660
+
661
+ if (outside_idx < surrogate_idx) {
662
+ return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx);
663
+ }
664
+
665
+ return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx);
666
+ }
667
+ }
668
+
669
+ return result(error_code::SUCCESS, len);
670
+ }
671
+ #endif // SIMDUTF_FEATURE_UTF32
672
+
673
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
674
+ simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(
675
+ const char *buf, size_t len, char *utf8_output) const noexcept {
676
+ return icelake::latin1_to_utf8_avx512_start(buf, len, utf8_output);
677
+ }
678
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
679
+
680
+ #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
681
+ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(
682
+ const char *buf, size_t len, char16_t *utf16_output) const noexcept {
683
+ return icelake_convert_latin1_to_utf16<endianness::LITTLE>(buf, len,
684
+ utf16_output);
685
+ }
686
+
687
+ simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(
688
+ const char *buf, size_t len, char16_t *utf16_output) const noexcept {
689
+ return icelake_convert_latin1_to_utf16<endianness::BIG>(buf, len,
690
+ utf16_output);
691
+ }
692
+ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
693
+
694
+ #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
695
+ simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(
696
+ const char *buf, size_t len, char32_t *utf32_output) const noexcept {
697
+ avx512_convert_latin1_to_utf32(buf, len, utf32_output);
698
+ return len;
699
+ }
700
+ #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
701
+
702
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
703
+ simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(
704
+ const char *buf, size_t len, char *latin1_output) const noexcept {
705
+ return icelake::utf8_to_latin1_avx512(buf, len, latin1_output);
706
+ }
707
+
708
+ simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(
709
+ const char *buf, size_t len, char *latin1_output) const noexcept {
710
+ // First, try to convert as much as possible using the SIMD implementation.
711
+ const char *obuf = buf;
712
+ char *olatin1_output = latin1_output;
713
+ size_t written = icelake::utf8_to_latin1_avx512(obuf, len, olatin1_output);
714
+
715
+ // If we have completely converted the string
716
+ if (obuf == buf + len) {
717
+ return {simdutf::SUCCESS, written};
718
+ }
719
+ size_t pos = obuf - buf;
720
+ result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(
721
+ pos, buf + pos, len - pos, latin1_output);
722
+ res.count += pos;
723
+ return res;
724
+ }
725
+
726
+ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(
727
+ const char *buf, size_t len, char *latin1_output) const noexcept {
728
+ return icelake::valid_utf8_to_latin1_avx512(buf, len, latin1_output);
729
+ }
730
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
731
+
732
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
733
+ simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(
734
+ const char *buf, size_t len, char16_t *utf16_output) const noexcept {
735
+ utf8_to_utf16_result ret =
736
+ fast_avx512_convert_utf8_to_utf16<endianness::LITTLE>(buf, len,
737
+ utf16_output);
738
+ if (ret.second == nullptr) {
739
+ return 0;
740
+ }
741
+ return ret.second - utf16_output;
742
+ }
743
+
744
+ simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(
745
+ const char *buf, size_t len, char16_t *utf16_output) const noexcept {
746
+ utf8_to_utf16_result ret = fast_avx512_convert_utf8_to_utf16<endianness::BIG>(
747
+ buf, len, utf16_output);
748
+ if (ret.second == nullptr) {
749
+ return 0;
750
+ }
751
+ return ret.second - utf16_output;
752
+ }
753
+
754
+ simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(
755
+ const char *buf, size_t len, char16_t *utf16_output) const noexcept {
756
+ return fast_avx512_convert_utf8_to_utf16_with_errors<endianness::LITTLE>(
757
+ buf, len, utf16_output);
758
+ }
759
+
760
+ simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(
761
+ const char *buf, size_t len, char16_t *utf16_output) const noexcept {
762
+ return fast_avx512_convert_utf8_to_utf16_with_errors<endianness::BIG>(
763
+ buf, len, utf16_output);
764
+ }
765
+
766
+ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(
767
+ const char *buf, size_t len, char16_t *utf16_output) const noexcept {
768
+ utf8_to_utf16_result ret =
769
+ icelake::valid_utf8_to_fixed_length<endianness::LITTLE, char16_t>(
770
+ buf, len, utf16_output);
771
+ size_t saved_bytes = ret.second - utf16_output;
772
+ const char *end = buf + len;
773
+ if (ret.first == end) {
774
+ return saved_bytes;
775
+ }
776
+
777
+ // Note: AVX512 procedure looks up 4 bytes forward, and
778
+ // correctly converts multi-byte chars even if their
779
+ // continuation bytes lie outsiede 16-byte window.
780
+ // It meas, we have to skip continuation bytes from
781
+ // the beginning ret.first, as they were already consumed.
782
+ while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
783
+ ret.first += 1;
784
+ }
785
+
786
+ if (ret.first != end) {
787
+ const size_t scalar_saved_bytes =
788
+ scalar::utf8_to_utf16::convert_valid<endianness::LITTLE>(
789
+ ret.first, len - (ret.first - buf), ret.second);
790
+ if (scalar_saved_bytes == 0) {
791
+ return 0;
792
+ }
793
+ saved_bytes += scalar_saved_bytes;
794
+ }
795
+
796
+ return saved_bytes;
797
+ }
798
+
799
+ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(
800
+ const char *buf, size_t len, char16_t *utf16_output) const noexcept {
801
+ utf8_to_utf16_result ret =
802
+ icelake::valid_utf8_to_fixed_length<endianness::BIG, char16_t>(
803
+ buf, len, utf16_output);
804
+ size_t saved_bytes = ret.second - utf16_output;
805
+ const char *end = buf + len;
806
+ if (ret.first == end) {
807
+ return saved_bytes;
808
+ }
809
+
810
+ // Note: AVX512 procedure looks up 4 bytes forward, and
811
+ // correctly converts multi-byte chars even if their
812
+ // continuation bytes lie outsiede 16-byte window.
813
+ // It meas, we have to skip continuation bytes from
814
+ // the beginning ret.first, as they were already consumed.
815
+ while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
816
+ ret.first += 1;
817
+ }
818
+
819
+ if (ret.first != end) {
820
+ const size_t scalar_saved_bytes =
821
+ scalar::utf8_to_utf16::convert_valid<endianness::BIG>(
822
+ ret.first, len - (ret.first - buf), ret.second);
823
+ if (scalar_saved_bytes == 0) {
824
+ return 0;
825
+ }
826
+ saved_bytes += scalar_saved_bytes;
827
+ }
828
+
829
+ return saved_bytes;
830
+ }
831
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
832
+
833
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
834
+ simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(
835
+ const char *buf, size_t len, char32_t *utf32_out) const noexcept {
836
+ uint32_t *utf32_output = reinterpret_cast<uint32_t *>(utf32_out);
837
+ utf8_to_utf32_result ret =
838
+ icelake::validating_utf8_to_fixed_length<endianness::LITTLE, uint32_t>(
839
+ buf, len, utf32_output);
840
+ if (ret.second == nullptr)
841
+ return 0;
842
+
843
+ size_t saved_bytes = ret.second - utf32_output;
844
+ const char *end = buf + len;
845
+ if (ret.first == end) {
846
+ return saved_bytes;
847
+ }
848
+
849
+ // Note: the AVX512 procedure looks up 4 bytes forward, and
850
+ // correctly converts multi-byte chars even if their
851
+ // continuation bytes lie outside 16-byte window.
852
+ // It means, we have to skip continuation bytes from
853
+ // the beginning ret.first, as they were already consumed.
854
+ while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
855
+ ret.first += 1;
856
+ }
857
+ if (ret.first != end) {
858
+ const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert(
859
+ ret.first, len - (ret.first - buf), utf32_out + saved_bytes);
860
+ if (scalar_saved_bytes == 0) {
861
+ return 0;
862
+ }
863
+ saved_bytes += scalar_saved_bytes;
864
+ }
865
+
866
+ return saved_bytes;
867
+ }
868
+
869
+ simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(
870
+ const char *buf, size_t len, char32_t *utf32) const noexcept {
871
+ if (simdutf_unlikely(len == 0)) {
872
+ return {error_code::SUCCESS, 0};
873
+ }
874
+ uint32_t *utf32_output = reinterpret_cast<uint32_t *>(utf32);
875
+ auto ret = icelake::validating_utf8_to_fixed_length_with_constant_checks<
876
+ endianness::LITTLE, uint32_t>(buf, len, utf32_output);
877
+
878
+ if (!std::get<2>(ret)) {
879
+ size_t pos = std::get<0>(ret) - buf;
880
+ // We might have an error that occurs right before pos.
881
+ // This is only a concern if buf[pos] is not a continuation byte.
882
+ if ((buf[pos] & 0xc0) != 0x80 && pos >= 64) {
883
+ pos -= 1;
884
+ } else if ((buf[pos] & 0xc0) == 0x80 && pos >= 64) {
885
+ // We must check whether we are the fourth continuation byte
886
+ bool c1 = (buf[pos - 1] & 0xc0) == 0x80;
887
+ bool c2 = (buf[pos - 2] & 0xc0) == 0x80;
888
+ bool c3 = (buf[pos - 3] & 0xc0) == 0x80;
889
+ if (c1 && c2 && c3) {
890
+ return {simdutf::TOO_LONG, pos};
891
+ }
892
+ }
893
+ // todo: we reset the output to utf32 instead of using std::get<2.(ret) as
894
+ // you'd expect. that is because
895
+ // validating_utf8_to_fixed_length_with_constant_checks may have processed
896
+ // data beyond the error.
897
+ result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(
898
+ pos, buf + pos, len - pos, utf32);
899
+ res.count += pos;
900
+ return res;
901
+ }
902
+ size_t saved_bytes = std::get<1>(ret) - utf32_output;
903
+ const char *end = buf + len;
904
+ if (std::get<0>(ret) == end) {
905
+ return {simdutf::SUCCESS, saved_bytes};
906
+ }
907
+
908
+ // Note: the AVX512 procedure looks up 4 bytes forward, and
909
+ // correctly converts multi-byte chars even if their
910
+ // continuation bytes lie outside 16-byte window.
911
+ // It means, we have to skip continuation bytes from
912
+ // the beginning ret.first, as they were already consumed.
913
+ while (std::get<0>(ret) != end and
914
+ ((uint8_t(*std::get<0>(ret)) & 0xc0) == 0x80)) {
915
+ std::get<0>(ret) += 1;
916
+ }
917
+
918
+ if (std::get<0>(ret) != end) {
919
+ auto scalar_result = scalar::utf8_to_utf32::convert_with_errors(
920
+ std::get<0>(ret), len - (std::get<0>(ret) - buf),
921
+ reinterpret_cast<char32_t *>(utf32_output) + saved_bytes);
922
+ if (scalar_result.error != simdutf::SUCCESS) {
923
+ scalar_result.count += (std::get<0>(ret) - buf);
924
+ } else {
925
+ scalar_result.count += saved_bytes;
926
+ }
927
+ return scalar_result;
928
+ }
929
+
930
+ return {simdutf::SUCCESS, size_t(std::get<1>(ret) - utf32_output)};
931
+ }
932
+
933
+ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(
934
+ const char *buf, size_t len, char32_t *utf32_out) const noexcept {
935
+ uint32_t *utf32_output = reinterpret_cast<uint32_t *>(utf32_out);
936
+ utf8_to_utf32_result ret =
937
+ icelake::valid_utf8_to_fixed_length<endianness::LITTLE, uint32_t>(
938
+ buf, len, utf32_output);
939
+ size_t saved_bytes = ret.second - utf32_output;
940
+ const char *end = buf + len;
941
+ if (ret.first == end) {
942
+ return saved_bytes;
943
+ }
944
+
945
+ // Note: AVX512 procedure looks up 4 bytes forward, and
946
+ // correctly converts multi-byte chars even if their
947
+ // continuation bytes lie outsiede 16-byte window.
948
+ // It meas, we have to skip continuation bytes from
949
+ // the beginning ret.first, as they were already consumed.
950
+ while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
951
+ ret.first += 1;
952
+ }
953
+
954
+ if (ret.first != end) {
955
+ const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert_valid(
956
+ ret.first, len - (ret.first - buf), utf32_out + saved_bytes);
957
+ if (scalar_saved_bytes == 0) {
958
+ return 0;
959
+ }
960
+ saved_bytes += scalar_saved_bytes;
961
+ }
962
+
963
+ return saved_bytes;
964
+ }
965
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
966
+
967
+ #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
968
+ simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(
969
+ const char16_t *buf, size_t len, char *latin1_output) const noexcept {
970
+ return icelake_convert_utf16_to_latin1<endianness::LITTLE>(buf, len,
971
+ latin1_output);
972
+ }
973
+
974
+ simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(
975
+ const char16_t *buf, size_t len, char *latin1_output) const noexcept {
976
+ return icelake_convert_utf16_to_latin1<endianness::BIG>(buf, len,
977
+ latin1_output);
978
+ }
979
+
980
+ simdutf_warn_unused result
981
+ implementation::convert_utf16le_to_latin1_with_errors(
982
+ const char16_t *buf, size_t len, char *latin1_output) const noexcept {
983
+ return icelake_convert_utf16_to_latin1_with_errors<endianness::LITTLE>(
984
+ buf, len, latin1_output)
985
+ .first;
986
+ }
987
+
988
+ simdutf_warn_unused result
989
+ implementation::convert_utf16be_to_latin1_with_errors(
990
+ const char16_t *buf, size_t len, char *latin1_output) const noexcept {
991
+ return icelake_convert_utf16_to_latin1_with_errors<endianness::BIG>(
992
+ buf, len, latin1_output)
993
+ .first;
994
+ }
995
+
996
+ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(
997
+ const char16_t *buf, size_t len, char *latin1_output) const noexcept {
998
+ // optimization opportunity: implement custom function
999
+ return convert_utf16be_to_latin1(buf, len, latin1_output);
1000
+ }
1001
+
1002
+ simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(
1003
+ const char16_t *buf, size_t len, char *latin1_output) const noexcept {
1004
+ // optimization opportunity: implement custom function
1005
+ return convert_utf16le_to_latin1(buf, len, latin1_output);
1006
+ }
1007
+ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
1008
+
1009
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1010
+ simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(
1011
+ const char16_t *buf, size_t len, char *utf8_output) const noexcept {
1012
+ size_t outlen;
1013
+ size_t inlen = utf16_to_utf8_avx512i<endianness::LITTLE>(
1014
+ buf, len, (unsigned char *)utf8_output, &outlen);
1015
+ if (inlen != len) {
1016
+ return 0;
1017
+ }
1018
+ return outlen;
1019
+ }
1020
+
1021
+ simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(
1022
+ const char16_t *buf, size_t len, char *utf8_output) const noexcept {
1023
+ size_t outlen;
1024
+ size_t inlen = utf16_to_utf8_avx512i<endianness::BIG>(
1025
+ buf, len, (unsigned char *)utf8_output, &outlen);
1026
+ if (inlen != len) {
1027
+ return 0;
1028
+ }
1029
+ return outlen;
1030
+ }
1031
+
1032
+ simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(
1033
+ const char16_t *buf, size_t len, char *utf8_output) const noexcept {
1034
+ size_t outlen;
1035
+ size_t inlen = utf16_to_utf8_avx512i<endianness::LITTLE>(
1036
+ buf, len, (unsigned char *)utf8_output, &outlen);
1037
+ if (inlen != len) {
1038
+ result res = scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(
1039
+ buf + inlen, len - inlen, utf8_output + outlen);
1040
+ res.count += inlen;
1041
+ return res;
1042
+ }
1043
+ return {simdutf::SUCCESS, outlen};
1044
+ }
1045
+
1046
+ simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(
1047
+ const char16_t *buf, size_t len, char *utf8_output) const noexcept {
1048
+ size_t outlen;
1049
+ size_t inlen = utf16_to_utf8_avx512i<endianness::BIG>(
1050
+ buf, len, (unsigned char *)utf8_output, &outlen);
1051
+ if (inlen != len) {
1052
+ result res = scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(
1053
+ buf + inlen, len - inlen, utf8_output + outlen);
1054
+ res.count += inlen;
1055
+ return res;
1056
+ }
1057
+ return {simdutf::SUCCESS, outlen};
1058
+ }
1059
+
1060
+ simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(
1061
+ const char16_t *buf, size_t len, char *utf8_output) const noexcept {
1062
+ return convert_utf16le_to_utf8(buf, len, utf8_output);
1063
+ }
1064
+
1065
+ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(
1066
+ const char16_t *buf, size_t len, char *utf8_output) const noexcept {
1067
+ return convert_utf16be_to_utf8(buf, len, utf8_output);
1068
+ }
1069
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1070
+
1071
+ #if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
1072
+ simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(
1073
+ const char32_t *buf, size_t len, char *latin1_output) const noexcept {
1074
+ return icelake_convert_utf32_to_latin1(buf, len, latin1_output);
1075
+ }
1076
+
1077
+ simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(
1078
+ const char32_t *buf, size_t len, char *latin1_output) const noexcept {
1079
+ return icelake_convert_utf32_to_latin1_with_errors(buf, len, latin1_output)
1080
+ .first;
1081
+ }
1082
+
1083
+ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(
1084
+ const char32_t *buf, size_t len, char *latin1_output) const noexcept {
1085
+ return icelake_convert_utf32_to_latin1(buf, len, latin1_output);
1086
+ }
1087
+ #endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
1088
+
1089
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1090
+ simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(
1091
+ const char32_t *buf, size_t len, char *utf8_output) const noexcept {
1092
+ std::pair<const char32_t *, char *> ret =
1093
+ avx512_convert_utf32_to_utf8(buf, len, utf8_output);
1094
+ if (ret.first == nullptr) {
1095
+ return 0;
1096
+ }
1097
+ size_t saved_bytes = ret.second - utf8_output;
1098
+ if (ret.first != buf + len) {
1099
+ const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert(
1100
+ ret.first, len - (ret.first - buf), ret.second);
1101
+ if (scalar_saved_bytes == 0) {
1102
+ return 0;
1103
+ }
1104
+ saved_bytes += scalar_saved_bytes;
1105
+ }
1106
+ return saved_bytes;
1107
+ }
1108
+
1109
+ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(
1110
+ const char32_t *buf, size_t len, char *utf8_output) const noexcept {
1111
+ // ret.first.count is always the position in the buffer, not the number of
1112
+ // code units written even if finished
1113
+ std::pair<result, char *> ret =
1114
+ icelake::avx512_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
1115
+ if (ret.first.count != len) {
1116
+ result scalar_res = scalar::utf32_to_utf8::convert_with_errors(
1117
+ buf + ret.first.count, len - ret.first.count, ret.second);
1118
+ if (scalar_res.error) {
1119
+ scalar_res.count += ret.first.count;
1120
+ return scalar_res;
1121
+ } else {
1122
+ ret.second += scalar_res.count;
1123
+ }
1124
+ }
1125
+ ret.first.count =
1126
+ ret.second -
1127
+ utf8_output; // Set count to the number of 8-bit code units written
1128
+ return ret.first;
1129
+ }
1130
+
1131
+ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(
1132
+ const char32_t *buf, size_t len, char *utf8_output) const noexcept {
1133
+ return convert_utf32_to_utf8(buf, len, utf8_output);
1134
+ }
1135
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1136
+
1137
+ #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1138
+ simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(
1139
+ const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1140
+ std::pair<const char32_t *, char16_t *> ret =
1141
+ avx512_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
1142
+ if (ret.first == nullptr) {
1143
+ return 0;
1144
+ }
1145
+ size_t saved_bytes = ret.second - utf16_output;
1146
+ return saved_bytes;
1147
+ }
1148
+
1149
+ simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(
1150
+ const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1151
+ std::pair<const char32_t *, char16_t *> ret =
1152
+ avx512_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
1153
+ if (ret.first == nullptr) {
1154
+ return 0;
1155
+ }
1156
+ size_t saved_bytes = ret.second - utf16_output;
1157
+ return saved_bytes;
1158
+ }
1159
+
1160
+ simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(
1161
+ const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1162
+ // ret.first.count is always the position in the buffer, not the number of
1163
+ // code units written even if finished
1164
+ std::pair<result, char16_t *> ret =
1165
+ avx512_convert_utf32_to_utf16_with_errors<endianness::LITTLE>(
1166
+ buf, len, utf16_output);
1167
+ if (ret.first.error) {
1168
+ return ret.first;
1169
+ }
1170
+ ret.first.count =
1171
+ ret.second -
1172
+ utf16_output; // Set count to the number of 8-bit code units written
1173
+ return ret.first;
1174
+ }
1175
+
1176
+ simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(
1177
+ const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1178
+ // ret.first.count is always the position in the buffer, not the number of
1179
+ // code units written even if finished
1180
+ std::pair<result, char16_t *> ret =
1181
+ avx512_convert_utf32_to_utf16_with_errors<endianness::BIG>(buf, len,
1182
+ utf16_output);
1183
+ if (ret.first.error) {
1184
+ return ret.first;
1185
+ }
1186
+ ret.first.count =
1187
+ ret.second -
1188
+ utf16_output; // Set count to the number of 8-bit code units written
1189
+ return ret.first;
1190
+ }
1191
+
1192
+ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(
1193
+ const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1194
+ return convert_utf32_to_utf16le(buf, len, utf16_output);
1195
+ }
1196
+
1197
+ simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(
1198
+ const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
1199
+ return convert_utf32_to_utf16be(buf, len, utf16_output);
1200
+ }
1201
+
1202
+ simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(
1203
+ const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
1204
+ std::tuple<const char16_t *, char32_t *, bool> ret =
1205
+ icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len,
1206
+ utf32_output);
1207
+ if (!std::get<2>(ret)) {
1208
+ return 0;
1209
+ }
1210
+ size_t saved_bytes = std::get<1>(ret) - utf32_output;
1211
+ if (std::get<0>(ret) != buf + len) {
1212
+ const size_t scalar_saved_bytes =
1213
+ scalar::utf16_to_utf32::convert<endianness::LITTLE>(
1214
+ std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
1215
+ if (scalar_saved_bytes == 0) {
1216
+ return 0;
1217
+ }
1218
+ saved_bytes += scalar_saved_bytes;
1219
+ }
1220
+ return saved_bytes;
1221
+ }
1222
+
1223
+ simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(
1224
+ const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
1225
+ std::tuple<const char16_t *, char32_t *, bool> ret =
1226
+ icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
1227
+ if (!std::get<2>(ret)) {
1228
+ return 0;
1229
+ }
1230
+ size_t saved_bytes = std::get<1>(ret) - utf32_output;
1231
+ if (std::get<0>(ret) != buf + len) {
1232
+ const size_t scalar_saved_bytes =
1233
+ scalar::utf16_to_utf32::convert<endianness::BIG>(
1234
+ std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
1235
+ if (scalar_saved_bytes == 0) {
1236
+ return 0;
1237
+ }
1238
+ saved_bytes += scalar_saved_bytes;
1239
+ }
1240
+ return saved_bytes;
1241
+ }
1242
+
1243
+ simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(
1244
+ const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
1245
+ std::tuple<const char16_t *, char32_t *, bool> ret =
1246
+ icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len,
1247
+ utf32_output);
1248
+ if (!std::get<2>(ret)) {
1249
+ result scalar_res =
1250
+ scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(
1251
+ std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
1252
+ scalar_res.count += (std::get<0>(ret) - buf);
1253
+ return scalar_res;
1254
+ }
1255
+ size_t saved_bytes = std::get<1>(ret) - utf32_output;
1256
+ if (std::get<0>(ret) != buf + len) {
1257
+ result scalar_res =
1258
+ scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(
1259
+ std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
1260
+ if (scalar_res.error) {
1261
+ scalar_res.count += (std::get<0>(ret) - buf);
1262
+ return scalar_res;
1263
+ } else {
1264
+ scalar_res.count += saved_bytes;
1265
+ return scalar_res;
1266
+ }
1267
+ }
1268
+ return simdutf::result(simdutf::SUCCESS, saved_bytes);
1269
+ }
1270
+
1271
+ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(
1272
+ const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
1273
+ std::tuple<const char16_t *, char32_t *, bool> ret =
1274
+ icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
1275
+ if (!std::get<2>(ret)) {
1276
+ result scalar_res =
1277
+ scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(
1278
+ std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
1279
+ scalar_res.count += (std::get<0>(ret) - buf);
1280
+ return scalar_res;
1281
+ }
1282
+ size_t saved_bytes = std::get<1>(ret) - utf32_output;
1283
+ if (std::get<0>(ret) != buf + len) {
1284
+ result scalar_res =
1285
+ scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(
1286
+ std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
1287
+ if (scalar_res.error) {
1288
+ scalar_res.count += (std::get<0>(ret) - buf);
1289
+ return scalar_res;
1290
+ } else {
1291
+ scalar_res.count += saved_bytes;
1292
+ return scalar_res;
1293
+ }
1294
+ }
1295
+ return simdutf::result(simdutf::SUCCESS, saved_bytes);
1296
+ }
1297
+
1298
+ simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(
1299
+ const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
1300
+ std::tuple<const char16_t *, char32_t *, bool> ret =
1301
+ icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len,
1302
+ utf32_output);
1303
+ if (!std::get<2>(ret)) {
1304
+ return 0;
1305
+ }
1306
+ size_t saved_bytes = std::get<1>(ret) - utf32_output;
1307
+ if (std::get<0>(ret) != buf + len) {
1308
+ const size_t scalar_saved_bytes =
1309
+ scalar::utf16_to_utf32::convert<endianness::LITTLE>(
1310
+ std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
1311
+ if (scalar_saved_bytes == 0) {
1312
+ return 0;
1313
+ }
1314
+ saved_bytes += scalar_saved_bytes;
1315
+ }
1316
+ return saved_bytes;
1317
+ }
1318
+
1319
+ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(
1320
+ const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
1321
+ std::tuple<const char16_t *, char32_t *, bool> ret =
1322
+ icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
1323
+ if (!std::get<2>(ret)) {
1324
+ return 0;
1325
+ }
1326
+ size_t saved_bytes = std::get<1>(ret) - utf32_output;
1327
+ if (std::get<0>(ret) != buf + len) {
1328
+ const size_t scalar_saved_bytes =
1329
+ scalar::utf16_to_utf32::convert<endianness::BIG>(
1330
+ std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
1331
+ if (scalar_saved_bytes == 0) {
1332
+ return 0;
1333
+ }
1334
+ saved_bytes += scalar_saved_bytes;
1335
+ }
1336
+ return saved_bytes;
1337
+ }
1338
+ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1339
+
1340
+ #if SIMDUTF_FEATURE_UTF16
1341
+ void implementation::change_endianness_utf16(const char16_t *input,
1342
+ size_t length,
1343
+ char16_t *output) const noexcept {
1344
+ size_t pos = 0;
1345
+ const __m512i byteflip = _mm512_setr_epi64(
1346
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001,
1347
+ 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809,
1348
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809);
1349
+ while (pos + 32 <= length) {
1350
+ __m512i utf16 = _mm512_loadu_si512((const __m512i *)(input + pos));
1351
+ utf16 = _mm512_shuffle_epi8(utf16, byteflip);
1352
+ _mm512_storeu_si512(output + pos, utf16);
1353
+ pos += 32;
1354
+ }
1355
+ if (pos < length) {
1356
+ __mmask32 m((1U << (length - pos)) - 1);
1357
+ __m512i utf16 = _mm512_maskz_loadu_epi16(m, (const __m512i *)(input + pos));
1358
+ utf16 = _mm512_shuffle_epi8(utf16, byteflip);
1359
+ _mm512_mask_storeu_epi16(output + pos, m, utf16);
1360
+ }
1361
+ }
1362
+
1363
+ simdutf_warn_unused size_t implementation::count_utf16le(
1364
+ const char16_t *input, size_t length) const noexcept {
1365
+ const char16_t *ptr = input;
1366
+ size_t count{0};
1367
+
1368
+ if (length >= 32) {
1369
+ const char16_t *end = input + length - 32;
1370
+
1371
+ const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00);
1372
+ const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff);
1373
+
1374
+ while (ptr <= end) {
1375
+ __m512i utf16 = _mm512_loadu_si512((const __m512i *)ptr);
1376
+ ptr += 32;
1377
+ uint64_t not_high_surrogate =
1378
+ static_cast<uint64_t>(_mm512_cmpgt_epu16_mask(utf16, high) |
1379
+ _mm512_cmplt_epu16_mask(utf16, low));
1380
+ count += count_ones(not_high_surrogate);
1381
+ }
1382
+ }
1383
+
1384
+ return count + scalar::utf16::count_code_points<endianness::LITTLE>(
1385
+ ptr, length - (ptr - input));
1386
+ }
1387
+
1388
+ simdutf_warn_unused size_t implementation::count_utf16be(
1389
+ const char16_t *input, size_t length) const noexcept {
1390
+ const char16_t *ptr = input;
1391
+ size_t count{0};
1392
+ if (length >= 32) {
1393
+
1394
+ const char16_t *end = input + length - 32;
1395
+
1396
+ const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00);
1397
+ const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff);
1398
+
1399
+ const __m512i byteflip = _mm512_setr_epi64(
1400
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001,
1401
+ 0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809,
1402
+ 0x0607040502030001, 0x0e0f0c0d0a0b0809);
1403
+ while (ptr <= end) {
1404
+ __m512i utf16 =
1405
+ _mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)ptr), byteflip);
1406
+ ptr += 32;
1407
+ uint64_t not_high_surrogate =
1408
+ static_cast<uint64_t>(_mm512_cmpgt_epu16_mask(utf16, high) |
1409
+ _mm512_cmplt_epu16_mask(utf16, low));
1410
+ count += count_ones(not_high_surrogate);
1411
+ }
1412
+ }
1413
+
1414
+ return count + scalar::utf16::count_code_points<endianness::BIG>(
1415
+ ptr, length - (ptr - input));
1416
+ }
1417
+ #endif // SIMDUTF_FEATURE_UTF16
1418
+
1419
+ #if SIMDUTF_FEATURE_UTF8
1420
+ simdutf_warn_unused size_t
1421
+ implementation::count_utf8(const char *input, size_t length) const noexcept {
1422
+ const uint8_t *str = reinterpret_cast<const uint8_t *>(input);
1423
+ size_t answer =
1424
+ length / sizeof(__m512i) *
1425
+ sizeof(__m512i); // Number of 512-bit chunks that fits into the length.
1426
+ size_t i = 0;
1427
+ __m512i unrolled_popcount{0};
1428
+
1429
+ const __m512i continuation = _mm512_set1_epi8(char(0b10111111));
1430
+
1431
+ while (i + sizeof(__m512i) <= length) {
1432
+ size_t iterations = (length - i) / sizeof(__m512i);
1433
+
1434
+ size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i);
1435
+ for (; i + 8 * sizeof(__m512i) <= max_i; i += 8 * sizeof(__m512i)) {
1436
+ __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i));
1437
+ __m512i input2 =
1438
+ _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i)));
1439
+ __m512i input3 =
1440
+ _mm512_loadu_si512((const __m512i *)(str + i + 2 * sizeof(__m512i)));
1441
+ __m512i input4 =
1442
+ _mm512_loadu_si512((const __m512i *)(str + i + 3 * sizeof(__m512i)));
1443
+ __m512i input5 =
1444
+ _mm512_loadu_si512((const __m512i *)(str + i + 4 * sizeof(__m512i)));
1445
+ __m512i input6 =
1446
+ _mm512_loadu_si512((const __m512i *)(str + i + 5 * sizeof(__m512i)));
1447
+ __m512i input7 =
1448
+ _mm512_loadu_si512((const __m512i *)(str + i + 6 * sizeof(__m512i)));
1449
+ __m512i input8 =
1450
+ _mm512_loadu_si512((const __m512i *)(str + i + 7 * sizeof(__m512i)));
1451
+
1452
+ __mmask64 mask1 = _mm512_cmple_epi8_mask(input1, continuation);
1453
+ __mmask64 mask2 = _mm512_cmple_epi8_mask(input2, continuation);
1454
+ __mmask64 mask3 = _mm512_cmple_epi8_mask(input3, continuation);
1455
+ __mmask64 mask4 = _mm512_cmple_epi8_mask(input4, continuation);
1456
+ __mmask64 mask5 = _mm512_cmple_epi8_mask(input5, continuation);
1457
+ __mmask64 mask6 = _mm512_cmple_epi8_mask(input6, continuation);
1458
+ __mmask64 mask7 = _mm512_cmple_epi8_mask(input7, continuation);
1459
+ __mmask64 mask8 = _mm512_cmple_epi8_mask(input8, continuation);
1460
+
1461
+ __m512i mask_register = _mm512_set_epi64(mask8, mask7, mask6, mask5,
1462
+ mask4, mask3, mask2, mask1);
1463
+
1464
+ unrolled_popcount = _mm512_add_epi64(unrolled_popcount,
1465
+ _mm512_popcnt_epi64(mask_register));
1466
+ }
1467
+
1468
+ for (; i <= max_i; i += sizeof(__m512i)) {
1469
+ __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i));
1470
+ uint64_t continuation_bitmask = static_cast<uint64_t>(
1471
+ _mm512_cmple_epi8_mask(more_input, continuation));
1472
+ answer -= count_ones(continuation_bitmask);
1473
+ }
1474
+ }
1475
+
1476
+ answer -= _mm512_reduce_add_epi64(unrolled_popcount);
1477
+
1478
+ return answer + scalar::utf8::count_code_points(
1479
+ reinterpret_cast<const char *>(str + i), length - i);
1480
+ }
1481
+ #endif // SIMDUTF_FEATURE_UTF8
1482
+
1483
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1484
+ simdutf_warn_unused size_t implementation::latin1_length_from_utf8(
1485
+ const char *buf, size_t len) const noexcept {
1486
+ return count_utf8(buf, len);
1487
+ }
1488
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1489
+
1490
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1491
+ simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(
1492
+ const char16_t *input, size_t length) const noexcept {
1493
+ return icelake_utf8_length_from_utf16<endianness::LITTLE>(input, length);
1494
+ }
1495
+
1496
+ simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(
1497
+ const char16_t *input, size_t length) const noexcept {
1498
+ return icelake_utf8_length_from_utf16<endianness::BIG>(input, length);
1499
+ }
1500
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1501
+
1502
+ #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1503
+ simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(
1504
+ const char16_t *input, size_t length) const noexcept {
1505
+ return implementation::count_utf16le(input, length);
1506
+ }
1507
+
1508
+ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(
1509
+ const char16_t *input, size_t length) const noexcept {
1510
+ return implementation::count_utf16be(input, length);
1511
+ }
1512
+ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1513
+
1514
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1515
+ simdutf_warn_unused size_t implementation::utf8_length_from_latin1(
1516
+ const char *input, size_t length) const noexcept {
1517
+ const uint8_t *str = reinterpret_cast<const uint8_t *>(input);
1518
+ size_t answer = length / sizeof(__m512i) * sizeof(__m512i);
1519
+ size_t i = 0;
1520
+ if (answer >= 2048) // long strings optimization
1521
+ {
1522
+ unsigned char v_0xFF = 0xff;
1523
+ __m512i eight_64bits = _mm512_setzero_si512();
1524
+ while (i + sizeof(__m512i) <= length) {
1525
+ __m512i runner = _mm512_setzero_si512();
1526
+ size_t iterations = (length - i) / sizeof(__m512i);
1527
+ if (iterations > 255) {
1528
+ iterations = 255;
1529
+ }
1530
+ size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i);
1531
+ for (; i + 4 * sizeof(__m512i) <= max_i; i += 4 * sizeof(__m512i)) {
1532
+ // Load four __m512i vectors
1533
+ __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i));
1534
+ __m512i input2 =
1535
+ _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i)));
1536
+ __m512i input3 = _mm512_loadu_si512(
1537
+ (const __m512i *)(str + i + 2 * sizeof(__m512i)));
1538
+ __m512i input4 = _mm512_loadu_si512(
1539
+ (const __m512i *)(str + i + 3 * sizeof(__m512i)));
1540
+
1541
+ // Generate four masks
1542
+ __mmask64 mask1 =
1543
+ _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input1);
1544
+ __mmask64 mask2 =
1545
+ _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input2);
1546
+ __mmask64 mask3 =
1547
+ _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input3);
1548
+ __mmask64 mask4 =
1549
+ _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input4);
1550
+ // Apply the masks and subtract from the runner
1551
+ __m512i not_ascii1 =
1552
+ _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask1, v_0xFF);
1553
+ __m512i not_ascii2 =
1554
+ _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask2, v_0xFF);
1555
+ __m512i not_ascii3 =
1556
+ _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask3, v_0xFF);
1557
+ __m512i not_ascii4 =
1558
+ _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask4, v_0xFF);
1559
+
1560
+ runner = _mm512_sub_epi8(runner, not_ascii1);
1561
+ runner = _mm512_sub_epi8(runner, not_ascii2);
1562
+ runner = _mm512_sub_epi8(runner, not_ascii3);
1563
+ runner = _mm512_sub_epi8(runner, not_ascii4);
1564
+ }
1565
+
1566
+ for (; i <= max_i; i += sizeof(__m512i)) {
1567
+ __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i));
1568
+
1569
+ __mmask64 mask =
1570
+ _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), more_input);
1571
+ __m512i not_ascii =
1572
+ _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask, v_0xFF);
1573
+ runner = _mm512_sub_epi8(runner, not_ascii);
1574
+ }
1575
+
1576
+ eight_64bits = _mm512_add_epi64(
1577
+ eight_64bits, _mm512_sad_epu8(runner, _mm512_setzero_si512()));
1578
+ }
1579
+
1580
+ answer += _mm512_reduce_add_epi64(eight_64bits);
1581
+ } else if (answer > 0) {
1582
+ for (; i + sizeof(__m512i) <= length; i += sizeof(__m512i)) {
1583
+ __m512i latin = _mm512_loadu_si512((const __m512i *)(str + i));
1584
+ uint64_t non_ascii = _mm512_movepi8_mask(latin);
1585
+ answer += count_ones(non_ascii);
1586
+ }
1587
+ }
1588
+ return answer + scalar::latin1::utf8_length_from_latin1(
1589
+ reinterpret_cast<const char *>(str + i), length - i);
1590
+ }
1591
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
1592
+
1593
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1594
+ simdutf_warn_unused size_t implementation::utf16_length_from_utf8(
1595
+ const char *input, size_t length) const noexcept {
1596
+ size_t pos = 0;
1597
+
1598
+ // UTF-16 char length based on the four most significant bits of UTF-8 bytes
1599
+ const __m128i utf8_length_128 = _mm_setr_epi8(
1600
+ // ASCII chars
1601
+ /* 0000 */ 1,
1602
+ /* 0001 */ 1,
1603
+ /* 0010 */ 1,
1604
+ /* 0011 */ 1,
1605
+ /* 0100 */ 1,
1606
+ /* 0101 */ 1,
1607
+ /* 0110 */ 1,
1608
+ /* 0111 */ 1,
1609
+
1610
+ // continuation bytes
1611
+ /* 1000 */ 0,
1612
+ /* 1001 */ 0,
1613
+ /* 1010 */ 0,
1614
+ /* 1011 */ 0,
1615
+
1616
+ // leading bytes
1617
+ /* 1100 */ 1, // 2-byte UTF-8 char => 1 UTF-16 word
1618
+ /* 1101 */ 1, // 2-byte UTF-8 char => 1 UTF-16 word
1619
+ /* 1110 */ 1, // 3-byte UTF-8 char => 1 UTF-16 word
1620
+ /* 1111 */ 2 // 4-byte UTF-8 char => 2 UTF-16 words (surrogate pair)
1621
+ );
1622
+
1623
+ const __m512i char_length = broadcast_128bit_lane(utf8_length_128);
1624
+
1625
+ constexpr size_t max_iterations = 255 / 2;
1626
+
1627
+ size_t iterations = 0;
1628
+ const auto zero = _mm512_setzero_si512();
1629
+ __m512i local = _mm512_setzero_si512(); // byte-wise counters
1630
+ __m512i counters = _mm512_setzero_si512(); // 64-bit counters
1631
+ for (; pos + 64 <= length; pos += 64) {
1632
+ __m512i utf8 = _mm512_loadu_si512((const __m512i *)(input + pos));
1633
+ const auto t0 = _mm512_srli_epi32(utf8, 4);
1634
+ const auto t1 = _mm512_and_si512(t0, _mm512_set1_epi8(0xf));
1635
+ const auto t2 = _mm512_shuffle_epi8(char_length, t1);
1636
+ local = _mm512_add_epi8(local, t2);
1637
+
1638
+ iterations += 1;
1639
+ if (iterations == max_iterations) {
1640
+ counters = _mm512_add_epi64(counters, _mm512_sad_epu8(local, zero));
1641
+ local = zero;
1642
+ iterations = 0;
1643
+ }
1644
+ }
1645
+
1646
+ size_t count = 0;
1647
+
1648
+ if (pos > 0) {
1649
+ // don't waste time for short strings
1650
+ if (iterations > 0) {
1651
+ counters = _mm512_add_epi64(counters, _mm512_sad_epu8(local, zero));
1652
+ }
1653
+
1654
+ const auto l0 = _mm512_extracti32x4_epi32(counters, 0);
1655
+ const auto l1 = _mm512_extracti32x4_epi32(counters, 1);
1656
+ const auto l2 = _mm512_extracti32x4_epi32(counters, 2);
1657
+ const auto l3 = _mm512_extracti32x4_epi32(counters, 3);
1658
+
1659
+ const auto sum =
1660
+ _mm_add_epi64(_mm_add_epi64(l0, l1), _mm_add_epi64(l2, l3));
1661
+
1662
+ count = uint64_t(_mm_extract_epi64(sum, 0)) +
1663
+ uint64_t(_mm_extract_epi64(sum, 1));
1664
+ }
1665
+
1666
+ return count +
1667
+ scalar::utf8::utf16_length_from_utf8(input + pos, length - pos);
1668
+ }
1669
+ simdutf_warn_unused result
1670
+ implementation::utf8_length_from_utf16le_with_replacement(
1671
+ const char16_t *input, size_t length) const noexcept {
1672
+ return icelake_utf8_length_from_utf16_with_replacement<endianness::LITTLE>(
1673
+ input, length);
1674
+ }
1675
+
1676
+ simdutf_warn_unused result
1677
+ implementation::utf8_length_from_utf16be_with_replacement(
1678
+ const char16_t *input, size_t length) const noexcept {
1679
+ return icelake_utf8_length_from_utf16_with_replacement<endianness::BIG>(
1680
+ input, length);
1681
+ }
1682
+
1683
+ simdutf_warn_unused size_t
1684
+ implementation::convert_utf16le_to_utf8_with_replacement(
1685
+ const char16_t *input, size_t length, char *utf8_buffer) const noexcept {
1686
+ return scalar::utf16_to_utf8::convert_with_replacement<endianness::LITTLE>(
1687
+ input, length, utf8_buffer);
1688
+ }
1689
+
1690
+ simdutf_warn_unused size_t
1691
+ implementation::convert_utf16be_to_utf8_with_replacement(
1692
+ const char16_t *input, size_t length, char *utf8_buffer) const noexcept {
1693
+ return scalar::utf16_to_utf8::convert_with_replacement<endianness::BIG>(
1694
+ input, length, utf8_buffer);
1695
+ }
1696
+
1697
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
1698
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1699
+ simdutf_warn_unused size_t implementation::utf8_length_from_utf32(
1700
+ const char32_t *input, size_t length) const noexcept {
1701
+ return utf32::utf8_length_from_utf32(input, length);
1702
+ }
1703
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1704
+ #if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1705
+ simdutf_warn_unused size_t implementation::utf16_length_from_utf32(
1706
+ const char32_t *input, size_t length) const noexcept {
1707
+ const char32_t *ptr = input;
1708
+ size_t count{0};
1709
+
1710
+ if (length >= 16) {
1711
+ const char32_t *end = input + length - 16;
1712
+
1713
+ const __m512i v_0000_ffff = _mm512_set1_epi32((uint32_t)0x0000ffff);
1714
+
1715
+ while (ptr <= end) {
1716
+ __m512i utf32 = _mm512_loadu_si512((const __m512i *)ptr);
1717
+ ptr += 16;
1718
+ __mmask16 surrogates_bitmask =
1719
+ _mm512_cmpgt_epu32_mask(utf32, v_0000_ffff);
1720
+
1721
+ count += 16 + count_ones(surrogates_bitmask);
1722
+ }
1723
+ }
1724
+
1725
+ return count +
1726
+ scalar::utf32::utf16_length_from_utf32(ptr, length - (ptr - input));
1727
+ }
1728
+ #endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
1729
+
1730
+ #if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1731
+ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(
1732
+ const char *input, size_t length) const noexcept {
1733
+ return implementation::count_utf8(input, length);
1734
+ }
1735
+ #endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
1736
+
1737
+ #if SIMDUTF_FEATURE_BASE64
1738
+ simdutf_warn_unused result implementation::base64_to_binary(
1739
+ const char *input, size_t length, char *output, base64_options options,
1740
+ last_chunk_handling_options last_chunk_options) const noexcept {
1741
+ if (options & base64_default_or_url) {
1742
+ if (options == base64_options::base64_default_or_url_accept_garbage) {
1743
+ return compress_decode_base64<false, true, true>(
1744
+ output, input, length, options, last_chunk_options);
1745
+ } else {
1746
+ return compress_decode_base64<false, false, true>(
1747
+ output, input, length, options, last_chunk_options);
1748
+ }
1749
+ } else if (options & base64_url) {
1750
+ if (options == base64_options::base64_url_accept_garbage) {
1751
+ return compress_decode_base64<true, true, false>(
1752
+ output, input, length, options, last_chunk_options);
1753
+ } else {
1754
+ return compress_decode_base64<true, false, false>(
1755
+ output, input, length, options, last_chunk_options);
1756
+ }
1757
+ } else {
1758
+ if (options == base64_options::base64_default_accept_garbage) {
1759
+ return compress_decode_base64<false, true, false>(
1760
+ output, input, length, options, last_chunk_options);
1761
+ } else {
1762
+ return compress_decode_base64<false, false, false>(
1763
+ output, input, length, options, last_chunk_options);
1764
+ }
1765
+ }
1766
+ }
1767
+
1768
+ simdutf_warn_unused full_result implementation::base64_to_binary_details(
1769
+ const char *input, size_t length, char *output, base64_options options,
1770
+ last_chunk_handling_options last_chunk_options) const noexcept {
1771
+ if (options & base64_default_or_url) {
1772
+ if (options == base64_options::base64_default_or_url_accept_garbage) {
1773
+ return compress_decode_base64<false, true, true>(
1774
+ output, input, length, options, last_chunk_options);
1775
+ } else {
1776
+ return compress_decode_base64<false, false, true>(
1777
+ output, input, length, options, last_chunk_options);
1778
+ }
1779
+ } else if (options & base64_url) {
1780
+ if (options == base64_options::base64_url_accept_garbage) {
1781
+ return compress_decode_base64<true, true, false>(
1782
+ output, input, length, options, last_chunk_options);
1783
+ } else {
1784
+ return compress_decode_base64<true, false, false>(
1785
+ output, input, length, options, last_chunk_options);
1786
+ }
1787
+ } else {
1788
+ if (options == base64_options::base64_default_accept_garbage) {
1789
+ return compress_decode_base64<false, true, false>(
1790
+ output, input, length, options, last_chunk_options);
1791
+ } else {
1792
+ return compress_decode_base64<false, false, false>(
1793
+ output, input, length, options, last_chunk_options);
1794
+ }
1795
+ }
1796
+ }
1797
+
1798
+ simdutf_warn_unused result implementation::base64_to_binary(
1799
+ const char16_t *input, size_t length, char *output, base64_options options,
1800
+ last_chunk_handling_options last_chunk_options) const noexcept {
1801
+ if (options & base64_default_or_url) {
1802
+ if (options == base64_options::base64_default_or_url_accept_garbage) {
1803
+ return compress_decode_base64<false, true, true>(
1804
+ output, input, length, options, last_chunk_options);
1805
+ } else {
1806
+ return compress_decode_base64<false, false, true>(
1807
+ output, input, length, options, last_chunk_options);
1808
+ }
1809
+ } else if (options & base64_url) {
1810
+ if (options == base64_options::base64_url_accept_garbage) {
1811
+ return compress_decode_base64<true, true, false>(
1812
+ output, input, length, options, last_chunk_options);
1813
+ } else {
1814
+ return compress_decode_base64<true, false, false>(
1815
+ output, input, length, options, last_chunk_options);
1816
+ }
1817
+ } else {
1818
+ if (options == base64_options::base64_default_accept_garbage) {
1819
+ return compress_decode_base64<false, true, false>(
1820
+ output, input, length, options, last_chunk_options);
1821
+ } else {
1822
+ return compress_decode_base64<false, false, false>(
1823
+ output, input, length, options, last_chunk_options);
1824
+ }
1825
+ }
1826
+ }
1827
+
1828
+ simdutf_warn_unused full_result implementation::base64_to_binary_details(
1829
+ const char16_t *input, size_t length, char *output, base64_options options,
1830
+ last_chunk_handling_options last_chunk_options) const noexcept {
1831
+ if (options & base64_default_or_url) {
1832
+ if (options == base64_options::base64_default_or_url_accept_garbage) {
1833
+ return compress_decode_base64<false, true, true>(
1834
+ output, input, length, options, last_chunk_options);
1835
+ } else {
1836
+ return compress_decode_base64<false, false, true>(
1837
+ output, input, length, options, last_chunk_options);
1838
+ }
1839
+ } else if (options & base64_url) {
1840
+ if (options == base64_options::base64_url_accept_garbage) {
1841
+ return compress_decode_base64<true, true, false>(
1842
+ output, input, length, options, last_chunk_options);
1843
+ } else {
1844
+ return compress_decode_base64<true, false, false>(
1845
+ output, input, length, options, last_chunk_options);
1846
+ }
1847
+ } else {
1848
+ if (options == base64_options::base64_default_accept_garbage) {
1849
+ return compress_decode_base64<false, true, false>(
1850
+ output, input, length, options, last_chunk_options);
1851
+ } else {
1852
+ return compress_decode_base64<false, false, false>(
1853
+ output, input, length, options, last_chunk_options);
1854
+ }
1855
+ }
1856
+ }
1857
+
1858
+ size_t implementation::binary_to_base64(const char *input, size_t length,
1859
+ char *output,
1860
+ base64_options options) const noexcept {
1861
+ if (options & base64_url) {
1862
+ return encode_base64<true>(output, input, length, options);
1863
+ } else {
1864
+ return encode_base64<false>(output, input, length, options);
1865
+ }
1866
+ }
1867
+
1868
+ size_t implementation::binary_to_base64_with_lines(
1869
+ const char *input, size_t length, char *output, size_t line_length,
1870
+ base64_options options) const noexcept {
1871
+ if (options & base64_url) {
1872
+ return encode_base64_impl<true, true>(output, input, length, options,
1873
+ line_length);
1874
+ } else {
1875
+ return encode_base64_impl<false, true>(output, input, length, options,
1876
+ line_length);
1877
+ }
1878
+ }
1879
+
1880
+ const char *implementation::find(const char *start, const char *end,
1881
+ char character) const noexcept {
1882
+ return util_find(start, end, character);
1883
+ }
1884
+ const char16_t *implementation::find(const char16_t *start, const char16_t *end,
1885
+ char16_t character) const noexcept {
1886
+ return util_find(start, end, character);
1887
+ }
1888
+
1889
+ simdutf_warn_unused size_t implementation::binary_length_from_base64(
1890
+ const char *input, size_t length) const noexcept {
1891
+ return icelake_binary_length_from_base64(input, length);
1892
+ }
1893
+
1894
+ simdutf_warn_unused size_t implementation::binary_length_from_base64(
1895
+ const char16_t *input, size_t length) const noexcept {
1896
+ return icelake_binary_length_from_base64(input, length);
1897
+ }
1898
+ #endif // SIMDUTF_FEATURE_BASE64
1899
+
1900
+ } // namespace SIMDUTF_IMPLEMENTATION
1901
+ } // namespace simdutf
1902
+
1903
+ #include "simdutf/icelake/end.h"