react-native-quick-crypto 1.0.18 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QuickCrypto.podspec +12 -38
- package/README.md +2 -0
- package/android/CMakeLists.txt +3 -0
- package/cpp/utils/HybridUtils.cpp +39 -77
- package/deps/simdutf/.clang-format +4 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/bug_report.md +62 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/config.yml +1 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/feature_request.md +35 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/standard-issue-template.md +29 -0
- package/deps/simdutf/.github/pull_request_template.md +51 -0
- package/deps/simdutf/.github/workflows/aarch64.yml +39 -0
- package/deps/simdutf/.github/workflows/alpine.yml +27 -0
- package/deps/simdutf/.github/workflows/amalgamation_demos.yml +34 -0
- package/deps/simdutf/.github/workflows/armv7.yml +32 -0
- package/deps/simdutf/.github/workflows/atomic_fuzz.yml +25 -0
- package/deps/simdutf/.github/workflows/cifuzz.yml +37 -0
- package/deps/simdutf/.github/workflows/clangformat.yml +36 -0
- package/deps/simdutf/.github/workflows/debian-latestcxxstandards.yml +40 -0
- package/deps/simdutf/.github/workflows/debian.yml +33 -0
- package/deps/simdutf/.github/workflows/documentation.yml +36 -0
- package/deps/simdutf/.github/workflows/emscripten.yml +19 -0
- package/deps/simdutf/.github/workflows/loongarch64-gcc-14.2.yml +39 -0
- package/deps/simdutf/.github/workflows/macos-latest.yml +29 -0
- package/deps/simdutf/.github/workflows/msys2-clang.yml +48 -0
- package/deps/simdutf/.github/workflows/msys2.yml +50 -0
- package/deps/simdutf/.github/workflows/ppc64le.yml +29 -0
- package/deps/simdutf/.github/workflows/rvv-1024-clang-18.yml +35 -0
- package/deps/simdutf/.github/workflows/rvv-128-clang-17.yml +35 -0
- package/deps/simdutf/.github/workflows/rvv-256-gcc-14.yml +31 -0
- package/deps/simdutf/.github/workflows/s390x.yml +29 -0
- package/deps/simdutf/.github/workflows/selective-amalgamation.yml +29 -0
- package/deps/simdutf/.github/workflows/typos.yml +19 -0
- package/deps/simdutf/.github/workflows/ubuntu22-cxx20.yml +30 -0
- package/deps/simdutf/.github/workflows/ubuntu22.yml +32 -0
- package/deps/simdutf/.github/workflows/ubuntu22_gcc12.yml +27 -0
- package/deps/simdutf/.github/workflows/ubuntu22sani.yml +29 -0
- package/deps/simdutf/.github/workflows/ubuntu24-cxxstandards.yml +34 -0
- package/deps/simdutf/.github/workflows/ubuntu24-unsignedchar.yml +34 -0
- package/deps/simdutf/.github/workflows/ubuntu24.yml +32 -0
- package/deps/simdutf/.github/workflows/ubuntu24sani.yml +36 -0
- package/deps/simdutf/.github/workflows/ubuntu24sani_clang.yml +29 -0
- package/deps/simdutf/.github/workflows/vs17-arm-ci.yml +21 -0
- package/deps/simdutf/.github/workflows/vs17-ci-cxx20.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-ci.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-clang-ci.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-cxxstandards.yml +36 -0
- package/deps/simdutf/AI_USAGE_POLICY.md +56 -0
- package/deps/simdutf/AUTHORS +6 -0
- package/deps/simdutf/CMakeLists.txt +231 -0
- package/deps/simdutf/CONTRIBUTING.md +214 -0
- package/deps/simdutf/CONTRIBUTORS +1 -0
- package/deps/simdutf/Doxyfile +2584 -0
- package/deps/simdutf/LICENSE-APACHE +201 -0
- package/deps/simdutf/LICENSE-MIT +18 -0
- package/deps/simdutf/Makefile.crosscompile +54 -0
- package/deps/simdutf/README-RVV.md +16 -0
- package/deps/simdutf/README.md +2782 -0
- package/deps/simdutf/SECURITY.md +8 -0
- package/deps/simdutf/benchmarks/CMakeLists.txt +101 -0
- package/deps/simdutf/benchmarks/alignment.cpp +150 -0
- package/deps/simdutf/benchmarks/base64/CMakeLists.txt +30 -0
- package/deps/simdutf/benchmarks/base64/benchmark_base64.cpp +875 -0
- package/deps/simdutf/benchmarks/base64/libbase64_spaces.h +49 -0
- package/deps/simdutf/benchmarks/base64/node_base64.h +227 -0
- package/deps/simdutf/benchmarks/base64/openssl3_base64.h +334 -0
- package/deps/simdutf/benchmarks/benchmark.cpp +65 -0
- package/deps/simdutf/benchmarks/benchmark_to_well_formed_utf16.cpp +347 -0
- package/deps/simdutf/benchmarks/competition/.clang-format-ignore +5 -0
- package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.cpp +1276 -0
- package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.h +595 -0
- package/deps/simdutf/benchmarks/competition/README.md +7 -0
- package/deps/simdutf/benchmarks/competition/hoehrmann/hoehrmann.h +91 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16.h +444 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16_tables.h +13183 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/script.py +73 -0
- package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.cpp +738 -0
- package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.h +293 -0
- package/deps/simdutf/benchmarks/competition/u8u16/COPYRIGHT +8 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Makefile +44 -0
- package/deps/simdutf/benchmarks/competition/u8u16/OSL3.0.txt +169 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/BOM_Profiler.h +148 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/i386_timer.h +45 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/ppc_timer.c +34 -0
- package/deps/simdutf/benchmarks/competition/u8u16/README +56 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/config_defs.h +43 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/g4_config.h +27 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/mmx_config.h +16 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/p4_config.h +18 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/p4_ideal_config.h +16 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/spu_config.h +28 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/ssse3_config.h +20 -0
- package/deps/simdutf/benchmarks/competition/u8u16/iconv_u8u16.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/altivec_simd.h +440 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_basic_ops.py +121 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_half_operand_versions.py +158 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_test.py +270 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd.h +141 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_basic.h +216 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_built_in.h +119 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_modified.h +2430 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/outline.txt +39 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/spu_simd.h +421 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/sse_simd.h +836 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/stdint.h +222 -0
- package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_BE.c +4 -0
- package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_LE.c +5 -0
- package/deps/simdutf/benchmarks/competition/u8u16/proto/u8u16.py +390 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/Makefile +18 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/bytelex.h +448 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/charsets/ASCII_EBCDIC.h +284 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.c +1975 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.pdf +0 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.w +2263 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/multiliteral.h +239 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/u8u16.c +232 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/x8x16.c +194 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.c +193 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.h +167 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.c +288 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.h +117 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_g4.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_mmx.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4.c +3 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4_ideal.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_spu.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_ssse3.c +3 -0
- package/deps/simdutf/benchmarks/competition/u8u16/x8x16_p4.c +2 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/LICENSE +23 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/data/test_minimal.txt +44 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/readme.md +106 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_corr_tests.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_example.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_file_conv.sh +14 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_lib.sh +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_sample.sh +8 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_corr_tests.cmd +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_example.cmd +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_file_conv.cmd +14 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_lib.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_sample.cmd +8 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_corr_tests.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_example.cmd +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_file_conv.cmd +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_lib.cmd +10 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_sample.cmd +9 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/html_table.py +25 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/measure.py +94 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/resize.py +20 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_all.cmd +2 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_interm.cmd +1 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/CustomMemcpy.h +75 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/PerfDefs.h +47 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.cpp +17 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.h +76 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/AllProcessors.cpp +35 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.cpp +117 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.h +210 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferDecoder.h +158 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferEncoder.h +104 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorPlugins.h +334 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorSelector.h +186 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.cpp +140 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.h +42 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderProcess.h +100 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/Dfa.h +57 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.cpp +85 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.h +27 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderProcess.h +126 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/ProcessTrivial.h +108 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.cpp +139 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.h +74 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.cpp +65 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.h +91 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/CorrectnessTests.cpp +772 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/Example.cpp +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/FileConverter.cpp +486 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/iconv_sample.c +162 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/utf8lut.h +15 -0
- package/deps/simdutf/benchmarks/competition/utf8sse4/fromutf8-sse.cpp +292 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/LICENSE +23 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/README.md +1503 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/checked.h +335 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/core.h +338 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp11.h +103 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp17.h +103 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/unchecked.h +274 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8.h +34 -0
- package/deps/simdutf/benchmarks/dataset/README.md +155 -0
- package/deps/simdutf/benchmarks/dataset/emoji.txt +204 -0
- package/deps/simdutf/benchmarks/dataset/scripts/utf8type.py +40 -0
- package/deps/simdutf/benchmarks/dataset/wikipedia_mars/Makefile +80 -0
- package/deps/simdutf/benchmarks/dataset/wikipedia_mars/convert_to_utf6.py +20 -0
- package/deps/simdutf/benchmarks/find/CMakeLists.txt +6 -0
- package/deps/simdutf/benchmarks/find/findbenchmark.cpp +63 -0
- package/deps/simdutf/benchmarks/find/findbenchmarker.h +46 -0
- package/deps/simdutf/benchmarks/shortbench.cpp +555 -0
- package/deps/simdutf/benchmarks/src/CMakeLists.txt +52 -0
- package/deps/simdutf/benchmarks/src/apple_arm_events.h +1104 -0
- package/deps/simdutf/benchmarks/src/benchmark.cpp +3899 -0
- package/deps/simdutf/benchmarks/src/benchmark.h +317 -0
- package/deps/simdutf/benchmarks/src/benchmark_base.cpp +144 -0
- package/deps/simdutf/benchmarks/src/benchmark_base.h +98 -0
- package/deps/simdutf/benchmarks/src/cmdline.cpp +176 -0
- package/deps/simdutf/benchmarks/src/cmdline.h +35 -0
- package/deps/simdutf/benchmarks/src/event_counter.h +162 -0
- package/deps/simdutf/benchmarks/src/linux-perf-events.h +104 -0
- package/deps/simdutf/benchmarks/stream.cpp +209 -0
- package/deps/simdutf/benchmarks/threaded.cpp +123 -0
- package/deps/simdutf/cmake/CPM.cmake +1363 -0
- package/deps/simdutf/cmake/JoinPaths.cmake +23 -0
- package/deps/simdutf/cmake/add_cpp_test.cmake +68 -0
- package/deps/simdutf/cmake/simdutf-config.cmake.in +2 -0
- package/deps/simdutf/cmake/simdutf-flags.cmake +26 -0
- package/deps/simdutf/cmake/toolchains-ci/riscv64-linux-gnu.cmake +4 -0
- package/deps/simdutf/cmake/toolchains-dev/README.md +32 -0
- package/deps/simdutf/cmake/toolchains-dev/aarch64.cmake +14 -0
- package/deps/simdutf/cmake/toolchains-dev/loongarch64.cmake +22 -0
- package/deps/simdutf/cmake/toolchains-dev/powerpc64.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/powerpc64le.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/riscv64.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/rvv-spike.cmake +38 -0
- package/deps/simdutf/doc/avx512.png +0 -0
- package/deps/simdutf/doc/logo.png +0 -0
- package/deps/simdutf/doc/logo.svg +165 -0
- package/deps/simdutf/doc/node2023.png +0 -0
- package/deps/simdutf/doc/shortinput.md +78 -0
- package/deps/simdutf/doc/utf16utf8.png +0 -0
- package/deps/simdutf/doc/utf8utf16.png +0 -0
- package/deps/simdutf/doc/widelogo.png +0 -0
- package/deps/simdutf/doxygen.py +50 -0
- package/deps/simdutf/fuzz/.clang-format +9 -0
- package/deps/simdutf/fuzz/CMakeLists.txt +45 -0
- package/deps/simdutf/fuzz/README.md +168 -0
- package/deps/simdutf/fuzz/atomic_base64.cpp +448 -0
- package/deps/simdutf/fuzz/base64.cpp +278 -0
- package/deps/simdutf/fuzz/build.sh +83 -0
- package/deps/simdutf/fuzz/conversion.cpp +669 -0
- package/deps/simdutf/fuzz/helpers/.clang-format-ignore +1 -0
- package/deps/simdutf/fuzz/helpers/common.h +135 -0
- package/deps/simdutf/fuzz/helpers/nameof.hpp +1258 -0
- package/deps/simdutf/fuzz/main.cpp +72 -0
- package/deps/simdutf/fuzz/minimize_and_cleanse.sh +87 -0
- package/deps/simdutf/fuzz/misc.cpp +216 -0
- package/deps/simdutf/fuzz/random_fuzz.sh +154 -0
- package/deps/simdutf/fuzz/roundtrip.cpp +588 -0
- package/deps/simdutf/fuzz/safe_conversion.cpp +104 -0
- package/deps/simdutf/include/simdutf/avx512.h +79 -0
- package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
- package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
- package/deps/simdutf/include/simdutf/common_defs.h +186 -0
- package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
- package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
- package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
- package/deps/simdutf/include/simdutf/error.h +126 -0
- package/deps/simdutf/include/simdutf/implementation.h +7081 -0
- package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
- package/deps/simdutf/include/simdutf/portability.h +285 -0
- package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
- package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
- package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
- package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
- package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
- package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
- package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
- package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
- package/deps/simdutf/include/simdutf.h +26 -0
- package/deps/simdutf/include/simdutf_c.h +342 -0
- package/deps/simdutf/riscv/Dockerfile +16 -0
- package/deps/simdutf/riscv/README.md +24 -0
- package/deps/simdutf/riscv/remove-docker-station +8 -0
- package/deps/simdutf/riscv/run-docker-station +31 -0
- package/deps/simdutf/scripts/.flake8 +2 -0
- package/deps/simdutf/scripts/Makefile +2 -0
- package/deps/simdutf/scripts/README_ADD_FUNCTION.md +49 -0
- package/deps/simdutf/scripts/add_function.py +330 -0
- package/deps/simdutf/scripts/amalgamation_tests.py +156 -0
- package/deps/simdutf/scripts/base64/Makefile +2 -0
- package/deps/simdutf/scripts/base64/README.md +2 -0
- package/deps/simdutf/scripts/base64/avx512.py +76 -0
- package/deps/simdutf/scripts/base64/neon_decode.py +143 -0
- package/deps/simdutf/scripts/base64/neon_generate_lut.py +101 -0
- package/deps/simdutf/scripts/base64/sse.py +252 -0
- package/deps/simdutf/scripts/base64/sseregular.py +160 -0
- package/deps/simdutf/scripts/base64/sseurl.py +283 -0
- package/deps/simdutf/scripts/base64/table.py +59 -0
- package/deps/simdutf/scripts/base64bench_print.py +145 -0
- package/deps/simdutf/scripts/benchmark-all.py +119 -0
- package/deps/simdutf/scripts/benchmark_print.py +324 -0
- package/deps/simdutf/scripts/check_feature_macros.py +156 -0
- package/deps/simdutf/scripts/check_typos.sh +13 -0
- package/deps/simdutf/scripts/clang_format.sh +35 -0
- package/deps/simdutf/scripts/clang_format_docker.sh +38 -0
- package/deps/simdutf/scripts/common.py +24 -0
- package/deps/simdutf/scripts/compilation_benchmark.py +55 -0
- package/deps/simdutf/scripts/compile_many_variations.sh +64 -0
- package/deps/simdutf/scripts/create_latex_table.py +62 -0
- package/deps/simdutf/scripts/docker/Dockerfile +14 -0
- package/deps/simdutf/scripts/docker/Makefile +9 -0
- package/deps/simdutf/scripts/docker/README.md +30 -0
- package/deps/simdutf/scripts/docker/llvm.gpg +0 -0
- package/deps/simdutf/scripts/ppc64_convert_utf16_to_utf8.py +155 -0
- package/deps/simdutf/scripts/prepare_doxygen.sh +21 -0
- package/deps/simdutf/scripts/release.py +197 -0
- package/deps/simdutf/scripts/shortinputplots.py +97 -0
- package/deps/simdutf/scripts/sse_convert_utf16_to_utf8.py +422 -0
- package/deps/simdutf/scripts/sse_convert_utf32_to_utf16.py +105 -0
- package/deps/simdutf/scripts/sse_utf8_utf16_decode.py +186 -0
- package/deps/simdutf/scripts/sse_validate_utf16le_proof.py +137 -0
- package/deps/simdutf/scripts/sse_validate_utf16le_testcases.py +129 -0
- package/deps/simdutf/scripts/table.py +207 -0
- package/deps/simdutf/scripts/tests/new.txt +33 -0
- package/deps/simdutf/scripts/tests/old.txt +33 -0
- package/deps/simdutf/scripts/tests/results.txt +272 -0
- package/deps/simdutf/simdutf.pc.in +11 -0
- package/deps/simdutf/singleheader/.flake8 +2 -0
- package/deps/simdutf/singleheader/CMakeLists.txt +64 -0
- package/deps/simdutf/singleheader/README-dev.md +81 -0
- package/deps/simdutf/singleheader/README.md +19 -0
- package/deps/simdutf/singleheader/amalgamate.py +513 -0
- package/deps/simdutf/singleheader/amalgamation_demo.c +59 -0
- package/deps/simdutf/singleheader/amalgamation_demo.cpp +54 -0
- package/deps/simdutf/singleheader/test-features.py +262 -0
- package/deps/simdutf/src/CMakeLists.txt +78 -0
- package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
- package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
- package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
- package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
- package/deps/simdutf/src/encoding_types.cpp +67 -0
- package/deps/simdutf/src/error.cpp +3 -0
- package/deps/simdutf/src/fallback/implementation.cpp +589 -0
- package/deps/simdutf/src/generic/ascii_validation.h +50 -0
- package/deps/simdutf/src/generic/base64.h +233 -0
- package/deps/simdutf/src/generic/base64lengths.h +63 -0
- package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
- package/deps/simdutf/src/generic/find.h +75 -0
- package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
- package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
- package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
- package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
- package/deps/simdutf/src/generic/utf16.h +73 -0
- package/deps/simdutf/src/generic/utf32.h +136 -0
- package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
- package/deps/simdutf/src/generic/utf8.h +92 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
- package/deps/simdutf/src/generic/validate_utf16.h +164 -0
- package/deps/simdutf/src/generic/validate_utf32.h +99 -0
- package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
- package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
- package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
- package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
- package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
- package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
- package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
- package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
- package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
- package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
- package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
- package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
- package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
- package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
- package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
- package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
- package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
- package/deps/simdutf/src/implementation.cpp +2526 -0
- package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
- package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
- package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
- package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
- package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
- package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
- package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
- package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
- package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
- package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
- package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
- package/deps/simdutf/src/ppc64/templates.cpp +91 -0
- package/deps/simdutf/src/rvv/implementation.cpp +138 -0
- package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
- package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
- package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
- package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
- package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
- package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
- package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
- package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
- package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
- package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
- package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
- package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
- package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
- package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
- package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
- package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
- package/deps/simdutf/src/simdutf/arm64.h +43 -0
- package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
- package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
- package/deps/simdutf/src/simdutf/fallback.h +42 -0
- package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
- package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
- package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
- package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
- package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
- package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
- package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
- package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
- package/deps/simdutf/src/simdutf/haswell.h +63 -0
- package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
- package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
- package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
- package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
- package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
- package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
- package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
- package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
- package/deps/simdutf/src/simdutf/icelake.h +81 -0
- package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
- package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
- package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
- package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
- package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
- package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
- package/deps/simdutf/src/simdutf/lasx.h +49 -0
- package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
- package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
- package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
- package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
- package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
- package/deps/simdutf/src/simdutf/lsx.h +52 -0
- package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
- package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
- package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
- package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
- package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
- package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
- package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
- package/deps/simdutf/src/simdutf/ppc64.h +40 -0
- package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
- package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
- package/deps/simdutf/src/simdutf/rvv.h +41 -0
- package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
- package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
- package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
- package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
- package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
- package/deps/simdutf/src/simdutf/westmere.h +59 -0
- package/deps/simdutf/src/simdutf.cpp +152 -0
- package/deps/simdutf/src/simdutf_c.cpp +525 -0
- package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
- package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
- package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
- package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
- package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
- package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
- package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
- package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
- package/deps/simdutf/tests/CMakeLists.txt +483 -0
- package/deps/simdutf/tests/atomic_base64_tests.cpp +2845 -0
- package/deps/simdutf/tests/base64_tests.cpp +3617 -0
- package/deps/simdutf/tests/basic_fuzzer.cpp +805 -0
- package/deps/simdutf/tests/bele_tests.cpp +182 -0
- package/deps/simdutf/tests/constexpr_base64_tests.cpp +387 -0
- package/deps/simdutf/tests/convert_latin1_to_utf16be_tests.cpp +52 -0
- package/deps/simdutf/tests/convert_latin1_to_utf16le_tests.cpp +80 -0
- package/deps/simdutf/tests/convert_latin1_to_utf32_tests.cpp +66 -0
- package/deps/simdutf/tests/convert_latin1_to_utf8_tests.cpp +120 -0
- package/deps/simdutf/tests/convert_utf16_to_utf8_safe_tests.cpp +203 -0
- package/deps/simdutf/tests/convert_utf16_to_utf8_with_replacement_tests.cpp +276 -0
- package/deps/simdutf/tests/convert_utf16be_to_latin1_tests.cpp +109 -0
- package/deps/simdutf/tests/convert_utf16be_to_latin1_tests_with_errors.cpp +136 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf32_tests.cpp +193 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf32_with_errors_tests.cpp +381 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf8_tests.cpp +259 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf8_with_errors_tests.cpp +266 -0
- package/deps/simdutf/tests/convert_utf16le_to_latin1_tests.cpp +148 -0
- package/deps/simdutf/tests/convert_utf16le_to_latin1_tests_with_errors.cpp +176 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf32_tests.cpp +213 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf32_with_errors_tests.cpp +318 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf8_tests.cpp +343 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf8_with_errors_tests.cpp +271 -0
- package/deps/simdutf/tests/convert_utf32_to_latin1_tests.cpp +111 -0
- package/deps/simdutf/tests/convert_utf32_to_latin1_with_errors_tests.cpp +96 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16be_tests.cpp +148 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16be_with_errors_tests.cpp +192 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16le_tests.cpp +166 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16le_with_errors_tests.cpp +215 -0
- package/deps/simdutf/tests/convert_utf32_to_utf8_tests.cpp +181 -0
- package/deps/simdutf/tests/convert_utf32_to_utf8_with_errors_tests.cpp +261 -0
- package/deps/simdutf/tests/convert_utf8_to_latin1_tests.cpp +516 -0
- package/deps/simdutf/tests/convert_utf8_to_latin1_with_errors_tests.cpp +579 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16be_tests.cpp +412 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16be_with_errors_tests.cpp +480 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16le_tests.cpp +671 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16le_with_errors_tests.cpp +455 -0
- package/deps/simdutf/tests/convert_utf8_to_utf32_tests.cpp +1204 -0
- package/deps/simdutf/tests/convert_utf8_to_utf32_with_errors_tests.cpp +337 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_latin1_tests.cpp +37 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_utf32_tests.cpp +97 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_utf8_tests.cpp +126 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_latin1_tests.cpp +71 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_utf32_tests.cpp +122 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_utf8_tests.cpp +244 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_latin1_tests.cpp +49 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf16be_tests.cpp +92 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf16le_tests.cpp +114 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf8_tests.cpp +109 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_latin1_tests.cpp +84 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf16be_tests.cpp +124 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf16le_tests.cpp +221 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf32_tests.cpp +155 -0
- package/deps/simdutf/tests/count_utf16be.cpp +64 -0
- package/deps/simdutf/tests/count_utf16le.cpp +61 -0
- package/deps/simdutf/tests/count_utf8.cpp +87 -0
- package/deps/simdutf/tests/detect_encodings_tests.cpp +312 -0
- package/deps/simdutf/tests/embed/valid_utf8.txt +1 -0
- package/deps/simdutf/tests/embed_tests.cpp +22 -0
- package/deps/simdutf/tests/find_tests.cpp +77 -0
- package/deps/simdutf/tests/fixed_string_tests.cpp +153 -0
- package/deps/simdutf/tests/helpers/CMakeLists.txt +25 -0
- package/deps/simdutf/tests/helpers/compiletime_conversions.h +222 -0
- package/deps/simdutf/tests/helpers/fixed_string.h +267 -0
- package/deps/simdutf/tests/helpers/random_int.cpp +30 -0
- package/deps/simdutf/tests/helpers/random_int.h +39 -0
- package/deps/simdutf/tests/helpers/random_utf16.cpp +123 -0
- package/deps/simdutf/tests/helpers/random_utf16.h +52 -0
- package/deps/simdutf/tests/helpers/random_utf32.cpp +41 -0
- package/deps/simdutf/tests/helpers/random_utf32.h +40 -0
- package/deps/simdutf/tests/helpers/random_utf8.cpp +93 -0
- package/deps/simdutf/tests/helpers/random_utf8.h +36 -0
- package/deps/simdutf/tests/helpers/test.cpp +231 -0
- package/deps/simdutf/tests/helpers/test.h +193 -0
- package/deps/simdutf/tests/helpers/transcode_test_base.cpp +1257 -0
- package/deps/simdutf/tests/helpers/transcode_test_base.h +683 -0
- package/deps/simdutf/tests/helpers/utf16.h +27 -0
- package/deps/simdutf/tests/installation_tests/find/CMakeLists.txt +43 -0
- package/deps/simdutf/tests/installation_tests/from_fetch/CMakeLists.txt +47 -0
- package/deps/simdutf/tests/internal_tests.cpp +27 -0
- package/deps/simdutf/tests/null_safety_tests.cpp +94 -0
- package/deps/simdutf/tests/random_fuzzer.cpp +779 -0
- package/deps/simdutf/tests/readme_tests.cpp +274 -0
- package/deps/simdutf/tests/reference/CMakeLists.txt +23 -0
- package/deps/simdutf/tests/reference/decode_utf16.h +81 -0
- package/deps/simdutf/tests/reference/decode_utf32.h +47 -0
- package/deps/simdutf/tests/reference/encode_latin1.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_latin1.h +32 -0
- package/deps/simdutf/tests/reference/encode_utf16.cpp +49 -0
- package/deps/simdutf/tests/reference/encode_utf16.h +20 -0
- package/deps/simdutf/tests/reference/encode_utf32.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_utf32.h +36 -0
- package/deps/simdutf/tests/reference/encode_utf8.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_utf8.h +40 -0
- package/deps/simdutf/tests/reference/validate_utf16.cpp +60 -0
- package/deps/simdutf/tests/reference/validate_utf16.h +14 -0
- package/deps/simdutf/tests/reference/validate_utf16_to_latin1.cpp +35 -0
- package/deps/simdutf/tests/reference/validate_utf16_to_latin1.h +13 -0
- package/deps/simdutf/tests/reference/validate_utf32.cpp +27 -0
- package/deps/simdutf/tests/reference/validate_utf32.h +12 -0
- package/deps/simdutf/tests/reference/validate_utf32_to_latin1.cpp +27 -0
- package/deps/simdutf/tests/reference/validate_utf32_to_latin1.h +12 -0
- package/deps/simdutf/tests/reference/validate_utf8.cpp +82 -0
- package/deps/simdutf/tests/reference/validate_utf8.h +11 -0
- package/deps/simdutf/tests/reference/validate_utf8_to_latin1.cpp +43 -0
- package/deps/simdutf/tests/reference/validate_utf8_to_latin1.h +12 -0
- package/deps/simdutf/tests/select_implementation.cpp +43 -0
- package/deps/simdutf/tests/simdutf_c_tests.cpp +244 -0
- package/deps/simdutf/tests/span_tests.cpp +401 -0
- package/deps/simdutf/tests/special_tests.cpp +559 -0
- package/deps/simdutf/tests/straight_c_test.c +187 -0
- package/deps/simdutf/tests/text_encoding_tests.cpp +77 -0
- package/deps/simdutf/tests/to_well_formed_utf16_tests.cpp +377 -0
- package/deps/simdutf/tests/utf8_length_from_utf16_tests.cpp +202 -0
- package/deps/simdutf/tests/validate_ascii_basic_tests.cpp +165 -0
- package/deps/simdutf/tests/validate_ascii_with_errors_tests.cpp +77 -0
- package/deps/simdutf/tests/validate_utf16be_basic_tests.cpp +175 -0
- package/deps/simdutf/tests/validate_utf16be_with_errors_tests.cpp +188 -0
- package/deps/simdutf/tests/validate_utf16le_basic_tests.cpp +268 -0
- package/deps/simdutf/tests/validate_utf16le_with_errors_tests.cpp +274 -0
- package/deps/simdutf/tests/validate_utf32_basic_tests.cpp +92 -0
- package/deps/simdutf/tests/validate_utf32_with_errors_tests.cpp +114 -0
- package/deps/simdutf/tests/validate_utf8_basic_tests.cpp +178 -0
- package/deps/simdutf/tests/validate_utf8_brute_force_tests.cpp +88 -0
- package/deps/simdutf/tests/validate_utf8_puzzler_tests.cpp +33 -0
- package/deps/simdutf/tests/validate_utf8_with_errors_tests.cpp +228 -0
- package/deps/simdutf/tools/CMakeLists.txt +85 -0
- package/deps/simdutf/tools/fastbase64.cpp +250 -0
- package/deps/simdutf/tools/sutf.cpp +556 -0
- package/deps/simdutf/tools/sutf.h +40 -0
- package/lib/commonjs/blake3.js +2 -1
- package/lib/commonjs/blake3.js.map +1 -1
- package/lib/commonjs/diffie-hellman.js +5 -4
- package/lib/commonjs/diffie-hellman.js.map +1 -1
- package/lib/commonjs/ecdh.js +5 -4
- package/lib/commonjs/ecdh.js.map +1 -1
- package/lib/module/blake3.js +2 -1
- package/lib/module/blake3.js.map +1 -1
- package/lib/module/diffie-hellman.js +5 -4
- package/lib/module/diffie-hellman.js.map +1 -1
- package/lib/module/ecdh.js +5 -4
- package/lib/module/ecdh.js.map +1 -1
- package/lib/tsconfig.tsbuildinfo +1 -1
- package/lib/typescript/blake3.d.ts.map +1 -1
- package/lib/typescript/diffie-hellman.d.ts.map +1 -1
- package/lib/typescript/ecdh.d.ts.map +1 -1
- package/package.json +2 -2
- package/src/blake3.ts +2 -1
- package/src/diffie-hellman.ts +5 -7
- package/src/ecdh.ts +5 -8
|
@@ -0,0 +1,1903 @@
|
|
|
1
|
+
#include <tuple>
|
|
2
|
+
#include <utility>
|
|
3
|
+
#include "simdutf/icelake/intrinsics.h"
|
|
4
|
+
|
|
5
|
+
#include "simdutf/icelake/begin.h"
|
|
6
|
+
namespace simdutf {
|
|
7
|
+
namespace SIMDUTF_IMPLEMENTATION {
|
|
8
|
+
namespace {
|
|
9
|
+
#ifndef SIMDUTF_ICELAKE_H
|
|
10
|
+
#error "icelake.h must be included"
|
|
11
|
+
#endif
|
|
12
|
+
using namespace simd;
|
|
13
|
+
|
|
14
|
+
#include "icelake/icelake_macros.inl.cpp"
|
|
15
|
+
#include "icelake/icelake_common.inl.cpp"
|
|
16
|
+
#if SIMDUTF_FEATURE_UTF8
|
|
17
|
+
#include "icelake/icelake_utf8_common.inl.cpp"
|
|
18
|
+
#endif // SIMDUTF_FEATURE_UTF8
|
|
19
|
+
|
|
20
|
+
#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
|
|
21
|
+
#include "icelake/icelake_utf8_validation.inl.cpp"
|
|
22
|
+
#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
|
|
23
|
+
|
|
24
|
+
#if SIMDUTF_FEATURE_UTF8 && \
|
|
25
|
+
(SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1)
|
|
26
|
+
#include "icelake/icelake_from_valid_utf8.inl.cpp"
|
|
27
|
+
#include "icelake/icelake_from_utf8.inl.cpp"
|
|
28
|
+
#endif // SIMDUTF_FEATURE_UTF8 && (SIMDUTF_FEATURE_UTF16 ||
|
|
29
|
+
// SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_LATIN1)
|
|
30
|
+
|
|
31
|
+
#if SIMDUTF_FEATURE_UTF16
|
|
32
|
+
#include "icelake/icelake_utf16fix.cpp"
|
|
33
|
+
#endif // SIMDUTF_FEATURE_UTF16
|
|
34
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
|
|
35
|
+
#include "icelake/icelake_convert_utf8_to_latin1.inl.cpp"
|
|
36
|
+
#include "icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp"
|
|
37
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
|
|
38
|
+
|
|
39
|
+
#if SIMDUTF_FEATURE_UTF16
|
|
40
|
+
#include "icelake/icelake_convert_utf16_to_latin1.inl.cpp"
|
|
41
|
+
#endif // SIMDUTF_FEATURE_UTF16
|
|
42
|
+
|
|
43
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
|
|
44
|
+
#include "icelake/icelake_convert_utf16_to_utf8.inl.cpp"
|
|
45
|
+
#include "icelake/icelake_convert_utf8_to_utf16.inl.cpp"
|
|
46
|
+
#include "icelake/icelake_utf8_length_from_utf16.inl.cpp"
|
|
47
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
|
|
48
|
+
|
|
49
|
+
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
|
|
50
|
+
#include "icelake/icelake_convert_utf16_to_utf32.inl.cpp"
|
|
51
|
+
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
|
|
52
|
+
|
|
53
|
+
#if SIMDUTF_FEATURE_UTF32
|
|
54
|
+
#include "icelake/icelake_convert_utf32_to_latin1.inl.cpp"
|
|
55
|
+
#endif // SIMDUTF_FEATURE_UTF32
|
|
56
|
+
|
|
57
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
58
|
+
#include "icelake/icelake_convert_utf32_to_utf8.inl.cpp"
|
|
59
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
60
|
+
|
|
61
|
+
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
|
|
62
|
+
#include "icelake/icelake_convert_utf32_to_utf16.inl.cpp"
|
|
63
|
+
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
|
|
64
|
+
|
|
65
|
+
#if SIMDUTF_FEATURE_ASCII
|
|
66
|
+
#include "icelake/icelake_ascii_validation.inl.cpp"
|
|
67
|
+
#endif // SIMDUTF_FEATURE_ASCII
|
|
68
|
+
#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
|
|
69
|
+
#include "icelake/icelake_utf32_validation.inl.cpp"
|
|
70
|
+
#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
|
|
71
|
+
#if SIMDUTF_FEATURE_UTF8
|
|
72
|
+
#include "icelake/icelake_convert_latin1_to_utf8.inl.cpp"
|
|
73
|
+
#endif // SIMDUTF_FEATURE_UTF8
|
|
74
|
+
#if SIMDUTF_FEATURE_UTF16
|
|
75
|
+
#include "icelake/icelake_convert_latin1_to_utf16.inl.cpp"
|
|
76
|
+
#endif // SIMDUTF_FEATURE_UTF16
|
|
77
|
+
#if SIMDUTF_FEATURE_UTF32
|
|
78
|
+
#include "icelake/icelake_convert_latin1_to_utf32.inl.cpp"
|
|
79
|
+
#endif // SIMDUTF_FEATURE_UTF32
|
|
80
|
+
#if SIMDUTF_FEATURE_BASE64
|
|
81
|
+
#include "icelake/icelake_base64.inl.cpp"
|
|
82
|
+
#include "icelake/icelake_find.inl.cpp"
|
|
83
|
+
#endif // SIMDUTF_FEATURE_BASE64
|
|
84
|
+
|
|
85
|
+
#include <cstdint>
|
|
86
|
+
|
|
87
|
+
} // namespace
|
|
88
|
+
} // namespace SIMDUTF_IMPLEMENTATION
|
|
89
|
+
} // namespace simdutf
|
|
90
|
+
|
|
91
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
92
|
+
#include "generic/utf32.h"
|
|
93
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
94
|
+
|
|
95
|
+
namespace simdutf {
|
|
96
|
+
namespace SIMDUTF_IMPLEMENTATION {
|
|
97
|
+
|
|
98
|
+
#if SIMDUTF_FEATURE_DETECT_ENCODING
|
|
99
|
+
simdutf_warn_unused int
|
|
100
|
+
implementation::detect_encodings(const char *input,
|
|
101
|
+
size_t length) const noexcept {
|
|
102
|
+
// If there is a BOM, then we trust it.
|
|
103
|
+
auto bom_encoding = simdutf::BOM::check_bom(input, length);
|
|
104
|
+
if (bom_encoding != encoding_type::unspecified) {
|
|
105
|
+
return bom_encoding;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
int out = 0;
|
|
109
|
+
uint32_t utf16_err = (length % 2);
|
|
110
|
+
uint32_t utf32_err = (length % 4);
|
|
111
|
+
uint32_t ends_with_high = 0;
|
|
112
|
+
avx512_utf8_checker checker{};
|
|
113
|
+
const __m512i offset = _mm512_set1_epi32((uint32_t)0xffff2000);
|
|
114
|
+
__m512i currentmax = _mm512_setzero_si512();
|
|
115
|
+
__m512i currentoffsetmax = _mm512_setzero_si512();
|
|
116
|
+
const char *ptr = input;
|
|
117
|
+
const char *end = ptr + length;
|
|
118
|
+
for (; end - ptr >= 64; ptr += 64) {
|
|
119
|
+
// utf8 checks
|
|
120
|
+
const __m512i data = _mm512_loadu_si512((const __m512i *)ptr);
|
|
121
|
+
checker.check_next_input(data);
|
|
122
|
+
|
|
123
|
+
// utf16le_checks
|
|
124
|
+
__m512i diff = _mm512_sub_epi16(data, _mm512_set1_epi16(uint16_t(0xD800)));
|
|
125
|
+
__mmask32 surrogates =
|
|
126
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
|
|
127
|
+
__mmask32 highsurrogates =
|
|
128
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
|
|
129
|
+
__mmask32 lowsurrogates = surrogates ^ highsurrogates;
|
|
130
|
+
utf16_err |= (((highsurrogates << 1) | ends_with_high) != lowsurrogates);
|
|
131
|
+
ends_with_high = ((highsurrogates & 0x80000000) != 0);
|
|
132
|
+
|
|
133
|
+
// utf32le checks
|
|
134
|
+
currentoffsetmax =
|
|
135
|
+
_mm512_max_epu32(_mm512_add_epi32(data, offset), currentoffsetmax);
|
|
136
|
+
currentmax = _mm512_max_epu32(data, currentmax);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// last block with 0 <= len < 64
|
|
140
|
+
__mmask64 read_mask = (__mmask64(1) << (end - ptr)) - 1;
|
|
141
|
+
const __m512i data = _mm512_maskz_loadu_epi8(read_mask, (const __m512i *)ptr);
|
|
142
|
+
checker.check_next_input(data);
|
|
143
|
+
|
|
144
|
+
__m512i diff = _mm512_sub_epi16(data, _mm512_set1_epi16(uint16_t(0xD800)));
|
|
145
|
+
__mmask32 surrogates =
|
|
146
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
|
|
147
|
+
__mmask32 highsurrogates =
|
|
148
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
|
|
149
|
+
__mmask32 lowsurrogates = surrogates ^ highsurrogates;
|
|
150
|
+
utf16_err |= (((highsurrogates << 1) | ends_with_high) != lowsurrogates);
|
|
151
|
+
|
|
152
|
+
currentoffsetmax =
|
|
153
|
+
_mm512_max_epu32(_mm512_add_epi32(data, offset), currentoffsetmax);
|
|
154
|
+
currentmax = _mm512_max_epu32(data, currentmax);
|
|
155
|
+
|
|
156
|
+
const __m512i standardmax = _mm512_set1_epi32((uint32_t)0x10ffff);
|
|
157
|
+
const __m512i standardoffsetmax = _mm512_set1_epi32((uint32_t)0xfffff7ff);
|
|
158
|
+
__m512i is_zero =
|
|
159
|
+
_mm512_xor_si512(_mm512_max_epu32(currentmax, standardmax), standardmax);
|
|
160
|
+
utf32_err |= (_mm512_test_epi8_mask(is_zero, is_zero) != 0);
|
|
161
|
+
is_zero = _mm512_xor_si512(
|
|
162
|
+
_mm512_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax);
|
|
163
|
+
utf32_err |= (_mm512_test_epi8_mask(is_zero, is_zero) != 0);
|
|
164
|
+
checker.check_eof();
|
|
165
|
+
bool is_valid_utf8 = !checker.errors();
|
|
166
|
+
if (is_valid_utf8) {
|
|
167
|
+
out |= encoding_type::UTF8;
|
|
168
|
+
}
|
|
169
|
+
if (utf16_err == 0) {
|
|
170
|
+
out |= encoding_type::UTF16_LE;
|
|
171
|
+
}
|
|
172
|
+
if (utf32_err == 0) {
|
|
173
|
+
out |= encoding_type::UTF32_LE;
|
|
174
|
+
}
|
|
175
|
+
return out;
|
|
176
|
+
}
|
|
177
|
+
#endif // SIMDUTF_FEATURE_DETECT_ENCODING
|
|
178
|
+
|
|
179
|
+
#if SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
|
|
180
|
+
simdutf_warn_unused bool
|
|
181
|
+
implementation::validate_utf8(const char *buf, size_t len) const noexcept {
|
|
182
|
+
if (simdutf_unlikely(len == 0)) {
|
|
183
|
+
return true;
|
|
184
|
+
}
|
|
185
|
+
avx512_utf8_checker checker{};
|
|
186
|
+
const char *ptr = buf;
|
|
187
|
+
const char *end = ptr + len;
|
|
188
|
+
for (; end - ptr >= 64; ptr += 64) {
|
|
189
|
+
const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr);
|
|
190
|
+
checker.check_next_input(utf8);
|
|
191
|
+
}
|
|
192
|
+
if (end != ptr) {
|
|
193
|
+
const __m512i utf8 = _mm512_maskz_loadu_epi8(
|
|
194
|
+
~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr);
|
|
195
|
+
checker.check_next_input(utf8);
|
|
196
|
+
}
|
|
197
|
+
checker.check_eof();
|
|
198
|
+
return !checker.errors();
|
|
199
|
+
}
|
|
200
|
+
#endif // SIMDUTF_FEATURE_UTF8 || SIMDUTF_FEATURE_DETECT_ENCODING
|
|
201
|
+
|
|
202
|
+
#if SIMDUTF_FEATURE_UTF8
|
|
203
|
+
simdutf_warn_unused result implementation::validate_utf8_with_errors(
|
|
204
|
+
const char *buf, size_t len) const noexcept {
|
|
205
|
+
if (simdutf_unlikely(len == 0)) {
|
|
206
|
+
return result(error_code::SUCCESS, len);
|
|
207
|
+
}
|
|
208
|
+
avx512_utf8_checker checker{};
|
|
209
|
+
const char *ptr = buf;
|
|
210
|
+
const char *end = ptr + len;
|
|
211
|
+
size_t count{0};
|
|
212
|
+
for (; end - ptr >= 64; ptr += 64) {
|
|
213
|
+
const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr);
|
|
214
|
+
checker.check_next_input(utf8);
|
|
215
|
+
if (checker.errors()) {
|
|
216
|
+
if (count != 0) {
|
|
217
|
+
count--;
|
|
218
|
+
} // Sometimes the error is only detected in the next chunk
|
|
219
|
+
result res = scalar::utf8::rewind_and_validate_with_errors(
|
|
220
|
+
reinterpret_cast<const char *>(buf),
|
|
221
|
+
reinterpret_cast<const char *>(buf + count), len - count);
|
|
222
|
+
res.count += count;
|
|
223
|
+
return res;
|
|
224
|
+
}
|
|
225
|
+
count += 64;
|
|
226
|
+
}
|
|
227
|
+
if (end != ptr) {
|
|
228
|
+
const __m512i utf8 = _mm512_maskz_loadu_epi8(
|
|
229
|
+
~UINT64_C(0) >> (64 - (end - ptr)), (const __m512i *)ptr);
|
|
230
|
+
checker.check_next_input(utf8);
|
|
231
|
+
}
|
|
232
|
+
checker.check_eof();
|
|
233
|
+
if (checker.errors()) {
|
|
234
|
+
if (count != 0) {
|
|
235
|
+
count--;
|
|
236
|
+
} // Sometimes the error is only detected in the next chunk
|
|
237
|
+
result res = scalar::utf8::rewind_and_validate_with_errors(
|
|
238
|
+
reinterpret_cast<const char *>(buf),
|
|
239
|
+
reinterpret_cast<const char *>(buf + count), len - count);
|
|
240
|
+
res.count += count;
|
|
241
|
+
return res;
|
|
242
|
+
}
|
|
243
|
+
return result(error_code::SUCCESS, len);
|
|
244
|
+
}
|
|
245
|
+
#endif // SIMDUTF_FEATURE_UTF8
|
|
246
|
+
|
|
247
|
+
#if SIMDUTF_FEATURE_ASCII
|
|
248
|
+
simdutf_warn_unused bool
|
|
249
|
+
implementation::validate_ascii(const char *buf, size_t len) const noexcept {
|
|
250
|
+
return icelake::validate_ascii(buf, len);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
simdutf_warn_unused result implementation::validate_ascii_with_errors(
|
|
254
|
+
const char *buf, size_t len) const noexcept {
|
|
255
|
+
const char *buf_orig = buf;
|
|
256
|
+
const char *end = buf + len;
|
|
257
|
+
const __m512i ascii = _mm512_set1_epi8((uint8_t)0x80);
|
|
258
|
+
for (; end - buf >= 64; buf += 64) {
|
|
259
|
+
const __m512i input = _mm512_loadu_si512((const __m512i *)buf);
|
|
260
|
+
__mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT);
|
|
261
|
+
if (notascii) {
|
|
262
|
+
return result(error_code::TOO_LARGE,
|
|
263
|
+
buf - buf_orig + _tzcnt_u64(notascii));
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
if (end != buf) {
|
|
267
|
+
const __m512i input = _mm512_maskz_loadu_epi8(
|
|
268
|
+
~UINT64_C(0) >> (64 - (end - buf)), (const __m512i *)buf);
|
|
269
|
+
__mmask64 notascii = _mm512_cmp_epu8_mask(input, ascii, _MM_CMPINT_NLT);
|
|
270
|
+
if (notascii) {
|
|
271
|
+
return result(error_code::TOO_LARGE,
|
|
272
|
+
buf - buf_orig + _tzcnt_u64(notascii));
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
return result(error_code::SUCCESS, len);
|
|
276
|
+
}
|
|
277
|
+
#endif // SIMDUTF_FEATURE_ASCII
|
|
278
|
+
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
|
|
279
|
+
simdutf_warn_unused bool
|
|
280
|
+
implementation::validate_utf16le_as_ascii(const char16_t *buf,
|
|
281
|
+
size_t len) const noexcept {
|
|
282
|
+
const char16_t *end = buf + len;
|
|
283
|
+
__m512i limit = _mm512_set1_epi16(uint16_t(0x007F));
|
|
284
|
+
for (; end - buf >= 32;) {
|
|
285
|
+
__m512i in = _mm512_loadu_si512((__m512i *)buf);
|
|
286
|
+
auto mask = _mm512_cmpgt_epu16_mask(in, limit);
|
|
287
|
+
if (mask) {
|
|
288
|
+
return false;
|
|
289
|
+
}
|
|
290
|
+
buf += 32;
|
|
291
|
+
}
|
|
292
|
+
if (buf < end) {
|
|
293
|
+
__m512i in =
|
|
294
|
+
_mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf);
|
|
295
|
+
auto mask = _mm512_cmpgt_epu16_mask(in, limit);
|
|
296
|
+
if (mask) {
|
|
297
|
+
return false;
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
return true;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
simdutf_warn_unused bool
|
|
304
|
+
implementation::validate_utf16be_as_ascii(const char16_t *buf,
|
|
305
|
+
size_t len) const noexcept {
|
|
306
|
+
const char16_t *end = buf + len;
|
|
307
|
+
const __m512i byteflip = _mm512_setr_epi64(
|
|
308
|
+
0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001,
|
|
309
|
+
0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809,
|
|
310
|
+
0x0607040502030001, 0x0e0f0c0d0a0b0809);
|
|
311
|
+
__m512i limit = _mm512_set1_epi16(uint16_t(0x007F));
|
|
312
|
+
for (; end - buf >= 32;) {
|
|
313
|
+
__m512i in = _mm512_loadu_si512((__m512i *)buf);
|
|
314
|
+
in = _mm512_shuffle_epi8(in, byteflip);
|
|
315
|
+
auto mask = _mm512_cmpgt_epu16_mask(in, limit);
|
|
316
|
+
if (mask) {
|
|
317
|
+
return false;
|
|
318
|
+
}
|
|
319
|
+
buf += 32;
|
|
320
|
+
}
|
|
321
|
+
if (buf < end) {
|
|
322
|
+
__m512i in =
|
|
323
|
+
_mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf);
|
|
324
|
+
in = _mm512_shuffle_epi8(in, byteflip);
|
|
325
|
+
auto mask = _mm512_cmpgt_epu16_mask(in, limit);
|
|
326
|
+
if (mask) {
|
|
327
|
+
return false;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
return true;
|
|
331
|
+
}
|
|
332
|
+
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_ASCII
|
|
333
|
+
#if SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
|
|
334
|
+
simdutf_warn_unused bool
|
|
335
|
+
implementation::validate_utf16le(const char16_t *buf,
|
|
336
|
+
size_t len) const noexcept {
|
|
337
|
+
const char16_t *end = buf + len;
|
|
338
|
+
|
|
339
|
+
// Optimized: Process 64 code units (2x 512-bit) per iteration
|
|
340
|
+
const __m512i surr_base = _mm512_set1_epi16(uint16_t(0xD800));
|
|
341
|
+
const __m512i surr_range = _mm512_set1_epi16(uint16_t(0x0800));
|
|
342
|
+
const __m512i high_range = _mm512_set1_epi16(uint16_t(0x0400));
|
|
343
|
+
|
|
344
|
+
for (; end - buf >= 64;) {
|
|
345
|
+
__m512i in_1 = _mm512_loadu_si512((__m512i *)buf);
|
|
346
|
+
__m512i in_2 = _mm512_loadu_si512((__m512i *)(buf + 32));
|
|
347
|
+
|
|
348
|
+
__m512i diff_1 = _mm512_sub_epi16(in_1, surr_base);
|
|
349
|
+
__m512i diff_2 = _mm512_sub_epi16(in_2, surr_base);
|
|
350
|
+
|
|
351
|
+
__mmask32 surrogates_1 = _mm512_cmplt_epu16_mask(diff_1, surr_range);
|
|
352
|
+
__mmask32 surrogates_2 = _mm512_cmplt_epu16_mask(diff_2, surr_range);
|
|
353
|
+
|
|
354
|
+
if (surrogates_1 | surrogates_2) {
|
|
355
|
+
__mmask32 highsurrogates_1 = _mm512_cmplt_epu16_mask(diff_1, high_range);
|
|
356
|
+
__mmask32 lowsurrogates_1 = surrogates_1 ^ highsurrogates_1;
|
|
357
|
+
|
|
358
|
+
__mmask32 highsurrogates_2 = _mm512_cmplt_epu16_mask(diff_2, high_range);
|
|
359
|
+
__mmask32 lowsurrogates_2 = surrogates_2 ^ highsurrogates_2;
|
|
360
|
+
|
|
361
|
+
// Validate first block: high must be followed by low
|
|
362
|
+
if ((highsurrogates_1 << 1) != lowsurrogates_1) {
|
|
363
|
+
return false;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// Check boundary between blocks: if first block ends with high, second
|
|
367
|
+
// must start with low
|
|
368
|
+
bool ends_with_high_1 = ((highsurrogates_1 & 0x80000000) != 0);
|
|
369
|
+
bool starts_with_low_2 = ((lowsurrogates_2 & 0x1) != 0);
|
|
370
|
+
if (ends_with_high_1 && !starts_with_low_2) {
|
|
371
|
+
return false;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Validate second block (shift by 1 if first ended with high)
|
|
375
|
+
__mmask32 expected_low_2 = ends_with_high_1
|
|
376
|
+
? (highsurrogates_2 << 1) | 0x1
|
|
377
|
+
: (highsurrogates_2 << 1);
|
|
378
|
+
if (expected_low_2 != lowsurrogates_2) {
|
|
379
|
+
return false;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
bool ends_with_high_2 = ((highsurrogates_2 & 0x80000000) != 0);
|
|
383
|
+
if (ends_with_high_2) {
|
|
384
|
+
buf += 63; // advance by 63 to start with high surrogate next round
|
|
385
|
+
} else {
|
|
386
|
+
buf += 64;
|
|
387
|
+
}
|
|
388
|
+
} else {
|
|
389
|
+
buf += 64;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Handle remaining 32-63 code units
|
|
394
|
+
for (; end - buf >= 32;) {
|
|
395
|
+
__m512i in = _mm512_loadu_si512((__m512i *)buf);
|
|
396
|
+
__m512i diff = _mm512_sub_epi16(in, surr_base);
|
|
397
|
+
__mmask32 surrogates = _mm512_cmplt_epu16_mask(diff, surr_range);
|
|
398
|
+
if (surrogates) {
|
|
399
|
+
__mmask32 highsurrogates = _mm512_cmplt_epu16_mask(diff, high_range);
|
|
400
|
+
__mmask32 lowsurrogates = surrogates ^ highsurrogates;
|
|
401
|
+
// high must be followed by low
|
|
402
|
+
if ((highsurrogates << 1) != lowsurrogates) {
|
|
403
|
+
return false;
|
|
404
|
+
}
|
|
405
|
+
bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
|
|
406
|
+
if (ends_with_high) {
|
|
407
|
+
buf += 31; // advance only by 31 code units so that we start with the
|
|
408
|
+
// high surrogate on the next round.
|
|
409
|
+
} else {
|
|
410
|
+
buf += 32;
|
|
411
|
+
}
|
|
412
|
+
} else {
|
|
413
|
+
buf += 32;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
if (buf < end) {
|
|
417
|
+
__m512i in =
|
|
418
|
+
_mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf);
|
|
419
|
+
__m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
|
|
420
|
+
__mmask32 surrogates =
|
|
421
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
|
|
422
|
+
if (surrogates) {
|
|
423
|
+
__mmask32 highsurrogates =
|
|
424
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
|
|
425
|
+
__mmask32 lowsurrogates = surrogates ^ highsurrogates;
|
|
426
|
+
// high must be followed by low
|
|
427
|
+
if ((highsurrogates << 1) != lowsurrogates) {
|
|
428
|
+
return false;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
return true;
|
|
433
|
+
}
|
|
434
|
+
#endif // SIMDUTF_FEATURE_UTF16 || SIMDUTF_FEATURE_DETECT_ENCODING
|
|
435
|
+
|
|
436
|
+
#if SIMDUTF_FEATURE_UTF16
|
|
437
|
+
simdutf_warn_unused bool
|
|
438
|
+
implementation::validate_utf16be(const char16_t *buf,
|
|
439
|
+
size_t len) const noexcept {
|
|
440
|
+
const char16_t *end = buf + len;
|
|
441
|
+
|
|
442
|
+
for (; end - buf >= 32;) {
|
|
443
|
+
__m512i in = _mm512_slli_epi32(_mm512_loadu_si512((__m512i *)buf), 8);
|
|
444
|
+
__m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
|
|
445
|
+
__mmask32 surrogates =
|
|
446
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
|
|
447
|
+
if (surrogates) {
|
|
448
|
+
__mmask32 highsurrogates =
|
|
449
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
|
|
450
|
+
__mmask32 lowsurrogates = surrogates ^ highsurrogates;
|
|
451
|
+
// high must be followed by low
|
|
452
|
+
if ((highsurrogates << 1) != lowsurrogates) {
|
|
453
|
+
return false;
|
|
454
|
+
}
|
|
455
|
+
bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
|
|
456
|
+
if (ends_with_high) {
|
|
457
|
+
buf += 31; // advance only by 31 code units so that we start with the
|
|
458
|
+
// high surrogate on the next round.
|
|
459
|
+
} else {
|
|
460
|
+
buf += 32;
|
|
461
|
+
}
|
|
462
|
+
} else {
|
|
463
|
+
buf += 32;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
if (buf < end) {
|
|
467
|
+
__m512i in = _mm512_slli_epi16(
|
|
468
|
+
_mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), 8);
|
|
469
|
+
__m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
|
|
470
|
+
__mmask32 surrogates =
|
|
471
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
|
|
472
|
+
if (surrogates) {
|
|
473
|
+
__mmask32 highsurrogates =
|
|
474
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
|
|
475
|
+
__mmask32 lowsurrogates = surrogates ^ highsurrogates;
|
|
476
|
+
// high must be followed by low
|
|
477
|
+
if ((highsurrogates << 1) != lowsurrogates) {
|
|
478
|
+
return false;
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
return true;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
simdutf_warn_unused result implementation::validate_utf16le_with_errors(
|
|
486
|
+
const char16_t *buf, size_t len) const noexcept {
|
|
487
|
+
const char16_t *start_buf = buf;
|
|
488
|
+
const char16_t *end = buf + len;
|
|
489
|
+
for (; end - buf >= 32;) {
|
|
490
|
+
__m512i in = _mm512_loadu_si512((__m512i *)buf);
|
|
491
|
+
__m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
|
|
492
|
+
__mmask32 surrogates =
|
|
493
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
|
|
494
|
+
if (surrogates) {
|
|
495
|
+
__mmask32 highsurrogates =
|
|
496
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
|
|
497
|
+
__mmask32 lowsurrogates = surrogates ^ highsurrogates;
|
|
498
|
+
// high must be followed by low
|
|
499
|
+
if ((highsurrogates << 1) != lowsurrogates) {
|
|
500
|
+
uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1));
|
|
501
|
+
uint32_t extra_high =
|
|
502
|
+
_tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1));
|
|
503
|
+
return result(error_code::SURROGATE,
|
|
504
|
+
(buf - start_buf) +
|
|
505
|
+
(extra_low < extra_high ? extra_low : extra_high));
|
|
506
|
+
}
|
|
507
|
+
bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
|
|
508
|
+
if (ends_with_high) {
|
|
509
|
+
buf += 31; // advance only by 31 code units so that we start with the
|
|
510
|
+
// high surrogate on the next round.
|
|
511
|
+
} else {
|
|
512
|
+
buf += 32;
|
|
513
|
+
}
|
|
514
|
+
} else {
|
|
515
|
+
buf += 32;
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
if (buf < end) {
|
|
519
|
+
__m512i in =
|
|
520
|
+
_mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf);
|
|
521
|
+
__m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
|
|
522
|
+
__mmask32 surrogates =
|
|
523
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
|
|
524
|
+
if (surrogates) {
|
|
525
|
+
__mmask32 highsurrogates =
|
|
526
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
|
|
527
|
+
__mmask32 lowsurrogates = surrogates ^ highsurrogates;
|
|
528
|
+
// high must be followed by low
|
|
529
|
+
if ((highsurrogates << 1) != lowsurrogates) {
|
|
530
|
+
uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1));
|
|
531
|
+
uint32_t extra_high =
|
|
532
|
+
_tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1));
|
|
533
|
+
return result(error_code::SURROGATE,
|
|
534
|
+
(buf - start_buf) +
|
|
535
|
+
(extra_low < extra_high ? extra_low : extra_high));
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
return result(error_code::SUCCESS, len);
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
simdutf_warn_unused result implementation::validate_utf16be_with_errors(
|
|
543
|
+
const char16_t *buf, size_t len) const noexcept {
|
|
544
|
+
const char16_t *start_buf = buf;
|
|
545
|
+
const char16_t *end = buf + len;
|
|
546
|
+
|
|
547
|
+
for (; end - buf >= 32;) {
|
|
548
|
+
__m512i in = _mm512_slli_epi16(_mm512_loadu_si512((__m512i *)buf), 8);
|
|
549
|
+
__m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
|
|
550
|
+
__mmask32 surrogates =
|
|
551
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
|
|
552
|
+
if (surrogates) {
|
|
553
|
+
__mmask32 highsurrogates =
|
|
554
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
|
|
555
|
+
__mmask32 lowsurrogates = surrogates ^ highsurrogates;
|
|
556
|
+
// high must be followed by low
|
|
557
|
+
if ((highsurrogates << 1) != lowsurrogates) {
|
|
558
|
+
uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1));
|
|
559
|
+
uint32_t extra_high =
|
|
560
|
+
_tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1));
|
|
561
|
+
return result(error_code::SURROGATE,
|
|
562
|
+
(buf - start_buf) +
|
|
563
|
+
(extra_low < extra_high ? extra_low : extra_high));
|
|
564
|
+
}
|
|
565
|
+
bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
|
|
566
|
+
if (ends_with_high) {
|
|
567
|
+
buf += 31; // advance only by 31 code units so that we start with the
|
|
568
|
+
// high surrogate on the next round.
|
|
569
|
+
} else {
|
|
570
|
+
buf += 32;
|
|
571
|
+
}
|
|
572
|
+
} else {
|
|
573
|
+
buf += 32;
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
if (buf < end) {
|
|
577
|
+
__m512i in = _mm512_slli_epi16(
|
|
578
|
+
_mm512_maskz_loadu_epi16((1U << (end - buf)) - 1, (__m512i *)buf), 8);
|
|
579
|
+
__m512i diff = _mm512_sub_epi16(in, _mm512_set1_epi16(uint16_t(0xD800)));
|
|
580
|
+
__mmask32 surrogates =
|
|
581
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0800)));
|
|
582
|
+
if (surrogates) {
|
|
583
|
+
__mmask32 highsurrogates =
|
|
584
|
+
_mm512_cmplt_epu16_mask(diff, _mm512_set1_epi16(uint16_t(0x0400)));
|
|
585
|
+
__mmask32 lowsurrogates = surrogates ^ highsurrogates;
|
|
586
|
+
// high must be followed by low
|
|
587
|
+
if ((highsurrogates << 1) != lowsurrogates) {
|
|
588
|
+
uint32_t extra_low = _tzcnt_u32(lowsurrogates & ~(highsurrogates << 1));
|
|
589
|
+
uint32_t extra_high =
|
|
590
|
+
_tzcnt_u32(highsurrogates & ~(lowsurrogates >> 1));
|
|
591
|
+
return result(error_code::SURROGATE,
|
|
592
|
+
(buf - start_buf) +
|
|
593
|
+
(extra_low < extra_high ? extra_low : extra_high));
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
return result(error_code::SUCCESS, len);
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
void implementation::to_well_formed_utf16le(const char16_t *input, size_t len,
|
|
601
|
+
char16_t *output) const noexcept {
|
|
602
|
+
return utf16fix_avx512<endianness::LITTLE>(input, len, output);
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
void implementation::to_well_formed_utf16be(const char16_t *input, size_t len,
|
|
606
|
+
char16_t *output) const noexcept {
|
|
607
|
+
return utf16fix_avx512<endianness::BIG>(input, len, output);
|
|
608
|
+
}
|
|
609
|
+
#endif // SIMDUTF_FEATURE_UTF16
|
|
610
|
+
|
|
611
|
+
#if SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
|
|
612
|
+
simdutf_warn_unused bool
|
|
613
|
+
implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
|
|
614
|
+
return icelake::validate_utf32(buf, len);
|
|
615
|
+
}
|
|
616
|
+
#endif // SIMDUTF_FEATURE_UTF32 || SIMDUTF_FEATURE_DETECT_ENCODING
|
|
617
|
+
|
|
618
|
+
#if SIMDUTF_FEATURE_UTF32
|
|
619
|
+
simdutf_warn_unused result implementation::validate_utf32_with_errors(
|
|
620
|
+
const char32_t *buf, size_t len) const noexcept {
|
|
621
|
+
const char32_t *buf_orig = buf;
|
|
622
|
+
if (len >= 16) {
|
|
623
|
+
const char32_t *end = buf + len - 16;
|
|
624
|
+
while (buf <= end) {
|
|
625
|
+
__m512i utf32 = _mm512_loadu_si512((const __m512i *)buf);
|
|
626
|
+
__mmask16 outside_range = _mm512_cmp_epu32_mask(
|
|
627
|
+
utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT);
|
|
628
|
+
|
|
629
|
+
__m512i utf32_off =
|
|
630
|
+
_mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000));
|
|
631
|
+
|
|
632
|
+
__mmask16 surrogate_range = _mm512_cmp_epu32_mask(
|
|
633
|
+
utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT);
|
|
634
|
+
if ((outside_range | surrogate_range)) {
|
|
635
|
+
auto outside_idx = _tzcnt_u32(outside_range);
|
|
636
|
+
auto surrogate_idx = _tzcnt_u32(surrogate_range);
|
|
637
|
+
|
|
638
|
+
if (outside_idx < surrogate_idx) {
|
|
639
|
+
return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx);
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx);
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
buf += 16;
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
if (len > 0) {
|
|
649
|
+
__m512i utf32 = _mm512_maskz_loadu_epi32(
|
|
650
|
+
__mmask16((1U << (buf_orig + len - buf)) - 1), (const __m512i *)buf);
|
|
651
|
+
__mmask16 outside_range = _mm512_cmp_epu32_mask(
|
|
652
|
+
utf32, _mm512_set1_epi32(0x10ffff), _MM_CMPINT_GT);
|
|
653
|
+
__m512i utf32_off = _mm512_add_epi32(utf32, _mm512_set1_epi32(0xffff2000));
|
|
654
|
+
|
|
655
|
+
__mmask16 surrogate_range = _mm512_cmp_epu32_mask(
|
|
656
|
+
utf32_off, _mm512_set1_epi32(0xfffff7ff), _MM_CMPINT_GT);
|
|
657
|
+
if ((outside_range | surrogate_range)) {
|
|
658
|
+
auto outside_idx = _tzcnt_u32(outside_range);
|
|
659
|
+
auto surrogate_idx = _tzcnt_u32(surrogate_range);
|
|
660
|
+
|
|
661
|
+
if (outside_idx < surrogate_idx) {
|
|
662
|
+
return result(error_code::TOO_LARGE, buf - buf_orig + outside_idx);
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
return result(error_code::SURROGATE, buf - buf_orig + surrogate_idx);
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
return result(error_code::SUCCESS, len);
|
|
670
|
+
}
|
|
671
|
+
#endif // SIMDUTF_FEATURE_UTF32
|
|
672
|
+
|
|
673
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
|
|
674
|
+
simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(
|
|
675
|
+
const char *buf, size_t len, char *utf8_output) const noexcept {
|
|
676
|
+
return icelake::latin1_to_utf8_avx512_start(buf, len, utf8_output);
|
|
677
|
+
}
|
|
678
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
|
|
679
|
+
|
|
680
|
+
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
|
|
681
|
+
simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(
|
|
682
|
+
const char *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
683
|
+
return icelake_convert_latin1_to_utf16<endianness::LITTLE>(buf, len,
|
|
684
|
+
utf16_output);
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(
|
|
688
|
+
const char *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
689
|
+
return icelake_convert_latin1_to_utf16<endianness::BIG>(buf, len,
|
|
690
|
+
utf16_output);
|
|
691
|
+
}
|
|
692
|
+
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
|
|
693
|
+
|
|
694
|
+
#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
|
|
695
|
+
simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(
|
|
696
|
+
const char *buf, size_t len, char32_t *utf32_output) const noexcept {
|
|
697
|
+
avx512_convert_latin1_to_utf32(buf, len, utf32_output);
|
|
698
|
+
return len;
|
|
699
|
+
}
|
|
700
|
+
#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
|
|
701
|
+
|
|
702
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
|
|
703
|
+
simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(
|
|
704
|
+
const char *buf, size_t len, char *latin1_output) const noexcept {
|
|
705
|
+
return icelake::utf8_to_latin1_avx512(buf, len, latin1_output);
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(
|
|
709
|
+
const char *buf, size_t len, char *latin1_output) const noexcept {
|
|
710
|
+
// First, try to convert as much as possible using the SIMD implementation.
|
|
711
|
+
const char *obuf = buf;
|
|
712
|
+
char *olatin1_output = latin1_output;
|
|
713
|
+
size_t written = icelake::utf8_to_latin1_avx512(obuf, len, olatin1_output);
|
|
714
|
+
|
|
715
|
+
// If we have completely converted the string
|
|
716
|
+
if (obuf == buf + len) {
|
|
717
|
+
return {simdutf::SUCCESS, written};
|
|
718
|
+
}
|
|
719
|
+
size_t pos = obuf - buf;
|
|
720
|
+
result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(
|
|
721
|
+
pos, buf + pos, len - pos, latin1_output);
|
|
722
|
+
res.count += pos;
|
|
723
|
+
return res;
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(
|
|
727
|
+
const char *buf, size_t len, char *latin1_output) const noexcept {
|
|
728
|
+
return icelake::valid_utf8_to_latin1_avx512(buf, len, latin1_output);
|
|
729
|
+
}
|
|
730
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
|
|
731
|
+
|
|
732
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
|
|
733
|
+
simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(
|
|
734
|
+
const char *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
735
|
+
utf8_to_utf16_result ret =
|
|
736
|
+
fast_avx512_convert_utf8_to_utf16<endianness::LITTLE>(buf, len,
|
|
737
|
+
utf16_output);
|
|
738
|
+
if (ret.second == nullptr) {
|
|
739
|
+
return 0;
|
|
740
|
+
}
|
|
741
|
+
return ret.second - utf16_output;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(
|
|
745
|
+
const char *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
746
|
+
utf8_to_utf16_result ret = fast_avx512_convert_utf8_to_utf16<endianness::BIG>(
|
|
747
|
+
buf, len, utf16_output);
|
|
748
|
+
if (ret.second == nullptr) {
|
|
749
|
+
return 0;
|
|
750
|
+
}
|
|
751
|
+
return ret.second - utf16_output;
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(
|
|
755
|
+
const char *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
756
|
+
return fast_avx512_convert_utf8_to_utf16_with_errors<endianness::LITTLE>(
|
|
757
|
+
buf, len, utf16_output);
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(
|
|
761
|
+
const char *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
762
|
+
return fast_avx512_convert_utf8_to_utf16_with_errors<endianness::BIG>(
|
|
763
|
+
buf, len, utf16_output);
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(
|
|
767
|
+
const char *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
768
|
+
utf8_to_utf16_result ret =
|
|
769
|
+
icelake::valid_utf8_to_fixed_length<endianness::LITTLE, char16_t>(
|
|
770
|
+
buf, len, utf16_output);
|
|
771
|
+
size_t saved_bytes = ret.second - utf16_output;
|
|
772
|
+
const char *end = buf + len;
|
|
773
|
+
if (ret.first == end) {
|
|
774
|
+
return saved_bytes;
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
// Note: AVX512 procedure looks up 4 bytes forward, and
|
|
778
|
+
// correctly converts multi-byte chars even if their
|
|
779
|
+
// continuation bytes lie outsiede 16-byte window.
|
|
780
|
+
// It meas, we have to skip continuation bytes from
|
|
781
|
+
// the beginning ret.first, as they were already consumed.
|
|
782
|
+
while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
|
|
783
|
+
ret.first += 1;
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
if (ret.first != end) {
|
|
787
|
+
const size_t scalar_saved_bytes =
|
|
788
|
+
scalar::utf8_to_utf16::convert_valid<endianness::LITTLE>(
|
|
789
|
+
ret.first, len - (ret.first - buf), ret.second);
|
|
790
|
+
if (scalar_saved_bytes == 0) {
|
|
791
|
+
return 0;
|
|
792
|
+
}
|
|
793
|
+
saved_bytes += scalar_saved_bytes;
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
return saved_bytes;
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(
|
|
800
|
+
const char *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
801
|
+
utf8_to_utf16_result ret =
|
|
802
|
+
icelake::valid_utf8_to_fixed_length<endianness::BIG, char16_t>(
|
|
803
|
+
buf, len, utf16_output);
|
|
804
|
+
size_t saved_bytes = ret.second - utf16_output;
|
|
805
|
+
const char *end = buf + len;
|
|
806
|
+
if (ret.first == end) {
|
|
807
|
+
return saved_bytes;
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
// Note: AVX512 procedure looks up 4 bytes forward, and
|
|
811
|
+
// correctly converts multi-byte chars even if their
|
|
812
|
+
// continuation bytes lie outsiede 16-byte window.
|
|
813
|
+
// It meas, we have to skip continuation bytes from
|
|
814
|
+
// the beginning ret.first, as they were already consumed.
|
|
815
|
+
while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
|
|
816
|
+
ret.first += 1;
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
if (ret.first != end) {
|
|
820
|
+
const size_t scalar_saved_bytes =
|
|
821
|
+
scalar::utf8_to_utf16::convert_valid<endianness::BIG>(
|
|
822
|
+
ret.first, len - (ret.first - buf), ret.second);
|
|
823
|
+
if (scalar_saved_bytes == 0) {
|
|
824
|
+
return 0;
|
|
825
|
+
}
|
|
826
|
+
saved_bytes += scalar_saved_bytes;
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
return saved_bytes;
|
|
830
|
+
}
|
|
831
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
|
|
832
|
+
|
|
833
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
834
|
+
simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(
|
|
835
|
+
const char *buf, size_t len, char32_t *utf32_out) const noexcept {
|
|
836
|
+
uint32_t *utf32_output = reinterpret_cast<uint32_t *>(utf32_out);
|
|
837
|
+
utf8_to_utf32_result ret =
|
|
838
|
+
icelake::validating_utf8_to_fixed_length<endianness::LITTLE, uint32_t>(
|
|
839
|
+
buf, len, utf32_output);
|
|
840
|
+
if (ret.second == nullptr)
|
|
841
|
+
return 0;
|
|
842
|
+
|
|
843
|
+
size_t saved_bytes = ret.second - utf32_output;
|
|
844
|
+
const char *end = buf + len;
|
|
845
|
+
if (ret.first == end) {
|
|
846
|
+
return saved_bytes;
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
// Note: the AVX512 procedure looks up 4 bytes forward, and
|
|
850
|
+
// correctly converts multi-byte chars even if their
|
|
851
|
+
// continuation bytes lie outside 16-byte window.
|
|
852
|
+
// It means, we have to skip continuation bytes from
|
|
853
|
+
// the beginning ret.first, as they were already consumed.
|
|
854
|
+
while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
|
|
855
|
+
ret.first += 1;
|
|
856
|
+
}
|
|
857
|
+
if (ret.first != end) {
|
|
858
|
+
const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert(
|
|
859
|
+
ret.first, len - (ret.first - buf), utf32_out + saved_bytes);
|
|
860
|
+
if (scalar_saved_bytes == 0) {
|
|
861
|
+
return 0;
|
|
862
|
+
}
|
|
863
|
+
saved_bytes += scalar_saved_bytes;
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
return saved_bytes;
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(
|
|
870
|
+
const char *buf, size_t len, char32_t *utf32) const noexcept {
|
|
871
|
+
if (simdutf_unlikely(len == 0)) {
|
|
872
|
+
return {error_code::SUCCESS, 0};
|
|
873
|
+
}
|
|
874
|
+
uint32_t *utf32_output = reinterpret_cast<uint32_t *>(utf32);
|
|
875
|
+
auto ret = icelake::validating_utf8_to_fixed_length_with_constant_checks<
|
|
876
|
+
endianness::LITTLE, uint32_t>(buf, len, utf32_output);
|
|
877
|
+
|
|
878
|
+
if (!std::get<2>(ret)) {
|
|
879
|
+
size_t pos = std::get<0>(ret) - buf;
|
|
880
|
+
// We might have an error that occurs right before pos.
|
|
881
|
+
// This is only a concern if buf[pos] is not a continuation byte.
|
|
882
|
+
if ((buf[pos] & 0xc0) != 0x80 && pos >= 64) {
|
|
883
|
+
pos -= 1;
|
|
884
|
+
} else if ((buf[pos] & 0xc0) == 0x80 && pos >= 64) {
|
|
885
|
+
// We must check whether we are the fourth continuation byte
|
|
886
|
+
bool c1 = (buf[pos - 1] & 0xc0) == 0x80;
|
|
887
|
+
bool c2 = (buf[pos - 2] & 0xc0) == 0x80;
|
|
888
|
+
bool c3 = (buf[pos - 3] & 0xc0) == 0x80;
|
|
889
|
+
if (c1 && c2 && c3) {
|
|
890
|
+
return {simdutf::TOO_LONG, pos};
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
// todo: we reset the output to utf32 instead of using std::get<2.(ret) as
|
|
894
|
+
// you'd expect. that is because
|
|
895
|
+
// validating_utf8_to_fixed_length_with_constant_checks may have processed
|
|
896
|
+
// data beyond the error.
|
|
897
|
+
result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(
|
|
898
|
+
pos, buf + pos, len - pos, utf32);
|
|
899
|
+
res.count += pos;
|
|
900
|
+
return res;
|
|
901
|
+
}
|
|
902
|
+
size_t saved_bytes = std::get<1>(ret) - utf32_output;
|
|
903
|
+
const char *end = buf + len;
|
|
904
|
+
if (std::get<0>(ret) == end) {
|
|
905
|
+
return {simdutf::SUCCESS, saved_bytes};
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
// Note: the AVX512 procedure looks up 4 bytes forward, and
|
|
909
|
+
// correctly converts multi-byte chars even if their
|
|
910
|
+
// continuation bytes lie outside 16-byte window.
|
|
911
|
+
// It means, we have to skip continuation bytes from
|
|
912
|
+
// the beginning ret.first, as they were already consumed.
|
|
913
|
+
while (std::get<0>(ret) != end and
|
|
914
|
+
((uint8_t(*std::get<0>(ret)) & 0xc0) == 0x80)) {
|
|
915
|
+
std::get<0>(ret) += 1;
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
if (std::get<0>(ret) != end) {
|
|
919
|
+
auto scalar_result = scalar::utf8_to_utf32::convert_with_errors(
|
|
920
|
+
std::get<0>(ret), len - (std::get<0>(ret) - buf),
|
|
921
|
+
reinterpret_cast<char32_t *>(utf32_output) + saved_bytes);
|
|
922
|
+
if (scalar_result.error != simdutf::SUCCESS) {
|
|
923
|
+
scalar_result.count += (std::get<0>(ret) - buf);
|
|
924
|
+
} else {
|
|
925
|
+
scalar_result.count += saved_bytes;
|
|
926
|
+
}
|
|
927
|
+
return scalar_result;
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
return {simdutf::SUCCESS, size_t(std::get<1>(ret) - utf32_output)};
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(
|
|
934
|
+
const char *buf, size_t len, char32_t *utf32_out) const noexcept {
|
|
935
|
+
uint32_t *utf32_output = reinterpret_cast<uint32_t *>(utf32_out);
|
|
936
|
+
utf8_to_utf32_result ret =
|
|
937
|
+
icelake::valid_utf8_to_fixed_length<endianness::LITTLE, uint32_t>(
|
|
938
|
+
buf, len, utf32_output);
|
|
939
|
+
size_t saved_bytes = ret.second - utf32_output;
|
|
940
|
+
const char *end = buf + len;
|
|
941
|
+
if (ret.first == end) {
|
|
942
|
+
return saved_bytes;
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
// Note: AVX512 procedure looks up 4 bytes forward, and
|
|
946
|
+
// correctly converts multi-byte chars even if their
|
|
947
|
+
// continuation bytes lie outsiede 16-byte window.
|
|
948
|
+
// It meas, we have to skip continuation bytes from
|
|
949
|
+
// the beginning ret.first, as they were already consumed.
|
|
950
|
+
while (ret.first != end && ((uint8_t(*ret.first) & 0xc0) == 0x80)) {
|
|
951
|
+
ret.first += 1;
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
if (ret.first != end) {
|
|
955
|
+
const size_t scalar_saved_bytes = scalar::utf8_to_utf32::convert_valid(
|
|
956
|
+
ret.first, len - (ret.first - buf), utf32_out + saved_bytes);
|
|
957
|
+
if (scalar_saved_bytes == 0) {
|
|
958
|
+
return 0;
|
|
959
|
+
}
|
|
960
|
+
saved_bytes += scalar_saved_bytes;
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
return saved_bytes;
|
|
964
|
+
}
|
|
965
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
966
|
+
|
|
967
|
+
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
|
|
968
|
+
simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(
|
|
969
|
+
const char16_t *buf, size_t len, char *latin1_output) const noexcept {
|
|
970
|
+
return icelake_convert_utf16_to_latin1<endianness::LITTLE>(buf, len,
|
|
971
|
+
latin1_output);
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(
|
|
975
|
+
const char16_t *buf, size_t len, char *latin1_output) const noexcept {
|
|
976
|
+
return icelake_convert_utf16_to_latin1<endianness::BIG>(buf, len,
|
|
977
|
+
latin1_output);
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
simdutf_warn_unused result
|
|
981
|
+
implementation::convert_utf16le_to_latin1_with_errors(
|
|
982
|
+
const char16_t *buf, size_t len, char *latin1_output) const noexcept {
|
|
983
|
+
return icelake_convert_utf16_to_latin1_with_errors<endianness::LITTLE>(
|
|
984
|
+
buf, len, latin1_output)
|
|
985
|
+
.first;
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
simdutf_warn_unused result
|
|
989
|
+
implementation::convert_utf16be_to_latin1_with_errors(
|
|
990
|
+
const char16_t *buf, size_t len, char *latin1_output) const noexcept {
|
|
991
|
+
return icelake_convert_utf16_to_latin1_with_errors<endianness::BIG>(
|
|
992
|
+
buf, len, latin1_output)
|
|
993
|
+
.first;
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(
|
|
997
|
+
const char16_t *buf, size_t len, char *latin1_output) const noexcept {
|
|
998
|
+
// optimization opportunity: implement custom function
|
|
999
|
+
return convert_utf16be_to_latin1(buf, len, latin1_output);
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(
|
|
1003
|
+
const char16_t *buf, size_t len, char *latin1_output) const noexcept {
|
|
1004
|
+
// optimization opportunity: implement custom function
|
|
1005
|
+
return convert_utf16le_to_latin1(buf, len, latin1_output);
|
|
1006
|
+
}
|
|
1007
|
+
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_LATIN1
|
|
1008
|
+
|
|
1009
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
|
|
1010
|
+
simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(
|
|
1011
|
+
const char16_t *buf, size_t len, char *utf8_output) const noexcept {
|
|
1012
|
+
size_t outlen;
|
|
1013
|
+
size_t inlen = utf16_to_utf8_avx512i<endianness::LITTLE>(
|
|
1014
|
+
buf, len, (unsigned char *)utf8_output, &outlen);
|
|
1015
|
+
if (inlen != len) {
|
|
1016
|
+
return 0;
|
|
1017
|
+
}
|
|
1018
|
+
return outlen;
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(
|
|
1022
|
+
const char16_t *buf, size_t len, char *utf8_output) const noexcept {
|
|
1023
|
+
size_t outlen;
|
|
1024
|
+
size_t inlen = utf16_to_utf8_avx512i<endianness::BIG>(
|
|
1025
|
+
buf, len, (unsigned char *)utf8_output, &outlen);
|
|
1026
|
+
if (inlen != len) {
|
|
1027
|
+
return 0;
|
|
1028
|
+
}
|
|
1029
|
+
return outlen;
|
|
1030
|
+
}
|
|
1031
|
+
|
|
1032
|
+
simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(
|
|
1033
|
+
const char16_t *buf, size_t len, char *utf8_output) const noexcept {
|
|
1034
|
+
size_t outlen;
|
|
1035
|
+
size_t inlen = utf16_to_utf8_avx512i<endianness::LITTLE>(
|
|
1036
|
+
buf, len, (unsigned char *)utf8_output, &outlen);
|
|
1037
|
+
if (inlen != len) {
|
|
1038
|
+
result res = scalar::utf16_to_utf8::convert_with_errors<endianness::LITTLE>(
|
|
1039
|
+
buf + inlen, len - inlen, utf8_output + outlen);
|
|
1040
|
+
res.count += inlen;
|
|
1041
|
+
return res;
|
|
1042
|
+
}
|
|
1043
|
+
return {simdutf::SUCCESS, outlen};
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(
|
|
1047
|
+
const char16_t *buf, size_t len, char *utf8_output) const noexcept {
|
|
1048
|
+
size_t outlen;
|
|
1049
|
+
size_t inlen = utf16_to_utf8_avx512i<endianness::BIG>(
|
|
1050
|
+
buf, len, (unsigned char *)utf8_output, &outlen);
|
|
1051
|
+
if (inlen != len) {
|
|
1052
|
+
result res = scalar::utf16_to_utf8::convert_with_errors<endianness::BIG>(
|
|
1053
|
+
buf + inlen, len - inlen, utf8_output + outlen);
|
|
1054
|
+
res.count += inlen;
|
|
1055
|
+
return res;
|
|
1056
|
+
}
|
|
1057
|
+
return {simdutf::SUCCESS, outlen};
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(
|
|
1061
|
+
const char16_t *buf, size_t len, char *utf8_output) const noexcept {
|
|
1062
|
+
return convert_utf16le_to_utf8(buf, len, utf8_output);
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(
|
|
1066
|
+
const char16_t *buf, size_t len, char *utf8_output) const noexcept {
|
|
1067
|
+
return convert_utf16be_to_utf8(buf, len, utf8_output);
|
|
1068
|
+
}
|
|
1069
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
|
|
1070
|
+
|
|
1071
|
+
#if SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
|
|
1072
|
+
simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(
|
|
1073
|
+
const char32_t *buf, size_t len, char *latin1_output) const noexcept {
|
|
1074
|
+
return icelake_convert_utf32_to_latin1(buf, len, latin1_output);
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(
|
|
1078
|
+
const char32_t *buf, size_t len, char *latin1_output) const noexcept {
|
|
1079
|
+
return icelake_convert_utf32_to_latin1_with_errors(buf, len, latin1_output)
|
|
1080
|
+
.first;
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(
|
|
1084
|
+
const char32_t *buf, size_t len, char *latin1_output) const noexcept {
|
|
1085
|
+
return icelake_convert_utf32_to_latin1(buf, len, latin1_output);
|
|
1086
|
+
}
|
|
1087
|
+
#endif // SIMDUTF_FEATURE_UTF32 && SIMDUTF_FEATURE_LATIN1
|
|
1088
|
+
|
|
1089
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
1090
|
+
simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(
|
|
1091
|
+
const char32_t *buf, size_t len, char *utf8_output) const noexcept {
|
|
1092
|
+
std::pair<const char32_t *, char *> ret =
|
|
1093
|
+
avx512_convert_utf32_to_utf8(buf, len, utf8_output);
|
|
1094
|
+
if (ret.first == nullptr) {
|
|
1095
|
+
return 0;
|
|
1096
|
+
}
|
|
1097
|
+
size_t saved_bytes = ret.second - utf8_output;
|
|
1098
|
+
if (ret.first != buf + len) {
|
|
1099
|
+
const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert(
|
|
1100
|
+
ret.first, len - (ret.first - buf), ret.second);
|
|
1101
|
+
if (scalar_saved_bytes == 0) {
|
|
1102
|
+
return 0;
|
|
1103
|
+
}
|
|
1104
|
+
saved_bytes += scalar_saved_bytes;
|
|
1105
|
+
}
|
|
1106
|
+
return saved_bytes;
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(
|
|
1110
|
+
const char32_t *buf, size_t len, char *utf8_output) const noexcept {
|
|
1111
|
+
// ret.first.count is always the position in the buffer, not the number of
|
|
1112
|
+
// code units written even if finished
|
|
1113
|
+
std::pair<result, char *> ret =
|
|
1114
|
+
icelake::avx512_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
|
|
1115
|
+
if (ret.first.count != len) {
|
|
1116
|
+
result scalar_res = scalar::utf32_to_utf8::convert_with_errors(
|
|
1117
|
+
buf + ret.first.count, len - ret.first.count, ret.second);
|
|
1118
|
+
if (scalar_res.error) {
|
|
1119
|
+
scalar_res.count += ret.first.count;
|
|
1120
|
+
return scalar_res;
|
|
1121
|
+
} else {
|
|
1122
|
+
ret.second += scalar_res.count;
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
ret.first.count =
|
|
1126
|
+
ret.second -
|
|
1127
|
+
utf8_output; // Set count to the number of 8-bit code units written
|
|
1128
|
+
return ret.first;
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(
|
|
1132
|
+
const char32_t *buf, size_t len, char *utf8_output) const noexcept {
|
|
1133
|
+
return convert_utf32_to_utf8(buf, len, utf8_output);
|
|
1134
|
+
}
|
|
1135
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
1136
|
+
|
|
1137
|
+
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
|
|
1138
|
+
simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(
|
|
1139
|
+
const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
1140
|
+
std::pair<const char32_t *, char16_t *> ret =
|
|
1141
|
+
avx512_convert_utf32_to_utf16<endianness::LITTLE>(buf, len, utf16_output);
|
|
1142
|
+
if (ret.first == nullptr) {
|
|
1143
|
+
return 0;
|
|
1144
|
+
}
|
|
1145
|
+
size_t saved_bytes = ret.second - utf16_output;
|
|
1146
|
+
return saved_bytes;
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(
|
|
1150
|
+
const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
1151
|
+
std::pair<const char32_t *, char16_t *> ret =
|
|
1152
|
+
avx512_convert_utf32_to_utf16<endianness::BIG>(buf, len, utf16_output);
|
|
1153
|
+
if (ret.first == nullptr) {
|
|
1154
|
+
return 0;
|
|
1155
|
+
}
|
|
1156
|
+
size_t saved_bytes = ret.second - utf16_output;
|
|
1157
|
+
return saved_bytes;
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(
|
|
1161
|
+
const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
1162
|
+
// ret.first.count is always the position in the buffer, not the number of
|
|
1163
|
+
// code units written even if finished
|
|
1164
|
+
std::pair<result, char16_t *> ret =
|
|
1165
|
+
avx512_convert_utf32_to_utf16_with_errors<endianness::LITTLE>(
|
|
1166
|
+
buf, len, utf16_output);
|
|
1167
|
+
if (ret.first.error) {
|
|
1168
|
+
return ret.first;
|
|
1169
|
+
}
|
|
1170
|
+
ret.first.count =
|
|
1171
|
+
ret.second -
|
|
1172
|
+
utf16_output; // Set count to the number of 8-bit code units written
|
|
1173
|
+
return ret.first;
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(
|
|
1177
|
+
const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
1178
|
+
// ret.first.count is always the position in the buffer, not the number of
|
|
1179
|
+
// code units written even if finished
|
|
1180
|
+
std::pair<result, char16_t *> ret =
|
|
1181
|
+
avx512_convert_utf32_to_utf16_with_errors<endianness::BIG>(buf, len,
|
|
1182
|
+
utf16_output);
|
|
1183
|
+
if (ret.first.error) {
|
|
1184
|
+
return ret.first;
|
|
1185
|
+
}
|
|
1186
|
+
ret.first.count =
|
|
1187
|
+
ret.second -
|
|
1188
|
+
utf16_output; // Set count to the number of 8-bit code units written
|
|
1189
|
+
return ret.first;
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(
|
|
1193
|
+
const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
1194
|
+
return convert_utf32_to_utf16le(buf, len, utf16_output);
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(
|
|
1198
|
+
const char32_t *buf, size_t len, char16_t *utf16_output) const noexcept {
|
|
1199
|
+
return convert_utf32_to_utf16be(buf, len, utf16_output);
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(
|
|
1203
|
+
const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
|
|
1204
|
+
std::tuple<const char16_t *, char32_t *, bool> ret =
|
|
1205
|
+
icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len,
|
|
1206
|
+
utf32_output);
|
|
1207
|
+
if (!std::get<2>(ret)) {
|
|
1208
|
+
return 0;
|
|
1209
|
+
}
|
|
1210
|
+
size_t saved_bytes = std::get<1>(ret) - utf32_output;
|
|
1211
|
+
if (std::get<0>(ret) != buf + len) {
|
|
1212
|
+
const size_t scalar_saved_bytes =
|
|
1213
|
+
scalar::utf16_to_utf32::convert<endianness::LITTLE>(
|
|
1214
|
+
std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
|
|
1215
|
+
if (scalar_saved_bytes == 0) {
|
|
1216
|
+
return 0;
|
|
1217
|
+
}
|
|
1218
|
+
saved_bytes += scalar_saved_bytes;
|
|
1219
|
+
}
|
|
1220
|
+
return saved_bytes;
|
|
1221
|
+
}
|
|
1222
|
+
|
|
1223
|
+
simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(
|
|
1224
|
+
const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
|
|
1225
|
+
std::tuple<const char16_t *, char32_t *, bool> ret =
|
|
1226
|
+
icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
|
|
1227
|
+
if (!std::get<2>(ret)) {
|
|
1228
|
+
return 0;
|
|
1229
|
+
}
|
|
1230
|
+
size_t saved_bytes = std::get<1>(ret) - utf32_output;
|
|
1231
|
+
if (std::get<0>(ret) != buf + len) {
|
|
1232
|
+
const size_t scalar_saved_bytes =
|
|
1233
|
+
scalar::utf16_to_utf32::convert<endianness::BIG>(
|
|
1234
|
+
std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
|
|
1235
|
+
if (scalar_saved_bytes == 0) {
|
|
1236
|
+
return 0;
|
|
1237
|
+
}
|
|
1238
|
+
saved_bytes += scalar_saved_bytes;
|
|
1239
|
+
}
|
|
1240
|
+
return saved_bytes;
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(
|
|
1244
|
+
const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
|
|
1245
|
+
std::tuple<const char16_t *, char32_t *, bool> ret =
|
|
1246
|
+
icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len,
|
|
1247
|
+
utf32_output);
|
|
1248
|
+
if (!std::get<2>(ret)) {
|
|
1249
|
+
result scalar_res =
|
|
1250
|
+
scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(
|
|
1251
|
+
std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
|
|
1252
|
+
scalar_res.count += (std::get<0>(ret) - buf);
|
|
1253
|
+
return scalar_res;
|
|
1254
|
+
}
|
|
1255
|
+
size_t saved_bytes = std::get<1>(ret) - utf32_output;
|
|
1256
|
+
if (std::get<0>(ret) != buf + len) {
|
|
1257
|
+
result scalar_res =
|
|
1258
|
+
scalar::utf16_to_utf32::convert_with_errors<endianness::LITTLE>(
|
|
1259
|
+
std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
|
|
1260
|
+
if (scalar_res.error) {
|
|
1261
|
+
scalar_res.count += (std::get<0>(ret) - buf);
|
|
1262
|
+
return scalar_res;
|
|
1263
|
+
} else {
|
|
1264
|
+
scalar_res.count += saved_bytes;
|
|
1265
|
+
return scalar_res;
|
|
1266
|
+
}
|
|
1267
|
+
}
|
|
1268
|
+
return simdutf::result(simdutf::SUCCESS, saved_bytes);
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(
|
|
1272
|
+
const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
|
|
1273
|
+
std::tuple<const char16_t *, char32_t *, bool> ret =
|
|
1274
|
+
icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
|
|
1275
|
+
if (!std::get<2>(ret)) {
|
|
1276
|
+
result scalar_res =
|
|
1277
|
+
scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(
|
|
1278
|
+
std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
|
|
1279
|
+
scalar_res.count += (std::get<0>(ret) - buf);
|
|
1280
|
+
return scalar_res;
|
|
1281
|
+
}
|
|
1282
|
+
size_t saved_bytes = std::get<1>(ret) - utf32_output;
|
|
1283
|
+
if (std::get<0>(ret) != buf + len) {
|
|
1284
|
+
result scalar_res =
|
|
1285
|
+
scalar::utf16_to_utf32::convert_with_errors<endianness::BIG>(
|
|
1286
|
+
std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
|
|
1287
|
+
if (scalar_res.error) {
|
|
1288
|
+
scalar_res.count += (std::get<0>(ret) - buf);
|
|
1289
|
+
return scalar_res;
|
|
1290
|
+
} else {
|
|
1291
|
+
scalar_res.count += saved_bytes;
|
|
1292
|
+
return scalar_res;
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
return simdutf::result(simdutf::SUCCESS, saved_bytes);
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(
|
|
1299
|
+
const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
|
|
1300
|
+
std::tuple<const char16_t *, char32_t *, bool> ret =
|
|
1301
|
+
icelake::convert_utf16_to_utf32<endianness::LITTLE>(buf, len,
|
|
1302
|
+
utf32_output);
|
|
1303
|
+
if (!std::get<2>(ret)) {
|
|
1304
|
+
return 0;
|
|
1305
|
+
}
|
|
1306
|
+
size_t saved_bytes = std::get<1>(ret) - utf32_output;
|
|
1307
|
+
if (std::get<0>(ret) != buf + len) {
|
|
1308
|
+
const size_t scalar_saved_bytes =
|
|
1309
|
+
scalar::utf16_to_utf32::convert<endianness::LITTLE>(
|
|
1310
|
+
std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
|
|
1311
|
+
if (scalar_saved_bytes == 0) {
|
|
1312
|
+
return 0;
|
|
1313
|
+
}
|
|
1314
|
+
saved_bytes += scalar_saved_bytes;
|
|
1315
|
+
}
|
|
1316
|
+
return saved_bytes;
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(
|
|
1320
|
+
const char16_t *buf, size_t len, char32_t *utf32_output) const noexcept {
|
|
1321
|
+
std::tuple<const char16_t *, char32_t *, bool> ret =
|
|
1322
|
+
icelake::convert_utf16_to_utf32<endianness::BIG>(buf, len, utf32_output);
|
|
1323
|
+
if (!std::get<2>(ret)) {
|
|
1324
|
+
return 0;
|
|
1325
|
+
}
|
|
1326
|
+
size_t saved_bytes = std::get<1>(ret) - utf32_output;
|
|
1327
|
+
if (std::get<0>(ret) != buf + len) {
|
|
1328
|
+
const size_t scalar_saved_bytes =
|
|
1329
|
+
scalar::utf16_to_utf32::convert<endianness::BIG>(
|
|
1330
|
+
std::get<0>(ret), len - (std::get<0>(ret) - buf), std::get<1>(ret));
|
|
1331
|
+
if (scalar_saved_bytes == 0) {
|
|
1332
|
+
return 0;
|
|
1333
|
+
}
|
|
1334
|
+
saved_bytes += scalar_saved_bytes;
|
|
1335
|
+
}
|
|
1336
|
+
return saved_bytes;
|
|
1337
|
+
}
|
|
1338
|
+
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
|
|
1339
|
+
|
|
1340
|
+
#if SIMDUTF_FEATURE_UTF16
|
|
1341
|
+
void implementation::change_endianness_utf16(const char16_t *input,
|
|
1342
|
+
size_t length,
|
|
1343
|
+
char16_t *output) const noexcept {
|
|
1344
|
+
size_t pos = 0;
|
|
1345
|
+
const __m512i byteflip = _mm512_setr_epi64(
|
|
1346
|
+
0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001,
|
|
1347
|
+
0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809,
|
|
1348
|
+
0x0607040502030001, 0x0e0f0c0d0a0b0809);
|
|
1349
|
+
while (pos + 32 <= length) {
|
|
1350
|
+
__m512i utf16 = _mm512_loadu_si512((const __m512i *)(input + pos));
|
|
1351
|
+
utf16 = _mm512_shuffle_epi8(utf16, byteflip);
|
|
1352
|
+
_mm512_storeu_si512(output + pos, utf16);
|
|
1353
|
+
pos += 32;
|
|
1354
|
+
}
|
|
1355
|
+
if (pos < length) {
|
|
1356
|
+
__mmask32 m((1U << (length - pos)) - 1);
|
|
1357
|
+
__m512i utf16 = _mm512_maskz_loadu_epi16(m, (const __m512i *)(input + pos));
|
|
1358
|
+
utf16 = _mm512_shuffle_epi8(utf16, byteflip);
|
|
1359
|
+
_mm512_mask_storeu_epi16(output + pos, m, utf16);
|
|
1360
|
+
}
|
|
1361
|
+
}
|
|
1362
|
+
|
|
1363
|
+
simdutf_warn_unused size_t implementation::count_utf16le(
|
|
1364
|
+
const char16_t *input, size_t length) const noexcept {
|
|
1365
|
+
const char16_t *ptr = input;
|
|
1366
|
+
size_t count{0};
|
|
1367
|
+
|
|
1368
|
+
if (length >= 32) {
|
|
1369
|
+
const char16_t *end = input + length - 32;
|
|
1370
|
+
|
|
1371
|
+
const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00);
|
|
1372
|
+
const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff);
|
|
1373
|
+
|
|
1374
|
+
while (ptr <= end) {
|
|
1375
|
+
__m512i utf16 = _mm512_loadu_si512((const __m512i *)ptr);
|
|
1376
|
+
ptr += 32;
|
|
1377
|
+
uint64_t not_high_surrogate =
|
|
1378
|
+
static_cast<uint64_t>(_mm512_cmpgt_epu16_mask(utf16, high) |
|
|
1379
|
+
_mm512_cmplt_epu16_mask(utf16, low));
|
|
1380
|
+
count += count_ones(not_high_surrogate);
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
|
|
1384
|
+
return count + scalar::utf16::count_code_points<endianness::LITTLE>(
|
|
1385
|
+
ptr, length - (ptr - input));
|
|
1386
|
+
}
|
|
1387
|
+
|
|
1388
|
+
simdutf_warn_unused size_t implementation::count_utf16be(
|
|
1389
|
+
const char16_t *input, size_t length) const noexcept {
|
|
1390
|
+
const char16_t *ptr = input;
|
|
1391
|
+
size_t count{0};
|
|
1392
|
+
if (length >= 32) {
|
|
1393
|
+
|
|
1394
|
+
const char16_t *end = input + length - 32;
|
|
1395
|
+
|
|
1396
|
+
const __m512i low = _mm512_set1_epi16((uint16_t)0xdc00);
|
|
1397
|
+
const __m512i high = _mm512_set1_epi16((uint16_t)0xdfff);
|
|
1398
|
+
|
|
1399
|
+
const __m512i byteflip = _mm512_setr_epi64(
|
|
1400
|
+
0x0607040502030001, 0x0e0f0c0d0a0b0809, 0x0607040502030001,
|
|
1401
|
+
0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809,
|
|
1402
|
+
0x0607040502030001, 0x0e0f0c0d0a0b0809);
|
|
1403
|
+
while (ptr <= end) {
|
|
1404
|
+
__m512i utf16 =
|
|
1405
|
+
_mm512_shuffle_epi8(_mm512_loadu_si512((__m512i *)ptr), byteflip);
|
|
1406
|
+
ptr += 32;
|
|
1407
|
+
uint64_t not_high_surrogate =
|
|
1408
|
+
static_cast<uint64_t>(_mm512_cmpgt_epu16_mask(utf16, high) |
|
|
1409
|
+
_mm512_cmplt_epu16_mask(utf16, low));
|
|
1410
|
+
count += count_ones(not_high_surrogate);
|
|
1411
|
+
}
|
|
1412
|
+
}
|
|
1413
|
+
|
|
1414
|
+
return count + scalar::utf16::count_code_points<endianness::BIG>(
|
|
1415
|
+
ptr, length - (ptr - input));
|
|
1416
|
+
}
|
|
1417
|
+
#endif // SIMDUTF_FEATURE_UTF16
|
|
1418
|
+
|
|
1419
|
+
#if SIMDUTF_FEATURE_UTF8
|
|
1420
|
+
simdutf_warn_unused size_t
|
|
1421
|
+
implementation::count_utf8(const char *input, size_t length) const noexcept {
|
|
1422
|
+
const uint8_t *str = reinterpret_cast<const uint8_t *>(input);
|
|
1423
|
+
size_t answer =
|
|
1424
|
+
length / sizeof(__m512i) *
|
|
1425
|
+
sizeof(__m512i); // Number of 512-bit chunks that fits into the length.
|
|
1426
|
+
size_t i = 0;
|
|
1427
|
+
__m512i unrolled_popcount{0};
|
|
1428
|
+
|
|
1429
|
+
const __m512i continuation = _mm512_set1_epi8(char(0b10111111));
|
|
1430
|
+
|
|
1431
|
+
while (i + sizeof(__m512i) <= length) {
|
|
1432
|
+
size_t iterations = (length - i) / sizeof(__m512i);
|
|
1433
|
+
|
|
1434
|
+
size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i);
|
|
1435
|
+
for (; i + 8 * sizeof(__m512i) <= max_i; i += 8 * sizeof(__m512i)) {
|
|
1436
|
+
__m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i));
|
|
1437
|
+
__m512i input2 =
|
|
1438
|
+
_mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i)));
|
|
1439
|
+
__m512i input3 =
|
|
1440
|
+
_mm512_loadu_si512((const __m512i *)(str + i + 2 * sizeof(__m512i)));
|
|
1441
|
+
__m512i input4 =
|
|
1442
|
+
_mm512_loadu_si512((const __m512i *)(str + i + 3 * sizeof(__m512i)));
|
|
1443
|
+
__m512i input5 =
|
|
1444
|
+
_mm512_loadu_si512((const __m512i *)(str + i + 4 * sizeof(__m512i)));
|
|
1445
|
+
__m512i input6 =
|
|
1446
|
+
_mm512_loadu_si512((const __m512i *)(str + i + 5 * sizeof(__m512i)));
|
|
1447
|
+
__m512i input7 =
|
|
1448
|
+
_mm512_loadu_si512((const __m512i *)(str + i + 6 * sizeof(__m512i)));
|
|
1449
|
+
__m512i input8 =
|
|
1450
|
+
_mm512_loadu_si512((const __m512i *)(str + i + 7 * sizeof(__m512i)));
|
|
1451
|
+
|
|
1452
|
+
__mmask64 mask1 = _mm512_cmple_epi8_mask(input1, continuation);
|
|
1453
|
+
__mmask64 mask2 = _mm512_cmple_epi8_mask(input2, continuation);
|
|
1454
|
+
__mmask64 mask3 = _mm512_cmple_epi8_mask(input3, continuation);
|
|
1455
|
+
__mmask64 mask4 = _mm512_cmple_epi8_mask(input4, continuation);
|
|
1456
|
+
__mmask64 mask5 = _mm512_cmple_epi8_mask(input5, continuation);
|
|
1457
|
+
__mmask64 mask6 = _mm512_cmple_epi8_mask(input6, continuation);
|
|
1458
|
+
__mmask64 mask7 = _mm512_cmple_epi8_mask(input7, continuation);
|
|
1459
|
+
__mmask64 mask8 = _mm512_cmple_epi8_mask(input8, continuation);
|
|
1460
|
+
|
|
1461
|
+
__m512i mask_register = _mm512_set_epi64(mask8, mask7, mask6, mask5,
|
|
1462
|
+
mask4, mask3, mask2, mask1);
|
|
1463
|
+
|
|
1464
|
+
unrolled_popcount = _mm512_add_epi64(unrolled_popcount,
|
|
1465
|
+
_mm512_popcnt_epi64(mask_register));
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1468
|
+
for (; i <= max_i; i += sizeof(__m512i)) {
|
|
1469
|
+
__m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i));
|
|
1470
|
+
uint64_t continuation_bitmask = static_cast<uint64_t>(
|
|
1471
|
+
_mm512_cmple_epi8_mask(more_input, continuation));
|
|
1472
|
+
answer -= count_ones(continuation_bitmask);
|
|
1473
|
+
}
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1476
|
+
answer -= _mm512_reduce_add_epi64(unrolled_popcount);
|
|
1477
|
+
|
|
1478
|
+
return answer + scalar::utf8::count_code_points(
|
|
1479
|
+
reinterpret_cast<const char *>(str + i), length - i);
|
|
1480
|
+
}
|
|
1481
|
+
#endif // SIMDUTF_FEATURE_UTF8
|
|
1482
|
+
|
|
1483
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
|
|
1484
|
+
simdutf_warn_unused size_t implementation::latin1_length_from_utf8(
|
|
1485
|
+
const char *buf, size_t len) const noexcept {
|
|
1486
|
+
return count_utf8(buf, len);
|
|
1487
|
+
}
|
|
1488
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
|
|
1489
|
+
|
|
1490
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
|
|
1491
|
+
simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(
|
|
1492
|
+
const char16_t *input, size_t length) const noexcept {
|
|
1493
|
+
return icelake_utf8_length_from_utf16<endianness::LITTLE>(input, length);
|
|
1494
|
+
}
|
|
1495
|
+
|
|
1496
|
+
simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(
|
|
1497
|
+
const char16_t *input, size_t length) const noexcept {
|
|
1498
|
+
return icelake_utf8_length_from_utf16<endianness::BIG>(input, length);
|
|
1499
|
+
}
|
|
1500
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
|
|
1501
|
+
|
|
1502
|
+
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
|
|
1503
|
+
simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(
|
|
1504
|
+
const char16_t *input, size_t length) const noexcept {
|
|
1505
|
+
return implementation::count_utf16le(input, length);
|
|
1506
|
+
}
|
|
1507
|
+
|
|
1508
|
+
simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(
|
|
1509
|
+
const char16_t *input, size_t length) const noexcept {
|
|
1510
|
+
return implementation::count_utf16be(input, length);
|
|
1511
|
+
}
|
|
1512
|
+
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
|
|
1513
|
+
|
|
1514
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
|
|
1515
|
+
simdutf_warn_unused size_t implementation::utf8_length_from_latin1(
|
|
1516
|
+
const char *input, size_t length) const noexcept {
|
|
1517
|
+
const uint8_t *str = reinterpret_cast<const uint8_t *>(input);
|
|
1518
|
+
size_t answer = length / sizeof(__m512i) * sizeof(__m512i);
|
|
1519
|
+
size_t i = 0;
|
|
1520
|
+
if (answer >= 2048) // long strings optimization
|
|
1521
|
+
{
|
|
1522
|
+
unsigned char v_0xFF = 0xff;
|
|
1523
|
+
__m512i eight_64bits = _mm512_setzero_si512();
|
|
1524
|
+
while (i + sizeof(__m512i) <= length) {
|
|
1525
|
+
__m512i runner = _mm512_setzero_si512();
|
|
1526
|
+
size_t iterations = (length - i) / sizeof(__m512i);
|
|
1527
|
+
if (iterations > 255) {
|
|
1528
|
+
iterations = 255;
|
|
1529
|
+
}
|
|
1530
|
+
size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i);
|
|
1531
|
+
for (; i + 4 * sizeof(__m512i) <= max_i; i += 4 * sizeof(__m512i)) {
|
|
1532
|
+
// Load four __m512i vectors
|
|
1533
|
+
__m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i));
|
|
1534
|
+
__m512i input2 =
|
|
1535
|
+
_mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i)));
|
|
1536
|
+
__m512i input3 = _mm512_loadu_si512(
|
|
1537
|
+
(const __m512i *)(str + i + 2 * sizeof(__m512i)));
|
|
1538
|
+
__m512i input4 = _mm512_loadu_si512(
|
|
1539
|
+
(const __m512i *)(str + i + 3 * sizeof(__m512i)));
|
|
1540
|
+
|
|
1541
|
+
// Generate four masks
|
|
1542
|
+
__mmask64 mask1 =
|
|
1543
|
+
_mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input1);
|
|
1544
|
+
__mmask64 mask2 =
|
|
1545
|
+
_mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input2);
|
|
1546
|
+
__mmask64 mask3 =
|
|
1547
|
+
_mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input3);
|
|
1548
|
+
__mmask64 mask4 =
|
|
1549
|
+
_mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input4);
|
|
1550
|
+
// Apply the masks and subtract from the runner
|
|
1551
|
+
__m512i not_ascii1 =
|
|
1552
|
+
_mm512_mask_set1_epi8(_mm512_setzero_si512(), mask1, v_0xFF);
|
|
1553
|
+
__m512i not_ascii2 =
|
|
1554
|
+
_mm512_mask_set1_epi8(_mm512_setzero_si512(), mask2, v_0xFF);
|
|
1555
|
+
__m512i not_ascii3 =
|
|
1556
|
+
_mm512_mask_set1_epi8(_mm512_setzero_si512(), mask3, v_0xFF);
|
|
1557
|
+
__m512i not_ascii4 =
|
|
1558
|
+
_mm512_mask_set1_epi8(_mm512_setzero_si512(), mask4, v_0xFF);
|
|
1559
|
+
|
|
1560
|
+
runner = _mm512_sub_epi8(runner, not_ascii1);
|
|
1561
|
+
runner = _mm512_sub_epi8(runner, not_ascii2);
|
|
1562
|
+
runner = _mm512_sub_epi8(runner, not_ascii3);
|
|
1563
|
+
runner = _mm512_sub_epi8(runner, not_ascii4);
|
|
1564
|
+
}
|
|
1565
|
+
|
|
1566
|
+
for (; i <= max_i; i += sizeof(__m512i)) {
|
|
1567
|
+
__m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i));
|
|
1568
|
+
|
|
1569
|
+
__mmask64 mask =
|
|
1570
|
+
_mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), more_input);
|
|
1571
|
+
__m512i not_ascii =
|
|
1572
|
+
_mm512_mask_set1_epi8(_mm512_setzero_si512(), mask, v_0xFF);
|
|
1573
|
+
runner = _mm512_sub_epi8(runner, not_ascii);
|
|
1574
|
+
}
|
|
1575
|
+
|
|
1576
|
+
eight_64bits = _mm512_add_epi64(
|
|
1577
|
+
eight_64bits, _mm512_sad_epu8(runner, _mm512_setzero_si512()));
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
answer += _mm512_reduce_add_epi64(eight_64bits);
|
|
1581
|
+
} else if (answer > 0) {
|
|
1582
|
+
for (; i + sizeof(__m512i) <= length; i += sizeof(__m512i)) {
|
|
1583
|
+
__m512i latin = _mm512_loadu_si512((const __m512i *)(str + i));
|
|
1584
|
+
uint64_t non_ascii = _mm512_movepi8_mask(latin);
|
|
1585
|
+
answer += count_ones(non_ascii);
|
|
1586
|
+
}
|
|
1587
|
+
}
|
|
1588
|
+
return answer + scalar::latin1::utf8_length_from_latin1(
|
|
1589
|
+
reinterpret_cast<const char *>(str + i), length - i);
|
|
1590
|
+
}
|
|
1591
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_LATIN1
|
|
1592
|
+
|
|
1593
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
|
|
1594
|
+
simdutf_warn_unused size_t implementation::utf16_length_from_utf8(
|
|
1595
|
+
const char *input, size_t length) const noexcept {
|
|
1596
|
+
size_t pos = 0;
|
|
1597
|
+
|
|
1598
|
+
// UTF-16 char length based on the four most significant bits of UTF-8 bytes
|
|
1599
|
+
const __m128i utf8_length_128 = _mm_setr_epi8(
|
|
1600
|
+
// ASCII chars
|
|
1601
|
+
/* 0000 */ 1,
|
|
1602
|
+
/* 0001 */ 1,
|
|
1603
|
+
/* 0010 */ 1,
|
|
1604
|
+
/* 0011 */ 1,
|
|
1605
|
+
/* 0100 */ 1,
|
|
1606
|
+
/* 0101 */ 1,
|
|
1607
|
+
/* 0110 */ 1,
|
|
1608
|
+
/* 0111 */ 1,
|
|
1609
|
+
|
|
1610
|
+
// continuation bytes
|
|
1611
|
+
/* 1000 */ 0,
|
|
1612
|
+
/* 1001 */ 0,
|
|
1613
|
+
/* 1010 */ 0,
|
|
1614
|
+
/* 1011 */ 0,
|
|
1615
|
+
|
|
1616
|
+
// leading bytes
|
|
1617
|
+
/* 1100 */ 1, // 2-byte UTF-8 char => 1 UTF-16 word
|
|
1618
|
+
/* 1101 */ 1, // 2-byte UTF-8 char => 1 UTF-16 word
|
|
1619
|
+
/* 1110 */ 1, // 3-byte UTF-8 char => 1 UTF-16 word
|
|
1620
|
+
/* 1111 */ 2 // 4-byte UTF-8 char => 2 UTF-16 words (surrogate pair)
|
|
1621
|
+
);
|
|
1622
|
+
|
|
1623
|
+
const __m512i char_length = broadcast_128bit_lane(utf8_length_128);
|
|
1624
|
+
|
|
1625
|
+
constexpr size_t max_iterations = 255 / 2;
|
|
1626
|
+
|
|
1627
|
+
size_t iterations = 0;
|
|
1628
|
+
const auto zero = _mm512_setzero_si512();
|
|
1629
|
+
__m512i local = _mm512_setzero_si512(); // byte-wise counters
|
|
1630
|
+
__m512i counters = _mm512_setzero_si512(); // 64-bit counters
|
|
1631
|
+
for (; pos + 64 <= length; pos += 64) {
|
|
1632
|
+
__m512i utf8 = _mm512_loadu_si512((const __m512i *)(input + pos));
|
|
1633
|
+
const auto t0 = _mm512_srli_epi32(utf8, 4);
|
|
1634
|
+
const auto t1 = _mm512_and_si512(t0, _mm512_set1_epi8(0xf));
|
|
1635
|
+
const auto t2 = _mm512_shuffle_epi8(char_length, t1);
|
|
1636
|
+
local = _mm512_add_epi8(local, t2);
|
|
1637
|
+
|
|
1638
|
+
iterations += 1;
|
|
1639
|
+
if (iterations == max_iterations) {
|
|
1640
|
+
counters = _mm512_add_epi64(counters, _mm512_sad_epu8(local, zero));
|
|
1641
|
+
local = zero;
|
|
1642
|
+
iterations = 0;
|
|
1643
|
+
}
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1646
|
+
size_t count = 0;
|
|
1647
|
+
|
|
1648
|
+
if (pos > 0) {
|
|
1649
|
+
// don't waste time for short strings
|
|
1650
|
+
if (iterations > 0) {
|
|
1651
|
+
counters = _mm512_add_epi64(counters, _mm512_sad_epu8(local, zero));
|
|
1652
|
+
}
|
|
1653
|
+
|
|
1654
|
+
const auto l0 = _mm512_extracti32x4_epi32(counters, 0);
|
|
1655
|
+
const auto l1 = _mm512_extracti32x4_epi32(counters, 1);
|
|
1656
|
+
const auto l2 = _mm512_extracti32x4_epi32(counters, 2);
|
|
1657
|
+
const auto l3 = _mm512_extracti32x4_epi32(counters, 3);
|
|
1658
|
+
|
|
1659
|
+
const auto sum =
|
|
1660
|
+
_mm_add_epi64(_mm_add_epi64(l0, l1), _mm_add_epi64(l2, l3));
|
|
1661
|
+
|
|
1662
|
+
count = uint64_t(_mm_extract_epi64(sum, 0)) +
|
|
1663
|
+
uint64_t(_mm_extract_epi64(sum, 1));
|
|
1664
|
+
}
|
|
1665
|
+
|
|
1666
|
+
return count +
|
|
1667
|
+
scalar::utf8::utf16_length_from_utf8(input + pos, length - pos);
|
|
1668
|
+
}
|
|
1669
|
+
simdutf_warn_unused result
|
|
1670
|
+
implementation::utf8_length_from_utf16le_with_replacement(
|
|
1671
|
+
const char16_t *input, size_t length) const noexcept {
|
|
1672
|
+
return icelake_utf8_length_from_utf16_with_replacement<endianness::LITTLE>(
|
|
1673
|
+
input, length);
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
simdutf_warn_unused result
|
|
1677
|
+
implementation::utf8_length_from_utf16be_with_replacement(
|
|
1678
|
+
const char16_t *input, size_t length) const noexcept {
|
|
1679
|
+
return icelake_utf8_length_from_utf16_with_replacement<endianness::BIG>(
|
|
1680
|
+
input, length);
|
|
1681
|
+
}
|
|
1682
|
+
|
|
1683
|
+
simdutf_warn_unused size_t
|
|
1684
|
+
implementation::convert_utf16le_to_utf8_with_replacement(
|
|
1685
|
+
const char16_t *input, size_t length, char *utf8_buffer) const noexcept {
|
|
1686
|
+
return scalar::utf16_to_utf8::convert_with_replacement<endianness::LITTLE>(
|
|
1687
|
+
input, length, utf8_buffer);
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
simdutf_warn_unused size_t
|
|
1691
|
+
implementation::convert_utf16be_to_utf8_with_replacement(
|
|
1692
|
+
const char16_t *input, size_t length, char *utf8_buffer) const noexcept {
|
|
1693
|
+
return scalar::utf16_to_utf8::convert_with_replacement<endianness::BIG>(
|
|
1694
|
+
input, length, utf8_buffer);
|
|
1695
|
+
}
|
|
1696
|
+
|
|
1697
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF16
|
|
1698
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
1699
|
+
simdutf_warn_unused size_t implementation::utf8_length_from_utf32(
|
|
1700
|
+
const char32_t *input, size_t length) const noexcept {
|
|
1701
|
+
return utf32::utf8_length_from_utf32(input, length);
|
|
1702
|
+
}
|
|
1703
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
1704
|
+
#if SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
|
|
1705
|
+
simdutf_warn_unused size_t implementation::utf16_length_from_utf32(
|
|
1706
|
+
const char32_t *input, size_t length) const noexcept {
|
|
1707
|
+
const char32_t *ptr = input;
|
|
1708
|
+
size_t count{0};
|
|
1709
|
+
|
|
1710
|
+
if (length >= 16) {
|
|
1711
|
+
const char32_t *end = input + length - 16;
|
|
1712
|
+
|
|
1713
|
+
const __m512i v_0000_ffff = _mm512_set1_epi32((uint32_t)0x0000ffff);
|
|
1714
|
+
|
|
1715
|
+
while (ptr <= end) {
|
|
1716
|
+
__m512i utf32 = _mm512_loadu_si512((const __m512i *)ptr);
|
|
1717
|
+
ptr += 16;
|
|
1718
|
+
__mmask16 surrogates_bitmask =
|
|
1719
|
+
_mm512_cmpgt_epu32_mask(utf32, v_0000_ffff);
|
|
1720
|
+
|
|
1721
|
+
count += 16 + count_ones(surrogates_bitmask);
|
|
1722
|
+
}
|
|
1723
|
+
}
|
|
1724
|
+
|
|
1725
|
+
return count +
|
|
1726
|
+
scalar::utf32::utf16_length_from_utf32(ptr, length - (ptr - input));
|
|
1727
|
+
}
|
|
1728
|
+
#endif // SIMDUTF_FEATURE_UTF16 && SIMDUTF_FEATURE_UTF32
|
|
1729
|
+
|
|
1730
|
+
#if SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
1731
|
+
simdutf_warn_unused size_t implementation::utf32_length_from_utf8(
|
|
1732
|
+
const char *input, size_t length) const noexcept {
|
|
1733
|
+
return implementation::count_utf8(input, length);
|
|
1734
|
+
}
|
|
1735
|
+
#endif // SIMDUTF_FEATURE_UTF8 && SIMDUTF_FEATURE_UTF32
|
|
1736
|
+
|
|
1737
|
+
#if SIMDUTF_FEATURE_BASE64
|
|
1738
|
+
simdutf_warn_unused result implementation::base64_to_binary(
|
|
1739
|
+
const char *input, size_t length, char *output, base64_options options,
|
|
1740
|
+
last_chunk_handling_options last_chunk_options) const noexcept {
|
|
1741
|
+
if (options & base64_default_or_url) {
|
|
1742
|
+
if (options == base64_options::base64_default_or_url_accept_garbage) {
|
|
1743
|
+
return compress_decode_base64<false, true, true>(
|
|
1744
|
+
output, input, length, options, last_chunk_options);
|
|
1745
|
+
} else {
|
|
1746
|
+
return compress_decode_base64<false, false, true>(
|
|
1747
|
+
output, input, length, options, last_chunk_options);
|
|
1748
|
+
}
|
|
1749
|
+
} else if (options & base64_url) {
|
|
1750
|
+
if (options == base64_options::base64_url_accept_garbage) {
|
|
1751
|
+
return compress_decode_base64<true, true, false>(
|
|
1752
|
+
output, input, length, options, last_chunk_options);
|
|
1753
|
+
} else {
|
|
1754
|
+
return compress_decode_base64<true, false, false>(
|
|
1755
|
+
output, input, length, options, last_chunk_options);
|
|
1756
|
+
}
|
|
1757
|
+
} else {
|
|
1758
|
+
if (options == base64_options::base64_default_accept_garbage) {
|
|
1759
|
+
return compress_decode_base64<false, true, false>(
|
|
1760
|
+
output, input, length, options, last_chunk_options);
|
|
1761
|
+
} else {
|
|
1762
|
+
return compress_decode_base64<false, false, false>(
|
|
1763
|
+
output, input, length, options, last_chunk_options);
|
|
1764
|
+
}
|
|
1765
|
+
}
|
|
1766
|
+
}
|
|
1767
|
+
|
|
1768
|
+
simdutf_warn_unused full_result implementation::base64_to_binary_details(
|
|
1769
|
+
const char *input, size_t length, char *output, base64_options options,
|
|
1770
|
+
last_chunk_handling_options last_chunk_options) const noexcept {
|
|
1771
|
+
if (options & base64_default_or_url) {
|
|
1772
|
+
if (options == base64_options::base64_default_or_url_accept_garbage) {
|
|
1773
|
+
return compress_decode_base64<false, true, true>(
|
|
1774
|
+
output, input, length, options, last_chunk_options);
|
|
1775
|
+
} else {
|
|
1776
|
+
return compress_decode_base64<false, false, true>(
|
|
1777
|
+
output, input, length, options, last_chunk_options);
|
|
1778
|
+
}
|
|
1779
|
+
} else if (options & base64_url) {
|
|
1780
|
+
if (options == base64_options::base64_url_accept_garbage) {
|
|
1781
|
+
return compress_decode_base64<true, true, false>(
|
|
1782
|
+
output, input, length, options, last_chunk_options);
|
|
1783
|
+
} else {
|
|
1784
|
+
return compress_decode_base64<true, false, false>(
|
|
1785
|
+
output, input, length, options, last_chunk_options);
|
|
1786
|
+
}
|
|
1787
|
+
} else {
|
|
1788
|
+
if (options == base64_options::base64_default_accept_garbage) {
|
|
1789
|
+
return compress_decode_base64<false, true, false>(
|
|
1790
|
+
output, input, length, options, last_chunk_options);
|
|
1791
|
+
} else {
|
|
1792
|
+
return compress_decode_base64<false, false, false>(
|
|
1793
|
+
output, input, length, options, last_chunk_options);
|
|
1794
|
+
}
|
|
1795
|
+
}
|
|
1796
|
+
}
|
|
1797
|
+
|
|
1798
|
+
simdutf_warn_unused result implementation::base64_to_binary(
|
|
1799
|
+
const char16_t *input, size_t length, char *output, base64_options options,
|
|
1800
|
+
last_chunk_handling_options last_chunk_options) const noexcept {
|
|
1801
|
+
if (options & base64_default_or_url) {
|
|
1802
|
+
if (options == base64_options::base64_default_or_url_accept_garbage) {
|
|
1803
|
+
return compress_decode_base64<false, true, true>(
|
|
1804
|
+
output, input, length, options, last_chunk_options);
|
|
1805
|
+
} else {
|
|
1806
|
+
return compress_decode_base64<false, false, true>(
|
|
1807
|
+
output, input, length, options, last_chunk_options);
|
|
1808
|
+
}
|
|
1809
|
+
} else if (options & base64_url) {
|
|
1810
|
+
if (options == base64_options::base64_url_accept_garbage) {
|
|
1811
|
+
return compress_decode_base64<true, true, false>(
|
|
1812
|
+
output, input, length, options, last_chunk_options);
|
|
1813
|
+
} else {
|
|
1814
|
+
return compress_decode_base64<true, false, false>(
|
|
1815
|
+
output, input, length, options, last_chunk_options);
|
|
1816
|
+
}
|
|
1817
|
+
} else {
|
|
1818
|
+
if (options == base64_options::base64_default_accept_garbage) {
|
|
1819
|
+
return compress_decode_base64<false, true, false>(
|
|
1820
|
+
output, input, length, options, last_chunk_options);
|
|
1821
|
+
} else {
|
|
1822
|
+
return compress_decode_base64<false, false, false>(
|
|
1823
|
+
output, input, length, options, last_chunk_options);
|
|
1824
|
+
}
|
|
1825
|
+
}
|
|
1826
|
+
}
|
|
1827
|
+
|
|
1828
|
+
simdutf_warn_unused full_result implementation::base64_to_binary_details(
|
|
1829
|
+
const char16_t *input, size_t length, char *output, base64_options options,
|
|
1830
|
+
last_chunk_handling_options last_chunk_options) const noexcept {
|
|
1831
|
+
if (options & base64_default_or_url) {
|
|
1832
|
+
if (options == base64_options::base64_default_or_url_accept_garbage) {
|
|
1833
|
+
return compress_decode_base64<false, true, true>(
|
|
1834
|
+
output, input, length, options, last_chunk_options);
|
|
1835
|
+
} else {
|
|
1836
|
+
return compress_decode_base64<false, false, true>(
|
|
1837
|
+
output, input, length, options, last_chunk_options);
|
|
1838
|
+
}
|
|
1839
|
+
} else if (options & base64_url) {
|
|
1840
|
+
if (options == base64_options::base64_url_accept_garbage) {
|
|
1841
|
+
return compress_decode_base64<true, true, false>(
|
|
1842
|
+
output, input, length, options, last_chunk_options);
|
|
1843
|
+
} else {
|
|
1844
|
+
return compress_decode_base64<true, false, false>(
|
|
1845
|
+
output, input, length, options, last_chunk_options);
|
|
1846
|
+
}
|
|
1847
|
+
} else {
|
|
1848
|
+
if (options == base64_options::base64_default_accept_garbage) {
|
|
1849
|
+
return compress_decode_base64<false, true, false>(
|
|
1850
|
+
output, input, length, options, last_chunk_options);
|
|
1851
|
+
} else {
|
|
1852
|
+
return compress_decode_base64<false, false, false>(
|
|
1853
|
+
output, input, length, options, last_chunk_options);
|
|
1854
|
+
}
|
|
1855
|
+
}
|
|
1856
|
+
}
|
|
1857
|
+
|
|
1858
|
+
size_t implementation::binary_to_base64(const char *input, size_t length,
|
|
1859
|
+
char *output,
|
|
1860
|
+
base64_options options) const noexcept {
|
|
1861
|
+
if (options & base64_url) {
|
|
1862
|
+
return encode_base64<true>(output, input, length, options);
|
|
1863
|
+
} else {
|
|
1864
|
+
return encode_base64<false>(output, input, length, options);
|
|
1865
|
+
}
|
|
1866
|
+
}
|
|
1867
|
+
|
|
1868
|
+
size_t implementation::binary_to_base64_with_lines(
|
|
1869
|
+
const char *input, size_t length, char *output, size_t line_length,
|
|
1870
|
+
base64_options options) const noexcept {
|
|
1871
|
+
if (options & base64_url) {
|
|
1872
|
+
return encode_base64_impl<true, true>(output, input, length, options,
|
|
1873
|
+
line_length);
|
|
1874
|
+
} else {
|
|
1875
|
+
return encode_base64_impl<false, true>(output, input, length, options,
|
|
1876
|
+
line_length);
|
|
1877
|
+
}
|
|
1878
|
+
}
|
|
1879
|
+
|
|
1880
|
+
const char *implementation::find(const char *start, const char *end,
|
|
1881
|
+
char character) const noexcept {
|
|
1882
|
+
return util_find(start, end, character);
|
|
1883
|
+
}
|
|
1884
|
+
const char16_t *implementation::find(const char16_t *start, const char16_t *end,
|
|
1885
|
+
char16_t character) const noexcept {
|
|
1886
|
+
return util_find(start, end, character);
|
|
1887
|
+
}
|
|
1888
|
+
|
|
1889
|
+
simdutf_warn_unused size_t implementation::binary_length_from_base64(
|
|
1890
|
+
const char *input, size_t length) const noexcept {
|
|
1891
|
+
return icelake_binary_length_from_base64(input, length);
|
|
1892
|
+
}
|
|
1893
|
+
|
|
1894
|
+
simdutf_warn_unused size_t implementation::binary_length_from_base64(
|
|
1895
|
+
const char16_t *input, size_t length) const noexcept {
|
|
1896
|
+
return icelake_binary_length_from_base64(input, length);
|
|
1897
|
+
}
|
|
1898
|
+
#endif // SIMDUTF_FEATURE_BASE64
|
|
1899
|
+
|
|
1900
|
+
} // namespace SIMDUTF_IMPLEMENTATION
|
|
1901
|
+
} // namespace simdutf
|
|
1902
|
+
|
|
1903
|
+
#include "simdutf/icelake/end.h"
|