react-native-quick-crypto 1.0.19 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QuickCrypto.podspec +12 -38
- package/README.md +2 -0
- package/android/CMakeLists.txt +3 -0
- package/cpp/utils/HybridUtils.cpp +39 -77
- package/deps/simdutf/.clang-format +4 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/bug_report.md +62 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/config.yml +1 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/feature_request.md +35 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/standard-issue-template.md +29 -0
- package/deps/simdutf/.github/pull_request_template.md +51 -0
- package/deps/simdutf/.github/workflows/aarch64.yml +39 -0
- package/deps/simdutf/.github/workflows/alpine.yml +27 -0
- package/deps/simdutf/.github/workflows/amalgamation_demos.yml +34 -0
- package/deps/simdutf/.github/workflows/armv7.yml +32 -0
- package/deps/simdutf/.github/workflows/atomic_fuzz.yml +25 -0
- package/deps/simdutf/.github/workflows/cifuzz.yml +37 -0
- package/deps/simdutf/.github/workflows/clangformat.yml +36 -0
- package/deps/simdutf/.github/workflows/debian-latestcxxstandards.yml +40 -0
- package/deps/simdutf/.github/workflows/debian.yml +33 -0
- package/deps/simdutf/.github/workflows/documentation.yml +36 -0
- package/deps/simdutf/.github/workflows/emscripten.yml +19 -0
- package/deps/simdutf/.github/workflows/loongarch64-gcc-14.2.yml +39 -0
- package/deps/simdutf/.github/workflows/macos-latest.yml +29 -0
- package/deps/simdutf/.github/workflows/msys2-clang.yml +48 -0
- package/deps/simdutf/.github/workflows/msys2.yml +50 -0
- package/deps/simdutf/.github/workflows/ppc64le.yml +29 -0
- package/deps/simdutf/.github/workflows/rvv-1024-clang-18.yml +35 -0
- package/deps/simdutf/.github/workflows/rvv-128-clang-17.yml +35 -0
- package/deps/simdutf/.github/workflows/rvv-256-gcc-14.yml +31 -0
- package/deps/simdutf/.github/workflows/s390x.yml +29 -0
- package/deps/simdutf/.github/workflows/selective-amalgamation.yml +29 -0
- package/deps/simdutf/.github/workflows/typos.yml +19 -0
- package/deps/simdutf/.github/workflows/ubuntu22-cxx20.yml +30 -0
- package/deps/simdutf/.github/workflows/ubuntu22.yml +32 -0
- package/deps/simdutf/.github/workflows/ubuntu22_gcc12.yml +27 -0
- package/deps/simdutf/.github/workflows/ubuntu22sani.yml +29 -0
- package/deps/simdutf/.github/workflows/ubuntu24-cxxstandards.yml +34 -0
- package/deps/simdutf/.github/workflows/ubuntu24-unsignedchar.yml +34 -0
- package/deps/simdutf/.github/workflows/ubuntu24.yml +32 -0
- package/deps/simdutf/.github/workflows/ubuntu24sani.yml +36 -0
- package/deps/simdutf/.github/workflows/ubuntu24sani_clang.yml +29 -0
- package/deps/simdutf/.github/workflows/vs17-arm-ci.yml +21 -0
- package/deps/simdutf/.github/workflows/vs17-ci-cxx20.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-ci.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-clang-ci.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-cxxstandards.yml +36 -0
- package/deps/simdutf/AI_USAGE_POLICY.md +56 -0
- package/deps/simdutf/AUTHORS +6 -0
- package/deps/simdutf/CMakeLists.txt +231 -0
- package/deps/simdutf/CONTRIBUTING.md +214 -0
- package/deps/simdutf/CONTRIBUTORS +1 -0
- package/deps/simdutf/Doxyfile +2584 -0
- package/deps/simdutf/LICENSE-APACHE +201 -0
- package/deps/simdutf/LICENSE-MIT +18 -0
- package/deps/simdutf/Makefile.crosscompile +54 -0
- package/deps/simdutf/README-RVV.md +16 -0
- package/deps/simdutf/README.md +2782 -0
- package/deps/simdutf/SECURITY.md +8 -0
- package/deps/simdutf/benchmarks/CMakeLists.txt +101 -0
- package/deps/simdutf/benchmarks/alignment.cpp +150 -0
- package/deps/simdutf/benchmarks/base64/CMakeLists.txt +30 -0
- package/deps/simdutf/benchmarks/base64/benchmark_base64.cpp +875 -0
- package/deps/simdutf/benchmarks/base64/libbase64_spaces.h +49 -0
- package/deps/simdutf/benchmarks/base64/node_base64.h +227 -0
- package/deps/simdutf/benchmarks/base64/openssl3_base64.h +334 -0
- package/deps/simdutf/benchmarks/benchmark.cpp +65 -0
- package/deps/simdutf/benchmarks/benchmark_to_well_formed_utf16.cpp +347 -0
- package/deps/simdutf/benchmarks/competition/.clang-format-ignore +5 -0
- package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.cpp +1276 -0
- package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.h +595 -0
- package/deps/simdutf/benchmarks/competition/README.md +7 -0
- package/deps/simdutf/benchmarks/competition/hoehrmann/hoehrmann.h +91 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16.h +444 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16_tables.h +13183 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/script.py +73 -0
- package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.cpp +738 -0
- package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.h +293 -0
- package/deps/simdutf/benchmarks/competition/u8u16/COPYRIGHT +8 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Makefile +44 -0
- package/deps/simdutf/benchmarks/competition/u8u16/OSL3.0.txt +169 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/BOM_Profiler.h +148 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/i386_timer.h +45 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/ppc_timer.c +34 -0
- package/deps/simdutf/benchmarks/competition/u8u16/README +56 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/config_defs.h +43 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/g4_config.h +27 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/mmx_config.h +16 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/p4_config.h +18 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/p4_ideal_config.h +16 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/spu_config.h +28 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/ssse3_config.h +20 -0
- package/deps/simdutf/benchmarks/competition/u8u16/iconv_u8u16.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/altivec_simd.h +440 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_basic_ops.py +121 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_half_operand_versions.py +158 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_test.py +270 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd.h +141 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_basic.h +216 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_built_in.h +119 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_modified.h +2430 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/outline.txt +39 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/spu_simd.h +421 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/sse_simd.h +836 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/stdint.h +222 -0
- package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_BE.c +4 -0
- package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_LE.c +5 -0
- package/deps/simdutf/benchmarks/competition/u8u16/proto/u8u16.py +390 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/Makefile +18 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/bytelex.h +448 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/charsets/ASCII_EBCDIC.h +284 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.c +1975 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.pdf +0 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.w +2263 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/multiliteral.h +239 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/u8u16.c +232 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/x8x16.c +194 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.c +193 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.h +167 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.c +288 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.h +117 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_g4.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_mmx.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4.c +3 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4_ideal.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_spu.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_ssse3.c +3 -0
- package/deps/simdutf/benchmarks/competition/u8u16/x8x16_p4.c +2 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/LICENSE +23 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/data/test_minimal.txt +44 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/readme.md +106 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_corr_tests.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_example.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_file_conv.sh +14 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_lib.sh +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_sample.sh +8 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_corr_tests.cmd +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_example.cmd +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_file_conv.cmd +14 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_lib.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_sample.cmd +8 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_corr_tests.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_example.cmd +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_file_conv.cmd +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_lib.cmd +10 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_sample.cmd +9 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/html_table.py +25 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/measure.py +94 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/resize.py +20 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_all.cmd +2 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_interm.cmd +1 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/CustomMemcpy.h +75 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/PerfDefs.h +47 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.cpp +17 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.h +76 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/AllProcessors.cpp +35 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.cpp +117 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.h +210 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferDecoder.h +158 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferEncoder.h +104 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorPlugins.h +334 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorSelector.h +186 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.cpp +140 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.h +42 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderProcess.h +100 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/Dfa.h +57 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.cpp +85 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.h +27 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderProcess.h +126 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/ProcessTrivial.h +108 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.cpp +139 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.h +74 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.cpp +65 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.h +91 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/CorrectnessTests.cpp +772 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/Example.cpp +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/FileConverter.cpp +486 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/iconv_sample.c +162 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/utf8lut.h +15 -0
- package/deps/simdutf/benchmarks/competition/utf8sse4/fromutf8-sse.cpp +292 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/LICENSE +23 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/README.md +1503 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/checked.h +335 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/core.h +338 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp11.h +103 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp17.h +103 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/unchecked.h +274 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8.h +34 -0
- package/deps/simdutf/benchmarks/dataset/README.md +155 -0
- package/deps/simdutf/benchmarks/dataset/emoji.txt +204 -0
- package/deps/simdutf/benchmarks/dataset/scripts/utf8type.py +40 -0
- package/deps/simdutf/benchmarks/dataset/wikipedia_mars/Makefile +80 -0
- package/deps/simdutf/benchmarks/dataset/wikipedia_mars/convert_to_utf6.py +20 -0
- package/deps/simdutf/benchmarks/find/CMakeLists.txt +6 -0
- package/deps/simdutf/benchmarks/find/findbenchmark.cpp +63 -0
- package/deps/simdutf/benchmarks/find/findbenchmarker.h +46 -0
- package/deps/simdutf/benchmarks/shortbench.cpp +555 -0
- package/deps/simdutf/benchmarks/src/CMakeLists.txt +52 -0
- package/deps/simdutf/benchmarks/src/apple_arm_events.h +1104 -0
- package/deps/simdutf/benchmarks/src/benchmark.cpp +3899 -0
- package/deps/simdutf/benchmarks/src/benchmark.h +317 -0
- package/deps/simdutf/benchmarks/src/benchmark_base.cpp +144 -0
- package/deps/simdutf/benchmarks/src/benchmark_base.h +98 -0
- package/deps/simdutf/benchmarks/src/cmdline.cpp +176 -0
- package/deps/simdutf/benchmarks/src/cmdline.h +35 -0
- package/deps/simdutf/benchmarks/src/event_counter.h +162 -0
- package/deps/simdutf/benchmarks/src/linux-perf-events.h +104 -0
- package/deps/simdutf/benchmarks/stream.cpp +209 -0
- package/deps/simdutf/benchmarks/threaded.cpp +123 -0
- package/deps/simdutf/cmake/CPM.cmake +1363 -0
- package/deps/simdutf/cmake/JoinPaths.cmake +23 -0
- package/deps/simdutf/cmake/add_cpp_test.cmake +68 -0
- package/deps/simdutf/cmake/simdutf-config.cmake.in +2 -0
- package/deps/simdutf/cmake/simdutf-flags.cmake +26 -0
- package/deps/simdutf/cmake/toolchains-ci/riscv64-linux-gnu.cmake +4 -0
- package/deps/simdutf/cmake/toolchains-dev/README.md +32 -0
- package/deps/simdutf/cmake/toolchains-dev/aarch64.cmake +14 -0
- package/deps/simdutf/cmake/toolchains-dev/loongarch64.cmake +22 -0
- package/deps/simdutf/cmake/toolchains-dev/powerpc64.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/powerpc64le.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/riscv64.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/rvv-spike.cmake +38 -0
- package/deps/simdutf/doc/avx512.png +0 -0
- package/deps/simdutf/doc/logo.png +0 -0
- package/deps/simdutf/doc/logo.svg +165 -0
- package/deps/simdutf/doc/node2023.png +0 -0
- package/deps/simdutf/doc/shortinput.md +78 -0
- package/deps/simdutf/doc/utf16utf8.png +0 -0
- package/deps/simdutf/doc/utf8utf16.png +0 -0
- package/deps/simdutf/doc/widelogo.png +0 -0
- package/deps/simdutf/doxygen.py +50 -0
- package/deps/simdutf/fuzz/.clang-format +9 -0
- package/deps/simdutf/fuzz/CMakeLists.txt +45 -0
- package/deps/simdutf/fuzz/README.md +168 -0
- package/deps/simdutf/fuzz/atomic_base64.cpp +448 -0
- package/deps/simdutf/fuzz/base64.cpp +278 -0
- package/deps/simdutf/fuzz/build.sh +83 -0
- package/deps/simdutf/fuzz/conversion.cpp +669 -0
- package/deps/simdutf/fuzz/helpers/.clang-format-ignore +1 -0
- package/deps/simdutf/fuzz/helpers/common.h +135 -0
- package/deps/simdutf/fuzz/helpers/nameof.hpp +1258 -0
- package/deps/simdutf/fuzz/main.cpp +72 -0
- package/deps/simdutf/fuzz/minimize_and_cleanse.sh +87 -0
- package/deps/simdutf/fuzz/misc.cpp +216 -0
- package/deps/simdutf/fuzz/random_fuzz.sh +154 -0
- package/deps/simdutf/fuzz/roundtrip.cpp +588 -0
- package/deps/simdutf/fuzz/safe_conversion.cpp +104 -0
- package/deps/simdutf/include/simdutf/avx512.h +79 -0
- package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
- package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
- package/deps/simdutf/include/simdutf/common_defs.h +186 -0
- package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
- package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
- package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
- package/deps/simdutf/include/simdutf/error.h +126 -0
- package/deps/simdutf/include/simdutf/implementation.h +7081 -0
- package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
- package/deps/simdutf/include/simdutf/portability.h +285 -0
- package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
- package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
- package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
- package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
- package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
- package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
- package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
- package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
- package/deps/simdutf/include/simdutf.h +26 -0
- package/deps/simdutf/include/simdutf_c.h +342 -0
- package/deps/simdutf/riscv/Dockerfile +16 -0
- package/deps/simdutf/riscv/README.md +24 -0
- package/deps/simdutf/riscv/remove-docker-station +8 -0
- package/deps/simdutf/riscv/run-docker-station +31 -0
- package/deps/simdutf/scripts/.flake8 +2 -0
- package/deps/simdutf/scripts/Makefile +2 -0
- package/deps/simdutf/scripts/README_ADD_FUNCTION.md +49 -0
- package/deps/simdutf/scripts/add_function.py +330 -0
- package/deps/simdutf/scripts/amalgamation_tests.py +156 -0
- package/deps/simdutf/scripts/base64/Makefile +2 -0
- package/deps/simdutf/scripts/base64/README.md +2 -0
- package/deps/simdutf/scripts/base64/avx512.py +76 -0
- package/deps/simdutf/scripts/base64/neon_decode.py +143 -0
- package/deps/simdutf/scripts/base64/neon_generate_lut.py +101 -0
- package/deps/simdutf/scripts/base64/sse.py +252 -0
- package/deps/simdutf/scripts/base64/sseregular.py +160 -0
- package/deps/simdutf/scripts/base64/sseurl.py +283 -0
- package/deps/simdutf/scripts/base64/table.py +59 -0
- package/deps/simdutf/scripts/base64bench_print.py +145 -0
- package/deps/simdutf/scripts/benchmark-all.py +119 -0
- package/deps/simdutf/scripts/benchmark_print.py +324 -0
- package/deps/simdutf/scripts/check_feature_macros.py +156 -0
- package/deps/simdutf/scripts/check_typos.sh +13 -0
- package/deps/simdutf/scripts/clang_format.sh +35 -0
- package/deps/simdutf/scripts/clang_format_docker.sh +38 -0
- package/deps/simdutf/scripts/common.py +24 -0
- package/deps/simdutf/scripts/compilation_benchmark.py +55 -0
- package/deps/simdutf/scripts/compile_many_variations.sh +64 -0
- package/deps/simdutf/scripts/create_latex_table.py +62 -0
- package/deps/simdutf/scripts/docker/Dockerfile +14 -0
- package/deps/simdutf/scripts/docker/Makefile +9 -0
- package/deps/simdutf/scripts/docker/README.md +30 -0
- package/deps/simdutf/scripts/docker/llvm.gpg +0 -0
- package/deps/simdutf/scripts/ppc64_convert_utf16_to_utf8.py +155 -0
- package/deps/simdutf/scripts/prepare_doxygen.sh +21 -0
- package/deps/simdutf/scripts/release.py +197 -0
- package/deps/simdutf/scripts/shortinputplots.py +97 -0
- package/deps/simdutf/scripts/sse_convert_utf16_to_utf8.py +422 -0
- package/deps/simdutf/scripts/sse_convert_utf32_to_utf16.py +105 -0
- package/deps/simdutf/scripts/sse_utf8_utf16_decode.py +186 -0
- package/deps/simdutf/scripts/sse_validate_utf16le_proof.py +137 -0
- package/deps/simdutf/scripts/sse_validate_utf16le_testcases.py +129 -0
- package/deps/simdutf/scripts/table.py +207 -0
- package/deps/simdutf/scripts/tests/new.txt +33 -0
- package/deps/simdutf/scripts/tests/old.txt +33 -0
- package/deps/simdutf/scripts/tests/results.txt +272 -0
- package/deps/simdutf/simdutf.pc.in +11 -0
- package/deps/simdutf/singleheader/.flake8 +2 -0
- package/deps/simdutf/singleheader/CMakeLists.txt +64 -0
- package/deps/simdutf/singleheader/README-dev.md +81 -0
- package/deps/simdutf/singleheader/README.md +19 -0
- package/deps/simdutf/singleheader/amalgamate.py +513 -0
- package/deps/simdutf/singleheader/amalgamation_demo.c +59 -0
- package/deps/simdutf/singleheader/amalgamation_demo.cpp +54 -0
- package/deps/simdutf/singleheader/test-features.py +262 -0
- package/deps/simdutf/src/CMakeLists.txt +78 -0
- package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
- package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
- package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
- package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
- package/deps/simdutf/src/encoding_types.cpp +67 -0
- package/deps/simdutf/src/error.cpp +3 -0
- package/deps/simdutf/src/fallback/implementation.cpp +589 -0
- package/deps/simdutf/src/generic/ascii_validation.h +50 -0
- package/deps/simdutf/src/generic/base64.h +233 -0
- package/deps/simdutf/src/generic/base64lengths.h +63 -0
- package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
- package/deps/simdutf/src/generic/find.h +75 -0
- package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
- package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
- package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
- package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
- package/deps/simdutf/src/generic/utf16.h +73 -0
- package/deps/simdutf/src/generic/utf32.h +136 -0
- package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
- package/deps/simdutf/src/generic/utf8.h +92 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
- package/deps/simdutf/src/generic/validate_utf16.h +164 -0
- package/deps/simdutf/src/generic/validate_utf32.h +99 -0
- package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
- package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
- package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
- package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
- package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
- package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
- package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
- package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
- package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
- package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
- package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
- package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
- package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
- package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
- package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
- package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
- package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
- package/deps/simdutf/src/implementation.cpp +2526 -0
- package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
- package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
- package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
- package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
- package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
- package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
- package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
- package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
- package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
- package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
- package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
- package/deps/simdutf/src/ppc64/templates.cpp +91 -0
- package/deps/simdutf/src/rvv/implementation.cpp +138 -0
- package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
- package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
- package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
- package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
- package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
- package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
- package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
- package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
- package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
- package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
- package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
- package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
- package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
- package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
- package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
- package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
- package/deps/simdutf/src/simdutf/arm64.h +43 -0
- package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
- package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
- package/deps/simdutf/src/simdutf/fallback.h +42 -0
- package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
- package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
- package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
- package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
- package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
- package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
- package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
- package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
- package/deps/simdutf/src/simdutf/haswell.h +63 -0
- package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
- package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
- package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
- package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
- package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
- package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
- package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
- package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
- package/deps/simdutf/src/simdutf/icelake.h +81 -0
- package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
- package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
- package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
- package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
- package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
- package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
- package/deps/simdutf/src/simdutf/lasx.h +49 -0
- package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
- package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
- package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
- package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
- package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
- package/deps/simdutf/src/simdutf/lsx.h +52 -0
- package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
- package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
- package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
- package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
- package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
- package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
- package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
- package/deps/simdutf/src/simdutf/ppc64.h +40 -0
- package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
- package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
- package/deps/simdutf/src/simdutf/rvv.h +41 -0
- package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
- package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
- package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
- package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
- package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
- package/deps/simdutf/src/simdutf/westmere.h +59 -0
- package/deps/simdutf/src/simdutf.cpp +152 -0
- package/deps/simdutf/src/simdutf_c.cpp +525 -0
- package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
- package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
- package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
- package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
- package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
- package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
- package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
- package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
- package/deps/simdutf/tests/CMakeLists.txt +483 -0
- package/deps/simdutf/tests/atomic_base64_tests.cpp +2845 -0
- package/deps/simdutf/tests/base64_tests.cpp +3617 -0
- package/deps/simdutf/tests/basic_fuzzer.cpp +805 -0
- package/deps/simdutf/tests/bele_tests.cpp +182 -0
- package/deps/simdutf/tests/constexpr_base64_tests.cpp +387 -0
- package/deps/simdutf/tests/convert_latin1_to_utf16be_tests.cpp +52 -0
- package/deps/simdutf/tests/convert_latin1_to_utf16le_tests.cpp +80 -0
- package/deps/simdutf/tests/convert_latin1_to_utf32_tests.cpp +66 -0
- package/deps/simdutf/tests/convert_latin1_to_utf8_tests.cpp +120 -0
- package/deps/simdutf/tests/convert_utf16_to_utf8_safe_tests.cpp +203 -0
- package/deps/simdutf/tests/convert_utf16_to_utf8_with_replacement_tests.cpp +276 -0
- package/deps/simdutf/tests/convert_utf16be_to_latin1_tests.cpp +109 -0
- package/deps/simdutf/tests/convert_utf16be_to_latin1_tests_with_errors.cpp +136 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf32_tests.cpp +193 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf32_with_errors_tests.cpp +381 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf8_tests.cpp +259 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf8_with_errors_tests.cpp +266 -0
- package/deps/simdutf/tests/convert_utf16le_to_latin1_tests.cpp +148 -0
- package/deps/simdutf/tests/convert_utf16le_to_latin1_tests_with_errors.cpp +176 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf32_tests.cpp +213 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf32_with_errors_tests.cpp +318 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf8_tests.cpp +343 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf8_with_errors_tests.cpp +271 -0
- package/deps/simdutf/tests/convert_utf32_to_latin1_tests.cpp +111 -0
- package/deps/simdutf/tests/convert_utf32_to_latin1_with_errors_tests.cpp +96 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16be_tests.cpp +148 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16be_with_errors_tests.cpp +192 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16le_tests.cpp +166 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16le_with_errors_tests.cpp +215 -0
- package/deps/simdutf/tests/convert_utf32_to_utf8_tests.cpp +181 -0
- package/deps/simdutf/tests/convert_utf32_to_utf8_with_errors_tests.cpp +261 -0
- package/deps/simdutf/tests/convert_utf8_to_latin1_tests.cpp +516 -0
- package/deps/simdutf/tests/convert_utf8_to_latin1_with_errors_tests.cpp +579 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16be_tests.cpp +412 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16be_with_errors_tests.cpp +480 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16le_tests.cpp +671 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16le_with_errors_tests.cpp +455 -0
- package/deps/simdutf/tests/convert_utf8_to_utf32_tests.cpp +1204 -0
- package/deps/simdutf/tests/convert_utf8_to_utf32_with_errors_tests.cpp +337 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_latin1_tests.cpp +37 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_utf32_tests.cpp +97 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_utf8_tests.cpp +126 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_latin1_tests.cpp +71 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_utf32_tests.cpp +122 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_utf8_tests.cpp +244 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_latin1_tests.cpp +49 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf16be_tests.cpp +92 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf16le_tests.cpp +114 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf8_tests.cpp +109 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_latin1_tests.cpp +84 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf16be_tests.cpp +124 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf16le_tests.cpp +221 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf32_tests.cpp +155 -0
- package/deps/simdutf/tests/count_utf16be.cpp +64 -0
- package/deps/simdutf/tests/count_utf16le.cpp +61 -0
- package/deps/simdutf/tests/count_utf8.cpp +87 -0
- package/deps/simdutf/tests/detect_encodings_tests.cpp +312 -0
- package/deps/simdutf/tests/embed/valid_utf8.txt +1 -0
- package/deps/simdutf/tests/embed_tests.cpp +22 -0
- package/deps/simdutf/tests/find_tests.cpp +77 -0
- package/deps/simdutf/tests/fixed_string_tests.cpp +153 -0
- package/deps/simdutf/tests/helpers/CMakeLists.txt +25 -0
- package/deps/simdutf/tests/helpers/compiletime_conversions.h +222 -0
- package/deps/simdutf/tests/helpers/fixed_string.h +267 -0
- package/deps/simdutf/tests/helpers/random_int.cpp +30 -0
- package/deps/simdutf/tests/helpers/random_int.h +39 -0
- package/deps/simdutf/tests/helpers/random_utf16.cpp +123 -0
- package/deps/simdutf/tests/helpers/random_utf16.h +52 -0
- package/deps/simdutf/tests/helpers/random_utf32.cpp +41 -0
- package/deps/simdutf/tests/helpers/random_utf32.h +40 -0
- package/deps/simdutf/tests/helpers/random_utf8.cpp +93 -0
- package/deps/simdutf/tests/helpers/random_utf8.h +36 -0
- package/deps/simdutf/tests/helpers/test.cpp +231 -0
- package/deps/simdutf/tests/helpers/test.h +193 -0
- package/deps/simdutf/tests/helpers/transcode_test_base.cpp +1257 -0
- package/deps/simdutf/tests/helpers/transcode_test_base.h +683 -0
- package/deps/simdutf/tests/helpers/utf16.h +27 -0
- package/deps/simdutf/tests/installation_tests/find/CMakeLists.txt +43 -0
- package/deps/simdutf/tests/installation_tests/from_fetch/CMakeLists.txt +47 -0
- package/deps/simdutf/tests/internal_tests.cpp +27 -0
- package/deps/simdutf/tests/null_safety_tests.cpp +94 -0
- package/deps/simdutf/tests/random_fuzzer.cpp +779 -0
- package/deps/simdutf/tests/readme_tests.cpp +274 -0
- package/deps/simdutf/tests/reference/CMakeLists.txt +23 -0
- package/deps/simdutf/tests/reference/decode_utf16.h +81 -0
- package/deps/simdutf/tests/reference/decode_utf32.h +47 -0
- package/deps/simdutf/tests/reference/encode_latin1.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_latin1.h +32 -0
- package/deps/simdutf/tests/reference/encode_utf16.cpp +49 -0
- package/deps/simdutf/tests/reference/encode_utf16.h +20 -0
- package/deps/simdutf/tests/reference/encode_utf32.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_utf32.h +36 -0
- package/deps/simdutf/tests/reference/encode_utf8.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_utf8.h +40 -0
- package/deps/simdutf/tests/reference/validate_utf16.cpp +60 -0
- package/deps/simdutf/tests/reference/validate_utf16.h +14 -0
- package/deps/simdutf/tests/reference/validate_utf16_to_latin1.cpp +35 -0
- package/deps/simdutf/tests/reference/validate_utf16_to_latin1.h +13 -0
- package/deps/simdutf/tests/reference/validate_utf32.cpp +27 -0
- package/deps/simdutf/tests/reference/validate_utf32.h +12 -0
- package/deps/simdutf/tests/reference/validate_utf32_to_latin1.cpp +27 -0
- package/deps/simdutf/tests/reference/validate_utf32_to_latin1.h +12 -0
- package/deps/simdutf/tests/reference/validate_utf8.cpp +82 -0
- package/deps/simdutf/tests/reference/validate_utf8.h +11 -0
- package/deps/simdutf/tests/reference/validate_utf8_to_latin1.cpp +43 -0
- package/deps/simdutf/tests/reference/validate_utf8_to_latin1.h +12 -0
- package/deps/simdutf/tests/select_implementation.cpp +43 -0
- package/deps/simdutf/tests/simdutf_c_tests.cpp +244 -0
- package/deps/simdutf/tests/span_tests.cpp +401 -0
- package/deps/simdutf/tests/special_tests.cpp +559 -0
- package/deps/simdutf/tests/straight_c_test.c +187 -0
- package/deps/simdutf/tests/text_encoding_tests.cpp +77 -0
- package/deps/simdutf/tests/to_well_formed_utf16_tests.cpp +377 -0
- package/deps/simdutf/tests/utf8_length_from_utf16_tests.cpp +202 -0
- package/deps/simdutf/tests/validate_ascii_basic_tests.cpp +165 -0
- package/deps/simdutf/tests/validate_ascii_with_errors_tests.cpp +77 -0
- package/deps/simdutf/tests/validate_utf16be_basic_tests.cpp +175 -0
- package/deps/simdutf/tests/validate_utf16be_with_errors_tests.cpp +188 -0
- package/deps/simdutf/tests/validate_utf16le_basic_tests.cpp +268 -0
- package/deps/simdutf/tests/validate_utf16le_with_errors_tests.cpp +274 -0
- package/deps/simdutf/tests/validate_utf32_basic_tests.cpp +92 -0
- package/deps/simdutf/tests/validate_utf32_with_errors_tests.cpp +114 -0
- package/deps/simdutf/tests/validate_utf8_basic_tests.cpp +178 -0
- package/deps/simdutf/tests/validate_utf8_brute_force_tests.cpp +88 -0
- package/deps/simdutf/tests/validate_utf8_puzzler_tests.cpp +33 -0
- package/deps/simdutf/tests/validate_utf8_with_errors_tests.cpp +228 -0
- package/deps/simdutf/tools/CMakeLists.txt +85 -0
- package/deps/simdutf/tools/fastbase64.cpp +250 -0
- package/deps/simdutf/tools/sutf.cpp +556 -0
- package/deps/simdutf/tools/sutf.h +40 -0
- package/package.json +2 -2
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
namespace simdutf {
|
|
2
|
+
namespace SIMDUTF_IMPLEMENTATION {
|
|
3
|
+
namespace {
|
|
4
|
+
namespace utf8_to_utf32 {
|
|
5
|
+
using namespace simd;
|
|
6
|
+
|
|
7
|
+
simdutf_really_inline simd8<uint8_t>
|
|
8
|
+
check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
|
|
9
|
+
// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
|
|
10
|
+
// Bit 1 = Too Long (ASCII followed by continuation)
|
|
11
|
+
// Bit 2 = Overlong 3-byte
|
|
12
|
+
// Bit 4 = Surrogate
|
|
13
|
+
// Bit 5 = Overlong 2-byte
|
|
14
|
+
// Bit 7 = Two Continuations
|
|
15
|
+
constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______
|
|
16
|
+
// 11______ 11______
|
|
17
|
+
constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______
|
|
18
|
+
constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____
|
|
19
|
+
constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____
|
|
20
|
+
constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______
|
|
21
|
+
constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______
|
|
22
|
+
constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____
|
|
23
|
+
// 11110100 101_____
|
|
24
|
+
// 11110101 1001____
|
|
25
|
+
// 11110101 101_____
|
|
26
|
+
// 1111011_ 1001____
|
|
27
|
+
// 1111011_ 101_____
|
|
28
|
+
// 11111___ 1001____
|
|
29
|
+
// 11111___ 101_____
|
|
30
|
+
constexpr const uint8_t TOO_LARGE_1000 = 1 << 6;
|
|
31
|
+
// 11110101 1000____
|
|
32
|
+
// 1111011_ 1000____
|
|
33
|
+
// 11111___ 1000____
|
|
34
|
+
constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____
|
|
35
|
+
|
|
36
|
+
const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
|
|
37
|
+
// 0_______ ________ <ASCII in byte 1>
|
|
38
|
+
TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
|
|
39
|
+
TOO_LONG,
|
|
40
|
+
// 10______ ________ <continuation in byte 1>
|
|
41
|
+
TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
|
|
42
|
+
// 1100____ ________ <two byte lead in byte 1>
|
|
43
|
+
TOO_SHORT | OVERLONG_2,
|
|
44
|
+
// 1101____ ________ <two byte lead in byte 1>
|
|
45
|
+
TOO_SHORT,
|
|
46
|
+
// 1110____ ________ <three byte lead in byte 1>
|
|
47
|
+
TOO_SHORT | OVERLONG_3 | SURROGATE,
|
|
48
|
+
// 1111____ ________ <four+ byte lead in byte 1>
|
|
49
|
+
TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4);
|
|
50
|
+
constexpr const uint8_t CARRY =
|
|
51
|
+
TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
|
|
52
|
+
const simd8<uint8_t> byte_1_low =
|
|
53
|
+
(prev1 & 0x0F)
|
|
54
|
+
.lookup_16<uint8_t>(
|
|
55
|
+
// ____0000 ________
|
|
56
|
+
CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
|
|
57
|
+
// ____0001 ________
|
|
58
|
+
CARRY | OVERLONG_2,
|
|
59
|
+
// ____001_ ________
|
|
60
|
+
CARRY, CARRY,
|
|
61
|
+
|
|
62
|
+
// ____0100 ________
|
|
63
|
+
CARRY | TOO_LARGE,
|
|
64
|
+
// ____0101 ________
|
|
65
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
66
|
+
// ____011_ ________
|
|
67
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
68
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
69
|
+
|
|
70
|
+
// ____1___ ________
|
|
71
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
72
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
73
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
74
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
75
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
76
|
+
// ____1101 ________
|
|
77
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
|
|
78
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
79
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000);
|
|
80
|
+
const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
|
|
81
|
+
// ________ 0_______ <ASCII in byte 2>
|
|
82
|
+
TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
|
|
83
|
+
TOO_SHORT, TOO_SHORT,
|
|
84
|
+
|
|
85
|
+
// ________ 1000____
|
|
86
|
+
TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 |
|
|
87
|
+
OVERLONG_4,
|
|
88
|
+
// ________ 1001____
|
|
89
|
+
TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
|
|
90
|
+
// ________ 101_____
|
|
91
|
+
TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
|
|
92
|
+
TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
|
|
93
|
+
|
|
94
|
+
// ________ 11______
|
|
95
|
+
TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT);
|
|
96
|
+
return (byte_1_high & byte_1_low & byte_2_high);
|
|
97
|
+
}
|
|
98
|
+
simdutf_really_inline simd8<uint8_t>
|
|
99
|
+
check_multibyte_lengths(const simd8<uint8_t> input,
|
|
100
|
+
const simd8<uint8_t> prev_input,
|
|
101
|
+
const simd8<uint8_t> sc) {
|
|
102
|
+
simd8<uint8_t> prev2 = input.prev<2>(prev_input);
|
|
103
|
+
simd8<uint8_t> prev3 = input.prev<3>(prev_input);
|
|
104
|
+
simd8<uint8_t> must23 =
|
|
105
|
+
simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
|
|
106
|
+
simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
|
|
107
|
+
return must23_80 ^ sc;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
struct validating_transcoder {
|
|
111
|
+
// If this is nonzero, there has been a UTF-8 error.
|
|
112
|
+
simd8<uint8_t> error;
|
|
113
|
+
|
|
114
|
+
validating_transcoder() : error(uint8_t(0)) {}
|
|
115
|
+
//
|
|
116
|
+
// Check whether the current bytes are valid UTF-8.
|
|
117
|
+
//
|
|
118
|
+
simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input,
|
|
119
|
+
const simd8<uint8_t> prev_input) {
|
|
120
|
+
// Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+
|
|
121
|
+
// lead bytes (2, 3, 4-byte leads become large positive numbers instead of
|
|
122
|
+
// small negative numbers)
|
|
123
|
+
simd8<uint8_t> prev1 = input.prev<1>(prev_input);
|
|
124
|
+
simd8<uint8_t> sc = check_special_cases(input, prev1);
|
|
125
|
+
this->error |= check_multibyte_lengths(input, prev_input, sc);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
simdutf_really_inline size_t convert(const char *in, size_t size,
|
|
129
|
+
char32_t *utf32_output) {
|
|
130
|
+
size_t pos = 0;
|
|
131
|
+
char32_t *start{utf32_output};
|
|
132
|
+
// In the worst case, we have the haswell kernel which can cause an overflow
|
|
133
|
+
// of 8 words when calling convert_masked_utf8_to_utf32. If you skip the
|
|
134
|
+
// last 16 bytes, and if the data is valid, then it is entirely safe because
|
|
135
|
+
// 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot
|
|
136
|
+
// generally assume that you have valid UTF-8 input, so we are going to go
|
|
137
|
+
// back from the end counting 16 leading bytes, to give us a good margin.
|
|
138
|
+
size_t leading_byte = 0;
|
|
139
|
+
size_t margin = size;
|
|
140
|
+
for (; margin > 0 && leading_byte < 8; margin--) {
|
|
141
|
+
leading_byte += (int8_t(in[margin - 1]) > -65);
|
|
142
|
+
}
|
|
143
|
+
// If the input is long enough, then we have that margin-1 is the fourth
|
|
144
|
+
// last leading byte.
|
|
145
|
+
const size_t safety_margin = size - margin + 1; // to avoid overruns!
|
|
146
|
+
while (pos + 64 + safety_margin <= size) {
|
|
147
|
+
simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
|
|
148
|
+
if (input.is_ascii()) {
|
|
149
|
+
input.store_ascii_as_utf32(utf32_output);
|
|
150
|
+
utf32_output += 64;
|
|
151
|
+
pos += 64;
|
|
152
|
+
} else {
|
|
153
|
+
// you might think that a for-loop would work, but under Visual Studio,
|
|
154
|
+
// it is not good enough.
|
|
155
|
+
static_assert(
|
|
156
|
+
(simd8x64<uint8_t>::NUM_CHUNKS == 2) ||
|
|
157
|
+
(simd8x64<uint8_t>::NUM_CHUNKS == 4),
|
|
158
|
+
"We support either two or four chunks per 64-byte block.");
|
|
159
|
+
auto zero = simd8<uint8_t>{uint8_t(0)};
|
|
160
|
+
if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
|
|
161
|
+
this->check_utf8_bytes(input.chunks[0], zero);
|
|
162
|
+
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
|
|
163
|
+
} else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
|
|
164
|
+
this->check_utf8_bytes(input.chunks[0], zero);
|
|
165
|
+
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
|
|
166
|
+
this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
|
|
167
|
+
this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
|
|
168
|
+
}
|
|
169
|
+
uint64_t utf8_continuation_mask = input.lt(-65 + 1);
|
|
170
|
+
if (utf8_continuation_mask & 1) {
|
|
171
|
+
return 0; // we have an error
|
|
172
|
+
}
|
|
173
|
+
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
|
|
174
|
+
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
|
|
175
|
+
// We process in blocks of up to 12 bytes except possibly
|
|
176
|
+
// for fast paths which may process up to 16 bytes. For the
|
|
177
|
+
// slow path to work, we should have at least 12 input bytes left.
|
|
178
|
+
size_t max_starting_point = (pos + 64) - 12;
|
|
179
|
+
// Next loop is going to run at least five times.
|
|
180
|
+
while (pos < max_starting_point) {
|
|
181
|
+
// Performance note: our ability to compute 'consumed' and
|
|
182
|
+
// then shift and recompute is critical. If there is a
|
|
183
|
+
// latency of, say, 4 cycles on getting 'consumed', then
|
|
184
|
+
// the inner loop might have a total latency of about 6 cycles.
|
|
185
|
+
// Yet we process between 6 to 12 inputs bytes, thus we get
|
|
186
|
+
// a speed limit between 1 cycle/byte and 0.5 cycle/byte
|
|
187
|
+
// for this section of the code. Hence, there is a limit
|
|
188
|
+
// to how much we can further increase this latency before
|
|
189
|
+
// it seriously harms performance.
|
|
190
|
+
size_t consumed = convert_masked_utf8_to_utf32(
|
|
191
|
+
in + pos, utf8_end_of_code_point_mask, utf32_output);
|
|
192
|
+
pos += consumed;
|
|
193
|
+
utf8_end_of_code_point_mask >>= consumed;
|
|
194
|
+
}
|
|
195
|
+
// At this point there may remain between 0 and 12 bytes in the
|
|
196
|
+
// 64-byte block. These bytes will be processed again. So we have an
|
|
197
|
+
// 80% efficiency (in the worst case). In practice we expect an
|
|
198
|
+
// 85% to 90% efficiency.
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
if (errors()) {
|
|
202
|
+
return 0;
|
|
203
|
+
}
|
|
204
|
+
if (pos < size) {
|
|
205
|
+
size_t howmany =
|
|
206
|
+
scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
|
|
207
|
+
if (howmany == 0) {
|
|
208
|
+
return 0;
|
|
209
|
+
}
|
|
210
|
+
utf32_output += howmany;
|
|
211
|
+
}
|
|
212
|
+
return utf32_output - start;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
simdutf_really_inline result convert_with_errors(const char *in, size_t size,
|
|
216
|
+
char32_t *utf32_output) {
|
|
217
|
+
size_t pos = 0;
|
|
218
|
+
char32_t *start{utf32_output};
|
|
219
|
+
// In the worst case, we have the haswell kernel which can cause an overflow
|
|
220
|
+
// of 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the
|
|
221
|
+
// last 16 bytes, and if the data is valid, then it is entirely safe because
|
|
222
|
+
// 16 UTF-8 bytes generate much more than 8 bytes. However, you cannot
|
|
223
|
+
// generally assume that you have valid UTF-8 input, so we are going to go
|
|
224
|
+
// back from the end counting 8 leading bytes, to give us a good margin.
|
|
225
|
+
size_t leading_byte = 0;
|
|
226
|
+
size_t margin = size;
|
|
227
|
+
for (; margin > 0 && leading_byte < 8; margin--) {
|
|
228
|
+
leading_byte += (int8_t(in[margin - 1]) > -65);
|
|
229
|
+
}
|
|
230
|
+
// If the input is long enough, then we have that margin-1 is the fourth
|
|
231
|
+
// last leading byte.
|
|
232
|
+
const size_t safety_margin = size - margin + 1; // to avoid overruns!
|
|
233
|
+
while (pos + 64 + safety_margin <= size) {
|
|
234
|
+
simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
|
|
235
|
+
if (input.is_ascii()) {
|
|
236
|
+
input.store_ascii_as_utf32(utf32_output);
|
|
237
|
+
utf32_output += 64;
|
|
238
|
+
pos += 64;
|
|
239
|
+
} else {
|
|
240
|
+
// you might think that a for-loop would work, but under Visual Studio,
|
|
241
|
+
// it is not good enough.
|
|
242
|
+
static_assert(
|
|
243
|
+
(simd8x64<uint8_t>::NUM_CHUNKS == 2) ||
|
|
244
|
+
(simd8x64<uint8_t>::NUM_CHUNKS == 4),
|
|
245
|
+
"We support either two or four chunks per 64-byte block.");
|
|
246
|
+
auto zero = simd8<uint8_t>{uint8_t(0)};
|
|
247
|
+
if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
|
|
248
|
+
this->check_utf8_bytes(input.chunks[0], zero);
|
|
249
|
+
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
|
|
250
|
+
} else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
|
|
251
|
+
this->check_utf8_bytes(input.chunks[0], zero);
|
|
252
|
+
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
|
|
253
|
+
this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
|
|
254
|
+
this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
|
|
255
|
+
}
|
|
256
|
+
uint64_t utf8_continuation_mask = input.lt(-65 + 1);
|
|
257
|
+
if (errors() || (utf8_continuation_mask & 1)) {
|
|
258
|
+
result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(
|
|
259
|
+
pos, in + pos, size - pos, utf32_output);
|
|
260
|
+
res.count += pos;
|
|
261
|
+
return res;
|
|
262
|
+
}
|
|
263
|
+
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
|
|
264
|
+
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
|
|
265
|
+
// We process in blocks of up to 12 bytes except possibly
|
|
266
|
+
// for fast paths which may process up to 16 bytes. For the
|
|
267
|
+
// slow path to work, we should have at least 12 input bytes left.
|
|
268
|
+
size_t max_starting_point = (pos + 64) - 12;
|
|
269
|
+
// Next loop is going to run at least five times.
|
|
270
|
+
while (pos < max_starting_point) {
|
|
271
|
+
// Performance note: our ability to compute 'consumed' and
|
|
272
|
+
// then shift and recompute is critical. If there is a
|
|
273
|
+
// latency of, say, 4 cycles on getting 'consumed', then
|
|
274
|
+
// the inner loop might have a total latency of about 6 cycles.
|
|
275
|
+
// Yet we process between 6 to 12 inputs bytes, thus we get
|
|
276
|
+
// a speed limit between 1 cycle/byte and 0.5 cycle/byte
|
|
277
|
+
// for this section of the code. Hence, there is a limit
|
|
278
|
+
// to how much we can further increase this latency before
|
|
279
|
+
// it seriously harms performance.
|
|
280
|
+
size_t consumed = convert_masked_utf8_to_utf32(
|
|
281
|
+
in + pos, utf8_end_of_code_point_mask, utf32_output);
|
|
282
|
+
pos += consumed;
|
|
283
|
+
utf8_end_of_code_point_mask >>= consumed;
|
|
284
|
+
}
|
|
285
|
+
// At this point there may remain between 0 and 12 bytes in the
|
|
286
|
+
// 64-byte block. These bytes will be processed again. So we have an
|
|
287
|
+
// 80% efficiency (in the worst case). In practice we expect an
|
|
288
|
+
// 85% to 90% efficiency.
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
if (errors()) {
|
|
292
|
+
result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(
|
|
293
|
+
pos, in + pos, size - pos, utf32_output);
|
|
294
|
+
res.count += pos;
|
|
295
|
+
return res;
|
|
296
|
+
}
|
|
297
|
+
if (pos < size) {
|
|
298
|
+
result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(
|
|
299
|
+
pos, in + pos, size - pos, utf32_output);
|
|
300
|
+
if (res.error) { // In case of error, we want the error position
|
|
301
|
+
res.count += pos;
|
|
302
|
+
return res;
|
|
303
|
+
} else { // In case of success, we want the number of word written
|
|
304
|
+
utf32_output += res.count;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
return result(error_code::SUCCESS, utf32_output - start);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
simdutf_really_inline bool errors() const {
|
|
311
|
+
return this->error.any_bits_set_anywhere();
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
}; // struct utf8_checker
|
|
315
|
+
} // namespace utf8_to_utf32
|
|
316
|
+
} // unnamed namespace
|
|
317
|
+
} // namespace SIMDUTF_IMPLEMENTATION
|
|
318
|
+
} // namespace simdutf
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
namespace simdutf {
|
|
2
|
+
namespace SIMDUTF_IMPLEMENTATION {
|
|
3
|
+
namespace {
|
|
4
|
+
namespace utf8_to_utf32 {
|
|
5
|
+
|
|
6
|
+
using namespace simd;
|
|
7
|
+
|
|
8
|
+
simdutf_warn_unused size_t convert_valid(const char *input, size_t size,
|
|
9
|
+
char32_t *utf32_output) noexcept {
|
|
10
|
+
size_t pos = 0;
|
|
11
|
+
char32_t *start{utf32_output};
|
|
12
|
+
const size_t safety_margin = 16; // to avoid overruns!
|
|
13
|
+
while (pos + 64 + safety_margin <= size) {
|
|
14
|
+
simd8x64<int8_t> in(reinterpret_cast<const int8_t *>(input + pos));
|
|
15
|
+
if (in.is_ascii()) {
|
|
16
|
+
in.store_ascii_as_utf32(utf32_output);
|
|
17
|
+
utf32_output += 64;
|
|
18
|
+
pos += 64;
|
|
19
|
+
} else {
|
|
20
|
+
// -65 is 0b10111111 in two-complement's, so largest possible continuation
|
|
21
|
+
// byte
|
|
22
|
+
uint64_t utf8_continuation_mask = in.lt(-65 + 1);
|
|
23
|
+
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
|
|
24
|
+
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
|
|
25
|
+
size_t max_starting_point = (pos + 64) - 12;
|
|
26
|
+
while (pos < max_starting_point) {
|
|
27
|
+
size_t consumed = convert_masked_utf8_to_utf32(
|
|
28
|
+
input + pos, utf8_end_of_code_point_mask, utf32_output);
|
|
29
|
+
pos += consumed;
|
|
30
|
+
utf8_end_of_code_point_mask >>= consumed;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos,
|
|
35
|
+
utf32_output);
|
|
36
|
+
return utf32_output - start;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
} // namespace utf8_to_utf32
|
|
40
|
+
} // unnamed namespace
|
|
41
|
+
} // namespace SIMDUTF_IMPLEMENTATION
|
|
42
|
+
} // namespace simdutf
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
namespace simdutf {
|
|
2
|
+
namespace SIMDUTF_IMPLEMENTATION {
|
|
3
|
+
namespace {
|
|
4
|
+
namespace utf8_validation {
|
|
5
|
+
|
|
6
|
+
using namespace simd;
|
|
7
|
+
|
|
8
|
+
simdutf_really_inline simd8<uint8_t>
|
|
9
|
+
check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
|
|
10
|
+
// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
|
|
11
|
+
// Bit 1 = Too Long (ASCII followed by continuation)
|
|
12
|
+
// Bit 2 = Overlong 3-byte
|
|
13
|
+
// Bit 4 = Surrogate
|
|
14
|
+
// Bit 5 = Overlong 2-byte
|
|
15
|
+
// Bit 7 = Two Continuations
|
|
16
|
+
constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______
|
|
17
|
+
// 11______ 11______
|
|
18
|
+
constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______
|
|
19
|
+
constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____
|
|
20
|
+
constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____
|
|
21
|
+
constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______
|
|
22
|
+
constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______
|
|
23
|
+
constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____
|
|
24
|
+
// 11110100 101_____
|
|
25
|
+
// 11110101 1001____
|
|
26
|
+
// 11110101 101_____
|
|
27
|
+
// 1111011_ 1001____
|
|
28
|
+
// 1111011_ 101_____
|
|
29
|
+
// 11111___ 1001____
|
|
30
|
+
// 11111___ 101_____
|
|
31
|
+
constexpr const uint8_t TOO_LARGE_1000 = 1 << 6;
|
|
32
|
+
// 11110101 1000____
|
|
33
|
+
// 1111011_ 1000____
|
|
34
|
+
// 11111___ 1000____
|
|
35
|
+
constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____
|
|
36
|
+
|
|
37
|
+
const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
|
|
38
|
+
// 0_______ ________ <ASCII in byte 1>
|
|
39
|
+
TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
|
|
40
|
+
TOO_LONG,
|
|
41
|
+
// 10______ ________ <continuation in byte 1>
|
|
42
|
+
TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
|
|
43
|
+
// 1100____ ________ <two byte lead in byte 1>
|
|
44
|
+
TOO_SHORT | OVERLONG_2,
|
|
45
|
+
// 1101____ ________ <two byte lead in byte 1>
|
|
46
|
+
TOO_SHORT,
|
|
47
|
+
// 1110____ ________ <three byte lead in byte 1>
|
|
48
|
+
TOO_SHORT | OVERLONG_3 | SURROGATE,
|
|
49
|
+
// 1111____ ________ <four+ byte lead in byte 1>
|
|
50
|
+
TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4);
|
|
51
|
+
constexpr const uint8_t CARRY =
|
|
52
|
+
TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
|
|
53
|
+
const simd8<uint8_t> byte_1_low =
|
|
54
|
+
(prev1 & 0x0F)
|
|
55
|
+
.lookup_16<uint8_t>(
|
|
56
|
+
// ____0000 ________
|
|
57
|
+
CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
|
|
58
|
+
// ____0001 ________
|
|
59
|
+
CARRY | OVERLONG_2,
|
|
60
|
+
// ____001_ ________
|
|
61
|
+
CARRY, CARRY,
|
|
62
|
+
|
|
63
|
+
// ____0100 ________
|
|
64
|
+
CARRY | TOO_LARGE,
|
|
65
|
+
// ____0101 ________
|
|
66
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
67
|
+
// ____011_ ________
|
|
68
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
69
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
70
|
+
|
|
71
|
+
// ____1___ ________
|
|
72
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
73
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
74
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
75
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
76
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
77
|
+
// ____1101 ________
|
|
78
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
|
|
79
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000,
|
|
80
|
+
CARRY | TOO_LARGE | TOO_LARGE_1000);
|
|
81
|
+
const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
|
|
82
|
+
// ________ 0_______ <ASCII in byte 2>
|
|
83
|
+
TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
|
|
84
|
+
TOO_SHORT, TOO_SHORT,
|
|
85
|
+
|
|
86
|
+
// ________ 1000____
|
|
87
|
+
TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 |
|
|
88
|
+
OVERLONG_4,
|
|
89
|
+
// ________ 1001____
|
|
90
|
+
TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
|
|
91
|
+
// ________ 101_____
|
|
92
|
+
TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
|
|
93
|
+
TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
|
|
94
|
+
|
|
95
|
+
// ________ 11______
|
|
96
|
+
TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT);
|
|
97
|
+
return (byte_1_high & byte_1_low & byte_2_high);
|
|
98
|
+
}
|
|
99
|
+
simdutf_really_inline simd8<uint8_t>
|
|
100
|
+
check_multibyte_lengths(const simd8<uint8_t> input,
|
|
101
|
+
const simd8<uint8_t> prev_input,
|
|
102
|
+
const simd8<uint8_t> sc) {
|
|
103
|
+
simd8<uint8_t> prev2 = input.prev<2>(prev_input);
|
|
104
|
+
simd8<uint8_t> prev3 = input.prev<3>(prev_input);
|
|
105
|
+
simd8<uint8_t> must23 =
|
|
106
|
+
simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
|
|
107
|
+
simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
|
|
108
|
+
return must23_80 ^ sc;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
//
|
|
112
|
+
// Return nonzero if there are incomplete multibyte characters at the end of the
|
|
113
|
+
// block: e.g. if there is a 4-byte character, but it is 3 bytes from the end.
|
|
114
|
+
//
|
|
115
|
+
simdutf_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
|
|
116
|
+
// If the previous input's last 3 bytes match this, they're too short (they
|
|
117
|
+
// ended at EOF):
|
|
118
|
+
// ... 1111____ 111_____ 11______
|
|
119
|
+
static const uint8_t max_array[32] = {255,
|
|
120
|
+
255,
|
|
121
|
+
255,
|
|
122
|
+
255,
|
|
123
|
+
255,
|
|
124
|
+
255,
|
|
125
|
+
255,
|
|
126
|
+
255,
|
|
127
|
+
255,
|
|
128
|
+
255,
|
|
129
|
+
255,
|
|
130
|
+
255,
|
|
131
|
+
255,
|
|
132
|
+
255,
|
|
133
|
+
255,
|
|
134
|
+
255,
|
|
135
|
+
255,
|
|
136
|
+
255,
|
|
137
|
+
255,
|
|
138
|
+
255,
|
|
139
|
+
255,
|
|
140
|
+
255,
|
|
141
|
+
255,
|
|
142
|
+
255,
|
|
143
|
+
255,
|
|
144
|
+
255,
|
|
145
|
+
255,
|
|
146
|
+
255,
|
|
147
|
+
255,
|
|
148
|
+
0b11110000u - 1,
|
|
149
|
+
0b11100000u - 1,
|
|
150
|
+
0b11000000u - 1};
|
|
151
|
+
const simd8<uint8_t> max_value(
|
|
152
|
+
&max_array[sizeof(max_array) - sizeof(simd8<uint8_t>)]);
|
|
153
|
+
return input.gt_bits(max_value);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
struct utf8_checker {
|
|
157
|
+
// If this is nonzero, there has been a UTF-8 error.
|
|
158
|
+
simd8<uint8_t> error;
|
|
159
|
+
// The last input we received
|
|
160
|
+
simd8<uint8_t> prev_input_block;
|
|
161
|
+
// Whether the last input we received was incomplete (used for ASCII fast
|
|
162
|
+
// path)
|
|
163
|
+
simd8<uint8_t> prev_incomplete;
|
|
164
|
+
|
|
165
|
+
//
|
|
166
|
+
// Check whether the current bytes are valid UTF-8.
|
|
167
|
+
//
|
|
168
|
+
simdutf_really_inline void check_utf8_bytes(const simd8<uint8_t> input,
|
|
169
|
+
const simd8<uint8_t> prev_input) {
|
|
170
|
+
// Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+
|
|
171
|
+
// lead bytes (2, 3, 4-byte leads become large positive numbers instead of
|
|
172
|
+
// small negative numbers)
|
|
173
|
+
simd8<uint8_t> prev1 = input.prev<1>(prev_input);
|
|
174
|
+
simd8<uint8_t> sc = check_special_cases(input, prev1);
|
|
175
|
+
this->error |= check_multibyte_lengths(input, prev_input, sc);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// The only problem that can happen at EOF is that a multibyte character is
|
|
179
|
+
// too short or a byte value too large in the last bytes: check_special_cases
|
|
180
|
+
// only checks for bytes too large in the first of two bytes.
|
|
181
|
+
simdutf_really_inline void check_eof() {
|
|
182
|
+
// If the previous block had incomplete UTF-8 characters at the end, an
|
|
183
|
+
// ASCII block can't possibly finish them.
|
|
184
|
+
this->error |= this->prev_incomplete;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
simdutf_really_inline void check_next_input(const simd8x64<uint8_t> &input) {
|
|
188
|
+
if (simdutf_likely(is_ascii(input))) {
|
|
189
|
+
this->error |= this->prev_incomplete;
|
|
190
|
+
} else {
|
|
191
|
+
// you might think that a for-loop would work, but under Visual Studio, it
|
|
192
|
+
// is not good enough.
|
|
193
|
+
static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) ||
|
|
194
|
+
(simd8x64<uint8_t>::NUM_CHUNKS == 4),
|
|
195
|
+
"We support either two or four chunks per 64-byte block.");
|
|
196
|
+
if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
|
|
197
|
+
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
|
|
198
|
+
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
|
|
199
|
+
} else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
|
|
200
|
+
this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
|
|
201
|
+
this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
|
|
202
|
+
this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
|
|
203
|
+
this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
|
|
204
|
+
}
|
|
205
|
+
this->prev_incomplete =
|
|
206
|
+
is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS - 1]);
|
|
207
|
+
this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS - 1];
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// do not forget to call check_eof!
|
|
212
|
+
simdutf_really_inline bool errors() const {
|
|
213
|
+
return this->error.any_bits_set_anywhere();
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
}; // struct utf8_checker
|
|
217
|
+
} // namespace utf8_validation
|
|
218
|
+
|
|
219
|
+
using utf8_validation::utf8_checker;
|
|
220
|
+
|
|
221
|
+
} // unnamed namespace
|
|
222
|
+
} // namespace SIMDUTF_IMPLEMENTATION
|
|
223
|
+
} // namespace simdutf
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
namespace simdutf {
|
|
2
|
+
namespace SIMDUTF_IMPLEMENTATION {
|
|
3
|
+
namespace {
|
|
4
|
+
namespace utf8_validation {
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Validates that the string is actual UTF-8.
|
|
8
|
+
*/
|
|
9
|
+
template <class checker>
|
|
10
|
+
bool generic_validate_utf8(const uint8_t *input, size_t length) {
|
|
11
|
+
checker c{};
|
|
12
|
+
buf_block_reader<64> reader(input, length);
|
|
13
|
+
while (reader.has_full_block()) {
|
|
14
|
+
simd::simd8x64<uint8_t> in(reader.full_block());
|
|
15
|
+
c.check_next_input(in);
|
|
16
|
+
reader.advance();
|
|
17
|
+
}
|
|
18
|
+
uint8_t block[64]{};
|
|
19
|
+
reader.get_remainder(block);
|
|
20
|
+
simd::simd8x64<uint8_t> in(block);
|
|
21
|
+
c.check_next_input(in);
|
|
22
|
+
reader.advance();
|
|
23
|
+
c.check_eof();
|
|
24
|
+
return !c.errors();
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
bool generic_validate_utf8(const char *input, size_t length) {
|
|
28
|
+
return generic_validate_utf8<utf8_checker>(
|
|
29
|
+
reinterpret_cast<const uint8_t *>(input), length);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Validates that the string is actual UTF-8 and stops on errors.
|
|
34
|
+
*/
|
|
35
|
+
template <class checker>
|
|
36
|
+
result generic_validate_utf8_with_errors(const uint8_t *input, size_t length) {
|
|
37
|
+
checker c{};
|
|
38
|
+
buf_block_reader<64> reader(input, length);
|
|
39
|
+
size_t count{0};
|
|
40
|
+
while (reader.has_full_block()) {
|
|
41
|
+
simd::simd8x64<uint8_t> in(reader.full_block());
|
|
42
|
+
c.check_next_input(in);
|
|
43
|
+
if (c.errors()) {
|
|
44
|
+
if (count != 0) {
|
|
45
|
+
count--;
|
|
46
|
+
} // Sometimes the error is only detected in the next chunk
|
|
47
|
+
result res = scalar::utf8::rewind_and_validate_with_errors(
|
|
48
|
+
reinterpret_cast<const char *>(input),
|
|
49
|
+
reinterpret_cast<const char *>(input + count), length - count);
|
|
50
|
+
res.count += count;
|
|
51
|
+
return res;
|
|
52
|
+
}
|
|
53
|
+
reader.advance();
|
|
54
|
+
count += 64;
|
|
55
|
+
}
|
|
56
|
+
uint8_t block[64]{};
|
|
57
|
+
reader.get_remainder(block);
|
|
58
|
+
simd::simd8x64<uint8_t> in(block);
|
|
59
|
+
c.check_next_input(in);
|
|
60
|
+
reader.advance();
|
|
61
|
+
c.check_eof();
|
|
62
|
+
if (c.errors()) {
|
|
63
|
+
if (count != 0) {
|
|
64
|
+
count--;
|
|
65
|
+
} // Sometimes the error is only detected in the next chunk
|
|
66
|
+
result res = scalar::utf8::rewind_and_validate_with_errors(
|
|
67
|
+
reinterpret_cast<const char *>(input),
|
|
68
|
+
reinterpret_cast<const char *>(input) + count, length - count);
|
|
69
|
+
res.count += count;
|
|
70
|
+
return res;
|
|
71
|
+
} else {
|
|
72
|
+
return result(error_code::SUCCESS, length);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
result generic_validate_utf8_with_errors(const char *input, size_t length) {
|
|
77
|
+
return generic_validate_utf8_with_errors<utf8_checker>(
|
|
78
|
+
reinterpret_cast<const uint8_t *>(input), length);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
} // namespace utf8_validation
|
|
82
|
+
} // unnamed namespace
|
|
83
|
+
} // namespace SIMDUTF_IMPLEMENTATION
|
|
84
|
+
} // namespace simdutf
|