react-native-quick-crypto 1.0.19 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QuickCrypto.podspec +12 -38
- package/README.md +2 -0
- package/android/CMakeLists.txt +3 -0
- package/cpp/utils/HybridUtils.cpp +39 -77
- package/deps/simdutf/.clang-format +4 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/bug_report.md +62 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/config.yml +1 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/feature_request.md +35 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/standard-issue-template.md +29 -0
- package/deps/simdutf/.github/pull_request_template.md +51 -0
- package/deps/simdutf/.github/workflows/aarch64.yml +39 -0
- package/deps/simdutf/.github/workflows/alpine.yml +27 -0
- package/deps/simdutf/.github/workflows/amalgamation_demos.yml +34 -0
- package/deps/simdutf/.github/workflows/armv7.yml +32 -0
- package/deps/simdutf/.github/workflows/atomic_fuzz.yml +25 -0
- package/deps/simdutf/.github/workflows/cifuzz.yml +37 -0
- package/deps/simdutf/.github/workflows/clangformat.yml +36 -0
- package/deps/simdutf/.github/workflows/debian-latestcxxstandards.yml +40 -0
- package/deps/simdutf/.github/workflows/debian.yml +33 -0
- package/deps/simdutf/.github/workflows/documentation.yml +36 -0
- package/deps/simdutf/.github/workflows/emscripten.yml +19 -0
- package/deps/simdutf/.github/workflows/loongarch64-gcc-14.2.yml +39 -0
- package/deps/simdutf/.github/workflows/macos-latest.yml +29 -0
- package/deps/simdutf/.github/workflows/msys2-clang.yml +48 -0
- package/deps/simdutf/.github/workflows/msys2.yml +50 -0
- package/deps/simdutf/.github/workflows/ppc64le.yml +29 -0
- package/deps/simdutf/.github/workflows/rvv-1024-clang-18.yml +35 -0
- package/deps/simdutf/.github/workflows/rvv-128-clang-17.yml +35 -0
- package/deps/simdutf/.github/workflows/rvv-256-gcc-14.yml +31 -0
- package/deps/simdutf/.github/workflows/s390x.yml +29 -0
- package/deps/simdutf/.github/workflows/selective-amalgamation.yml +29 -0
- package/deps/simdutf/.github/workflows/typos.yml +19 -0
- package/deps/simdutf/.github/workflows/ubuntu22-cxx20.yml +30 -0
- package/deps/simdutf/.github/workflows/ubuntu22.yml +32 -0
- package/deps/simdutf/.github/workflows/ubuntu22_gcc12.yml +27 -0
- package/deps/simdutf/.github/workflows/ubuntu22sani.yml +29 -0
- package/deps/simdutf/.github/workflows/ubuntu24-cxxstandards.yml +34 -0
- package/deps/simdutf/.github/workflows/ubuntu24-unsignedchar.yml +34 -0
- package/deps/simdutf/.github/workflows/ubuntu24.yml +32 -0
- package/deps/simdutf/.github/workflows/ubuntu24sani.yml +36 -0
- package/deps/simdutf/.github/workflows/ubuntu24sani_clang.yml +29 -0
- package/deps/simdutf/.github/workflows/vs17-arm-ci.yml +21 -0
- package/deps/simdutf/.github/workflows/vs17-ci-cxx20.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-ci.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-clang-ci.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-cxxstandards.yml +36 -0
- package/deps/simdutf/AI_USAGE_POLICY.md +56 -0
- package/deps/simdutf/AUTHORS +6 -0
- package/deps/simdutf/CMakeLists.txt +231 -0
- package/deps/simdutf/CONTRIBUTING.md +214 -0
- package/deps/simdutf/CONTRIBUTORS +1 -0
- package/deps/simdutf/Doxyfile +2584 -0
- package/deps/simdutf/LICENSE-APACHE +201 -0
- package/deps/simdutf/LICENSE-MIT +18 -0
- package/deps/simdutf/Makefile.crosscompile +54 -0
- package/deps/simdutf/README-RVV.md +16 -0
- package/deps/simdutf/README.md +2782 -0
- package/deps/simdutf/SECURITY.md +8 -0
- package/deps/simdutf/benchmarks/CMakeLists.txt +101 -0
- package/deps/simdutf/benchmarks/alignment.cpp +150 -0
- package/deps/simdutf/benchmarks/base64/CMakeLists.txt +30 -0
- package/deps/simdutf/benchmarks/base64/benchmark_base64.cpp +875 -0
- package/deps/simdutf/benchmarks/base64/libbase64_spaces.h +49 -0
- package/deps/simdutf/benchmarks/base64/node_base64.h +227 -0
- package/deps/simdutf/benchmarks/base64/openssl3_base64.h +334 -0
- package/deps/simdutf/benchmarks/benchmark.cpp +65 -0
- package/deps/simdutf/benchmarks/benchmark_to_well_formed_utf16.cpp +347 -0
- package/deps/simdutf/benchmarks/competition/.clang-format-ignore +5 -0
- package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.cpp +1276 -0
- package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.h +595 -0
- package/deps/simdutf/benchmarks/competition/README.md +7 -0
- package/deps/simdutf/benchmarks/competition/hoehrmann/hoehrmann.h +91 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16.h +444 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16_tables.h +13183 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/script.py +73 -0
- package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.cpp +738 -0
- package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.h +293 -0
- package/deps/simdutf/benchmarks/competition/u8u16/COPYRIGHT +8 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Makefile +44 -0
- package/deps/simdutf/benchmarks/competition/u8u16/OSL3.0.txt +169 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/BOM_Profiler.h +148 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/i386_timer.h +45 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/ppc_timer.c +34 -0
- package/deps/simdutf/benchmarks/competition/u8u16/README +56 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/config_defs.h +43 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/g4_config.h +27 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/mmx_config.h +16 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/p4_config.h +18 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/p4_ideal_config.h +16 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/spu_config.h +28 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/ssse3_config.h +20 -0
- package/deps/simdutf/benchmarks/competition/u8u16/iconv_u8u16.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/altivec_simd.h +440 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_basic_ops.py +121 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_half_operand_versions.py +158 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_test.py +270 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd.h +141 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_basic.h +216 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_built_in.h +119 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_modified.h +2430 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/outline.txt +39 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/spu_simd.h +421 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/sse_simd.h +836 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/stdint.h +222 -0
- package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_BE.c +4 -0
- package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_LE.c +5 -0
- package/deps/simdutf/benchmarks/competition/u8u16/proto/u8u16.py +390 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/Makefile +18 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/bytelex.h +448 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/charsets/ASCII_EBCDIC.h +284 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.c +1975 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.pdf +0 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.w +2263 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/multiliteral.h +239 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/u8u16.c +232 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/x8x16.c +194 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.c +193 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.h +167 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.c +288 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.h +117 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_g4.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_mmx.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4.c +3 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4_ideal.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_spu.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_ssse3.c +3 -0
- package/deps/simdutf/benchmarks/competition/u8u16/x8x16_p4.c +2 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/LICENSE +23 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/data/test_minimal.txt +44 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/readme.md +106 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_corr_tests.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_example.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_file_conv.sh +14 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_lib.sh +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_sample.sh +8 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_corr_tests.cmd +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_example.cmd +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_file_conv.cmd +14 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_lib.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_sample.cmd +8 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_corr_tests.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_example.cmd +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_file_conv.cmd +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_lib.cmd +10 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_sample.cmd +9 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/html_table.py +25 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/measure.py +94 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/resize.py +20 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_all.cmd +2 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_interm.cmd +1 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/CustomMemcpy.h +75 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/PerfDefs.h +47 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.cpp +17 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.h +76 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/AllProcessors.cpp +35 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.cpp +117 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.h +210 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferDecoder.h +158 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferEncoder.h +104 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorPlugins.h +334 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorSelector.h +186 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.cpp +140 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.h +42 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderProcess.h +100 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/Dfa.h +57 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.cpp +85 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.h +27 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderProcess.h +126 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/ProcessTrivial.h +108 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.cpp +139 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.h +74 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.cpp +65 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.h +91 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/CorrectnessTests.cpp +772 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/Example.cpp +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/FileConverter.cpp +486 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/iconv_sample.c +162 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/utf8lut.h +15 -0
- package/deps/simdutf/benchmarks/competition/utf8sse4/fromutf8-sse.cpp +292 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/LICENSE +23 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/README.md +1503 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/checked.h +335 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/core.h +338 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp11.h +103 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp17.h +103 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/unchecked.h +274 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8.h +34 -0
- package/deps/simdutf/benchmarks/dataset/README.md +155 -0
- package/deps/simdutf/benchmarks/dataset/emoji.txt +204 -0
- package/deps/simdutf/benchmarks/dataset/scripts/utf8type.py +40 -0
- package/deps/simdutf/benchmarks/dataset/wikipedia_mars/Makefile +80 -0
- package/deps/simdutf/benchmarks/dataset/wikipedia_mars/convert_to_utf6.py +20 -0
- package/deps/simdutf/benchmarks/find/CMakeLists.txt +6 -0
- package/deps/simdutf/benchmarks/find/findbenchmark.cpp +63 -0
- package/deps/simdutf/benchmarks/find/findbenchmarker.h +46 -0
- package/deps/simdutf/benchmarks/shortbench.cpp +555 -0
- package/deps/simdutf/benchmarks/src/CMakeLists.txt +52 -0
- package/deps/simdutf/benchmarks/src/apple_arm_events.h +1104 -0
- package/deps/simdutf/benchmarks/src/benchmark.cpp +3899 -0
- package/deps/simdutf/benchmarks/src/benchmark.h +317 -0
- package/deps/simdutf/benchmarks/src/benchmark_base.cpp +144 -0
- package/deps/simdutf/benchmarks/src/benchmark_base.h +98 -0
- package/deps/simdutf/benchmarks/src/cmdline.cpp +176 -0
- package/deps/simdutf/benchmarks/src/cmdline.h +35 -0
- package/deps/simdutf/benchmarks/src/event_counter.h +162 -0
- package/deps/simdutf/benchmarks/src/linux-perf-events.h +104 -0
- package/deps/simdutf/benchmarks/stream.cpp +209 -0
- package/deps/simdutf/benchmarks/threaded.cpp +123 -0
- package/deps/simdutf/cmake/CPM.cmake +1363 -0
- package/deps/simdutf/cmake/JoinPaths.cmake +23 -0
- package/deps/simdutf/cmake/add_cpp_test.cmake +68 -0
- package/deps/simdutf/cmake/simdutf-config.cmake.in +2 -0
- package/deps/simdutf/cmake/simdutf-flags.cmake +26 -0
- package/deps/simdutf/cmake/toolchains-ci/riscv64-linux-gnu.cmake +4 -0
- package/deps/simdutf/cmake/toolchains-dev/README.md +32 -0
- package/deps/simdutf/cmake/toolchains-dev/aarch64.cmake +14 -0
- package/deps/simdutf/cmake/toolchains-dev/loongarch64.cmake +22 -0
- package/deps/simdutf/cmake/toolchains-dev/powerpc64.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/powerpc64le.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/riscv64.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/rvv-spike.cmake +38 -0
- package/deps/simdutf/doc/avx512.png +0 -0
- package/deps/simdutf/doc/logo.png +0 -0
- package/deps/simdutf/doc/logo.svg +165 -0
- package/deps/simdutf/doc/node2023.png +0 -0
- package/deps/simdutf/doc/shortinput.md +78 -0
- package/deps/simdutf/doc/utf16utf8.png +0 -0
- package/deps/simdutf/doc/utf8utf16.png +0 -0
- package/deps/simdutf/doc/widelogo.png +0 -0
- package/deps/simdutf/doxygen.py +50 -0
- package/deps/simdutf/fuzz/.clang-format +9 -0
- package/deps/simdutf/fuzz/CMakeLists.txt +45 -0
- package/deps/simdutf/fuzz/README.md +168 -0
- package/deps/simdutf/fuzz/atomic_base64.cpp +448 -0
- package/deps/simdutf/fuzz/base64.cpp +278 -0
- package/deps/simdutf/fuzz/build.sh +83 -0
- package/deps/simdutf/fuzz/conversion.cpp +669 -0
- package/deps/simdutf/fuzz/helpers/.clang-format-ignore +1 -0
- package/deps/simdutf/fuzz/helpers/common.h +135 -0
- package/deps/simdutf/fuzz/helpers/nameof.hpp +1258 -0
- package/deps/simdutf/fuzz/main.cpp +72 -0
- package/deps/simdutf/fuzz/minimize_and_cleanse.sh +87 -0
- package/deps/simdutf/fuzz/misc.cpp +216 -0
- package/deps/simdutf/fuzz/random_fuzz.sh +154 -0
- package/deps/simdutf/fuzz/roundtrip.cpp +588 -0
- package/deps/simdutf/fuzz/safe_conversion.cpp +104 -0
- package/deps/simdutf/include/simdutf/avx512.h +79 -0
- package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
- package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
- package/deps/simdutf/include/simdutf/common_defs.h +186 -0
- package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
- package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
- package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
- package/deps/simdutf/include/simdutf/error.h +126 -0
- package/deps/simdutf/include/simdutf/implementation.h +7081 -0
- package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
- package/deps/simdutf/include/simdutf/portability.h +285 -0
- package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
- package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
- package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
- package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
- package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
- package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
- package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
- package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
- package/deps/simdutf/include/simdutf.h +26 -0
- package/deps/simdutf/include/simdutf_c.h +342 -0
- package/deps/simdutf/riscv/Dockerfile +16 -0
- package/deps/simdutf/riscv/README.md +24 -0
- package/deps/simdutf/riscv/remove-docker-station +8 -0
- package/deps/simdutf/riscv/run-docker-station +31 -0
- package/deps/simdutf/scripts/.flake8 +2 -0
- package/deps/simdutf/scripts/Makefile +2 -0
- package/deps/simdutf/scripts/README_ADD_FUNCTION.md +49 -0
- package/deps/simdutf/scripts/add_function.py +330 -0
- package/deps/simdutf/scripts/amalgamation_tests.py +156 -0
- package/deps/simdutf/scripts/base64/Makefile +2 -0
- package/deps/simdutf/scripts/base64/README.md +2 -0
- package/deps/simdutf/scripts/base64/avx512.py +76 -0
- package/deps/simdutf/scripts/base64/neon_decode.py +143 -0
- package/deps/simdutf/scripts/base64/neon_generate_lut.py +101 -0
- package/deps/simdutf/scripts/base64/sse.py +252 -0
- package/deps/simdutf/scripts/base64/sseregular.py +160 -0
- package/deps/simdutf/scripts/base64/sseurl.py +283 -0
- package/deps/simdutf/scripts/base64/table.py +59 -0
- package/deps/simdutf/scripts/base64bench_print.py +145 -0
- package/deps/simdutf/scripts/benchmark-all.py +119 -0
- package/deps/simdutf/scripts/benchmark_print.py +324 -0
- package/deps/simdutf/scripts/check_feature_macros.py +156 -0
- package/deps/simdutf/scripts/check_typos.sh +13 -0
- package/deps/simdutf/scripts/clang_format.sh +35 -0
- package/deps/simdutf/scripts/clang_format_docker.sh +38 -0
- package/deps/simdutf/scripts/common.py +24 -0
- package/deps/simdutf/scripts/compilation_benchmark.py +55 -0
- package/deps/simdutf/scripts/compile_many_variations.sh +64 -0
- package/deps/simdutf/scripts/create_latex_table.py +62 -0
- package/deps/simdutf/scripts/docker/Dockerfile +14 -0
- package/deps/simdutf/scripts/docker/Makefile +9 -0
- package/deps/simdutf/scripts/docker/README.md +30 -0
- package/deps/simdutf/scripts/docker/llvm.gpg +0 -0
- package/deps/simdutf/scripts/ppc64_convert_utf16_to_utf8.py +155 -0
- package/deps/simdutf/scripts/prepare_doxygen.sh +21 -0
- package/deps/simdutf/scripts/release.py +197 -0
- package/deps/simdutf/scripts/shortinputplots.py +97 -0
- package/deps/simdutf/scripts/sse_convert_utf16_to_utf8.py +422 -0
- package/deps/simdutf/scripts/sse_convert_utf32_to_utf16.py +105 -0
- package/deps/simdutf/scripts/sse_utf8_utf16_decode.py +186 -0
- package/deps/simdutf/scripts/sse_validate_utf16le_proof.py +137 -0
- package/deps/simdutf/scripts/sse_validate_utf16le_testcases.py +129 -0
- package/deps/simdutf/scripts/table.py +207 -0
- package/deps/simdutf/scripts/tests/new.txt +33 -0
- package/deps/simdutf/scripts/tests/old.txt +33 -0
- package/deps/simdutf/scripts/tests/results.txt +272 -0
- package/deps/simdutf/simdutf.pc.in +11 -0
- package/deps/simdutf/singleheader/.flake8 +2 -0
- package/deps/simdutf/singleheader/CMakeLists.txt +64 -0
- package/deps/simdutf/singleheader/README-dev.md +81 -0
- package/deps/simdutf/singleheader/README.md +19 -0
- package/deps/simdutf/singleheader/amalgamate.py +513 -0
- package/deps/simdutf/singleheader/amalgamation_demo.c +59 -0
- package/deps/simdutf/singleheader/amalgamation_demo.cpp +54 -0
- package/deps/simdutf/singleheader/test-features.py +262 -0
- package/deps/simdutf/src/CMakeLists.txt +78 -0
- package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
- package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
- package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
- package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
- package/deps/simdutf/src/encoding_types.cpp +67 -0
- package/deps/simdutf/src/error.cpp +3 -0
- package/deps/simdutf/src/fallback/implementation.cpp +589 -0
- package/deps/simdutf/src/generic/ascii_validation.h +50 -0
- package/deps/simdutf/src/generic/base64.h +233 -0
- package/deps/simdutf/src/generic/base64lengths.h +63 -0
- package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
- package/deps/simdutf/src/generic/find.h +75 -0
- package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
- package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
- package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
- package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
- package/deps/simdutf/src/generic/utf16.h +73 -0
- package/deps/simdutf/src/generic/utf32.h +136 -0
- package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
- package/deps/simdutf/src/generic/utf8.h +92 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
- package/deps/simdutf/src/generic/validate_utf16.h +164 -0
- package/deps/simdutf/src/generic/validate_utf32.h +99 -0
- package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
- package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
- package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
- package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
- package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
- package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
- package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
- package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
- package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
- package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
- package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
- package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
- package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
- package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
- package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
- package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
- package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
- package/deps/simdutf/src/implementation.cpp +2526 -0
- package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
- package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
- package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
- package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
- package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
- package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
- package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
- package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
- package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
- package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
- package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
- package/deps/simdutf/src/ppc64/templates.cpp +91 -0
- package/deps/simdutf/src/rvv/implementation.cpp +138 -0
- package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
- package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
- package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
- package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
- package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
- package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
- package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
- package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
- package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
- package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
- package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
- package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
- package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
- package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
- package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
- package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
- package/deps/simdutf/src/simdutf/arm64.h +43 -0
- package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
- package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
- package/deps/simdutf/src/simdutf/fallback.h +42 -0
- package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
- package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
- package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
- package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
- package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
- package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
- package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
- package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
- package/deps/simdutf/src/simdutf/haswell.h +63 -0
- package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
- package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
- package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
- package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
- package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
- package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
- package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
- package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
- package/deps/simdutf/src/simdutf/icelake.h +81 -0
- package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
- package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
- package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
- package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
- package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
- package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
- package/deps/simdutf/src/simdutf/lasx.h +49 -0
- package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
- package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
- package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
- package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
- package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
- package/deps/simdutf/src/simdutf/lsx.h +52 -0
- package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
- package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
- package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
- package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
- package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
- package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
- package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
- package/deps/simdutf/src/simdutf/ppc64.h +40 -0
- package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
- package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
- package/deps/simdutf/src/simdutf/rvv.h +41 -0
- package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
- package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
- package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
- package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
- package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
- package/deps/simdutf/src/simdutf/westmere.h +59 -0
- package/deps/simdutf/src/simdutf.cpp +152 -0
- package/deps/simdutf/src/simdutf_c.cpp +525 -0
- package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
- package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
- package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
- package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
- package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
- package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
- package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
- package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
- package/deps/simdutf/tests/CMakeLists.txt +483 -0
- package/deps/simdutf/tests/atomic_base64_tests.cpp +2845 -0
- package/deps/simdutf/tests/base64_tests.cpp +3617 -0
- package/deps/simdutf/tests/basic_fuzzer.cpp +805 -0
- package/deps/simdutf/tests/bele_tests.cpp +182 -0
- package/deps/simdutf/tests/constexpr_base64_tests.cpp +387 -0
- package/deps/simdutf/tests/convert_latin1_to_utf16be_tests.cpp +52 -0
- package/deps/simdutf/tests/convert_latin1_to_utf16le_tests.cpp +80 -0
- package/deps/simdutf/tests/convert_latin1_to_utf32_tests.cpp +66 -0
- package/deps/simdutf/tests/convert_latin1_to_utf8_tests.cpp +120 -0
- package/deps/simdutf/tests/convert_utf16_to_utf8_safe_tests.cpp +203 -0
- package/deps/simdutf/tests/convert_utf16_to_utf8_with_replacement_tests.cpp +276 -0
- package/deps/simdutf/tests/convert_utf16be_to_latin1_tests.cpp +109 -0
- package/deps/simdutf/tests/convert_utf16be_to_latin1_tests_with_errors.cpp +136 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf32_tests.cpp +193 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf32_with_errors_tests.cpp +381 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf8_tests.cpp +259 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf8_with_errors_tests.cpp +266 -0
- package/deps/simdutf/tests/convert_utf16le_to_latin1_tests.cpp +148 -0
- package/deps/simdutf/tests/convert_utf16le_to_latin1_tests_with_errors.cpp +176 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf32_tests.cpp +213 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf32_with_errors_tests.cpp +318 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf8_tests.cpp +343 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf8_with_errors_tests.cpp +271 -0
- package/deps/simdutf/tests/convert_utf32_to_latin1_tests.cpp +111 -0
- package/deps/simdutf/tests/convert_utf32_to_latin1_with_errors_tests.cpp +96 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16be_tests.cpp +148 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16be_with_errors_tests.cpp +192 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16le_tests.cpp +166 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16le_with_errors_tests.cpp +215 -0
- package/deps/simdutf/tests/convert_utf32_to_utf8_tests.cpp +181 -0
- package/deps/simdutf/tests/convert_utf32_to_utf8_with_errors_tests.cpp +261 -0
- package/deps/simdutf/tests/convert_utf8_to_latin1_tests.cpp +516 -0
- package/deps/simdutf/tests/convert_utf8_to_latin1_with_errors_tests.cpp +579 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16be_tests.cpp +412 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16be_with_errors_tests.cpp +480 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16le_tests.cpp +671 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16le_with_errors_tests.cpp +455 -0
- package/deps/simdutf/tests/convert_utf8_to_utf32_tests.cpp +1204 -0
- package/deps/simdutf/tests/convert_utf8_to_utf32_with_errors_tests.cpp +337 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_latin1_tests.cpp +37 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_utf32_tests.cpp +97 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_utf8_tests.cpp +126 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_latin1_tests.cpp +71 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_utf32_tests.cpp +122 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_utf8_tests.cpp +244 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_latin1_tests.cpp +49 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf16be_tests.cpp +92 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf16le_tests.cpp +114 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf8_tests.cpp +109 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_latin1_tests.cpp +84 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf16be_tests.cpp +124 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf16le_tests.cpp +221 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf32_tests.cpp +155 -0
- package/deps/simdutf/tests/count_utf16be.cpp +64 -0
- package/deps/simdutf/tests/count_utf16le.cpp +61 -0
- package/deps/simdutf/tests/count_utf8.cpp +87 -0
- package/deps/simdutf/tests/detect_encodings_tests.cpp +312 -0
- package/deps/simdutf/tests/embed/valid_utf8.txt +1 -0
- package/deps/simdutf/tests/embed_tests.cpp +22 -0
- package/deps/simdutf/tests/find_tests.cpp +77 -0
- package/deps/simdutf/tests/fixed_string_tests.cpp +153 -0
- package/deps/simdutf/tests/helpers/CMakeLists.txt +25 -0
- package/deps/simdutf/tests/helpers/compiletime_conversions.h +222 -0
- package/deps/simdutf/tests/helpers/fixed_string.h +267 -0
- package/deps/simdutf/tests/helpers/random_int.cpp +30 -0
- package/deps/simdutf/tests/helpers/random_int.h +39 -0
- package/deps/simdutf/tests/helpers/random_utf16.cpp +123 -0
- package/deps/simdutf/tests/helpers/random_utf16.h +52 -0
- package/deps/simdutf/tests/helpers/random_utf32.cpp +41 -0
- package/deps/simdutf/tests/helpers/random_utf32.h +40 -0
- package/deps/simdutf/tests/helpers/random_utf8.cpp +93 -0
- package/deps/simdutf/tests/helpers/random_utf8.h +36 -0
- package/deps/simdutf/tests/helpers/test.cpp +231 -0
- package/deps/simdutf/tests/helpers/test.h +193 -0
- package/deps/simdutf/tests/helpers/transcode_test_base.cpp +1257 -0
- package/deps/simdutf/tests/helpers/transcode_test_base.h +683 -0
- package/deps/simdutf/tests/helpers/utf16.h +27 -0
- package/deps/simdutf/tests/installation_tests/find/CMakeLists.txt +43 -0
- package/deps/simdutf/tests/installation_tests/from_fetch/CMakeLists.txt +47 -0
- package/deps/simdutf/tests/internal_tests.cpp +27 -0
- package/deps/simdutf/tests/null_safety_tests.cpp +94 -0
- package/deps/simdutf/tests/random_fuzzer.cpp +779 -0
- package/deps/simdutf/tests/readme_tests.cpp +274 -0
- package/deps/simdutf/tests/reference/CMakeLists.txt +23 -0
- package/deps/simdutf/tests/reference/decode_utf16.h +81 -0
- package/deps/simdutf/tests/reference/decode_utf32.h +47 -0
- package/deps/simdutf/tests/reference/encode_latin1.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_latin1.h +32 -0
- package/deps/simdutf/tests/reference/encode_utf16.cpp +49 -0
- package/deps/simdutf/tests/reference/encode_utf16.h +20 -0
- package/deps/simdutf/tests/reference/encode_utf32.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_utf32.h +36 -0
- package/deps/simdutf/tests/reference/encode_utf8.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_utf8.h +40 -0
- package/deps/simdutf/tests/reference/validate_utf16.cpp +60 -0
- package/deps/simdutf/tests/reference/validate_utf16.h +14 -0
- package/deps/simdutf/tests/reference/validate_utf16_to_latin1.cpp +35 -0
- package/deps/simdutf/tests/reference/validate_utf16_to_latin1.h +13 -0
- package/deps/simdutf/tests/reference/validate_utf32.cpp +27 -0
- package/deps/simdutf/tests/reference/validate_utf32.h +12 -0
- package/deps/simdutf/tests/reference/validate_utf32_to_latin1.cpp +27 -0
- package/deps/simdutf/tests/reference/validate_utf32_to_latin1.h +12 -0
- package/deps/simdutf/tests/reference/validate_utf8.cpp +82 -0
- package/deps/simdutf/tests/reference/validate_utf8.h +11 -0
- package/deps/simdutf/tests/reference/validate_utf8_to_latin1.cpp +43 -0
- package/deps/simdutf/tests/reference/validate_utf8_to_latin1.h +12 -0
- package/deps/simdutf/tests/select_implementation.cpp +43 -0
- package/deps/simdutf/tests/simdutf_c_tests.cpp +244 -0
- package/deps/simdutf/tests/span_tests.cpp +401 -0
- package/deps/simdutf/tests/special_tests.cpp +559 -0
- package/deps/simdutf/tests/straight_c_test.c +187 -0
- package/deps/simdutf/tests/text_encoding_tests.cpp +77 -0
- package/deps/simdutf/tests/to_well_formed_utf16_tests.cpp +377 -0
- package/deps/simdutf/tests/utf8_length_from_utf16_tests.cpp +202 -0
- package/deps/simdutf/tests/validate_ascii_basic_tests.cpp +165 -0
- package/deps/simdutf/tests/validate_ascii_with_errors_tests.cpp +77 -0
- package/deps/simdutf/tests/validate_utf16be_basic_tests.cpp +175 -0
- package/deps/simdutf/tests/validate_utf16be_with_errors_tests.cpp +188 -0
- package/deps/simdutf/tests/validate_utf16le_basic_tests.cpp +268 -0
- package/deps/simdutf/tests/validate_utf16le_with_errors_tests.cpp +274 -0
- package/deps/simdutf/tests/validate_utf32_basic_tests.cpp +92 -0
- package/deps/simdutf/tests/validate_utf32_with_errors_tests.cpp +114 -0
- package/deps/simdutf/tests/validate_utf8_basic_tests.cpp +178 -0
- package/deps/simdutf/tests/validate_utf8_brute_force_tests.cpp +88 -0
- package/deps/simdutf/tests/validate_utf8_puzzler_tests.cpp +33 -0
- package/deps/simdutf/tests/validate_utf8_with_errors_tests.cpp +228 -0
- package/deps/simdutf/tools/CMakeLists.txt +85 -0
- package/deps/simdutf/tools/fastbase64.cpp +250 -0
- package/deps/simdutf/tools/sutf.cpp +556 -0
- package/deps/simdutf/tools/sutf.h +40 -0
- package/package.json +2 -2
|
@@ -0,0 +1,595 @@
|
|
|
1
|
+
//==================================================================================================
|
|
2
|
+
// File: unicode_utils.h
|
|
3
|
+
//
|
|
4
|
+
// Summary: Header file for fast UTF-8 to UTF-32/UTF-16 conversion routines.
|
|
5
|
+
//
|
|
6
|
+
// Copyright (c) 2018 Bob Steagall and KEWB Computing, All Rights Reserved
|
|
7
|
+
//==================================================================================================
|
|
8
|
+
//
|
|
9
|
+
// modified by D. Lemire for benchmarking in simdutf in April 2021.
|
|
10
|
+
#ifndef KEWB_UNICODE_UTILS_H_DEFINED
|
|
11
|
+
#define KEWB_UNICODE_UTILS_H_DEFINED
|
|
12
|
+
#include "simdutf.h"
|
|
13
|
+
|
|
14
|
+
//#include <cstdint>
|
|
15
|
+
#include <string>
|
|
16
|
+
|
|
17
|
+
//- Detect the compiler; only Clang, GCC, and Visual C++ are currently supported.
|
|
18
|
+
//
|
|
19
|
+
#if defined __clang__
|
|
20
|
+
|
|
21
|
+
#define KEWB_COMPILER_CLANG
|
|
22
|
+
#ifdef __OPTIMIZE__
|
|
23
|
+
#define KEWB_FORCE_INLINE inline __attribute__ ((always_inline))
|
|
24
|
+
#else
|
|
25
|
+
#define KEWB_FORCE_INLINE inline
|
|
26
|
+
#endif
|
|
27
|
+
#define KEWB_ALIGN_FN __attribute__ ((aligned (128)))
|
|
28
|
+
|
|
29
|
+
#elif defined __GNUG__ || defined __GNUC__ || defined __MSYS__
|
|
30
|
+
|
|
31
|
+
#define KEWB_COMPILER_GCC
|
|
32
|
+
#ifdef __OPTIMIZE__
|
|
33
|
+
#define KEWB_FORCE_INLINE inline __attribute__ ((always_inline))
|
|
34
|
+
#else
|
|
35
|
+
#define KEWB_FORCE_INLINE inline
|
|
36
|
+
#endif
|
|
37
|
+
#define KEWB_ALIGN_FN __attribute__ ((aligned (128)))
|
|
38
|
+
|
|
39
|
+
#elif defined _MSC_VER
|
|
40
|
+
|
|
41
|
+
#define KEWB_COMPILER_MSVC
|
|
42
|
+
#ifdef NDEBUG
|
|
43
|
+
#define KEWB_FORCE_INLINE inline __forceinline
|
|
44
|
+
#else
|
|
45
|
+
#define KEWB_FORCE_INLINE inline
|
|
46
|
+
#endif
|
|
47
|
+
#define KEWB_ALIGN_FN
|
|
48
|
+
|
|
49
|
+
#else
|
|
50
|
+
#define KEWB_FORCE_INLINE inline
|
|
51
|
+
#define KEWB_ALIGN_FN
|
|
52
|
+
#endif
|
|
53
|
+
|
|
54
|
+
//- Detect OS and include relevant SSE headers; only Linux and Windows are currently supported.
|
|
55
|
+
//
|
|
56
|
+
#if defined _WIN32
|
|
57
|
+
#define KEWB_PLATFORM_WINDOWS
|
|
58
|
+
#else
|
|
59
|
+
#define KEWB_PLATFORM_LINUX
|
|
60
|
+
#endif
|
|
61
|
+
SIMDUTF_TARGET_WESTMERE
|
|
62
|
+
|
|
63
|
+
namespace uu {
|
|
64
|
+
//--------------------------------------------------------------------------------------------------
|
|
65
|
+
/// \brief Traits style class to perform conversions from UTF-8 to UTF-32/UTF-16
|
|
66
|
+
///
|
|
67
|
+
/// \details
|
|
68
|
+
/// This traits-style class provides a demonstration of functions for converting strings
|
|
69
|
+
/// of UTF-8 code units to strings of UTF-32 code points, as well as transcoding UTF-8
|
|
70
|
+
/// into strings of UTF-16 code units. Its focus is on converting _from_ UTF-8 as quickly
|
|
71
|
+
/// as possible, although it does include member functions for converting a UTF-32 code
|
|
72
|
+
/// point into sequences of UTF-8/UTF-16 code units.
|
|
73
|
+
///
|
|
74
|
+
/// It implements conversion from UTF-8 in three different, but related ways:
|
|
75
|
+
/// * using a purely DFA-based approach to recognizing valid sequences of UTF-8 code units;
|
|
76
|
+
/// * using the DFA-based approach with a short-circuit optimization for ASCII code units;
|
|
77
|
+
/// * using the DFA-based approach with an SSE-based optimization for ASCII code units.
|
|
78
|
+
///
|
|
79
|
+
/// The member functions implement STL-style argument ordering, with source arguments on the
|
|
80
|
+
/// left and destination arguments on the right. The string-to-string conversion member
|
|
81
|
+
/// functions are analogous to std::copy() in that the first two arguments define an input
|
|
82
|
+
/// range and the third argument defines the starting point of the output range.
|
|
83
|
+
///
|
|
84
|
+
/// This class is not intended for production usage, as it does not currently provide a
|
|
85
|
+
/// mechanism for reporting/handling errors. No checking is done for null pointers; it
|
|
86
|
+
/// is assumed that the input and output pointers sensibly point to buffers that exist.
|
|
87
|
+
///
|
|
88
|
+
/// Finally, please note that this was developed and tested on x64/x86 hardware, and so
|
|
89
|
+
/// there is an implicit assumption that UTF-32 code points and UTF-16 code units are
|
|
90
|
+
/// little endian.
|
|
91
|
+
//--------------------------------------------------------------------------------------------------
|
|
92
|
+
//
|
|
93
|
+
class UtfUtils
|
|
94
|
+
{
|
|
95
|
+
public:
|
|
96
|
+
using char8_t = unsigned char;
|
|
97
|
+
using ptrdiff_t = std::ptrdiff_t;
|
|
98
|
+
|
|
99
|
+
public:
|
|
100
|
+
static bool GetCodePoint(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t& cdpt) noexcept;
|
|
101
|
+
|
|
102
|
+
static uint32_t GetCodeUnits(char32_t cdpt, char8_t*& pDst) noexcept;
|
|
103
|
+
static uint32_t GetCodeUnits(char32_t cdpt, char16_t*& pDst) noexcept;
|
|
104
|
+
|
|
105
|
+
//- Conversion to UTF-32/UTF-16 using fastest typical (lookup/computation on first code unit).
|
|
106
|
+
// These member functions are wrappers to the '*BigTableConvert' and '*SmallTableConvert'
|
|
107
|
+
// member functions declared further down.
|
|
108
|
+
//
|
|
109
|
+
static ptrdiff_t BasicConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept;
|
|
110
|
+
static ptrdiff_t FastConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept;
|
|
111
|
+
static ptrdiff_t SseConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept;
|
|
112
|
+
|
|
113
|
+
static ptrdiff_t BasicConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept;
|
|
114
|
+
static ptrdiff_t FastConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept;
|
|
115
|
+
static ptrdiff_t SseConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept;
|
|
116
|
+
|
|
117
|
+
//- Conversion to UTF-32/UTF-16 using pre-computed first code unit lookup table.
|
|
118
|
+
//
|
|
119
|
+
static ptrdiff_t BasicBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept;
|
|
120
|
+
static ptrdiff_t FastBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept;
|
|
121
|
+
static ptrdiff_t SseBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept;
|
|
122
|
+
|
|
123
|
+
static ptrdiff_t BasicBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept;
|
|
124
|
+
static ptrdiff_t FastBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept;
|
|
125
|
+
static ptrdiff_t SseBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept;
|
|
126
|
+
|
|
127
|
+
//- Conversion to UTF-32/UTF-16 using small lookup table and masking operations on first code unit.
|
|
128
|
+
//
|
|
129
|
+
static ptrdiff_t BasicSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept;
|
|
130
|
+
static ptrdiff_t FastSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept;
|
|
131
|
+
static ptrdiff_t SseSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept;
|
|
132
|
+
|
|
133
|
+
static ptrdiff_t BasicSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept;
|
|
134
|
+
static ptrdiff_t FastSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept;
|
|
135
|
+
static ptrdiff_t SseSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept;
|
|
136
|
+
|
|
137
|
+
//- Conversion that traces path through DFA, writing to stdout.
|
|
138
|
+
//
|
|
139
|
+
static ptrdiff_t ConvertWithTrace(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept;
|
|
140
|
+
static ptrdiff_t ConvertWithTrace(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept;
|
|
141
|
+
|
|
142
|
+
private:
|
|
143
|
+
enum CharClass : uint8_t
|
|
144
|
+
{
|
|
145
|
+
ILL = 0, //- C0..C1, F5..FF ILLEGAL octets that should never appear in a UTF-8 sequence
|
|
146
|
+
//
|
|
147
|
+
ASC = 1, //- 00..7F ASCII leading byte range
|
|
148
|
+
//
|
|
149
|
+
CR1 = 2, //- 80..8F Continuation range 1
|
|
150
|
+
CR2 = 3, //- 90..9F Continuation range 2
|
|
151
|
+
CR3 = 4, //- A0..BF Continuation range 3
|
|
152
|
+
//
|
|
153
|
+
L2A = 5, //- C2..DF Leading byte range A / 2-byte sequence
|
|
154
|
+
//
|
|
155
|
+
L3A = 6, //- E0 Leading byte range A / 3-byte sequence
|
|
156
|
+
L3B = 7, //- E1..EC, EE..EF Leading byte range B / 3-byte sequence
|
|
157
|
+
L3C = 8, //- ED Leading byte range C / 3-byte sequence
|
|
158
|
+
//
|
|
159
|
+
L4A = 9, //- F0 Leading byte range A / 4-byte sequence
|
|
160
|
+
L4B = 10, //- F1..F3 Leading byte range B / 4-byte sequence
|
|
161
|
+
L4C = 11, //- F4 Leading byte range C / 4-byte sequence
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
enum State : uint8_t
|
|
165
|
+
{
|
|
166
|
+
BGN = 0, //- Start
|
|
167
|
+
ERR = 12, //- Invalid sequence
|
|
168
|
+
//
|
|
169
|
+
CS1 = 24, //- Continuation state 1
|
|
170
|
+
CS2 = 36, //- Continuation state 2
|
|
171
|
+
CS3 = 48, //- Continuation state 3
|
|
172
|
+
//
|
|
173
|
+
P3A = 60, //- Partial 3-byte sequence state A
|
|
174
|
+
P3B = 72, //- Partial 3-byte sequence state B
|
|
175
|
+
//
|
|
176
|
+
P4A = 84, //- Partial 4-byte sequence state A
|
|
177
|
+
P4B = 96, //- Partial 4-byte sequence state B
|
|
178
|
+
//
|
|
179
|
+
END = BGN, //- Start and End are the same state!
|
|
180
|
+
err = ERR, //- For readability in the state transition table
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
struct FirstUnitInfo
|
|
184
|
+
{
|
|
185
|
+
char8_t mFirstOctet;
|
|
186
|
+
State mNextState;
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
struct alignas(2048) LookupTables
|
|
190
|
+
{
|
|
191
|
+
FirstUnitInfo maFirstUnitTable[256];
|
|
192
|
+
CharClass maOctetCategory[256];
|
|
193
|
+
State maTransitions[108];
|
|
194
|
+
std::uint8_t maFirstOctetMask[16];
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
private:
|
|
198
|
+
static LookupTables const smTables;
|
|
199
|
+
static char const* smClassNames[12];
|
|
200
|
+
static char const* smStateNames[9];
|
|
201
|
+
|
|
202
|
+
private:
|
|
203
|
+
static int32_t AdvanceWithBigTable(char8_t const*& pSrc, char8_t const* pSrcEnd, char32_t& cdpt) noexcept;
|
|
204
|
+
static int32_t AdvanceWithSmallTable(char8_t const*& pSrc, char8_t const* pSrcEnd, char32_t& cdpt) noexcept;
|
|
205
|
+
static State AdvanceWithTrace(char8_t const*& pSrc, char8_t const* pSrcEnd, char32_t& cdpt) noexcept;
|
|
206
|
+
|
|
207
|
+
static void ConvertAsciiWithSse(char8_t const*& pSrc, char32_t*& pDst) noexcept;
|
|
208
|
+
static void ConvertAsciiWithSse(char8_t const*& pSrc, char16_t*& pDst) noexcept;
|
|
209
|
+
static int32_t GetTrailingZeros(int32_t x) noexcept;
|
|
210
|
+
|
|
211
|
+
static void PrintStateData(State curr, CharClass type, uint32_t unit, State next);
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
//--------------------------------------------------------------------------------------------------
|
|
215
|
+
/// \brief Converts a sequence of UTF-8 code units to a UTF-32 code point.
|
|
216
|
+
///
|
|
217
|
+
/// \param pSrc
|
|
218
|
+
/// A non-null pointer defining the beginning of the input range of code units.
|
|
219
|
+
/// \param pSrcEnd
|
|
220
|
+
/// A non-null past-the-end pointer defining the end of the input range.
|
|
221
|
+
/// \param cdpt
|
|
222
|
+
/// A mutable reference to a char32_t variable which will receive the code unit.
|
|
223
|
+
///
|
|
224
|
+
/// \returns
|
|
225
|
+
/// Boolean value `true` on success.
|
|
226
|
+
//--------------------------------------------------------------------------------------------------
|
|
227
|
+
//
|
|
228
|
+
KEWB_FORCE_INLINE bool
|
|
229
|
+
UtfUtils::GetCodePoint(char8_t const* pSrc, char8_t const* const pSrcEnd, char32_t& cdpt) noexcept
|
|
230
|
+
{
|
|
231
|
+
return (pSrc < pSrcEnd) ? (AdvanceWithSmallTable(pSrc, pSrcEnd, cdpt) != ERR) : false;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
//--------------------------------------------------------------------------------------------------
|
|
235
|
+
/// \brief Converts a UTF-32 code point to a sequence of one to four UTF-8 code units.
|
|
236
|
+
///
|
|
237
|
+
/// \details
|
|
238
|
+
/// Note that the conversion performed by this member function is "unsafe", in that it does
|
|
239
|
+
/// not check for invalid/illegal values of the input code point.
|
|
240
|
+
///
|
|
241
|
+
/// \param cdpt
|
|
242
|
+
/// A the source code unit.
|
|
243
|
+
/// \param pDst
|
|
244
|
+
/// A reference to non-null pointer defining the beginning of the output range of code units.
|
|
245
|
+
///
|
|
246
|
+
/// \returns
|
|
247
|
+
/// The number of UTF-8 code units resulting from the conversion of `cdpt`.
|
|
248
|
+
//--------------------------------------------------------------------------------------------------
|
|
249
|
+
//
|
|
250
|
+
KEWB_FORCE_INLINE std::uint32_t
|
|
251
|
+
UtfUtils::GetCodeUnits(char32_t cdpt, char8_t*& pDst) noexcept
|
|
252
|
+
{
|
|
253
|
+
if (cdpt <= 0x7F)
|
|
254
|
+
{
|
|
255
|
+
*pDst++ = (char8_t) (cdpt);
|
|
256
|
+
return 1;
|
|
257
|
+
}
|
|
258
|
+
else if (cdpt <= 0x7FF)
|
|
259
|
+
{
|
|
260
|
+
*pDst++ = (char8_t) (0xC0 | ((cdpt >> 6) & 0x1F));
|
|
261
|
+
*pDst++ = (char8_t) (0x80 | (cdpt & 0x3F));
|
|
262
|
+
return 2;
|
|
263
|
+
}
|
|
264
|
+
else if (cdpt <= 0xFFFF)
|
|
265
|
+
{
|
|
266
|
+
*pDst++ = (char8_t) (0xE0 | ((cdpt >> 12) & 0x0F));
|
|
267
|
+
*pDst++ = (char8_t) (0x80 | ((cdpt >> 6) & 0x3F));
|
|
268
|
+
*pDst++ = (char8_t) (0x80 | (cdpt & 0x3F));
|
|
269
|
+
return 3;
|
|
270
|
+
}
|
|
271
|
+
else if (cdpt <= 0x10FFFF)
|
|
272
|
+
{
|
|
273
|
+
*pDst++ = (char8_t) (0xF0 | ((cdpt >> 18) & 0x07));
|
|
274
|
+
*pDst++ = (char8_t) (0x80 | ((cdpt >> 12) & 0x3F));
|
|
275
|
+
*pDst++ = (char8_t) (0x80 | ((cdpt >> 6) & 0x3F));
|
|
276
|
+
*pDst++ = (char8_t) (0x80 | (cdpt & 0x3F));
|
|
277
|
+
return 4;
|
|
278
|
+
}
|
|
279
|
+
return 0;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
//--------------------------------------------------------------------------------------------------
|
|
283
|
+
/// \brief Converts a UTF-32 code point to a sequence of one or two UTF-16 code units.
|
|
284
|
+
///
|
|
285
|
+
/// \details
|
|
286
|
+
/// Note that the conversion performed by this member function is "unsafe", in that it does
|
|
287
|
+
/// not check for invalid/illegal values of the input code point.
|
|
288
|
+
///
|
|
289
|
+
/// \param cdpt
|
|
290
|
+
/// A the source code unit.
|
|
291
|
+
/// \param pDst
|
|
292
|
+
/// A reference to non-null pointer defining the beginning of the output range of code units.
|
|
293
|
+
///
|
|
294
|
+
/// \returns
|
|
295
|
+
/// The number of UTF-16 code units resulting from the conversion of `cdpt`.
|
|
296
|
+
//--------------------------------------------------------------------------------------------------
|
|
297
|
+
//
|
|
298
|
+
KEWB_FORCE_INLINE std::uint32_t
|
|
299
|
+
UtfUtils::GetCodeUnits(char32_t cdpt, char16_t*& pDst) noexcept
|
|
300
|
+
{
|
|
301
|
+
if (cdpt < 0x10000)
|
|
302
|
+
{
|
|
303
|
+
*pDst++ = (char16_t) cdpt;
|
|
304
|
+
return 1;
|
|
305
|
+
}
|
|
306
|
+
else
|
|
307
|
+
{
|
|
308
|
+
*pDst++ = (char16_t)(0xD7C0 + (cdpt >> 10));
|
|
309
|
+
*pDst++ = (char16_t)(0xDC00 + (cdpt & 0x3FF));
|
|
310
|
+
return 2;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
//--------------------------------------------------------------------------------------------------
|
|
315
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-32 code points.
|
|
316
|
+
///
|
|
317
|
+
/// \param pSrc
|
|
318
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
319
|
+
/// \param pSrcEnd
|
|
320
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
321
|
+
/// \param pDst
|
|
322
|
+
/// A non-null pointer defining the beginning of the code point output range.
|
|
323
|
+
///
|
|
324
|
+
/// \returns
|
|
325
|
+
/// If successful, the number of UTF-32 code points written; otherwise -1 is returned to
|
|
326
|
+
/// indicate an error was encountered.
|
|
327
|
+
//--------------------------------------------------------------------------------------------------
|
|
328
|
+
//
|
|
329
|
+
KEWB_FORCE_INLINE ptrdiff_t
|
|
330
|
+
UtfUtils::BasicConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept
|
|
331
|
+
{
|
|
332
|
+
return BasicBigTableConvert(pSrc, pSrcEnd, pDst);
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
//--------------------------------------------------------------------------------------------------
|
|
336
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-32 code points.
|
|
337
|
+
///
|
|
338
|
+
/// \param pSrc
|
|
339
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
340
|
+
/// \param pSrcEnd
|
|
341
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
342
|
+
/// \param pDst
|
|
343
|
+
/// A non-null pointer defining the beginning of the code point output range.
|
|
344
|
+
///
|
|
345
|
+
/// \returns
|
|
346
|
+
/// If successful, the number of UTF-32 code points written; otherwise -1 is returned to
|
|
347
|
+
/// indicate an error was encountered.
|
|
348
|
+
//--------------------------------------------------------------------------------------------------
|
|
349
|
+
//
|
|
350
|
+
KEWB_FORCE_INLINE ptrdiff_t
|
|
351
|
+
UtfUtils::FastConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept
|
|
352
|
+
{
|
|
353
|
+
return FastBigTableConvert(pSrc, pSrcEnd, pDst);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
//--------------------------------------------------------------------------------------------------
|
|
357
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-32 code points.
|
|
358
|
+
///
|
|
359
|
+
/// \param pSrc
|
|
360
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
361
|
+
/// \param pSrcEnd
|
|
362
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
363
|
+
/// \param pDst
|
|
364
|
+
/// A non-null pointer defining the beginning of the code point output range.
|
|
365
|
+
///
|
|
366
|
+
/// \returns
|
|
367
|
+
/// If successful, the number of UTF-32 code points written; otherwise -1 is returned to
|
|
368
|
+
/// indicate an error was encountered.
|
|
369
|
+
//--------------------------------------------------------------------------------------------------
|
|
370
|
+
//
|
|
371
|
+
ptrdiff_t
|
|
372
|
+
UtfUtils::SseConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept
|
|
373
|
+
{
|
|
374
|
+
return SseBigTableConvert(pSrc, pSrcEnd, pDst);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
//--------------------------------------------------------------------------------------------------
|
|
378
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-16 code units.
|
|
379
|
+
///
|
|
380
|
+
/// \param pSrc
|
|
381
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
382
|
+
/// \param pSrcEnd
|
|
383
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
384
|
+
/// \param pDst
|
|
385
|
+
/// A non-null pointer defining the beginning of the code unit output range.
|
|
386
|
+
///
|
|
387
|
+
/// \returns
|
|
388
|
+
/// If successful, the number of UTF-16 code units written; otherwise -1 is returned to
|
|
389
|
+
/// indicate an error was encountered.
|
|
390
|
+
//--------------------------------------------------------------------------------------------------
|
|
391
|
+
//
|
|
392
|
+
KEWB_FORCE_INLINE ptrdiff_t
|
|
393
|
+
UtfUtils::BasicConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept
|
|
394
|
+
{
|
|
395
|
+
return BasicBigTableConvert(pSrc, pSrcEnd, pDst);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
//--------------------------------------------------------------------------------------------------
|
|
399
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-16 code units.
|
|
400
|
+
///
|
|
401
|
+
/// \param pSrc
|
|
402
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
403
|
+
/// \param pSrcEnd
|
|
404
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
405
|
+
/// \param pDst
|
|
406
|
+
/// A non-null pointer defining the beginning of the code unit output range.
|
|
407
|
+
///
|
|
408
|
+
/// \returns
|
|
409
|
+
/// If successful, the number of UTF-16 code units written; otherwise -1 is returned to
|
|
410
|
+
/// indicate an error was encountered.
|
|
411
|
+
//--------------------------------------------------------------------------------------------------
|
|
412
|
+
//
|
|
413
|
+
KEWB_FORCE_INLINE ptrdiff_t
|
|
414
|
+
UtfUtils::FastConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept
|
|
415
|
+
{
|
|
416
|
+
return FastSmallTableConvert(pSrc, pSrcEnd, pDst);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
//--------------------------------------------------------------------------------------------------
|
|
420
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-16 code units.
|
|
421
|
+
///
|
|
422
|
+
/// \param pSrc
|
|
423
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
424
|
+
/// \param pSrcEnd
|
|
425
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
426
|
+
/// \param pDst
|
|
427
|
+
/// A non-null pointer defining the beginning of the code unit output range.
|
|
428
|
+
///
|
|
429
|
+
/// \returns
|
|
430
|
+
/// If successful, the number of UTF-16 code units written; otherwise -1 is returned to
|
|
431
|
+
/// indicate an error was encountered.
|
|
432
|
+
//--------------------------------------------------------------------------------------------------
|
|
433
|
+
//
|
|
434
|
+
ptrdiff_t
|
|
435
|
+
UtfUtils::SseConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept
|
|
436
|
+
{
|
|
437
|
+
return SseBigTableConvert(pSrc, pSrcEnd, pDst);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
//--------------------------------------------------------------------------------------------------
|
|
441
|
+
/// \brief Converts a sequence of UTF-8 code units to a UTF-32 code point.
|
|
442
|
+
///
|
|
443
|
+
/// \details
|
|
444
|
+
/// This static member function reads input octets and uses them to traverse a DFA that
|
|
445
|
+
/// recognizes valid sequences of UTF-8 code units. It is the heart of all non-ASCII
|
|
446
|
+
/// conversions in all member functions of this class. This function uses the "big"
|
|
447
|
+
/// first-unit lookup table and the state machine table to traverse the DFA.
|
|
448
|
+
///
|
|
449
|
+
/// \param pSrc
|
|
450
|
+
/// A reference to a non-null pointer defining the beginning of the code unit input range.
|
|
451
|
+
/// \param pSrcEnd
|
|
452
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
453
|
+
/// \param cdpt
|
|
454
|
+
/// A reference to the output code point.
|
|
455
|
+
///
|
|
456
|
+
/// \returns
|
|
457
|
+
/// An internal flag describing the current DFA state.
|
|
458
|
+
//--------------------------------------------------------------------------------------------------
|
|
459
|
+
//
|
|
460
|
+
KEWB_FORCE_INLINE int32_t
|
|
461
|
+
UtfUtils::AdvanceWithBigTable(char8_t const*& pSrc, char8_t const* const pSrcEnd, char32_t& cdpt) noexcept
|
|
462
|
+
{
|
|
463
|
+
FirstUnitInfo info; //- The descriptor for the first code unit
|
|
464
|
+
char32_t unit; //- The current UTF-8 code unit
|
|
465
|
+
int32_t type; //- The current code unit's character class
|
|
466
|
+
int32_t curr; //- The current DFA state
|
|
467
|
+
|
|
468
|
+
info = smTables.maFirstUnitTable[*pSrc++]; //- Look up the first code unit descriptor
|
|
469
|
+
cdpt = info.mFirstOctet; //- From it, get the initial code point value
|
|
470
|
+
curr = info.mNextState; //- From it, get the second state
|
|
471
|
+
|
|
472
|
+
while (curr > ERR)
|
|
473
|
+
{
|
|
474
|
+
if (pSrc < pSrcEnd)
|
|
475
|
+
{
|
|
476
|
+
unit = *pSrc++; //- Cache the current code unit
|
|
477
|
+
cdpt = (cdpt << 6) | (unit & 0x3F); //- Adjust code point with continuation bits
|
|
478
|
+
type = smTables.maOctetCategory[unit]; //- Look up the code unit's character class
|
|
479
|
+
curr = smTables.maTransitions[curr + type]; //- Look up the next state
|
|
480
|
+
}
|
|
481
|
+
else
|
|
482
|
+
{
|
|
483
|
+
return ERR;
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
return curr;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
//--------------------------------------------------------------------------------------------------
|
|
490
|
+
/// \brief Converts a sequence of UTF-8 code units to a UTF-32 code point.
|
|
491
|
+
///
|
|
492
|
+
/// \details
|
|
493
|
+
/// This static member function reads input octets and uses them to traverse a DFA that
|
|
494
|
+
/// recognizes valid sequences of UTF-8 code units. It is the heart of all non-ASCII
|
|
495
|
+
/// conversions in all member functions of this class. This function uses the "small"
|
|
496
|
+
/// first-unit lookup table and the state machine table to traverse the DFA.
|
|
497
|
+
///
|
|
498
|
+
/// \param pSrc
|
|
499
|
+
/// A reference to a non-null pointer defining the beginning of the code unit input range.
|
|
500
|
+
/// \param pSrcEnd
|
|
501
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
502
|
+
/// \param cdpt
|
|
503
|
+
/// A reference to the output code point.
|
|
504
|
+
///
|
|
505
|
+
/// \returns
|
|
506
|
+
/// An internal flag describing the current DFA state.
|
|
507
|
+
//--------------------------------------------------------------------------------------------------
|
|
508
|
+
//
|
|
509
|
+
KEWB_FORCE_INLINE int32_t
|
|
510
|
+
UtfUtils::AdvanceWithSmallTable(char8_t const*& pSrc, char8_t const* const pSrcEnd, char32_t& cdpt) noexcept
|
|
511
|
+
{
|
|
512
|
+
char32_t unit; //- The current UTF-8 code unit
|
|
513
|
+
int32_t type; //- The current code unit's character class
|
|
514
|
+
int32_t curr; //- The current DFA state
|
|
515
|
+
|
|
516
|
+
unit = *pSrc++; //- Cache the first code unit
|
|
517
|
+
type = smTables.maOctetCategory[unit]; //- Get the first code unit's character class
|
|
518
|
+
cdpt = smTables.maFirstOctetMask[type] & unit; //- Apply the first octet mask
|
|
519
|
+
curr = smTables.maTransitions[type]; //- Look up the second state
|
|
520
|
+
|
|
521
|
+
while (curr > ERR)
|
|
522
|
+
{
|
|
523
|
+
if (pSrc < pSrcEnd)
|
|
524
|
+
{
|
|
525
|
+
unit = *pSrc++; //- Cache the current code unit
|
|
526
|
+
cdpt = (cdpt << 6) | (unit & 0x3F); //- Adjust code point with continuation bits
|
|
527
|
+
type = smTables.maOctetCategory[unit]; //- Look up the code unit's character class
|
|
528
|
+
curr = smTables.maTransitions[curr + type]; //- Look up the next state
|
|
529
|
+
}
|
|
530
|
+
else
|
|
531
|
+
{
|
|
532
|
+
return ERR;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
return curr;
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
//--------------------------------------------------------------------------------------------------
|
|
539
|
+
/// \brief Converts a sequence of UTF-8 code units to a UTF-32 code point.
|
|
540
|
+
///
|
|
541
|
+
/// \details
|
|
542
|
+
/// This static member function reads input octets and uses them to traverse a DFA that
|
|
543
|
+
/// recognizes valid sequences of UTF-8 code units. It prints state information to `stdout`
|
|
544
|
+
/// while doing so. It is simpler and slower than its counterpart `NextCodePoint`.
|
|
545
|
+
///
|
|
546
|
+
/// \param pSrc
|
|
547
|
+
/// A reference to a non-null pointer defining the beginning of the code unit input range.
|
|
548
|
+
/// \param pSrcEnd
|
|
549
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
550
|
+
/// \param cdpt
|
|
551
|
+
/// A reference to the output code point.
|
|
552
|
+
///
|
|
553
|
+
/// \returns
|
|
554
|
+
/// An internal flag describing the current DFA state.
|
|
555
|
+
//--------------------------------------------------------------------------------------------------
|
|
556
|
+
//
|
|
557
|
+
KEWB_FORCE_INLINE UtfUtils::State
|
|
558
|
+
UtfUtils::AdvanceWithTrace(char8_t const*& pSrc, char8_t const* pSrcEnd, char32_t& cdpt) noexcept
|
|
559
|
+
{
|
|
560
|
+
char32_t unit; //- The current UTF-8 code unit
|
|
561
|
+
CharClass type; //- The UTF-8 "sequence class"
|
|
562
|
+
State next; //- The next DFA state
|
|
563
|
+
State curr; //- The current DFA state
|
|
564
|
+
|
|
565
|
+
unit = *pSrc++;
|
|
566
|
+
type = smTables.maOctetCategory[unit];
|
|
567
|
+
cdpt = smTables.maFirstOctetMask[type] & unit;
|
|
568
|
+
curr = BGN;
|
|
569
|
+
next = smTables.maTransitions[type];
|
|
570
|
+
|
|
571
|
+
PrintStateData(curr, type, (char8_t) unit, next);
|
|
572
|
+
|
|
573
|
+
while (next > ERR)
|
|
574
|
+
{
|
|
575
|
+
if (pSrc < pSrcEnd)
|
|
576
|
+
{
|
|
577
|
+
unit = *pSrc++;
|
|
578
|
+
cdpt = (cdpt << 6) | (unit & 0x3F);
|
|
579
|
+
type = smTables.maOctetCategory[unit];
|
|
580
|
+
curr = next;
|
|
581
|
+
next = smTables.maTransitions[curr + type];
|
|
582
|
+
PrintStateData(curr, type, (char8_t) unit, next);
|
|
583
|
+
}
|
|
584
|
+
else
|
|
585
|
+
{
|
|
586
|
+
return ERR;
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
return next;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
} //- namespace uu
|
|
593
|
+
SIMDUTF_UNTARGET_REGION
|
|
594
|
+
|
|
595
|
+
#endif //- KEWB_UNICODE_UTILS_H_DEFINED
|