react-native-quick-crypto 1.0.18 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QuickCrypto.podspec +12 -38
- package/README.md +2 -0
- package/android/CMakeLists.txt +3 -0
- package/cpp/utils/HybridUtils.cpp +39 -77
- package/deps/simdutf/.clang-format +4 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/bug_report.md +62 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/config.yml +1 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/feature_request.md +35 -0
- package/deps/simdutf/.github/ISSUE_TEMPLATE/standard-issue-template.md +29 -0
- package/deps/simdutf/.github/pull_request_template.md +51 -0
- package/deps/simdutf/.github/workflows/aarch64.yml +39 -0
- package/deps/simdutf/.github/workflows/alpine.yml +27 -0
- package/deps/simdutf/.github/workflows/amalgamation_demos.yml +34 -0
- package/deps/simdutf/.github/workflows/armv7.yml +32 -0
- package/deps/simdutf/.github/workflows/atomic_fuzz.yml +25 -0
- package/deps/simdutf/.github/workflows/cifuzz.yml +37 -0
- package/deps/simdutf/.github/workflows/clangformat.yml +36 -0
- package/deps/simdutf/.github/workflows/debian-latestcxxstandards.yml +40 -0
- package/deps/simdutf/.github/workflows/debian.yml +33 -0
- package/deps/simdutf/.github/workflows/documentation.yml +36 -0
- package/deps/simdutf/.github/workflows/emscripten.yml +19 -0
- package/deps/simdutf/.github/workflows/loongarch64-gcc-14.2.yml +39 -0
- package/deps/simdutf/.github/workflows/macos-latest.yml +29 -0
- package/deps/simdutf/.github/workflows/msys2-clang.yml +48 -0
- package/deps/simdutf/.github/workflows/msys2.yml +50 -0
- package/deps/simdutf/.github/workflows/ppc64le.yml +29 -0
- package/deps/simdutf/.github/workflows/rvv-1024-clang-18.yml +35 -0
- package/deps/simdutf/.github/workflows/rvv-128-clang-17.yml +35 -0
- package/deps/simdutf/.github/workflows/rvv-256-gcc-14.yml +31 -0
- package/deps/simdutf/.github/workflows/s390x.yml +29 -0
- package/deps/simdutf/.github/workflows/selective-amalgamation.yml +29 -0
- package/deps/simdutf/.github/workflows/typos.yml +19 -0
- package/deps/simdutf/.github/workflows/ubuntu22-cxx20.yml +30 -0
- package/deps/simdutf/.github/workflows/ubuntu22.yml +32 -0
- package/deps/simdutf/.github/workflows/ubuntu22_gcc12.yml +27 -0
- package/deps/simdutf/.github/workflows/ubuntu22sani.yml +29 -0
- package/deps/simdutf/.github/workflows/ubuntu24-cxxstandards.yml +34 -0
- package/deps/simdutf/.github/workflows/ubuntu24-unsignedchar.yml +34 -0
- package/deps/simdutf/.github/workflows/ubuntu24.yml +32 -0
- package/deps/simdutf/.github/workflows/ubuntu24sani.yml +36 -0
- package/deps/simdutf/.github/workflows/ubuntu24sani_clang.yml +29 -0
- package/deps/simdutf/.github/workflows/vs17-arm-ci.yml +21 -0
- package/deps/simdutf/.github/workflows/vs17-ci-cxx20.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-ci.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-clang-ci.yml +41 -0
- package/deps/simdutf/.github/workflows/vs17-cxxstandards.yml +36 -0
- package/deps/simdutf/AI_USAGE_POLICY.md +56 -0
- package/deps/simdutf/AUTHORS +6 -0
- package/deps/simdutf/CMakeLists.txt +231 -0
- package/deps/simdutf/CONTRIBUTING.md +214 -0
- package/deps/simdutf/CONTRIBUTORS +1 -0
- package/deps/simdutf/Doxyfile +2584 -0
- package/deps/simdutf/LICENSE-APACHE +201 -0
- package/deps/simdutf/LICENSE-MIT +18 -0
- package/deps/simdutf/Makefile.crosscompile +54 -0
- package/deps/simdutf/README-RVV.md +16 -0
- package/deps/simdutf/README.md +2782 -0
- package/deps/simdutf/SECURITY.md +8 -0
- package/deps/simdutf/benchmarks/CMakeLists.txt +101 -0
- package/deps/simdutf/benchmarks/alignment.cpp +150 -0
- package/deps/simdutf/benchmarks/base64/CMakeLists.txt +30 -0
- package/deps/simdutf/benchmarks/base64/benchmark_base64.cpp +875 -0
- package/deps/simdutf/benchmarks/base64/libbase64_spaces.h +49 -0
- package/deps/simdutf/benchmarks/base64/node_base64.h +227 -0
- package/deps/simdutf/benchmarks/base64/openssl3_base64.h +334 -0
- package/deps/simdutf/benchmarks/benchmark.cpp +65 -0
- package/deps/simdutf/benchmarks/benchmark_to_well_formed_utf16.cpp +347 -0
- package/deps/simdutf/benchmarks/competition/.clang-format-ignore +5 -0
- package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.cpp +1276 -0
- package/deps/simdutf/benchmarks/competition/CppCon2018/utf_utils.h +595 -0
- package/deps/simdutf/benchmarks/competition/README.md +7 -0
- package/deps/simdutf/benchmarks/competition/hoehrmann/hoehrmann.h +91 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16.h +444 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/inoue_utf8_to_utf16_tables.h +13183 -0
- package/deps/simdutf/benchmarks/competition/inoue2008/script.py +73 -0
- package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.cpp +738 -0
- package/deps/simdutf/benchmarks/competition/llvm/ConvertUTF.h +293 -0
- package/deps/simdutf/benchmarks/competition/u8u16/COPYRIGHT +8 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Makefile +44 -0
- package/deps/simdutf/benchmarks/competition/u8u16/OSL3.0.txt +169 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/BOM_Profiler.h +148 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/i386_timer.h +45 -0
- package/deps/simdutf/benchmarks/competition/u8u16/Profiling/ppc_timer.c +34 -0
- package/deps/simdutf/benchmarks/competition/u8u16/README +56 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/config_defs.h +43 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/g4_config.h +27 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/mmx_config.h +16 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/p4_config.h +18 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/p4_ideal_config.h +16 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/spu_config.h +28 -0
- package/deps/simdutf/benchmarks/competition/u8u16/config/ssse3_config.h +20 -0
- package/deps/simdutf/benchmarks/competition/u8u16/iconv_u8u16.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/altivec_simd.h +440 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_basic_ops.py +121 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_half_operand_versions.py +158 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/libgen/make_test.py +270 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd.h +141 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_basic.h +216 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_built_in.h +119 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/mmx_simd_modified.h +2430 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/outline.txt +39 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/spu_simd.h +421 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/sse_simd.h +836 -0
- package/deps/simdutf/benchmarks/competition/u8u16/lib/stdint.h +222 -0
- package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_BE.c +4 -0
- package/deps/simdutf/benchmarks/competition/u8u16/libu8u16_LE.c +5 -0
- package/deps/simdutf/benchmarks/competition/u8u16/proto/u8u16.py +390 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/Makefile +18 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/bytelex.h +448 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/charsets/ASCII_EBCDIC.h +284 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.c +1975 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.pdf +0 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/libu8u16.w +2263 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/multiliteral.h +239 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/u8u16.c +232 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/x8x16.c +194 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.c +193 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xml_error.h +167 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.c +288 -0
- package/deps/simdutf/benchmarks/competition/u8u16/src/xmldecl.h +117 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_g4.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_mmx.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4.c +3 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_p4_ideal.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_spu.c +2 -0
- package/deps/simdutf/benchmarks/competition/u8u16/u8u16_ssse3.c +3 -0
- package/deps/simdutf/benchmarks/competition/u8u16/x8x16_p4.c +2 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/LICENSE +23 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/data/test_minimal.txt +44 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/readme.md +106 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_clang_corr_tests.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_corr_tests.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_example.sh +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_file_conv.sh +14 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_lib.sh +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_gcc_iconv_sample.sh +8 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_corr_tests.cmd +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_example.cmd +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_file_conv.cmd +14 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_lib.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_mingw_iconv_sample.cmd +8 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_corr_tests.cmd +11 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_example.cmd +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_file_conv.cmd +13 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_lib.cmd +10 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/build_msvc_iconv_sample.cmd +9 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/html_table.py +25 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/measure.py +94 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/resize.py +20 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_all.cmd +2 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/scripts/wipe_interm.cmd +1 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/CustomMemcpy.h +75 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/PerfDefs.h +47 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.cpp +17 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/base/Timing.h +76 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/AllProcessors.cpp +35 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.cpp +117 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BaseBufferProcessor.h +210 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferDecoder.h +158 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/BufferEncoder.h +104 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorPlugins.h +334 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/buffer/ProcessorSelector.h +186 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.cpp +140 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderLut.h +42 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/DecoderProcess.h +100 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/Dfa.h +57 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.cpp +85 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderLut.h +27 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/EncoderProcess.h +126 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/core/ProcessTrivial.h +108 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.cpp +139 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/iconv/iconv.h +74 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.cpp +65 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/message/MessageConverter.h +91 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/CorrectnessTests.cpp +772 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/Example.cpp +12 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/FileConverter.cpp +486 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/tests/iconv_sample.c +162 -0
- package/deps/simdutf/benchmarks/competition/utf8lut/src/utf8lut.h +15 -0
- package/deps/simdutf/benchmarks/competition/utf8sse4/fromutf8-sse.cpp +292 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/LICENSE +23 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/README.md +1503 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/checked.h +335 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/core.h +338 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp11.h +103 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/cpp17.h +103 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8/unchecked.h +274 -0
- package/deps/simdutf/benchmarks/competition/utfcpp/source/utf8.h +34 -0
- package/deps/simdutf/benchmarks/dataset/README.md +155 -0
- package/deps/simdutf/benchmarks/dataset/emoji.txt +204 -0
- package/deps/simdutf/benchmarks/dataset/scripts/utf8type.py +40 -0
- package/deps/simdutf/benchmarks/dataset/wikipedia_mars/Makefile +80 -0
- package/deps/simdutf/benchmarks/dataset/wikipedia_mars/convert_to_utf6.py +20 -0
- package/deps/simdutf/benchmarks/find/CMakeLists.txt +6 -0
- package/deps/simdutf/benchmarks/find/findbenchmark.cpp +63 -0
- package/deps/simdutf/benchmarks/find/findbenchmarker.h +46 -0
- package/deps/simdutf/benchmarks/shortbench.cpp +555 -0
- package/deps/simdutf/benchmarks/src/CMakeLists.txt +52 -0
- package/deps/simdutf/benchmarks/src/apple_arm_events.h +1104 -0
- package/deps/simdutf/benchmarks/src/benchmark.cpp +3899 -0
- package/deps/simdutf/benchmarks/src/benchmark.h +317 -0
- package/deps/simdutf/benchmarks/src/benchmark_base.cpp +144 -0
- package/deps/simdutf/benchmarks/src/benchmark_base.h +98 -0
- package/deps/simdutf/benchmarks/src/cmdline.cpp +176 -0
- package/deps/simdutf/benchmarks/src/cmdline.h +35 -0
- package/deps/simdutf/benchmarks/src/event_counter.h +162 -0
- package/deps/simdutf/benchmarks/src/linux-perf-events.h +104 -0
- package/deps/simdutf/benchmarks/stream.cpp +209 -0
- package/deps/simdutf/benchmarks/threaded.cpp +123 -0
- package/deps/simdutf/cmake/CPM.cmake +1363 -0
- package/deps/simdutf/cmake/JoinPaths.cmake +23 -0
- package/deps/simdutf/cmake/add_cpp_test.cmake +68 -0
- package/deps/simdutf/cmake/simdutf-config.cmake.in +2 -0
- package/deps/simdutf/cmake/simdutf-flags.cmake +26 -0
- package/deps/simdutf/cmake/toolchains-ci/riscv64-linux-gnu.cmake +4 -0
- package/deps/simdutf/cmake/toolchains-dev/README.md +32 -0
- package/deps/simdutf/cmake/toolchains-dev/aarch64.cmake +14 -0
- package/deps/simdutf/cmake/toolchains-dev/loongarch64.cmake +22 -0
- package/deps/simdutf/cmake/toolchains-dev/powerpc64.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/powerpc64le.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/riscv64.cmake +16 -0
- package/deps/simdutf/cmake/toolchains-dev/rvv-spike.cmake +38 -0
- package/deps/simdutf/doc/avx512.png +0 -0
- package/deps/simdutf/doc/logo.png +0 -0
- package/deps/simdutf/doc/logo.svg +165 -0
- package/deps/simdutf/doc/node2023.png +0 -0
- package/deps/simdutf/doc/shortinput.md +78 -0
- package/deps/simdutf/doc/utf16utf8.png +0 -0
- package/deps/simdutf/doc/utf8utf16.png +0 -0
- package/deps/simdutf/doc/widelogo.png +0 -0
- package/deps/simdutf/doxygen.py +50 -0
- package/deps/simdutf/fuzz/.clang-format +9 -0
- package/deps/simdutf/fuzz/CMakeLists.txt +45 -0
- package/deps/simdutf/fuzz/README.md +168 -0
- package/deps/simdutf/fuzz/atomic_base64.cpp +448 -0
- package/deps/simdutf/fuzz/base64.cpp +278 -0
- package/deps/simdutf/fuzz/build.sh +83 -0
- package/deps/simdutf/fuzz/conversion.cpp +669 -0
- package/deps/simdutf/fuzz/helpers/.clang-format-ignore +1 -0
- package/deps/simdutf/fuzz/helpers/common.h +135 -0
- package/deps/simdutf/fuzz/helpers/nameof.hpp +1258 -0
- package/deps/simdutf/fuzz/main.cpp +72 -0
- package/deps/simdutf/fuzz/minimize_and_cleanse.sh +87 -0
- package/deps/simdutf/fuzz/misc.cpp +216 -0
- package/deps/simdutf/fuzz/random_fuzz.sh +154 -0
- package/deps/simdutf/fuzz/roundtrip.cpp +588 -0
- package/deps/simdutf/fuzz/safe_conversion.cpp +104 -0
- package/deps/simdutf/include/simdutf/avx512.h +79 -0
- package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
- package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
- package/deps/simdutf/include/simdutf/common_defs.h +186 -0
- package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
- package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
- package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
- package/deps/simdutf/include/simdutf/error.h +126 -0
- package/deps/simdutf/include/simdutf/implementation.h +7081 -0
- package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
- package/deps/simdutf/include/simdutf/portability.h +285 -0
- package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
- package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
- package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
- package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
- package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
- package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
- package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
- package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
- package/deps/simdutf/include/simdutf.h +26 -0
- package/deps/simdutf/include/simdutf_c.h +342 -0
- package/deps/simdutf/riscv/Dockerfile +16 -0
- package/deps/simdutf/riscv/README.md +24 -0
- package/deps/simdutf/riscv/remove-docker-station +8 -0
- package/deps/simdutf/riscv/run-docker-station +31 -0
- package/deps/simdutf/scripts/.flake8 +2 -0
- package/deps/simdutf/scripts/Makefile +2 -0
- package/deps/simdutf/scripts/README_ADD_FUNCTION.md +49 -0
- package/deps/simdutf/scripts/add_function.py +330 -0
- package/deps/simdutf/scripts/amalgamation_tests.py +156 -0
- package/deps/simdutf/scripts/base64/Makefile +2 -0
- package/deps/simdutf/scripts/base64/README.md +2 -0
- package/deps/simdutf/scripts/base64/avx512.py +76 -0
- package/deps/simdutf/scripts/base64/neon_decode.py +143 -0
- package/deps/simdutf/scripts/base64/neon_generate_lut.py +101 -0
- package/deps/simdutf/scripts/base64/sse.py +252 -0
- package/deps/simdutf/scripts/base64/sseregular.py +160 -0
- package/deps/simdutf/scripts/base64/sseurl.py +283 -0
- package/deps/simdutf/scripts/base64/table.py +59 -0
- package/deps/simdutf/scripts/base64bench_print.py +145 -0
- package/deps/simdutf/scripts/benchmark-all.py +119 -0
- package/deps/simdutf/scripts/benchmark_print.py +324 -0
- package/deps/simdutf/scripts/check_feature_macros.py +156 -0
- package/deps/simdutf/scripts/check_typos.sh +13 -0
- package/deps/simdutf/scripts/clang_format.sh +35 -0
- package/deps/simdutf/scripts/clang_format_docker.sh +38 -0
- package/deps/simdutf/scripts/common.py +24 -0
- package/deps/simdutf/scripts/compilation_benchmark.py +55 -0
- package/deps/simdutf/scripts/compile_many_variations.sh +64 -0
- package/deps/simdutf/scripts/create_latex_table.py +62 -0
- package/deps/simdutf/scripts/docker/Dockerfile +14 -0
- package/deps/simdutf/scripts/docker/Makefile +9 -0
- package/deps/simdutf/scripts/docker/README.md +30 -0
- package/deps/simdutf/scripts/docker/llvm.gpg +0 -0
- package/deps/simdutf/scripts/ppc64_convert_utf16_to_utf8.py +155 -0
- package/deps/simdutf/scripts/prepare_doxygen.sh +21 -0
- package/deps/simdutf/scripts/release.py +197 -0
- package/deps/simdutf/scripts/shortinputplots.py +97 -0
- package/deps/simdutf/scripts/sse_convert_utf16_to_utf8.py +422 -0
- package/deps/simdutf/scripts/sse_convert_utf32_to_utf16.py +105 -0
- package/deps/simdutf/scripts/sse_utf8_utf16_decode.py +186 -0
- package/deps/simdutf/scripts/sse_validate_utf16le_proof.py +137 -0
- package/deps/simdutf/scripts/sse_validate_utf16le_testcases.py +129 -0
- package/deps/simdutf/scripts/table.py +207 -0
- package/deps/simdutf/scripts/tests/new.txt +33 -0
- package/deps/simdutf/scripts/tests/old.txt +33 -0
- package/deps/simdutf/scripts/tests/results.txt +272 -0
- package/deps/simdutf/simdutf.pc.in +11 -0
- package/deps/simdutf/singleheader/.flake8 +2 -0
- package/deps/simdutf/singleheader/CMakeLists.txt +64 -0
- package/deps/simdutf/singleheader/README-dev.md +81 -0
- package/deps/simdutf/singleheader/README.md +19 -0
- package/deps/simdutf/singleheader/amalgamate.py +513 -0
- package/deps/simdutf/singleheader/amalgamation_demo.c +59 -0
- package/deps/simdutf/singleheader/amalgamation_demo.cpp +54 -0
- package/deps/simdutf/singleheader/test-features.py +262 -0
- package/deps/simdutf/src/CMakeLists.txt +78 -0
- package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
- package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
- package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
- package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
- package/deps/simdutf/src/encoding_types.cpp +67 -0
- package/deps/simdutf/src/error.cpp +3 -0
- package/deps/simdutf/src/fallback/implementation.cpp +589 -0
- package/deps/simdutf/src/generic/ascii_validation.h +50 -0
- package/deps/simdutf/src/generic/base64.h +233 -0
- package/deps/simdutf/src/generic/base64lengths.h +63 -0
- package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
- package/deps/simdutf/src/generic/find.h +75 -0
- package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
- package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
- package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
- package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
- package/deps/simdutf/src/generic/utf16.h +73 -0
- package/deps/simdutf/src/generic/utf32.h +136 -0
- package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
- package/deps/simdutf/src/generic/utf8.h +92 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
- package/deps/simdutf/src/generic/validate_utf16.h +164 -0
- package/deps/simdutf/src/generic/validate_utf32.h +99 -0
- package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
- package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
- package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
- package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
- package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
- package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
- package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
- package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
- package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
- package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
- package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
- package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
- package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
- package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
- package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
- package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
- package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
- package/deps/simdutf/src/implementation.cpp +2526 -0
- package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
- package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
- package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
- package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
- package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
- package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
- package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
- package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
- package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
- package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
- package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
- package/deps/simdutf/src/ppc64/templates.cpp +91 -0
- package/deps/simdutf/src/rvv/implementation.cpp +138 -0
- package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
- package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
- package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
- package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
- package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
- package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
- package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
- package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
- package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
- package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
- package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
- package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
- package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
- package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
- package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
- package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
- package/deps/simdutf/src/simdutf/arm64.h +43 -0
- package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
- package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
- package/deps/simdutf/src/simdutf/fallback.h +42 -0
- package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
- package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
- package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
- package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
- package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
- package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
- package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
- package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
- package/deps/simdutf/src/simdutf/haswell.h +63 -0
- package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
- package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
- package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
- package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
- package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
- package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
- package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
- package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
- package/deps/simdutf/src/simdutf/icelake.h +81 -0
- package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
- package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
- package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
- package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
- package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
- package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
- package/deps/simdutf/src/simdutf/lasx.h +49 -0
- package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
- package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
- package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
- package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
- package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
- package/deps/simdutf/src/simdutf/lsx.h +52 -0
- package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
- package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
- package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
- package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
- package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
- package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
- package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
- package/deps/simdutf/src/simdutf/ppc64.h +40 -0
- package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
- package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
- package/deps/simdutf/src/simdutf/rvv.h +41 -0
- package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
- package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
- package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
- package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
- package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
- package/deps/simdutf/src/simdutf/westmere.h +59 -0
- package/deps/simdutf/src/simdutf.cpp +152 -0
- package/deps/simdutf/src/simdutf_c.cpp +525 -0
- package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
- package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
- package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
- package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
- package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
- package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
- package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
- package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
- package/deps/simdutf/tests/CMakeLists.txt +483 -0
- package/deps/simdutf/tests/atomic_base64_tests.cpp +2845 -0
- package/deps/simdutf/tests/base64_tests.cpp +3617 -0
- package/deps/simdutf/tests/basic_fuzzer.cpp +805 -0
- package/deps/simdutf/tests/bele_tests.cpp +182 -0
- package/deps/simdutf/tests/constexpr_base64_tests.cpp +387 -0
- package/deps/simdutf/tests/convert_latin1_to_utf16be_tests.cpp +52 -0
- package/deps/simdutf/tests/convert_latin1_to_utf16le_tests.cpp +80 -0
- package/deps/simdutf/tests/convert_latin1_to_utf32_tests.cpp +66 -0
- package/deps/simdutf/tests/convert_latin1_to_utf8_tests.cpp +120 -0
- package/deps/simdutf/tests/convert_utf16_to_utf8_safe_tests.cpp +203 -0
- package/deps/simdutf/tests/convert_utf16_to_utf8_with_replacement_tests.cpp +276 -0
- package/deps/simdutf/tests/convert_utf16be_to_latin1_tests.cpp +109 -0
- package/deps/simdutf/tests/convert_utf16be_to_latin1_tests_with_errors.cpp +136 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf32_tests.cpp +193 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf32_with_errors_tests.cpp +381 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf8_tests.cpp +259 -0
- package/deps/simdutf/tests/convert_utf16be_to_utf8_with_errors_tests.cpp +266 -0
- package/deps/simdutf/tests/convert_utf16le_to_latin1_tests.cpp +148 -0
- package/deps/simdutf/tests/convert_utf16le_to_latin1_tests_with_errors.cpp +176 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf32_tests.cpp +213 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf32_with_errors_tests.cpp +318 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf8_tests.cpp +343 -0
- package/deps/simdutf/tests/convert_utf16le_to_utf8_with_errors_tests.cpp +271 -0
- package/deps/simdutf/tests/convert_utf32_to_latin1_tests.cpp +111 -0
- package/deps/simdutf/tests/convert_utf32_to_latin1_with_errors_tests.cpp +96 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16be_tests.cpp +148 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16be_with_errors_tests.cpp +192 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16le_tests.cpp +166 -0
- package/deps/simdutf/tests/convert_utf32_to_utf16le_with_errors_tests.cpp +215 -0
- package/deps/simdutf/tests/convert_utf32_to_utf8_tests.cpp +181 -0
- package/deps/simdutf/tests/convert_utf32_to_utf8_with_errors_tests.cpp +261 -0
- package/deps/simdutf/tests/convert_utf8_to_latin1_tests.cpp +516 -0
- package/deps/simdutf/tests/convert_utf8_to_latin1_with_errors_tests.cpp +579 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16be_tests.cpp +412 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16be_with_errors_tests.cpp +480 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16le_tests.cpp +671 -0
- package/deps/simdutf/tests/convert_utf8_to_utf16le_with_errors_tests.cpp +455 -0
- package/deps/simdutf/tests/convert_utf8_to_utf32_tests.cpp +1204 -0
- package/deps/simdutf/tests/convert_utf8_to_utf32_with_errors_tests.cpp +337 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_latin1_tests.cpp +37 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_utf32_tests.cpp +97 -0
- package/deps/simdutf/tests/convert_valid_utf16be_to_utf8_tests.cpp +126 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_latin1_tests.cpp +71 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_utf32_tests.cpp +122 -0
- package/deps/simdutf/tests/convert_valid_utf16le_to_utf8_tests.cpp +244 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_latin1_tests.cpp +49 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf16be_tests.cpp +92 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf16le_tests.cpp +114 -0
- package/deps/simdutf/tests/convert_valid_utf32_to_utf8_tests.cpp +109 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_latin1_tests.cpp +84 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf16be_tests.cpp +124 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf16le_tests.cpp +221 -0
- package/deps/simdutf/tests/convert_valid_utf8_to_utf32_tests.cpp +155 -0
- package/deps/simdutf/tests/count_utf16be.cpp +64 -0
- package/deps/simdutf/tests/count_utf16le.cpp +61 -0
- package/deps/simdutf/tests/count_utf8.cpp +87 -0
- package/deps/simdutf/tests/detect_encodings_tests.cpp +312 -0
- package/deps/simdutf/tests/embed/valid_utf8.txt +1 -0
- package/deps/simdutf/tests/embed_tests.cpp +22 -0
- package/deps/simdutf/tests/find_tests.cpp +77 -0
- package/deps/simdutf/tests/fixed_string_tests.cpp +153 -0
- package/deps/simdutf/tests/helpers/CMakeLists.txt +25 -0
- package/deps/simdutf/tests/helpers/compiletime_conversions.h +222 -0
- package/deps/simdutf/tests/helpers/fixed_string.h +267 -0
- package/deps/simdutf/tests/helpers/random_int.cpp +30 -0
- package/deps/simdutf/tests/helpers/random_int.h +39 -0
- package/deps/simdutf/tests/helpers/random_utf16.cpp +123 -0
- package/deps/simdutf/tests/helpers/random_utf16.h +52 -0
- package/deps/simdutf/tests/helpers/random_utf32.cpp +41 -0
- package/deps/simdutf/tests/helpers/random_utf32.h +40 -0
- package/deps/simdutf/tests/helpers/random_utf8.cpp +93 -0
- package/deps/simdutf/tests/helpers/random_utf8.h +36 -0
- package/deps/simdutf/tests/helpers/test.cpp +231 -0
- package/deps/simdutf/tests/helpers/test.h +193 -0
- package/deps/simdutf/tests/helpers/transcode_test_base.cpp +1257 -0
- package/deps/simdutf/tests/helpers/transcode_test_base.h +683 -0
- package/deps/simdutf/tests/helpers/utf16.h +27 -0
- package/deps/simdutf/tests/installation_tests/find/CMakeLists.txt +43 -0
- package/deps/simdutf/tests/installation_tests/from_fetch/CMakeLists.txt +47 -0
- package/deps/simdutf/tests/internal_tests.cpp +27 -0
- package/deps/simdutf/tests/null_safety_tests.cpp +94 -0
- package/deps/simdutf/tests/random_fuzzer.cpp +779 -0
- package/deps/simdutf/tests/readme_tests.cpp +274 -0
- package/deps/simdutf/tests/reference/CMakeLists.txt +23 -0
- package/deps/simdutf/tests/reference/decode_utf16.h +81 -0
- package/deps/simdutf/tests/reference/decode_utf32.h +47 -0
- package/deps/simdutf/tests/reference/encode_latin1.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_latin1.h +32 -0
- package/deps/simdutf/tests/reference/encode_utf16.cpp +49 -0
- package/deps/simdutf/tests/reference/encode_utf16.h +20 -0
- package/deps/simdutf/tests/reference/encode_utf32.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_utf32.h +36 -0
- package/deps/simdutf/tests/reference/encode_utf8.cpp +1 -0
- package/deps/simdutf/tests/reference/encode_utf8.h +40 -0
- package/deps/simdutf/tests/reference/validate_utf16.cpp +60 -0
- package/deps/simdutf/tests/reference/validate_utf16.h +14 -0
- package/deps/simdutf/tests/reference/validate_utf16_to_latin1.cpp +35 -0
- package/deps/simdutf/tests/reference/validate_utf16_to_latin1.h +13 -0
- package/deps/simdutf/tests/reference/validate_utf32.cpp +27 -0
- package/deps/simdutf/tests/reference/validate_utf32.h +12 -0
- package/deps/simdutf/tests/reference/validate_utf32_to_latin1.cpp +27 -0
- package/deps/simdutf/tests/reference/validate_utf32_to_latin1.h +12 -0
- package/deps/simdutf/tests/reference/validate_utf8.cpp +82 -0
- package/deps/simdutf/tests/reference/validate_utf8.h +11 -0
- package/deps/simdutf/tests/reference/validate_utf8_to_latin1.cpp +43 -0
- package/deps/simdutf/tests/reference/validate_utf8_to_latin1.h +12 -0
- package/deps/simdutf/tests/select_implementation.cpp +43 -0
- package/deps/simdutf/tests/simdutf_c_tests.cpp +244 -0
- package/deps/simdutf/tests/span_tests.cpp +401 -0
- package/deps/simdutf/tests/special_tests.cpp +559 -0
- package/deps/simdutf/tests/straight_c_test.c +187 -0
- package/deps/simdutf/tests/text_encoding_tests.cpp +77 -0
- package/deps/simdutf/tests/to_well_formed_utf16_tests.cpp +377 -0
- package/deps/simdutf/tests/utf8_length_from_utf16_tests.cpp +202 -0
- package/deps/simdutf/tests/validate_ascii_basic_tests.cpp +165 -0
- package/deps/simdutf/tests/validate_ascii_with_errors_tests.cpp +77 -0
- package/deps/simdutf/tests/validate_utf16be_basic_tests.cpp +175 -0
- package/deps/simdutf/tests/validate_utf16be_with_errors_tests.cpp +188 -0
- package/deps/simdutf/tests/validate_utf16le_basic_tests.cpp +268 -0
- package/deps/simdutf/tests/validate_utf16le_with_errors_tests.cpp +274 -0
- package/deps/simdutf/tests/validate_utf32_basic_tests.cpp +92 -0
- package/deps/simdutf/tests/validate_utf32_with_errors_tests.cpp +114 -0
- package/deps/simdutf/tests/validate_utf8_basic_tests.cpp +178 -0
- package/deps/simdutf/tests/validate_utf8_brute_force_tests.cpp +88 -0
- package/deps/simdutf/tests/validate_utf8_puzzler_tests.cpp +33 -0
- package/deps/simdutf/tests/validate_utf8_with_errors_tests.cpp +228 -0
- package/deps/simdutf/tools/CMakeLists.txt +85 -0
- package/deps/simdutf/tools/fastbase64.cpp +250 -0
- package/deps/simdutf/tools/sutf.cpp +556 -0
- package/deps/simdutf/tools/sutf.h +40 -0
- package/lib/commonjs/blake3.js +2 -1
- package/lib/commonjs/blake3.js.map +1 -1
- package/lib/commonjs/diffie-hellman.js +5 -4
- package/lib/commonjs/diffie-hellman.js.map +1 -1
- package/lib/commonjs/ecdh.js +5 -4
- package/lib/commonjs/ecdh.js.map +1 -1
- package/lib/module/blake3.js +2 -1
- package/lib/module/blake3.js.map +1 -1
- package/lib/module/diffie-hellman.js +5 -4
- package/lib/module/diffie-hellman.js.map +1 -1
- package/lib/module/ecdh.js +5 -4
- package/lib/module/ecdh.js.map +1 -1
- package/lib/tsconfig.tsbuildinfo +1 -1
- package/lib/typescript/blake3.d.ts.map +1 -1
- package/lib/typescript/diffie-hellman.d.ts.map +1 -1
- package/lib/typescript/ecdh.d.ts.map +1 -1
- package/package.json +2 -2
- package/src/blake3.ts +2 -1
- package/src/diffie-hellman.ts +5 -7
- package/src/ecdh.ts +5 -8
|
@@ -0,0 +1,1276 @@
|
|
|
1
|
+
//==================================================================================================
|
|
2
|
+
// File: unicode_utils.cpp
|
|
3
|
+
//
|
|
4
|
+
// Copyright (c) 2018 Bob Steagall and KEWB Computing, All Rights Reserved
|
|
5
|
+
//==================================================================================================
|
|
6
|
+
//
|
|
7
|
+
// modified by D. Lemire for benchmarking in simdutf in April 2021.
|
|
8
|
+
#include "utf_utils.h"
|
|
9
|
+
#include <cstdio>
|
|
10
|
+
|
|
11
|
+
#if defined KEWB_PLATFORM_LINUX
|
|
12
|
+
#include <emmintrin.h>
|
|
13
|
+
#include <immintrin.h>
|
|
14
|
+
#include <xmmintrin.h>
|
|
15
|
+
#elif defined KEWB_PLATFORM_WINDOWS
|
|
16
|
+
#include <intrin.h>
|
|
17
|
+
#endif
|
|
18
|
+
SIMDUTF_TARGET_WESTMERE
|
|
19
|
+
|
|
20
|
+
namespace uu {
|
|
21
|
+
//- Static member data init.
|
|
22
|
+
//
|
|
23
|
+
UtfUtils::LookupTables const UtfUtils::smTables =
|
|
24
|
+
{
|
|
25
|
+
//- Initialize the maFirstUnitTable member array. This array implements a lookup table that
|
|
26
|
+
// maps the first code unit of a sequence to: 1. a pre-masked value to start the computation
|
|
27
|
+
// of the resulting code point; and, 2. the next state in the DFA for this code unit.
|
|
28
|
+
//
|
|
29
|
+
{
|
|
30
|
+
{ 0x00, BGN }, //- 0x00
|
|
31
|
+
{ 0x01, BGN }, //- 0x01
|
|
32
|
+
{ 0x02, BGN }, //- 0x02
|
|
33
|
+
{ 0x03, BGN }, //- 0x03
|
|
34
|
+
{ 0x04, BGN }, //- 0x04
|
|
35
|
+
{ 0x05, BGN }, //- 0x05
|
|
36
|
+
{ 0x06, BGN }, //- 0x06
|
|
37
|
+
{ 0x07, BGN }, //- 0x07
|
|
38
|
+
{ 0x08, BGN }, //- 0x08
|
|
39
|
+
{ 0x09, BGN }, //- 0x09
|
|
40
|
+
{ 0x0A, BGN }, //- 0x0A
|
|
41
|
+
{ 0x0B, BGN }, //- 0x0B
|
|
42
|
+
{ 0x0C, BGN }, //- 0x0C
|
|
43
|
+
{ 0x0D, BGN }, //- 0x0D
|
|
44
|
+
{ 0x0E, BGN }, //- 0x0E
|
|
45
|
+
{ 0x0F, BGN }, //- 0x0F
|
|
46
|
+
|
|
47
|
+
{ 0x10, BGN }, //- 0x10
|
|
48
|
+
{ 0x11, BGN }, //- 0x11
|
|
49
|
+
{ 0x12, BGN }, //- 0x12
|
|
50
|
+
{ 0x13, BGN }, //- 0x13
|
|
51
|
+
{ 0x14, BGN }, //- 0x14
|
|
52
|
+
{ 0x15, BGN }, //- 0x15
|
|
53
|
+
{ 0x16, BGN }, //- 0x16
|
|
54
|
+
{ 0x17, BGN }, //- 0x17
|
|
55
|
+
{ 0x18, BGN }, //- 0x18
|
|
56
|
+
{ 0x19, BGN }, //- 0x19
|
|
57
|
+
{ 0x1A, BGN }, //- 0x1A
|
|
58
|
+
{ 0x1B, BGN }, //- 0x1B
|
|
59
|
+
{ 0x1C, BGN }, //- 0x1C
|
|
60
|
+
{ 0x1D, BGN }, //- 0x1D
|
|
61
|
+
{ 0x1E, BGN }, //- 0x1E
|
|
62
|
+
{ 0x1F, BGN }, //- 0x1F
|
|
63
|
+
|
|
64
|
+
{ 0x20, BGN }, //- 0x20
|
|
65
|
+
{ 0x21, BGN }, //- 0x21
|
|
66
|
+
{ 0x22, BGN }, //- 0x22
|
|
67
|
+
{ 0x23, BGN }, //- 0x23
|
|
68
|
+
{ 0x24, BGN }, //- 0x24
|
|
69
|
+
{ 0x25, BGN }, //- 0x25
|
|
70
|
+
{ 0x26, BGN }, //- 0x26
|
|
71
|
+
{ 0x27, BGN }, //- 0x27
|
|
72
|
+
{ 0x28, BGN }, //- 0x28
|
|
73
|
+
{ 0x29, BGN }, //- 0x29
|
|
74
|
+
{ 0x2A, BGN }, //- 0x2A
|
|
75
|
+
{ 0x2B, BGN }, //- 0x2B
|
|
76
|
+
{ 0x2C, BGN }, //- 0x2C
|
|
77
|
+
{ 0x2D, BGN }, //- 0x2D
|
|
78
|
+
{ 0x2E, BGN }, //- 0x2E
|
|
79
|
+
{ 0x2F, BGN }, //- 0x2F
|
|
80
|
+
|
|
81
|
+
{ 0x30, BGN }, //- 0x30
|
|
82
|
+
{ 0x31, BGN }, //- 0x31
|
|
83
|
+
{ 0x32, BGN }, //- 0x32
|
|
84
|
+
{ 0x33, BGN }, //- 0x33
|
|
85
|
+
{ 0x34, BGN }, //- 0x34
|
|
86
|
+
{ 0x35, BGN }, //- 0x35
|
|
87
|
+
{ 0x36, BGN }, //- 0x36
|
|
88
|
+
{ 0x37, BGN }, //- 0x37
|
|
89
|
+
{ 0x38, BGN }, //- 0x38
|
|
90
|
+
{ 0x39, BGN }, //- 0x39
|
|
91
|
+
{ 0x3A, BGN }, //- 0x3A
|
|
92
|
+
{ 0x3B, BGN }, //- 0x3B
|
|
93
|
+
{ 0x3C, BGN }, //- 0x3C
|
|
94
|
+
{ 0x3D, BGN }, //- 0x3D
|
|
95
|
+
{ 0x3E, BGN }, //- 0x3E
|
|
96
|
+
{ 0x3F, BGN }, //- 0x3F
|
|
97
|
+
|
|
98
|
+
{ 0x40, BGN }, //- 0x40
|
|
99
|
+
{ 0x41, BGN }, //- 0x41
|
|
100
|
+
{ 0x42, BGN }, //- 0x42
|
|
101
|
+
{ 0x43, BGN }, //- 0x43
|
|
102
|
+
{ 0x44, BGN }, //- 0x44
|
|
103
|
+
{ 0x45, BGN }, //- 0x45
|
|
104
|
+
{ 0x46, BGN }, //- 0x46
|
|
105
|
+
{ 0x47, BGN }, //- 0x47
|
|
106
|
+
{ 0x48, BGN }, //- 0x48
|
|
107
|
+
{ 0x49, BGN }, //- 0x49
|
|
108
|
+
{ 0x4A, BGN }, //- 0x4A
|
|
109
|
+
{ 0x4B, BGN }, //- 0x4B
|
|
110
|
+
{ 0x4C, BGN }, //- 0x4C
|
|
111
|
+
{ 0x4D, BGN }, //- 0x4D
|
|
112
|
+
{ 0x4E, BGN }, //- 0x4E
|
|
113
|
+
{ 0x4F, BGN }, //- 0x4F
|
|
114
|
+
|
|
115
|
+
{ 0x50, BGN }, //- 0x50
|
|
116
|
+
{ 0x51, BGN }, //- 0x51
|
|
117
|
+
{ 0x52, BGN }, //- 0x52
|
|
118
|
+
{ 0x53, BGN }, //- 0x53
|
|
119
|
+
{ 0x54, BGN }, //- 0x54
|
|
120
|
+
{ 0x55, BGN }, //- 0x55
|
|
121
|
+
{ 0x56, BGN }, //- 0x56
|
|
122
|
+
{ 0x57, BGN }, //- 0x57
|
|
123
|
+
{ 0x58, BGN }, //- 0x58
|
|
124
|
+
{ 0x59, BGN }, //- 0x59
|
|
125
|
+
{ 0x5A, BGN }, //- 0x5A
|
|
126
|
+
{ 0x5B, BGN }, //- 0x5B
|
|
127
|
+
{ 0x5C, BGN }, //- 0x5C
|
|
128
|
+
{ 0x5D, BGN }, //- 0x5D
|
|
129
|
+
{ 0x5E, BGN }, //- 0x5E
|
|
130
|
+
{ 0x5F, BGN }, //- 0x5F
|
|
131
|
+
|
|
132
|
+
{ 0x60, BGN }, //- 0x60
|
|
133
|
+
{ 0x61, BGN }, //- 0x61
|
|
134
|
+
{ 0x62, BGN }, //- 0x62
|
|
135
|
+
{ 0x63, BGN }, //- 0x63
|
|
136
|
+
{ 0x64, BGN }, //- 0x64
|
|
137
|
+
{ 0x65, BGN }, //- 0x65
|
|
138
|
+
{ 0x66, BGN }, //- 0x66
|
|
139
|
+
{ 0x67, BGN }, //- 0x67
|
|
140
|
+
{ 0x68, BGN }, //- 0x68
|
|
141
|
+
{ 0x69, BGN }, //- 0x69
|
|
142
|
+
{ 0x6A, BGN }, //- 0x6A
|
|
143
|
+
{ 0x6B, BGN }, //- 0x6B
|
|
144
|
+
{ 0x6C, BGN }, //- 0x6C
|
|
145
|
+
{ 0x6D, BGN }, //- 0x6D
|
|
146
|
+
{ 0x6E, BGN }, //- 0x6E
|
|
147
|
+
{ 0x6F, BGN }, //- 0x6F
|
|
148
|
+
|
|
149
|
+
{ 0x70, BGN }, //- 0x70
|
|
150
|
+
{ 0x71, BGN }, //- 0x71
|
|
151
|
+
{ 0x72, BGN }, //- 0x72
|
|
152
|
+
{ 0x73, BGN }, //- 0x73
|
|
153
|
+
{ 0x74, BGN }, //- 0x74
|
|
154
|
+
{ 0x75, BGN }, //- 0x75
|
|
155
|
+
{ 0x76, BGN }, //- 0x76
|
|
156
|
+
{ 0x77, BGN }, //- 0x77
|
|
157
|
+
{ 0x78, BGN }, //- 0x78
|
|
158
|
+
{ 0x79, BGN }, //- 0x79
|
|
159
|
+
{ 0x7A, BGN }, //- 0x7A
|
|
160
|
+
{ 0x7B, BGN }, //- 0x7B
|
|
161
|
+
{ 0x7C, BGN }, //- 0x7C
|
|
162
|
+
{ 0x7D, BGN }, //- 0x7D
|
|
163
|
+
{ 0x7E, BGN }, //- 0x7E
|
|
164
|
+
{ 0x7F, BGN }, //- 0x7F
|
|
165
|
+
|
|
166
|
+
{ 0x00, ERR }, //- 0x80
|
|
167
|
+
{ 0x01, ERR }, //- 0x81
|
|
168
|
+
{ 0x02, ERR }, //- 0x82
|
|
169
|
+
{ 0x03, ERR }, //- 0x83
|
|
170
|
+
{ 0x04, ERR }, //- 0x84
|
|
171
|
+
{ 0x05, ERR }, //- 0x85
|
|
172
|
+
{ 0x06, ERR }, //- 0x86
|
|
173
|
+
{ 0x07, ERR }, //- 0x87
|
|
174
|
+
{ 0x08, ERR }, //- 0x88
|
|
175
|
+
{ 0x09, ERR }, //- 0x89
|
|
176
|
+
{ 0x0A, ERR }, //- 0x8A
|
|
177
|
+
{ 0x0B, ERR }, //- 0x8B
|
|
178
|
+
{ 0x0C, ERR }, //- 0x8C
|
|
179
|
+
{ 0x0D, ERR }, //- 0x8D
|
|
180
|
+
{ 0x0E, ERR }, //- 0x8E
|
|
181
|
+
{ 0x0F, ERR }, //- 0x8F
|
|
182
|
+
|
|
183
|
+
{ 0x10, ERR }, //- 0x90
|
|
184
|
+
{ 0x11, ERR }, //- 0x91
|
|
185
|
+
{ 0x12, ERR }, //- 0x92
|
|
186
|
+
{ 0x13, ERR }, //- 0x93
|
|
187
|
+
{ 0x14, ERR }, //- 0x94
|
|
188
|
+
{ 0x15, ERR }, //- 0x95
|
|
189
|
+
{ 0x16, ERR }, //- 0x96
|
|
190
|
+
{ 0x17, ERR }, //- 0x97
|
|
191
|
+
{ 0x18, ERR }, //- 0x98
|
|
192
|
+
{ 0x19, ERR }, //- 0x99
|
|
193
|
+
{ 0x1A, ERR }, //- 0x9A
|
|
194
|
+
{ 0x1B, ERR }, //- 0x9B
|
|
195
|
+
{ 0x1C, ERR }, //- 0x9C
|
|
196
|
+
{ 0x1D, ERR }, //- 0x9D
|
|
197
|
+
{ 0x1E, ERR }, //- 0x9E
|
|
198
|
+
{ 0x1F, ERR }, //- 0x9F
|
|
199
|
+
|
|
200
|
+
{ 0x20, ERR }, //- 0xA0
|
|
201
|
+
{ 0x21, ERR }, //- 0xA1
|
|
202
|
+
{ 0x22, ERR }, //- 0xA2
|
|
203
|
+
{ 0x23, ERR }, //- 0xA3
|
|
204
|
+
{ 0x24, ERR }, //- 0xA4
|
|
205
|
+
{ 0x25, ERR }, //- 0xA5
|
|
206
|
+
{ 0x26, ERR }, //- 0xA6
|
|
207
|
+
{ 0x27, ERR }, //- 0xA7
|
|
208
|
+
{ 0x28, ERR }, //- 0xA8
|
|
209
|
+
{ 0x29, ERR }, //- 0xA9
|
|
210
|
+
{ 0x2A, ERR }, //- 0xAA
|
|
211
|
+
{ 0x2B, ERR }, //- 0xAB
|
|
212
|
+
{ 0x2C, ERR }, //- 0xAC
|
|
213
|
+
{ 0x2D, ERR }, //- 0xAD
|
|
214
|
+
{ 0x2E, ERR }, //- 0xAE
|
|
215
|
+
{ 0x2F, ERR }, //- 0xAF
|
|
216
|
+
|
|
217
|
+
{ 0x30, ERR }, //- 0xB0
|
|
218
|
+
{ 0x31, ERR }, //- 0xB1
|
|
219
|
+
{ 0x32, ERR }, //- 0xB2
|
|
220
|
+
{ 0x33, ERR }, //- 0xB3
|
|
221
|
+
{ 0x34, ERR }, //- 0xB4
|
|
222
|
+
{ 0x35, ERR }, //- 0xB5
|
|
223
|
+
{ 0x36, ERR }, //- 0xB6
|
|
224
|
+
{ 0x37, ERR }, //- 0xB7
|
|
225
|
+
{ 0x38, ERR }, //- 0xB8
|
|
226
|
+
{ 0x39, ERR }, //- 0xB9
|
|
227
|
+
{ 0x3A, ERR }, //- 0xBA
|
|
228
|
+
{ 0x3B, ERR }, //- 0xBB
|
|
229
|
+
{ 0x3C, ERR }, //- 0xBC
|
|
230
|
+
{ 0x3D, ERR }, //- 0xBD
|
|
231
|
+
{ 0x3E, ERR }, //- 0xBE
|
|
232
|
+
{ 0x3F, ERR }, //- 0xBF
|
|
233
|
+
|
|
234
|
+
{ 0xC0, ERR }, //- 0xC0
|
|
235
|
+
{ 0xC1, ERR }, //- 0xC1
|
|
236
|
+
{ 0x02, CS1 }, //- 0xC2
|
|
237
|
+
{ 0x03, CS1 }, //- 0xC3
|
|
238
|
+
{ 0x04, CS1 }, //- 0xC4
|
|
239
|
+
{ 0x05, CS1 }, //- 0xC5
|
|
240
|
+
{ 0x06, CS1 }, //- 0xC6
|
|
241
|
+
{ 0x07, CS1 }, //- 0xC7
|
|
242
|
+
{ 0x08, CS1 }, //- 0xC8
|
|
243
|
+
{ 0x09, CS1 }, //- 0xC9
|
|
244
|
+
{ 0x0A, CS1 }, //- 0xCA
|
|
245
|
+
{ 0x0B, CS1 }, //- 0xCB
|
|
246
|
+
{ 0x0C, CS1 }, //- 0xCC
|
|
247
|
+
{ 0x0D, CS1 }, //- 0xCD
|
|
248
|
+
{ 0x0E, CS1 }, //- 0xCE
|
|
249
|
+
{ 0x0F, CS1 }, //- 0xCF
|
|
250
|
+
|
|
251
|
+
{ 0x10, CS1 }, //- 0xD0
|
|
252
|
+
{ 0x11, CS1 }, //- 0xD1
|
|
253
|
+
{ 0x12, CS1 }, //- 0xD2
|
|
254
|
+
{ 0x13, CS1 }, //- 0xD3
|
|
255
|
+
{ 0x14, CS1 }, //- 0xD4
|
|
256
|
+
{ 0x15, CS1 }, //- 0xD5
|
|
257
|
+
{ 0x16, CS1 }, //- 0xD6
|
|
258
|
+
{ 0x17, CS1 }, //- 0xD7
|
|
259
|
+
{ 0x18, CS1 }, //- 0xD8
|
|
260
|
+
{ 0x19, CS1 }, //- 0xD9
|
|
261
|
+
{ 0x1A, CS1 }, //- 0xDA
|
|
262
|
+
{ 0x1B, CS1 }, //- 0xDB
|
|
263
|
+
{ 0x1C, CS1 }, //- 0xDC
|
|
264
|
+
{ 0x1D, CS1 }, //- 0xDD
|
|
265
|
+
{ 0x1E, CS1 }, //- 0xDE
|
|
266
|
+
{ 0x1F, CS1 }, //- 0xDF
|
|
267
|
+
|
|
268
|
+
{ 0x00, P3A }, //- 0xE0
|
|
269
|
+
{ 0x01, CS2 }, //- 0xE1
|
|
270
|
+
{ 0x02, CS2 }, //- 0xE2
|
|
271
|
+
{ 0x03, CS2 }, //- 0xE3
|
|
272
|
+
{ 0x04, CS2 }, //- 0xE4
|
|
273
|
+
{ 0x05, CS2 }, //- 0xE5
|
|
274
|
+
{ 0x06, CS2 }, //- 0xE6
|
|
275
|
+
{ 0x07, CS2 }, //- 0xE7
|
|
276
|
+
{ 0x08, CS2 }, //- 0xE8
|
|
277
|
+
{ 0x09, CS2 }, //- 0xE9
|
|
278
|
+
{ 0x0A, CS2 }, //- 0xEA
|
|
279
|
+
{ 0x0B, CS2 }, //- 0xEB
|
|
280
|
+
{ 0x0C, CS2 }, //- 0xEC
|
|
281
|
+
{ 0x0D, P3B }, //- 0xED
|
|
282
|
+
{ 0x0E, CS2 }, //- 0xEE
|
|
283
|
+
{ 0x0F, CS2 }, //- 0xEF
|
|
284
|
+
|
|
285
|
+
{ 0x00, P4A }, //- 0xF0
|
|
286
|
+
{ 0x01, CS3 }, //- 0xF1
|
|
287
|
+
{ 0x02, CS3 }, //- 0xF2
|
|
288
|
+
{ 0x03, CS3 }, //- 0xF3
|
|
289
|
+
{ 0x04, P4B }, //- 0xF4
|
|
290
|
+
{ 0xF5, ERR }, //- 0xF5
|
|
291
|
+
{ 0xF6, ERR }, //- 0xF6
|
|
292
|
+
{ 0xF7, ERR }, //- 0xF7
|
|
293
|
+
{ 0xF8, ERR }, //- 0xF8
|
|
294
|
+
{ 0xF9, ERR }, //- 0xF9
|
|
295
|
+
{ 0xFA, ERR }, //- 0xFA
|
|
296
|
+
{ 0xFB, ERR }, //- 0xFB
|
|
297
|
+
{ 0xFC, ERR }, //- 0xFC
|
|
298
|
+
{ 0xFD, ERR }, //- 0xFD
|
|
299
|
+
{ 0xFE, ERR }, //- 0xFE
|
|
300
|
+
{ 0xFF, ERR }, //- 0xFF
|
|
301
|
+
},
|
|
302
|
+
|
|
303
|
+
//- Initialize the maOctetCategory member array. This array implements a lookup table
|
|
304
|
+
// that maps an input octet to a corresponding octet category.
|
|
305
|
+
//
|
|
306
|
+
// 0 1 2 3 4 5 6 7 8 9 A B C D E F
|
|
307
|
+
//============================================================================================
|
|
308
|
+
{
|
|
309
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, //- 00..0F
|
|
310
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, //- 10..1F
|
|
311
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, //- 20..2F
|
|
312
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, //- 30..3F
|
|
313
|
+
//
|
|
314
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, //- 40..4F
|
|
315
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, //- 50..5F
|
|
316
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, //- 60..6F
|
|
317
|
+
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, //- 70..7F
|
|
318
|
+
//
|
|
319
|
+
CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, CR1, //- 80..8F
|
|
320
|
+
CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, CR2, //- 90..9F
|
|
321
|
+
CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, //- A0..AF
|
|
322
|
+
CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, CR3, //- B0..BF
|
|
323
|
+
//
|
|
324
|
+
ILL, ILL, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, //- C0..CF
|
|
325
|
+
L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, L2A, //- D0..DF
|
|
326
|
+
L3A, L3B, L3B, L3B, L3B, L3B, L3B, L3B, L3B, L3B, L3B, L3B, L3B, L3C, L3B, L3B, //- E0..EF
|
|
327
|
+
L4A, L4B, L4B, L4B, L4C, ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL, ILL, //- F0..FF
|
|
328
|
+
},
|
|
329
|
+
|
|
330
|
+
//- Initialize the maTransitions member array. This array implements a lookup table that,
|
|
331
|
+
// given the current DFA state and an input code unit, indicates the next DFA state.
|
|
332
|
+
//
|
|
333
|
+
// ILL ASC CR1 CR2 CR3 L2A L3A L3B L3C L4A L4B L4C CLASS/STATE
|
|
334
|
+
//=========================================================================
|
|
335
|
+
{
|
|
336
|
+
err, END, err, err, err, CS1, P3A, CS2, P3B, P4A, CS3, P4B, //- BGN|END
|
|
337
|
+
err, err, err, err, err, err, err, err, err, err, err, err, //- ERR
|
|
338
|
+
//
|
|
339
|
+
err, err, END, END, END, err, err, err, err, err, err, err, //- CS1
|
|
340
|
+
err, err, CS1, CS1, CS1, err, err, err, err, err, err, err, //- CS2
|
|
341
|
+
err, err, CS2, CS2, CS2, err, err, err, err, err, err, err, //- CS3
|
|
342
|
+
//
|
|
343
|
+
err, err, err, err, CS1, err, err, err, err, err, err, err, //- P3A
|
|
344
|
+
err, err, CS1, CS1, err, err, err, err, err, err, err, err, //- P3B
|
|
345
|
+
//
|
|
346
|
+
err, err, err, CS2, CS2, err, err, err, err, err, err, err, //- P4A
|
|
347
|
+
err, err, CS2, err, err, err, err, err, err, err, err, err, //- P4B
|
|
348
|
+
},
|
|
349
|
+
|
|
350
|
+
//- Initialize the maFirstOctetMask member array. This array implements a lookup table that
|
|
351
|
+
// maps a character class to a mask that is applied to the first code unit in a sequence.
|
|
352
|
+
//
|
|
353
|
+
{
|
|
354
|
+
0xFF, //- ILL - C0..C1, F5..FF Illegal code unit
|
|
355
|
+
//
|
|
356
|
+
0x7F, //- ASC - 00..7F ASCII byte range
|
|
357
|
+
//
|
|
358
|
+
0x3F, //- CR1 - 80..8F Continuation range 1
|
|
359
|
+
0x3F, //- CR2 - 90..9F Continuation range 2
|
|
360
|
+
0x3F, //- CR3 - A0..BF Continuation range 3
|
|
361
|
+
//
|
|
362
|
+
0x1F, //- L2A - C2..DF Leading byte range 2A / 2-byte sequence
|
|
363
|
+
//
|
|
364
|
+
0x0F, //- L3A - E0 Leading byte range 3A / 3-byte sequence
|
|
365
|
+
0x0F, //- L3B - E1..EC, EE..EF Leading byte range 3B / 3-byte sequence
|
|
366
|
+
0x0F, //- L3C - ED Leading byte range 3C / 3-byte sequence
|
|
367
|
+
//
|
|
368
|
+
0x07, //- L4A - F0 Leading byte range 4A / 4-byte sequence
|
|
369
|
+
0x07, //- L4B - F1..F3 Leading byte range 4B / 4-byte sequence
|
|
370
|
+
0x07, //- L4C - F4 Leading byte range 4C / 4-byte sequence
|
|
371
|
+
},
|
|
372
|
+
};
|
|
373
|
+
|
|
374
|
+
//- These are the human-readable names assigned to the code unit categories.
|
|
375
|
+
//
|
|
376
|
+
char const* UtfUtils::smClassNames[12] =
|
|
377
|
+
{
|
|
378
|
+
"ILL", "ASC", "CR1", "CR2", "CR3", "L2A", "L3A", "L3B", "L3C", "L4A", "L4B", "L4C",
|
|
379
|
+
};
|
|
380
|
+
|
|
381
|
+
//- These are the human-readable names assigned to the various states comprising the DFA.
|
|
382
|
+
//
|
|
383
|
+
char const* UtfUtils::smStateNames[9] =
|
|
384
|
+
{
|
|
385
|
+
"BGN", "ERR", "CS1", "CS2", "CS3", "P3A", "P3B", "P4A", "P4B",
|
|
386
|
+
};
|
|
387
|
+
|
|
388
|
+
//--------------------------------------------------------------------------------------------------
|
|
389
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-32 code points.
|
|
390
|
+
///
|
|
391
|
+
/// \details
|
|
392
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
393
|
+
/// it to an output sequence of UTF-32 code points. It performs conversion by traversing
|
|
394
|
+
/// the DFA without any optimizations using the `AdvanceWithBigTable` member function to
|
|
395
|
+
/// read and convert input.
|
|
396
|
+
///
|
|
397
|
+
/// \param pSrc
|
|
398
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
399
|
+
/// \param pSrcEnd
|
|
400
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
401
|
+
/// \param pDst
|
|
402
|
+
/// A non-null pointer defining the beginning of the code point output range.
|
|
403
|
+
///
|
|
404
|
+
/// \returns
|
|
405
|
+
/// If successful, the number of UTF-32 code points written; otherwise -1 is returned to
|
|
406
|
+
/// indicate an error was encountered.
|
|
407
|
+
//--------------------------------------------------------------------------------------------------
|
|
408
|
+
//
|
|
409
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
410
|
+
UtfUtils::BasicBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept
|
|
411
|
+
{
|
|
412
|
+
char32_t* pDstOrig = pDst;
|
|
413
|
+
char32_t cdpt;
|
|
414
|
+
|
|
415
|
+
while (pSrc < pSrcEnd)
|
|
416
|
+
{
|
|
417
|
+
if (AdvanceWithBigTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
418
|
+
{
|
|
419
|
+
*pDst++ = cdpt;
|
|
420
|
+
}
|
|
421
|
+
else
|
|
422
|
+
{
|
|
423
|
+
return -1;
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
return pDst - pDstOrig;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
//--------------------------------------------------------------------------------------------------
|
|
431
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-32 code points.
|
|
432
|
+
///
|
|
433
|
+
/// \details
|
|
434
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
435
|
+
/// it to an output sequence of UTF-32 code points. It uses the DFA to perform non-ascii
|
|
436
|
+
/// code-unit sequence conversions, but optimizes by checking for ASCII code units and
|
|
437
|
+
/// converting them directly to code points. It uses the `AdvanceWithBigTable` member
|
|
438
|
+
/// function to read and convert input.
|
|
439
|
+
///
|
|
440
|
+
/// \param pSrc
|
|
441
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
442
|
+
/// \param pSrcEnd
|
|
443
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
444
|
+
/// \param pDst
|
|
445
|
+
/// A non-null pointer defining the beginning of the code point output range.
|
|
446
|
+
///
|
|
447
|
+
/// \returns
|
|
448
|
+
/// If successful, the number of UTF-32 code points written; otherwise -1 is returned to
|
|
449
|
+
/// indicate an error was encountered.
|
|
450
|
+
//--------------------------------------------------------------------------------------------------
|
|
451
|
+
//
|
|
452
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
453
|
+
UtfUtils::FastBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept
|
|
454
|
+
{
|
|
455
|
+
char32_t* pDstOrig = pDst;
|
|
456
|
+
char32_t cdpt;
|
|
457
|
+
|
|
458
|
+
while (pSrc < pSrcEnd)
|
|
459
|
+
{
|
|
460
|
+
if (*pSrc < 0x80)
|
|
461
|
+
{
|
|
462
|
+
*pDst++ = *pSrc++;
|
|
463
|
+
}
|
|
464
|
+
else
|
|
465
|
+
{
|
|
466
|
+
if (AdvanceWithBigTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
467
|
+
{
|
|
468
|
+
*pDst++ = cdpt;
|
|
469
|
+
}
|
|
470
|
+
else
|
|
471
|
+
{
|
|
472
|
+
return -1;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return pDst - pDstOrig;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
//--------------------------------------------------------------------------------------------------
|
|
481
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-32 code points.
|
|
482
|
+
///
|
|
483
|
+
/// \details
|
|
484
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
485
|
+
/// it to an output sequence of UTF-32 code points. It uses the DFA to perform non-ascii
|
|
486
|
+
/// code-unit sequence conversions, but optimizes by converting contiguous sequences of
|
|
487
|
+
/// ASCII code units using SSE intrinsics. It uses the `AdvanceWithBigTable` member
|
|
488
|
+
/// function to read and convert input.
|
|
489
|
+
///
|
|
490
|
+
/// \param pSrc
|
|
491
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
492
|
+
/// \param pSrcEnd
|
|
493
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
494
|
+
/// \param pDst
|
|
495
|
+
/// A non-null pointer defining the beginning of the code point output range.
|
|
496
|
+
///
|
|
497
|
+
/// \returns
|
|
498
|
+
/// If successful, the number of UTF-32 code points written; otherwise -1 is returned to
|
|
499
|
+
/// indicate an error was encountered.
|
|
500
|
+
//--------------------------------------------------------------------------------------------------
|
|
501
|
+
//
|
|
502
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
503
|
+
UtfUtils::SseBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept
|
|
504
|
+
{
|
|
505
|
+
char32_t* pDstOrig = pDst;
|
|
506
|
+
char32_t cdpt;
|
|
507
|
+
|
|
508
|
+
while (pSrc < (pSrcEnd - sizeof(__m128i)))
|
|
509
|
+
{
|
|
510
|
+
if (*pSrc < 0x80)
|
|
511
|
+
{
|
|
512
|
+
ConvertAsciiWithSse(pSrc, pDst);
|
|
513
|
+
}
|
|
514
|
+
else
|
|
515
|
+
{
|
|
516
|
+
if (AdvanceWithBigTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
517
|
+
{
|
|
518
|
+
*pDst++ = cdpt;
|
|
519
|
+
}
|
|
520
|
+
else
|
|
521
|
+
{
|
|
522
|
+
return -1;
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
while (pSrc < pSrcEnd)
|
|
528
|
+
{
|
|
529
|
+
if (*pSrc < 0x80)
|
|
530
|
+
{
|
|
531
|
+
*pDst++ = *pSrc++;
|
|
532
|
+
}
|
|
533
|
+
else
|
|
534
|
+
{
|
|
535
|
+
if (AdvanceWithBigTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
536
|
+
{
|
|
537
|
+
*pDst++ = cdpt;
|
|
538
|
+
}
|
|
539
|
+
else
|
|
540
|
+
{
|
|
541
|
+
return -1;
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
return pDst - pDstOrig;
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
//--------------------------------------------------------------------------------------------------
|
|
550
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-16 code units.
|
|
551
|
+
///
|
|
552
|
+
/// \details
|
|
553
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
554
|
+
/// it to an output sequence of UTF-16 code units. It performs conversion by traversing
|
|
555
|
+
/// the DFA without any optimizations using the `AdvanceWithBigTable` member function to
|
|
556
|
+
/// read and convert input.
|
|
557
|
+
///
|
|
558
|
+
/// \param pSrc
|
|
559
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
560
|
+
/// \param pSrcEnd
|
|
561
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
562
|
+
/// \param pDst
|
|
563
|
+
/// A non-null pointer defining the beginning of the code unit output range.
|
|
564
|
+
///
|
|
565
|
+
/// \returns
|
|
566
|
+
/// If successful, the number of UTF-16 code units written; otherwise -1 is returned to
|
|
567
|
+
/// indicate an error was encountered.
|
|
568
|
+
//--------------------------------------------------------------------------------------------------
|
|
569
|
+
//
|
|
570
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
571
|
+
UtfUtils::BasicBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept
|
|
572
|
+
{
|
|
573
|
+
char16_t* pDstOrig = pDst;
|
|
574
|
+
char32_t cdpt;
|
|
575
|
+
|
|
576
|
+
while (pSrc < pSrcEnd)
|
|
577
|
+
{
|
|
578
|
+
if (AdvanceWithBigTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
579
|
+
{
|
|
580
|
+
GetCodeUnits(cdpt, pDst);
|
|
581
|
+
}
|
|
582
|
+
else
|
|
583
|
+
{
|
|
584
|
+
return -1;
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
return pDst - pDstOrig;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
//--------------------------------------------------------------------------------------------------
|
|
592
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-16 code units.
|
|
593
|
+
///
|
|
594
|
+
/// \details
|
|
595
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
596
|
+
/// it to an output sequence of UTF-16 code unis. It uses the DFA to perform non-ascii
|
|
597
|
+
/// code-unit sequence conversions, but optimizes by checking for ASCII code units and
|
|
598
|
+
/// converting them directly to code points. It uses the `AdvanceWithBigTable` member
|
|
599
|
+
/// function to read and convert input.
|
|
600
|
+
///
|
|
601
|
+
/// \param pSrc
|
|
602
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
603
|
+
/// \param pSrcEnd
|
|
604
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
605
|
+
/// \param pDst
|
|
606
|
+
/// A non-null pointer defining the beginning of the code unit output range.
|
|
607
|
+
///
|
|
608
|
+
/// \returns
|
|
609
|
+
/// If successful, the number of UTF-16 code units written; otherwise -1 is returned to
|
|
610
|
+
/// indicate an error was encountered.
|
|
611
|
+
//--------------------------------------------------------------------------------------------------
|
|
612
|
+
//
|
|
613
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
614
|
+
UtfUtils::FastBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept
|
|
615
|
+
{
|
|
616
|
+
char16_t* pDstOrig = pDst;
|
|
617
|
+
char32_t cdpt;
|
|
618
|
+
|
|
619
|
+
while (pSrc < pSrcEnd)
|
|
620
|
+
{
|
|
621
|
+
if (*pSrc < 0x80)
|
|
622
|
+
{
|
|
623
|
+
*pDst++ = *pSrc++;
|
|
624
|
+
}
|
|
625
|
+
else
|
|
626
|
+
{
|
|
627
|
+
if (AdvanceWithBigTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
628
|
+
{
|
|
629
|
+
GetCodeUnits(cdpt, pDst);
|
|
630
|
+
}
|
|
631
|
+
else
|
|
632
|
+
{
|
|
633
|
+
return -1;
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
return pDst - pDstOrig;
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
//--------------------------------------------------------------------------------------------------
|
|
642
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-16 code units.
|
|
643
|
+
///
|
|
644
|
+
/// \details
|
|
645
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
646
|
+
/// it to an output sequence of UTF-16 code units. It uses the DFA to perform non-ascii
|
|
647
|
+
/// code-unit sequence conversions, but optimizes by converting contiguous sequences of
|
|
648
|
+
/// ASCII code units using SSE intrinsics. It uses the `AdvanceWithBigTable` member
|
|
649
|
+
/// function to read and convert input.
|
|
650
|
+
///
|
|
651
|
+
/// \param pSrc
|
|
652
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
653
|
+
/// \param pSrcEnd
|
|
654
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
655
|
+
/// \param pDst
|
|
656
|
+
/// A non-null pointer defining the beginning of the code unit output range.
|
|
657
|
+
///
|
|
658
|
+
/// \returns
|
|
659
|
+
/// If successful, the number of UTF-16 code units written; otherwise -1 is returned to
|
|
660
|
+
/// indicate an error was encountered.
|
|
661
|
+
//--------------------------------------------------------------------------------------------------
|
|
662
|
+
//
|
|
663
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
664
|
+
UtfUtils::SseBigTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept
|
|
665
|
+
{
|
|
666
|
+
char16_t* pDstOrig = pDst;
|
|
667
|
+
char32_t cdpt;
|
|
668
|
+
|
|
669
|
+
while (pSrc < (pSrcEnd - sizeof(__m128i)))
|
|
670
|
+
{
|
|
671
|
+
if (*pSrc < 0x80)
|
|
672
|
+
{
|
|
673
|
+
ConvertAsciiWithSse(pSrc, pDst);
|
|
674
|
+
}
|
|
675
|
+
else
|
|
676
|
+
{
|
|
677
|
+
if (AdvanceWithBigTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
678
|
+
{
|
|
679
|
+
GetCodeUnits(cdpt, pDst);
|
|
680
|
+
}
|
|
681
|
+
else
|
|
682
|
+
{
|
|
683
|
+
return -1;
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
while (pSrc < pSrcEnd)
|
|
689
|
+
{
|
|
690
|
+
if (*pSrc < 0x80)
|
|
691
|
+
{
|
|
692
|
+
*pDst++ = *pSrc++;
|
|
693
|
+
}
|
|
694
|
+
else
|
|
695
|
+
{
|
|
696
|
+
if (AdvanceWithBigTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
697
|
+
{
|
|
698
|
+
GetCodeUnits(cdpt, pDst);
|
|
699
|
+
}
|
|
700
|
+
else
|
|
701
|
+
{
|
|
702
|
+
return -1;
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
return pDst - pDstOrig;
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
//--------------------------------------------------------------------------------------------------
|
|
711
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-32 code points.
|
|
712
|
+
///
|
|
713
|
+
/// \details
|
|
714
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
715
|
+
/// it to an output sequence of UTF-32 code points. It performs conversion by traversing
|
|
716
|
+
/// the DFA without any optimizations using the `AdvanceWithSmallTable` member function to
|
|
717
|
+
/// read and convert input.
|
|
718
|
+
///
|
|
719
|
+
/// \param pSrc
|
|
720
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
721
|
+
/// \param pSrcEnd
|
|
722
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
723
|
+
/// \param pDst
|
|
724
|
+
/// A non-null pointer defining the beginning of the code point output range.
|
|
725
|
+
///
|
|
726
|
+
/// \returns
|
|
727
|
+
/// If successful, the number of UTF-32 code points written; otherwise -1 is returned to
|
|
728
|
+
/// indicate an error was encountered.
|
|
729
|
+
//--------------------------------------------------------------------------------------------------
|
|
730
|
+
//
|
|
731
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
732
|
+
UtfUtils::BasicSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept
|
|
733
|
+
{
|
|
734
|
+
char32_t* pDstOrig = pDst;
|
|
735
|
+
char32_t cdpt;
|
|
736
|
+
|
|
737
|
+
while (pSrc < pSrcEnd)
|
|
738
|
+
{
|
|
739
|
+
if (AdvanceWithSmallTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
740
|
+
{
|
|
741
|
+
*pDst++ = cdpt;
|
|
742
|
+
}
|
|
743
|
+
else
|
|
744
|
+
{
|
|
745
|
+
return -1;
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
return pDst - pDstOrig;
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
//--------------------------------------------------------------------------------------------------
|
|
753
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-32 code points.
|
|
754
|
+
///
|
|
755
|
+
/// \details
|
|
756
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
757
|
+
/// it to an output sequence of UTF-32 code points. It uses the DFA to perform non-ascii
|
|
758
|
+
/// code-unit sequence conversions, but optimizes by checking for ASCII code units and
|
|
759
|
+
/// converting them directly to code points. It uses the `AdvanceWithSmallTable` member
|
|
760
|
+
/// function to read and convert input.
|
|
761
|
+
///
|
|
762
|
+
/// \param pSrc
|
|
763
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
764
|
+
/// \param pSrcEnd
|
|
765
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
766
|
+
/// \param pDst
|
|
767
|
+
/// A non-null pointer defining the beginning of the code point output range.
|
|
768
|
+
///
|
|
769
|
+
/// \returns
|
|
770
|
+
/// If successful, the number of UTF-32 code points written; otherwise -1 is returned to
|
|
771
|
+
/// indicate an error was encountered.
|
|
772
|
+
//--------------------------------------------------------------------------------------------------
|
|
773
|
+
//
|
|
774
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
775
|
+
UtfUtils::FastSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept
|
|
776
|
+
{
|
|
777
|
+
char32_t* pDstOrig = pDst;
|
|
778
|
+
char32_t cdpt;
|
|
779
|
+
|
|
780
|
+
while (pSrc < pSrcEnd)
|
|
781
|
+
{
|
|
782
|
+
if (*pSrc < 0x80)
|
|
783
|
+
{
|
|
784
|
+
*pDst++ = *pSrc++;
|
|
785
|
+
}
|
|
786
|
+
else
|
|
787
|
+
{
|
|
788
|
+
if (AdvanceWithSmallTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
789
|
+
{
|
|
790
|
+
*pDst++ = cdpt;
|
|
791
|
+
}
|
|
792
|
+
else
|
|
793
|
+
{
|
|
794
|
+
return -1;
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
return pDst - pDstOrig;
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
//--------------------------------------------------------------------------------------------------
|
|
803
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-32 code points.
|
|
804
|
+
///
|
|
805
|
+
/// \details
|
|
806
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
807
|
+
/// it to an output sequence of UTF-32 code points. It uses the DFA to perform non-ascii
|
|
808
|
+
/// code-unit sequence conversions, but optimizes by converting contiguous sequences of
|
|
809
|
+
/// ASCII code units using SSE intrinsics. It uses the `AdvanceWithSmallTable` member
|
|
810
|
+
/// function to read and convert input.
|
|
811
|
+
///
|
|
812
|
+
/// \param pSrc
|
|
813
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
814
|
+
/// \param pSrcEnd
|
|
815
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
816
|
+
/// \param pDst
|
|
817
|
+
/// A non-null pointer defining the beginning of the code point output range.
|
|
818
|
+
///
|
|
819
|
+
/// \returns
|
|
820
|
+
/// If successful, the number of UTF-32 code points written; otherwise -1 is returned to
|
|
821
|
+
/// indicate an error was encountered.
|
|
822
|
+
//--------------------------------------------------------------------------------------------------
|
|
823
|
+
//
|
|
824
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
825
|
+
UtfUtils::SseSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept
|
|
826
|
+
{
|
|
827
|
+
char32_t* pDstOrig = pDst;
|
|
828
|
+
char32_t cdpt;
|
|
829
|
+
|
|
830
|
+
while (pSrc < (pSrcEnd - sizeof(__m128i)))
|
|
831
|
+
{
|
|
832
|
+
if (*pSrc < 0x80)
|
|
833
|
+
{
|
|
834
|
+
ConvertAsciiWithSse(pSrc, pDst);
|
|
835
|
+
}
|
|
836
|
+
else
|
|
837
|
+
{
|
|
838
|
+
if (AdvanceWithSmallTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
839
|
+
{
|
|
840
|
+
*pDst++ = cdpt;
|
|
841
|
+
}
|
|
842
|
+
else
|
|
843
|
+
{
|
|
844
|
+
return -1;
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
while (pSrc < pSrcEnd)
|
|
850
|
+
{
|
|
851
|
+
if (*pSrc < 0x80)
|
|
852
|
+
{
|
|
853
|
+
*pDst++ = *pSrc++;
|
|
854
|
+
}
|
|
855
|
+
else
|
|
856
|
+
{
|
|
857
|
+
if (AdvanceWithSmallTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
858
|
+
{
|
|
859
|
+
*pDst++ = cdpt;
|
|
860
|
+
}
|
|
861
|
+
else
|
|
862
|
+
{
|
|
863
|
+
return -1;
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
return pDst - pDstOrig;
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
//--------------------------------------------------------------------------------------------------
|
|
872
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-16 code units.
|
|
873
|
+
///
|
|
874
|
+
/// \details
|
|
875
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
876
|
+
/// it to an output sequence of UTF-16 code units. It performs conversion by traversing
|
|
877
|
+
/// the DFA without any optimizations using the `AdvanceWithSmallTable` member function to
|
|
878
|
+
/// read and convert input.
|
|
879
|
+
///
|
|
880
|
+
/// \param pSrc
|
|
881
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
882
|
+
/// \param pSrcEnd
|
|
883
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
884
|
+
/// \param pDst
|
|
885
|
+
/// A non-null pointer defining the beginning of the code unit output range.
|
|
886
|
+
///
|
|
887
|
+
/// \returns
|
|
888
|
+
/// If successful, the number of UTF-16 code units written; otherwise -1 is returned to
|
|
889
|
+
/// indicate an error was encountered.
|
|
890
|
+
//--------------------------------------------------------------------------------------------------
|
|
891
|
+
//
|
|
892
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
893
|
+
UtfUtils::BasicSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept
|
|
894
|
+
{
|
|
895
|
+
char16_t* pDstOrig = pDst;
|
|
896
|
+
char32_t cdpt;
|
|
897
|
+
|
|
898
|
+
while (pSrc < pSrcEnd)
|
|
899
|
+
{
|
|
900
|
+
if (AdvanceWithSmallTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
901
|
+
{
|
|
902
|
+
GetCodeUnits(cdpt, pDst);
|
|
903
|
+
}
|
|
904
|
+
else
|
|
905
|
+
{
|
|
906
|
+
return -1;
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
return pDst - pDstOrig;
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
//--------------------------------------------------------------------------------------------------
|
|
914
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-16 code units.
|
|
915
|
+
///
|
|
916
|
+
/// \details
|
|
917
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
918
|
+
/// it to an output sequence of UTF-16 code unis. It uses the DFA to perform non-ascii
|
|
919
|
+
/// code-unit sequence conversions, but optimizes by checking for ASCII code units and
|
|
920
|
+
/// converting them directly to code points. It uses the `AdvanceWithSmallTable` member
|
|
921
|
+
/// function to read and convert input.
|
|
922
|
+
///
|
|
923
|
+
/// \param pSrc
|
|
924
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
925
|
+
/// \param pSrcEnd
|
|
926
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
927
|
+
/// \param pDst
|
|
928
|
+
/// A non-null pointer defining the beginning of the code unit output range.
|
|
929
|
+
///
|
|
930
|
+
/// \returns
|
|
931
|
+
/// If successful, the number of UTF-16 code units written; otherwise -1 is returned to
|
|
932
|
+
/// indicate an error was encountered.
|
|
933
|
+
//--------------------------------------------------------------------------------------------------
|
|
934
|
+
//
|
|
935
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
936
|
+
UtfUtils::FastSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept
|
|
937
|
+
{
|
|
938
|
+
char16_t* pDstOrig = pDst;
|
|
939
|
+
char32_t cdpt;
|
|
940
|
+
|
|
941
|
+
while (pSrc < pSrcEnd)
|
|
942
|
+
{
|
|
943
|
+
if (*pSrc < 0x80)
|
|
944
|
+
{
|
|
945
|
+
*pDst++ = *pSrc++;
|
|
946
|
+
}
|
|
947
|
+
else
|
|
948
|
+
{
|
|
949
|
+
if (AdvanceWithSmallTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
950
|
+
{
|
|
951
|
+
GetCodeUnits(cdpt, pDst);
|
|
952
|
+
}
|
|
953
|
+
else
|
|
954
|
+
{
|
|
955
|
+
return -1;
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
return pDst - pDstOrig;
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
//--------------------------------------------------------------------------------------------------
|
|
964
|
+
/// \brief Converts a sequence of UTF-8 code units to a sequence of UTF-16 code units.
|
|
965
|
+
///
|
|
966
|
+
/// \details
|
|
967
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
968
|
+
/// it to an output sequence of UTF-16 code units. It uses the DFA to perform non-ascii
|
|
969
|
+
/// code-unit sequence conversions, but optimizes by converting contiguous sequences of
|
|
970
|
+
/// ASCII code units using SSE intrinsics. It uses the `AdvanceWithSmallTable` member
|
|
971
|
+
/// function to read and convert input.
|
|
972
|
+
///
|
|
973
|
+
/// \param pSrc
|
|
974
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
975
|
+
/// \param pSrcEnd
|
|
976
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
977
|
+
/// \param pDst
|
|
978
|
+
/// A non-null pointer defining the beginning of the code unit output range.
|
|
979
|
+
///
|
|
980
|
+
/// \returns
|
|
981
|
+
/// If successful, the number of UTF-16 code units written; otherwise -1 is returned to
|
|
982
|
+
/// indicate an error was encountered.
|
|
983
|
+
//--------------------------------------------------------------------------------------------------
|
|
984
|
+
//
|
|
985
|
+
KEWB_ALIGN_FN std::ptrdiff_t
|
|
986
|
+
UtfUtils::SseSmallTableConvert(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept
|
|
987
|
+
{
|
|
988
|
+
char16_t* pDstOrig = pDst;
|
|
989
|
+
char32_t cdpt;
|
|
990
|
+
|
|
991
|
+
while (pSrc < (pSrcEnd - sizeof(__m128i)))
|
|
992
|
+
{
|
|
993
|
+
if (*pSrc < 0x80)
|
|
994
|
+
{
|
|
995
|
+
ConvertAsciiWithSse(pSrc, pDst);
|
|
996
|
+
}
|
|
997
|
+
else
|
|
998
|
+
{
|
|
999
|
+
if (AdvanceWithSmallTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
1000
|
+
{
|
|
1001
|
+
GetCodeUnits(cdpt, pDst);
|
|
1002
|
+
}
|
|
1003
|
+
else
|
|
1004
|
+
{
|
|
1005
|
+
return -1;
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
while (pSrc < pSrcEnd)
|
|
1011
|
+
{
|
|
1012
|
+
if (*pSrc < 0x80)
|
|
1013
|
+
{
|
|
1014
|
+
*pDst++ = *pSrc++;
|
|
1015
|
+
}
|
|
1016
|
+
else
|
|
1017
|
+
{
|
|
1018
|
+
if (AdvanceWithSmallTable(pSrc, pSrcEnd, cdpt) != ERR)
|
|
1019
|
+
{
|
|
1020
|
+
GetCodeUnits(cdpt, pDst);
|
|
1021
|
+
}
|
|
1022
|
+
else
|
|
1023
|
+
{
|
|
1024
|
+
return -1;
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
return pDst - pDstOrig;
|
|
1030
|
+
}
|
|
1031
|
+
|
|
1032
|
+
//--------------------------------------------------------------------------------------------------
|
|
1033
|
+
/// \brief Trace converts a sequence of UTF-8 code units to a sequence of UTF-32 code points.
|
|
1034
|
+
///
|
|
1035
|
+
/// \details
|
|
1036
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
1037
|
+
/// it to an output sequence of UTF-32 code points. It uses only the DFA to perform
|
|
1038
|
+
/// conversion. It prints current and next state transition information as it proceeds.
|
|
1039
|
+
///
|
|
1040
|
+
/// \param pSrc
|
|
1041
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
1042
|
+
/// \param pSrcEnd
|
|
1043
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
1044
|
+
/// \param pDst
|
|
1045
|
+
/// A non-null pointer defining the beginning of the code point output range.
|
|
1046
|
+
///
|
|
1047
|
+
/// \returns
|
|
1048
|
+
/// If successful, the number of UTF-32 code points written; otherwise -1 is returned to
|
|
1049
|
+
/// indicate an error was encountered.
|
|
1050
|
+
//--------------------------------------------------------------------------------------------------
|
|
1051
|
+
//
|
|
1052
|
+
std::ptrdiff_t
|
|
1053
|
+
UtfUtils::ConvertWithTrace(char8_t const* pSrc, char8_t const* pSrcEnd, char32_t* pDst) noexcept
|
|
1054
|
+
{
|
|
1055
|
+
char32_t* pDstOrig = pDst;
|
|
1056
|
+
char32_t cdpt;
|
|
1057
|
+
|
|
1058
|
+
while (pSrc < pSrcEnd)
|
|
1059
|
+
{
|
|
1060
|
+
if (AdvanceWithTrace(pSrc, pSrcEnd, cdpt) != ERR)
|
|
1061
|
+
{
|
|
1062
|
+
*pDst++ = cdpt;
|
|
1063
|
+
}
|
|
1064
|
+
else
|
|
1065
|
+
{
|
|
1066
|
+
return -1;
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
return pDst - pDstOrig;
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
//--------------------------------------------------------------------------------------------------
|
|
1074
|
+
/// \brief Trace converts a sequence of UTF-8 code units to a sequence of UTF-16 code units.
|
|
1075
|
+
///
|
|
1076
|
+
/// \details
|
|
1077
|
+
/// This static member function reads an input sequence of UTF-8 code units and converts
|
|
1078
|
+
/// it to an output sequence of UTF-16 code units. It uses only the DFA to perform
|
|
1079
|
+
/// conversion. It prints current and next state transition information as it proceeds.
|
|
1080
|
+
///
|
|
1081
|
+
/// \param pSrc
|
|
1082
|
+
/// A non-null pointer defining the beginning of the code unit input range.
|
|
1083
|
+
/// \param pSrcEnd
|
|
1084
|
+
/// A non-null past-the-end pointer defining the end of the code unit input range.
|
|
1085
|
+
/// \param pDst
|
|
1086
|
+
/// A non-null pointer defining the beginning of the code unit output range.
|
|
1087
|
+
///
|
|
1088
|
+
/// \returns
|
|
1089
|
+
/// If successful, the number of UTF-16 code units written; otherwise -1 is returned to
|
|
1090
|
+
/// indicate an error was encountered.
|
|
1091
|
+
//--------------------------------------------------------------------------------------------------
|
|
1092
|
+
//
|
|
1093
|
+
std::ptrdiff_t
|
|
1094
|
+
UtfUtils::ConvertWithTrace(char8_t const* pSrc, char8_t const* pSrcEnd, char16_t* pDst) noexcept
|
|
1095
|
+
{
|
|
1096
|
+
char16_t* pDstOrig = pDst;
|
|
1097
|
+
char32_t cdpt;
|
|
1098
|
+
|
|
1099
|
+
while (pSrc < pSrcEnd)
|
|
1100
|
+
{
|
|
1101
|
+
if (AdvanceWithTrace(pSrc, pSrcEnd, cdpt) != ERR)
|
|
1102
|
+
{
|
|
1103
|
+
GetCodeUnits(cdpt, pDst);
|
|
1104
|
+
}
|
|
1105
|
+
else
|
|
1106
|
+
{
|
|
1107
|
+
return -1;
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
|
|
1111
|
+
return pDst - pDstOrig;
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
//--------------------------------------------------------------------------------------------------
|
|
1115
|
+
/// \brief Converts a sequence of ASCII UTF-8 code units to a sequence of UTF-32 code points.
|
|
1116
|
+
///
|
|
1117
|
+
/// \details
|
|
1118
|
+
/// This static member function uses SSE intrinsics to convert a register of ASCII code
|
|
1119
|
+
/// units to four registers of equivalent UTF-32 code units.
|
|
1120
|
+
///
|
|
1121
|
+
/// \param pSrc
|
|
1122
|
+
/// A reference to a non-null pointer defining the start of the code unit input range.
|
|
1123
|
+
/// \param pDst
|
|
1124
|
+
/// A reference to a non-null pointer defining the start of the code point output range.
|
|
1125
|
+
//--------------------------------------------------------------------------------------------------
|
|
1126
|
+
//
|
|
1127
|
+
KEWB_FORCE_INLINE void
|
|
1128
|
+
UtfUtils::ConvertAsciiWithSse(char8_t const*& pSrc, char32_t*& pDst) noexcept
|
|
1129
|
+
{
|
|
1130
|
+
__m128i chunk, half, qrtr, zero;
|
|
1131
|
+
int32_t mask, incr;
|
|
1132
|
+
|
|
1133
|
+
zero = _mm_set1_epi8(0); //- Zero out the interleave register
|
|
1134
|
+
chunk = _mm_loadu_si128((__m128i const*) pSrc); //- Load a register with 8-bit bytes
|
|
1135
|
+
mask = _mm_movemask_epi8(chunk); //- Determine which octets have high bit set
|
|
1136
|
+
|
|
1137
|
+
half = _mm_unpacklo_epi8(chunk, zero); //- Unpack bytes 0-7 into 16-bit words
|
|
1138
|
+
qrtr = _mm_unpacklo_epi16(half, zero); //- Unpack words 0-3 into 32-bit dwords
|
|
1139
|
+
_mm_storeu_si128((__m128i*) pDst, qrtr); //- Write to memory
|
|
1140
|
+
qrtr = _mm_unpackhi_epi16(half, zero); //- Unpack words 4-7 into 32-bit dwords
|
|
1141
|
+
_mm_storeu_si128((__m128i*) (pDst + 4), qrtr); //- Write to memory
|
|
1142
|
+
|
|
1143
|
+
half = _mm_unpackhi_epi8(chunk, zero); //- Unpack bytes 8-15 into 16-bit words
|
|
1144
|
+
qrtr = _mm_unpacklo_epi16(half, zero); //- Unpack words 8-11 into 32-bit dwords
|
|
1145
|
+
_mm_storeu_si128((__m128i*) (pDst + 8), qrtr); //- Write to memory
|
|
1146
|
+
qrtr = _mm_unpackhi_epi16(half, zero); //- Unpack words 12-15 into 32-bit dwords
|
|
1147
|
+
_mm_storeu_si128((__m128i*) (pDst + 12), qrtr); //- Write to memory
|
|
1148
|
+
|
|
1149
|
+
//- If no bits were set in the mask, then all 16 code units were ASCII, and therefore
|
|
1150
|
+
// both pointers are advanced by 16.
|
|
1151
|
+
//
|
|
1152
|
+
if (mask == 0)
|
|
1153
|
+
{
|
|
1154
|
+
pSrc += 16;
|
|
1155
|
+
pDst += 16;
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
//- Otherwise, the number of trailing (low-order) zero bits in the mask indicates the number
|
|
1159
|
+
// of ASCII code units starting from the lowest byte address.
|
|
1160
|
+
else
|
|
1161
|
+
{
|
|
1162
|
+
incr = GetTrailingZeros(mask);
|
|
1163
|
+
pSrc += incr;
|
|
1164
|
+
pDst += incr;
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
//--------------------------------------------------------------------------------------------------
|
|
1169
|
+
/// \brief Converts a sequence of ASCII UTF-8 code units to a sequence of UTF-16 code units.
|
|
1170
|
+
///
|
|
1171
|
+
/// \details
|
|
1172
|
+
/// This static member function uses SSE intrinsics to convert a register of ASCII code
|
|
1173
|
+
/// units to two registers of equivalent UTF-16 code units.
|
|
1174
|
+
///
|
|
1175
|
+
/// \param pSrc
|
|
1176
|
+
/// A reference to a non-null pointer defining the start of the code unit input range.
|
|
1177
|
+
/// \param pDst
|
|
1178
|
+
/// A reference to a non-null pointer defining the start of the code unit output range.
|
|
1179
|
+
//--------------------------------------------------------------------------------------------------
|
|
1180
|
+
//
|
|
1181
|
+
KEWB_FORCE_INLINE void
|
|
1182
|
+
UtfUtils::ConvertAsciiWithSse(char8_t const*& pSrc, char16_t*& pDst) noexcept
|
|
1183
|
+
{
|
|
1184
|
+
__m128i chunk, half;
|
|
1185
|
+
int32_t mask, incr;
|
|
1186
|
+
|
|
1187
|
+
chunk = _mm_loadu_si128((__m128i const*) pSrc); //- Load the register with 8-bit bytes
|
|
1188
|
+
mask = _mm_movemask_epi8(chunk); //- Determine which octets have high bit set
|
|
1189
|
+
|
|
1190
|
+
half = _mm_unpacklo_epi8(chunk, _mm_set1_epi8(0)); //- Unpack lower half into 16-bit words
|
|
1191
|
+
_mm_storeu_si128((__m128i*) pDst, half); //- Write to memory
|
|
1192
|
+
|
|
1193
|
+
half = _mm_unpackhi_epi8(chunk, _mm_set1_epi8(0)); //- Unpack upper half into 16-bit words
|
|
1194
|
+
_mm_storeu_si128((__m128i*) (pDst + 8), half); //- Write to memory
|
|
1195
|
+
|
|
1196
|
+
//- If no bits were set in the mask, then all 16 code units were ASCII, and therefore
|
|
1197
|
+
// both pointers are advanced by 16.
|
|
1198
|
+
//
|
|
1199
|
+
if (mask == 0)
|
|
1200
|
+
{
|
|
1201
|
+
pSrc += 16;
|
|
1202
|
+
pDst += 16;
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
//- Otherwise, the number of trailing (low-order) zero bits in the mask indicates the number
|
|
1206
|
+
// of ASCII code units starting from the lowest byte address.
|
|
1207
|
+
else
|
|
1208
|
+
{
|
|
1209
|
+
incr = GetTrailingZeros(mask);
|
|
1210
|
+
pSrc += incr;
|
|
1211
|
+
pDst += incr;
|
|
1212
|
+
}
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1215
|
+
//--------------------------------------------------------------------------------------------------
|
|
1216
|
+
/// \brief Returns the number of trailing 0-bits in an integer, starting with the least
|
|
1217
|
+
/// significant bit.
|
|
1218
|
+
///
|
|
1219
|
+
/// \details
|
|
1220
|
+
/// This static member function uses compiler intrinsics to determine the number of trailing
|
|
1221
|
+
/// (i.e., low-order) zero bits in an int32_t parameter. For example, an input value of 8
|
|
1222
|
+
/// (0000 1000) would return a value of 3; an input value of 64 (0100 0000) would return a
|
|
1223
|
+
/// value of 6.
|
|
1224
|
+
///
|
|
1225
|
+
/// \param x
|
|
1226
|
+
/// An `int32_t` value whose number of trailing bits is to be determined.
|
|
1227
|
+
|
|
1228
|
+
/// \returns
|
|
1229
|
+
/// the number of trailing zero bits, as an `int32_t`.
|
|
1230
|
+
//--------------------------------------------------------------------------------------------------
|
|
1231
|
+
//
|
|
1232
|
+
#if defined KEWB_COMPILER_CLANG || defined KEWB_COMPILER_GCC
|
|
1233
|
+
// previous line modified by D. Lemire on June 23rd 2021: we want support for not just Linux!
|
|
1234
|
+
KEWB_FORCE_INLINE int32_t
|
|
1235
|
+
UtfUtils::GetTrailingZeros(int32_t x) noexcept
|
|
1236
|
+
{
|
|
1237
|
+
return __builtin_ctz((unsigned int) x);
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
#elif defined KEWB_PLATFORM_WINDOWS && defined KEWB_COMPILER_MSVC
|
|
1241
|
+
|
|
1242
|
+
KEWB_FORCE_INLINE int32_t
|
|
1243
|
+
UtfUtils::GetTrailingZeros(int32_t x) noexcept
|
|
1244
|
+
{
|
|
1245
|
+
unsigned long indx;
|
|
1246
|
+
_BitScanForward(&indx, (unsigned long) x);
|
|
1247
|
+
return (int32_t) indx;
|
|
1248
|
+
}
|
|
1249
|
+
#endif
|
|
1250
|
+
|
|
1251
|
+
//--------------------------------------------------------------------------------------------------
|
|
1252
|
+
/// \brief Prints state information for tracing versions of converters.
|
|
1253
|
+
///
|
|
1254
|
+
/// \param curr
|
|
1255
|
+
/// The current DFA state.
|
|
1256
|
+
/// \param type
|
|
1257
|
+
/// The character class of the lookahead input octet.
|
|
1258
|
+
/// \param unit
|
|
1259
|
+
/// The lookahead input octet.
|
|
1260
|
+
/// \param next
|
|
1261
|
+
/// The next DFA state, based on the current state and lookahead character class.
|
|
1262
|
+
//--------------------------------------------------------------------------------------------------
|
|
1263
|
+
//
|
|
1264
|
+
void
|
|
1265
|
+
UtfUtils::PrintStateData(State curr, CharClass type, uint32_t unit, State next)
|
|
1266
|
+
{
|
|
1267
|
+
uint32_t currState = ((uint32_t) curr) / 12;
|
|
1268
|
+
uint32_t nextState = ((uint32_t) next) / 12;
|
|
1269
|
+
uint32_t unitValue = unit & 0xFF;
|
|
1270
|
+
|
|
1271
|
+
printf("[%s, %s (0x%02X)] ==>> %s\n", smStateNames[currState],
|
|
1272
|
+
smClassNames[type], unitValue, smStateNames[nextState]);
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
} //- Namespace uu
|
|
1276
|
+
SIMDUTF_UNTARGET_REGION
|