react-native-quick-crypto 1.0.19 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QuickCrypto.podspec +12 -38
- package/README.md +2 -0
- package/android/CMakeLists.txt +3 -0
- package/android/build.gradle +5 -1
- package/cpp/argon2/HybridArgon2.cpp +10 -3
- package/cpp/blake3/HybridBlake3.cpp +5 -3
- package/cpp/cipher/CCMCipher.cpp +29 -16
- package/cpp/cipher/CCMCipher.hpp +2 -4
- package/cpp/cipher/ChaCha20Cipher.cpp +14 -18
- package/cpp/cipher/ChaCha20Cipher.hpp +2 -4
- package/cpp/cipher/ChaCha20Poly1305Cipher.cpp +34 -23
- package/cpp/cipher/ChaCha20Poly1305Cipher.hpp +2 -4
- package/cpp/cipher/GCMCipher.cpp +14 -15
- package/cpp/cipher/HybridCipher.cpp +39 -36
- package/cpp/cipher/HybridCipher.hpp +17 -1
- package/cpp/cipher/HybridRsaCipher.cpp +74 -29
- package/cpp/cipher/OCBCipher.cpp +4 -3
- package/cpp/cipher/XChaCha20Poly1305Cipher.cpp +14 -13
- package/cpp/cipher/XSalsa20Cipher.cpp +72 -6
- package/cpp/cipher/XSalsa20Cipher.hpp +25 -3
- package/cpp/cipher/XSalsa20Poly1305Cipher.cpp +21 -25
- package/cpp/dh/HybridDiffieHellman.cpp +29 -0
- package/cpp/ec/HybridEcKeyPair.cpp +35 -33
- package/cpp/ec/HybridEcKeyPair.hpp +3 -7
- package/cpp/ecdh/HybridECDH.cpp +23 -0
- package/cpp/ed25519/HybridEdKeyPair.cpp +73 -117
- package/cpp/ed25519/HybridEdKeyPair.hpp +5 -9
- package/cpp/hash/HybridHash.cpp +5 -7
- package/cpp/hkdf/HybridHkdf.cpp +6 -4
- package/cpp/hmac/HybridHmac.cpp +4 -6
- package/cpp/kmac/HybridKmac.cpp +4 -4
- package/cpp/mldsa/HybridMlDsaKeyPair.cpp +37 -49
- package/cpp/mlkem/HybridMlKemKeyPair.cpp +39 -43
- package/cpp/pbkdf2/HybridPbkdf2.cpp +7 -8
- package/cpp/rsa/HybridRsaKeyPair.cpp +5 -8
- package/cpp/rsa/HybridRsaKeyPair.hpp +4 -7
- package/cpp/scrypt/HybridScrypt.cpp +6 -4
- package/cpp/sign/HybridSignHandle.cpp +25 -68
- package/cpp/sign/HybridVerifyHandle.cpp +23 -60
- package/cpp/utils/HybridUtils.cpp +213 -111
- package/cpp/utils/HybridUtils.hpp +9 -2
- package/cpp/utils/QuickCryptoUtils.hpp +72 -0
- package/deps/simdutf/LICENSE-APACHE +201 -0
- package/deps/simdutf/LICENSE-MIT +18 -0
- package/deps/simdutf/README.md +2782 -0
- package/deps/simdutf/include/simdutf/avx512.h +79 -0
- package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
- package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
- package/deps/simdutf/include/simdutf/common_defs.h +186 -0
- package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
- package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
- package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
- package/deps/simdutf/include/simdutf/error.h +126 -0
- package/deps/simdutf/include/simdutf/implementation.h +7081 -0
- package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
- package/deps/simdutf/include/simdutf/portability.h +285 -0
- package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
- package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
- package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
- package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
- package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
- package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
- package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
- package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
- package/deps/simdutf/include/simdutf.h +26 -0
- package/deps/simdutf/include/simdutf_c.h +342 -0
- package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
- package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
- package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
- package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
- package/deps/simdutf/src/encoding_types.cpp +67 -0
- package/deps/simdutf/src/error.cpp +3 -0
- package/deps/simdutf/src/fallback/implementation.cpp +589 -0
- package/deps/simdutf/src/generic/ascii_validation.h +50 -0
- package/deps/simdutf/src/generic/base64.h +233 -0
- package/deps/simdutf/src/generic/base64lengths.h +63 -0
- package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
- package/deps/simdutf/src/generic/find.h +75 -0
- package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
- package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
- package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
- package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
- package/deps/simdutf/src/generic/utf16.h +73 -0
- package/deps/simdutf/src/generic/utf32.h +136 -0
- package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
- package/deps/simdutf/src/generic/utf8.h +92 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
- package/deps/simdutf/src/generic/validate_utf16.h +164 -0
- package/deps/simdutf/src/generic/validate_utf32.h +99 -0
- package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
- package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
- package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
- package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
- package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
- package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
- package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
- package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
- package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
- package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
- package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
- package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
- package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
- package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
- package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
- package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
- package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
- package/deps/simdutf/src/implementation.cpp +2526 -0
- package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
- package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
- package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
- package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
- package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
- package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
- package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
- package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
- package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
- package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
- package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
- package/deps/simdutf/src/ppc64/templates.cpp +91 -0
- package/deps/simdutf/src/rvv/implementation.cpp +138 -0
- package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
- package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
- package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
- package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
- package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
- package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
- package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
- package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
- package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
- package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
- package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
- package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
- package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
- package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
- package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
- package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
- package/deps/simdutf/src/simdutf/arm64.h +43 -0
- package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
- package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
- package/deps/simdutf/src/simdutf/fallback.h +42 -0
- package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
- package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
- package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
- package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
- package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
- package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
- package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
- package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
- package/deps/simdutf/src/simdutf/haswell.h +63 -0
- package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
- package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
- package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
- package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
- package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
- package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
- package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
- package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
- package/deps/simdutf/src/simdutf/icelake.h +81 -0
- package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
- package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
- package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
- package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
- package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
- package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
- package/deps/simdutf/src/simdutf/lasx.h +49 -0
- package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
- package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
- package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
- package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
- package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
- package/deps/simdutf/src/simdutf/lsx.h +52 -0
- package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
- package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
- package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
- package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
- package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
- package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
- package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
- package/deps/simdutf/src/simdutf/ppc64.h +40 -0
- package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
- package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
- package/deps/simdutf/src/simdutf/rvv.h +41 -0
- package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
- package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
- package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
- package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
- package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
- package/deps/simdutf/src/simdutf/westmere.h +59 -0
- package/deps/simdutf/src/simdutf.cpp +152 -0
- package/deps/simdutf/src/simdutf_c.cpp +525 -0
- package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
- package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
- package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
- package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
- package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
- package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
- package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
- package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
- package/lib/commonjs/argon2.js +51 -2
- package/lib/commonjs/argon2.js.map +1 -1
- package/lib/commonjs/cipher.js +109 -11
- package/lib/commonjs/cipher.js.map +1 -1
- package/lib/commonjs/dsa.js +8 -2
- package/lib/commonjs/dsa.js.map +1 -1
- package/lib/commonjs/hash.js +15 -5
- package/lib/commonjs/hash.js.map +1 -1
- package/lib/commonjs/hkdf.js +33 -6
- package/lib/commonjs/hkdf.js.map +1 -1
- package/lib/commonjs/hmac.js +15 -5
- package/lib/commonjs/hmac.js.map +1 -1
- package/lib/commonjs/keys/publicCipher.js +10 -4
- package/lib/commonjs/keys/publicCipher.js.map +1 -1
- package/lib/commonjs/random.js +11 -2
- package/lib/commonjs/random.js.map +1 -1
- package/lib/commonjs/rsa.js +12 -5
- package/lib/commonjs/rsa.js.map +1 -1
- package/lib/commonjs/scrypt.js +47 -6
- package/lib/commonjs/scrypt.js.map +1 -1
- package/lib/commonjs/subtle.js +76 -5
- package/lib/commonjs/subtle.js.map +1 -1
- package/lib/commonjs/utils/cipher.js +18 -7
- package/lib/commonjs/utils/cipher.js.map +1 -1
- package/lib/commonjs/utils/conversion.js +33 -9
- package/lib/commonjs/utils/conversion.js.map +1 -1
- package/lib/commonjs/utils/timingSafeEqual.js +7 -2
- package/lib/commonjs/utils/timingSafeEqual.js.map +1 -1
- package/lib/commonjs/x509certificate.js +6 -6
- package/lib/commonjs/x509certificate.js.map +1 -1
- package/lib/module/argon2.js +51 -2
- package/lib/module/argon2.js.map +1 -1
- package/lib/module/cipher.js +109 -11
- package/lib/module/cipher.js.map +1 -1
- package/lib/module/dsa.js +8 -2
- package/lib/module/dsa.js.map +1 -1
- package/lib/module/hash.js +15 -5
- package/lib/module/hash.js.map +1 -1
- package/lib/module/hkdf.js +33 -6
- package/lib/module/hkdf.js.map +1 -1
- package/lib/module/hmac.js +15 -5
- package/lib/module/hmac.js.map +1 -1
- package/lib/module/keys/publicCipher.js +10 -4
- package/lib/module/keys/publicCipher.js.map +1 -1
- package/lib/module/random.js +11 -2
- package/lib/module/random.js.map +1 -1
- package/lib/module/rsa.js +11 -4
- package/lib/module/rsa.js.map +1 -1
- package/lib/module/scrypt.js +47 -6
- package/lib/module/scrypt.js.map +1 -1
- package/lib/module/subtle.js +76 -5
- package/lib/module/subtle.js.map +1 -1
- package/lib/module/utils/cipher.js +18 -7
- package/lib/module/utils/cipher.js.map +1 -1
- package/lib/module/utils/conversion.js +33 -9
- package/lib/module/utils/conversion.js.map +1 -1
- package/lib/module/utils/timingSafeEqual.js +8 -3
- package/lib/module/utils/timingSafeEqual.js.map +1 -1
- package/lib/module/x509certificate.js +6 -6
- package/lib/module/x509certificate.js.map +1 -1
- package/lib/typescript/argon2.d.ts.map +1 -1
- package/lib/typescript/cipher.d.ts +2 -2
- package/lib/typescript/cipher.d.ts.map +1 -1
- package/lib/typescript/dsa.d.ts.map +1 -1
- package/lib/typescript/hash.d.ts +2 -2
- package/lib/typescript/hash.d.ts.map +1 -1
- package/lib/typescript/hkdf.d.ts.map +1 -1
- package/lib/typescript/hmac.d.ts +2 -2
- package/lib/typescript/hmac.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/keys/publicCipher.d.ts.map +1 -1
- package/lib/typescript/random.d.ts.map +1 -1
- package/lib/typescript/rsa.d.ts.map +1 -1
- package/lib/typescript/scrypt.d.ts.map +1 -1
- package/lib/typescript/specs/utils.nitro.d.ts +0 -2
- package/lib/typescript/specs/utils.nitro.d.ts.map +1 -1
- package/lib/typescript/subtle.d.ts.map +1 -1
- package/lib/typescript/utils/cipher.d.ts +13 -1
- package/lib/typescript/utils/cipher.d.ts.map +1 -1
- package/lib/typescript/utils/conversion.d.ts +9 -6
- package/lib/typescript/utils/conversion.d.ts.map +1 -1
- package/lib/typescript/utils/timingSafeEqual.d.ts.map +1 -1
- package/lib/typescript/x509certificate.d.ts.map +1 -1
- package/nitrogen/generated/shared/c++/HybridUtilsSpec.cpp +0 -2
- package/nitrogen/generated/shared/c++/HybridUtilsSpec.hpp +0 -3
- package/package.json +38 -6
- package/src/argon2.ts +80 -2
- package/src/cipher.ts +139 -15
- package/src/dsa.ts +11 -2
- package/src/hash.ts +17 -7
- package/src/hkdf.ts +44 -6
- package/src/hmac.ts +17 -7
- package/src/keys/publicCipher.ts +10 -4
- package/src/random.ts +11 -2
- package/src/rsa.ts +18 -4
- package/src/scrypt.ts +73 -6
- package/src/specs/utils.nitro.ts +0 -2
- package/src/subtle.ts +90 -8
- package/src/utils/cipher.ts +30 -8
- package/src/utils/conversion.ts +58 -20
- package/src/utils/timingSafeEqual.ts +8 -3
- package/src/x509certificate.ts +5 -6
- package/deps/blake3/.cargo/config.toml +0 -2
- package/deps/blake3/.git-blame-ignore-revs +0 -2
- package/deps/blake3/.github/workflows/build_b3sum.py +0 -38
- package/deps/blake3/.github/workflows/ci.yml +0 -491
- package/deps/blake3/.github/workflows/tag.yml +0 -43
- package/deps/blake3/.github/workflows/upload_github_release_asset.py +0 -73
- package/deps/blake3/CONTRIBUTING.md +0 -31
- package/deps/blake3/Cargo.toml +0 -135
- package/deps/blake3/b3sum/Cargo.lock +0 -513
- package/deps/blake3/b3sum/Cargo.toml +0 -26
- package/deps/blake3/b3sum/README.md +0 -72
- package/deps/blake3/b3sum/src/main.rs +0 -564
- package/deps/blake3/b3sum/src/unit_tests.rs +0 -235
- package/deps/blake3/b3sum/tests/cli_tests.rs +0 -680
- package/deps/blake3/b3sum/what_does_check_do.md +0 -176
- package/deps/blake3/benches/bench.rs +0 -623
- package/deps/blake3/build.rs +0 -389
- package/deps/blake3/c/CMakeLists.txt +0 -383
- package/deps/blake3/c/CMakePresets.json +0 -73
- package/deps/blake3/c/Makefile.testing +0 -82
- package/deps/blake3/c/blake3-config.cmake.in +0 -14
- package/deps/blake3/c/blake3_avx2.c +0 -326
- package/deps/blake3/c/blake3_avx2_x86-64_unix.S +0 -1815
- package/deps/blake3/c/blake3_avx2_x86-64_windows_gnu.S +0 -1817
- package/deps/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +0 -1828
- package/deps/blake3/c/blake3_avx512.c +0 -1388
- package/deps/blake3/c/blake3_avx512_x86-64_unix.S +0 -4824
- package/deps/blake3/c/blake3_avx512_x86-64_windows_gnu.S +0 -2615
- package/deps/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +0 -2634
- package/deps/blake3/c/blake3_c_rust_bindings/Cargo.toml +0 -32
- package/deps/blake3/c/blake3_c_rust_bindings/README.md +0 -4
- package/deps/blake3/c/blake3_c_rust_bindings/benches/bench.rs +0 -477
- package/deps/blake3/c/blake3_c_rust_bindings/build.rs +0 -253
- package/deps/blake3/c/blake3_c_rust_bindings/cross_test.sh +0 -31
- package/deps/blake3/c/blake3_c_rust_bindings/src/lib.rs +0 -333
- package/deps/blake3/c/blake3_c_rust_bindings/src/test.rs +0 -696
- package/deps/blake3/c/blake3_sse2.c +0 -566
- package/deps/blake3/c/blake3_sse2_x86-64_unix.S +0 -2291
- package/deps/blake3/c/blake3_sse2_x86-64_windows_gnu.S +0 -2332
- package/deps/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +0 -2350
- package/deps/blake3/c/blake3_sse41.c +0 -560
- package/deps/blake3/c/blake3_sse41_x86-64_unix.S +0 -2028
- package/deps/blake3/c/blake3_sse41_x86-64_windows_gnu.S +0 -2069
- package/deps/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +0 -2089
- package/deps/blake3/c/blake3_tbb.cpp +0 -37
- package/deps/blake3/c/dependencies/CMakeLists.txt +0 -3
- package/deps/blake3/c/dependencies/tbb/CMakeLists.txt +0 -28
- package/deps/blake3/c/example.c +0 -36
- package/deps/blake3/c/example_tbb.c +0 -57
- package/deps/blake3/c/libblake3.pc.in +0 -12
- package/deps/blake3/c/main.c +0 -166
- package/deps/blake3/c/test.py +0 -97
- package/deps/blake3/media/B3.svg +0 -70
- package/deps/blake3/media/BLAKE3.svg +0 -85
- package/deps/blake3/media/speed.svg +0 -1474
- package/deps/blake3/reference_impl/Cargo.toml +0 -8
- package/deps/blake3/reference_impl/README.md +0 -14
- package/deps/blake3/reference_impl/reference_impl.rs +0 -374
- package/deps/blake3/src/ffi_avx2.rs +0 -65
- package/deps/blake3/src/ffi_avx512.rs +0 -169
- package/deps/blake3/src/ffi_neon.rs +0 -82
- package/deps/blake3/src/ffi_sse2.rs +0 -126
- package/deps/blake3/src/ffi_sse41.rs +0 -126
- package/deps/blake3/src/guts.rs +0 -60
- package/deps/blake3/src/hazmat.rs +0 -704
- package/deps/blake3/src/io.rs +0 -64
- package/deps/blake3/src/join.rs +0 -92
- package/deps/blake3/src/lib.rs +0 -1835
- package/deps/blake3/src/platform.rs +0 -587
- package/deps/blake3/src/portable.rs +0 -198
- package/deps/blake3/src/rust_avx2.rs +0 -474
- package/deps/blake3/src/rust_sse2.rs +0 -775
- package/deps/blake3/src/rust_sse41.rs +0 -766
- package/deps/blake3/src/test.rs +0 -1049
- package/deps/blake3/src/traits.rs +0 -227
- package/deps/blake3/src/wasm32_simd.rs +0 -794
- package/deps/blake3/test_vectors/Cargo.toml +0 -19
- package/deps/blake3/test_vectors/cross_test.sh +0 -25
- package/deps/blake3/test_vectors/src/bin/generate.rs +0 -4
- package/deps/blake3/test_vectors/src/lib.rs +0 -350
- package/deps/blake3/test_vectors/test_vectors.json +0 -217
- package/deps/blake3/tools/compiler_version/Cargo.toml +0 -7
- package/deps/blake3/tools/compiler_version/build.rs +0 -6
- package/deps/blake3/tools/compiler_version/src/main.rs +0 -27
- package/deps/blake3/tools/instruction_set_support/Cargo.toml +0 -6
- package/deps/blake3/tools/instruction_set_support/src/main.rs +0 -10
- package/deps/blake3/tools/release.md +0 -16
- package/deps/ncrypto/.bazelignore +0 -4
- package/deps/ncrypto/.bazelrc +0 -1
- package/deps/ncrypto/.bazelversion +0 -1
- package/deps/ncrypto/.clang-format +0 -111
- package/deps/ncrypto/.github/workflows/bazel.yml +0 -58
- package/deps/ncrypto/.github/workflows/commitlint.yml +0 -16
- package/deps/ncrypto/.github/workflows/linter.yml +0 -38
- package/deps/ncrypto/.github/workflows/macos.yml +0 -43
- package/deps/ncrypto/.github/workflows/release-please.yml +0 -16
- package/deps/ncrypto/.github/workflows/ubuntu.yml +0 -128
- package/deps/ncrypto/.github/workflows/visual-studio.yml +0 -49
- package/deps/ncrypto/.python-version +0 -1
- package/deps/ncrypto/.release-please-manifest.json +0 -3
- package/deps/ncrypto/BUILD.bazel +0 -44
- package/deps/ncrypto/CHANGELOG.md +0 -37
- package/deps/ncrypto/CMakeLists.txt +0 -79
- package/deps/ncrypto/MODULE.bazel +0 -16
- package/deps/ncrypto/MODULE.bazel.lock +0 -461
- package/deps/ncrypto/cmake/CPM.cmake +0 -1225
- package/deps/ncrypto/cmake/ncrypto-flags.cmake +0 -17
- package/deps/ncrypto/ncrypto.pc.in +0 -10
- package/deps/ncrypto/patches/0001-Expose-libdecrepit-so-NodeJS-can-use-it-for-ncrypto.patch +0 -28
- package/deps/ncrypto/pyproject.toml +0 -38
- package/deps/ncrypto/release-please-config.json +0 -11
- package/deps/ncrypto/src/CMakeLists.txt +0 -40
- package/deps/ncrypto/tests/BUILD.bazel +0 -11
- package/deps/ncrypto/tests/CMakeLists.txt +0 -7
- package/deps/ncrypto/tests/basic.cpp +0 -856
- package/deps/ncrypto/tools/run-clang-format.sh +0 -42
- package/lib/tsconfig.tsbuildinfo +0 -1
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
struct utf32_to_utf8_t {
|
|
2
|
+
error_code err;
|
|
3
|
+
const char32_t *input;
|
|
4
|
+
char *output;
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
template <ErrorReporting er>
|
|
8
|
+
utf32_to_utf8_t ppc64_convert_utf32_to_utf8(const char32_t *buf, size_t len,
|
|
9
|
+
char *utf8_output) {
|
|
10
|
+
const char32_t *end = buf + len;
|
|
11
|
+
|
|
12
|
+
const auto v_f800 = vector_u16::splat(0xf800);
|
|
13
|
+
const auto v_d800 = vector_u16::splat(0xd800);
|
|
14
|
+
|
|
15
|
+
const auto v_ffff0000 = vector_u32::splat(0xffff0000);
|
|
16
|
+
const auto v_00000000 = vector_u32::zero();
|
|
17
|
+
auto forbidden_bytemask = simd16<bool>();
|
|
18
|
+
const size_t safety_margin =
|
|
19
|
+
12; // to avoid overruns, see issue
|
|
20
|
+
// https://github.com/simdutf/simdutf/issues/92
|
|
21
|
+
|
|
22
|
+
while (end - buf >=
|
|
23
|
+
std::ptrdiff_t(
|
|
24
|
+
16 + safety_margin)) { // buf is a char32_t pointer, each char32_t
|
|
25
|
+
// has 4 bytes or 32 bits, thus buf + 16 *
|
|
26
|
+
// char_32t = 512 bits = 64 bytes
|
|
27
|
+
// We load two 16 bytes registers for a total of 32 bytes or 16 characters.
|
|
28
|
+
// These two values can hold only 8 UTF32 chars
|
|
29
|
+
auto in0 = vector_u32::load(buf);
|
|
30
|
+
auto in1 = vector_u32::load(buf + vector_u32::ELEMENTS);
|
|
31
|
+
|
|
32
|
+
// Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned
|
|
33
|
+
// saturation
|
|
34
|
+
auto in = vector_u32::pack(in0, in1);
|
|
35
|
+
|
|
36
|
+
// Try to apply UTF-16 => UTF-8 from ./ppc64_convert_utf16_to_utf8.cpp
|
|
37
|
+
|
|
38
|
+
// Check for ASCII fast path
|
|
39
|
+
|
|
40
|
+
// ASCII fast path!!!!
|
|
41
|
+
// We eagerly load another 32 bytes, hoping that they will be ASCII too.
|
|
42
|
+
// The intuition is that we try to collect 16 ASCII characters which
|
|
43
|
+
// requires a total of 64 bytes of input. If we fail, we just pass thirdin
|
|
44
|
+
// and fourthin as our new inputs.
|
|
45
|
+
if (in.is_ascii()) { // if the first two blocks are ASCII
|
|
46
|
+
const auto in2 = vector_u32::load(buf + 2 * vector_u32::ELEMENTS);
|
|
47
|
+
const auto in3 = vector_u32::load(buf + 3 * vector_u32::ELEMENTS);
|
|
48
|
+
|
|
49
|
+
const auto next = vector_u32::pack(in2, in3);
|
|
50
|
+
if (next.is_ascii()) {
|
|
51
|
+
// 1. pack the bytes
|
|
52
|
+
const auto utf8_packed = vector_u16::pack(in, next);
|
|
53
|
+
// 2. store (16 bytes)
|
|
54
|
+
utf8_packed.store(utf8_output);
|
|
55
|
+
// 3. adjust pointers
|
|
56
|
+
buf += 16;
|
|
57
|
+
utf8_output += 16;
|
|
58
|
+
continue; // we are done for this round!
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// `next` is not ASCII, write `in` and carry on with next
|
|
62
|
+
|
|
63
|
+
// 1. pack the bytes
|
|
64
|
+
const auto utf8_packed = vector_u16::pack(in, in);
|
|
65
|
+
utf8_packed.store(utf8_output);
|
|
66
|
+
// 3. adjust pointers
|
|
67
|
+
buf += 8;
|
|
68
|
+
utf8_output += 8;
|
|
69
|
+
|
|
70
|
+
// Proceed with next input
|
|
71
|
+
in = next;
|
|
72
|
+
in0 = in2;
|
|
73
|
+
in1 = in3;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// no bits set above 7th bit
|
|
77
|
+
const auto one_byte_bytemask = in < uint16_t(1 << 7);
|
|
78
|
+
const uint16_t one_byte_bitmask = one_byte_bytemask.to_bitmask();
|
|
79
|
+
|
|
80
|
+
// no bits set above 11th bit
|
|
81
|
+
const auto one_or_two_bytes_bytemask = in < uint16_t(1 << 11);
|
|
82
|
+
const uint16_t one_or_two_bytes_bitmask =
|
|
83
|
+
one_or_two_bytes_bytemask.to_bitmask();
|
|
84
|
+
|
|
85
|
+
if (one_or_two_bytes_bitmask == 0xffff) {
|
|
86
|
+
write_v_u16_11bits_to_utf8(
|
|
87
|
+
in, utf8_output, as_vector_u8(one_byte_bytemask), one_byte_bitmask);
|
|
88
|
+
buf += 8;
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Check for overflow in packing
|
|
93
|
+
const auto saturation_bytemask = ((in0 | in1) & v_ffff0000) == v_00000000;
|
|
94
|
+
const uint16_t saturation_bitmask = saturation_bytemask.to_bitmask();
|
|
95
|
+
if (saturation_bitmask == 0xffff) {
|
|
96
|
+
switch (er) {
|
|
97
|
+
case ErrorReporting::precise: {
|
|
98
|
+
const auto forbidden = (in & v_f800) == v_d800;
|
|
99
|
+
if (forbidden.any()) {
|
|
100
|
+
// We return no error code, instead we force the scalar procedure
|
|
101
|
+
// to rescan the portion of input where we've just found an error.
|
|
102
|
+
return utf32_to_utf8_t{error_code::SUCCESS, buf, utf8_output};
|
|
103
|
+
}
|
|
104
|
+
} break;
|
|
105
|
+
case ErrorReporting::at_the_end:
|
|
106
|
+
forbidden_bytemask |= (in & v_f800) == v_d800;
|
|
107
|
+
break;
|
|
108
|
+
case ErrorReporting::none:
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
ppc64_convert_utf16_to_1_2_3_bytes_of_utf8(
|
|
113
|
+
in, one_byte_bitmask, one_or_two_bytes_bytemask,
|
|
114
|
+
one_or_two_bytes_bitmask, utf8_output);
|
|
115
|
+
buf += 8;
|
|
116
|
+
} else {
|
|
117
|
+
// case: at least one 32-bit word produce a surrogate pair in UTF-16 <=>
|
|
118
|
+
// will produce four UTF-8 bytes Let us do a scalar fallback. It may seem
|
|
119
|
+
// wasteful to use scalar code, but being efficient with SIMD in the
|
|
120
|
+
// presence of surrogate pairs may require non-trivial tables.
|
|
121
|
+
size_t forward = 15;
|
|
122
|
+
size_t k = 0;
|
|
123
|
+
if (size_t(end - buf) < forward + 1) {
|
|
124
|
+
forward = size_t(end - buf - 1);
|
|
125
|
+
}
|
|
126
|
+
for (; k < forward; k++) {
|
|
127
|
+
uint32_t word = buf[k];
|
|
128
|
+
if ((word & 0xFFFFFF80) == 0) {
|
|
129
|
+
*utf8_output++ = char(word);
|
|
130
|
+
} else if ((word & 0xFFFFF800) == 0) {
|
|
131
|
+
*utf8_output++ = char((word >> 6) | 0b11000000);
|
|
132
|
+
*utf8_output++ = char((word & 0b111111) | 0b10000000);
|
|
133
|
+
} else if ((word & 0xFFFF0000) == 0) {
|
|
134
|
+
if (er != ErrorReporting::none and
|
|
135
|
+
(word >= 0xD800 && word <= 0xDFFF)) {
|
|
136
|
+
return utf32_to_utf8_t{error_code::SURROGATE, buf + k, utf8_output};
|
|
137
|
+
}
|
|
138
|
+
*utf8_output++ = char((word >> 12) | 0b11100000);
|
|
139
|
+
*utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
|
|
140
|
+
*utf8_output++ = char((word & 0b111111) | 0b10000000);
|
|
141
|
+
} else {
|
|
142
|
+
if (er != ErrorReporting::none and (word > 0x10FFFF)) {
|
|
143
|
+
return utf32_to_utf8_t{error_code::TOO_LARGE, buf + k, utf8_output};
|
|
144
|
+
}
|
|
145
|
+
*utf8_output++ = char((word >> 18) | 0b11110000);
|
|
146
|
+
*utf8_output++ = char(((word >> 12) & 0b111111) | 0b10000000);
|
|
147
|
+
*utf8_output++ = char(((word >> 6) & 0b111111) | 0b10000000);
|
|
148
|
+
*utf8_output++ = char((word & 0b111111) | 0b10000000);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
buf += k;
|
|
152
|
+
}
|
|
153
|
+
} // while
|
|
154
|
+
|
|
155
|
+
if (er == ErrorReporting::at_the_end) {
|
|
156
|
+
if (forbidden_bytemask.any()) {
|
|
157
|
+
return utf32_to_utf8_t{error_code::SURROGATE, buf, utf8_output};
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return utf32_to_utf8_t{
|
|
162
|
+
error_code::SUCCESS,
|
|
163
|
+
buf,
|
|
164
|
+
utf8_output,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// depends on "tables/utf8_to_utf16_tables.h"
|
|
2
|
+
|
|
3
|
+
// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the
|
|
4
|
+
// end of the code points. Only the least significant 12 bits of the mask
|
|
5
|
+
// are accessed.
|
|
6
|
+
// It returns how many bytes were consumed (up to 12).
|
|
7
|
+
size_t convert_masked_utf8_to_latin1(const char *input,
|
|
8
|
+
uint64_t utf8_end_of_code_point_mask,
|
|
9
|
+
char *&latin1_output) {
|
|
10
|
+
// we use an approach where we try to process up to 12 input bytes.
|
|
11
|
+
// Why 12 input bytes and not 16? Because we are concerned with the size of
|
|
12
|
+
// the lookup tables. Also 12 is nicely divisible by two and three.
|
|
13
|
+
//
|
|
14
|
+
//
|
|
15
|
+
// Optimization note: our main path below is load-latency dependent. Thus it
|
|
16
|
+
// is maybe beneficial to have fast paths that depend on branch prediction but
|
|
17
|
+
// have less latency. This results in more instructions but, potentially, also
|
|
18
|
+
// higher speeds.
|
|
19
|
+
//
|
|
20
|
+
const auto in = vector_u8::load(input);
|
|
21
|
+
const uint16_t input_utf8_end_of_code_point_mask =
|
|
22
|
+
utf8_end_of_code_point_mask &
|
|
23
|
+
0xfff; // we are only processing 12 bytes in case it is not all ASCII
|
|
24
|
+
if (utf8_end_of_code_point_mask == 0xfff) {
|
|
25
|
+
// We process the data in chunks of 12 bytes.
|
|
26
|
+
in.store(latin1_output);
|
|
27
|
+
latin1_output += 12; // We wrote 12 characters.
|
|
28
|
+
return 12; // We consumed 12 bytes.
|
|
29
|
+
}
|
|
30
|
+
/// We do not have a fast path available, so we fallback.
|
|
31
|
+
const uint8_t idx =
|
|
32
|
+
tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0];
|
|
33
|
+
const uint8_t consumed =
|
|
34
|
+
tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
|
|
35
|
+
// this indicates an invalid input:
|
|
36
|
+
if (idx >= 64) {
|
|
37
|
+
return consumed;
|
|
38
|
+
}
|
|
39
|
+
// Here we should have (idx < 64), if not, there is a bug in the validation or
|
|
40
|
+
// elsewhere. SIX (6) input code-code units this is a relatively easy scenario
|
|
41
|
+
// we process SIX (6) input code-code units. The max length in bytes of six
|
|
42
|
+
// code code units spanning between 1 and 2 bytes each is 12 bytes. On
|
|
43
|
+
// processors where pdep/pext is fast, we might be able to use a small lookup
|
|
44
|
+
// table.
|
|
45
|
+
|
|
46
|
+
const auto reshuffle = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]);
|
|
47
|
+
const auto perm8 = reshuffle.lookup_32(in, vector_u8::zero());
|
|
48
|
+
#if SIMDUTF_IS_BIG_ENDIAN
|
|
49
|
+
const auto perm16 = as_vector_u16(perm8).swap_bytes();
|
|
50
|
+
#else
|
|
51
|
+
const auto perm16 = as_vector_u16(perm8);
|
|
52
|
+
#endif // SIMDUTF_IS_BIG_ENDIAN
|
|
53
|
+
const auto ascii = perm16 & uint16_t(0x7f);
|
|
54
|
+
const auto highbyte = perm16 & uint16_t(0x1f00);
|
|
55
|
+
const auto composed = ascii | highbyte.shr<2>();
|
|
56
|
+
|
|
57
|
+
const auto latin1_packed = vector_u16::pack(composed, composed);
|
|
58
|
+
#if defined(__clang__)
|
|
59
|
+
__attribute__((aligned(16))) char buf[16];
|
|
60
|
+
latin1_packed.store(buf);
|
|
61
|
+
memcpy(latin1_output, buf, 6);
|
|
62
|
+
#else
|
|
63
|
+
// writing 8 bytes even though we only care about the first 6 bytes.
|
|
64
|
+
const auto tmp = vec_u64_t(latin1_packed.value);
|
|
65
|
+
memcpy(latin1_output, &tmp[0], 8);
|
|
66
|
+
#endif
|
|
67
|
+
latin1_output += 6; // We wrote 6 bytes.
|
|
68
|
+
return consumed;
|
|
69
|
+
}
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
// depends on "tables/utf8_to_utf16_tables.h"
|
|
2
|
+
|
|
3
|
+
// Convert up to 12 bytes from utf8 to utf16 using a mask indicating the
|
|
4
|
+
// end of the code points. Only the least significant 12 bits of the mask
|
|
5
|
+
// are accessed.
|
|
6
|
+
// It returns how many bytes were consumed (up to 12).
|
|
7
|
+
template <endianness big_endian>
|
|
8
|
+
size_t convert_masked_utf8_to_utf16(const char *input,
|
|
9
|
+
uint64_t utf8_end_of_code_point_mask,
|
|
10
|
+
char16_t *&utf16_output) {
|
|
11
|
+
// we use an approach where we try to process up to 12 input bytes.
|
|
12
|
+
// Why 12 input bytes and not 16? Because we are concerned with the size of
|
|
13
|
+
// the lookup tables. Also 12 is nicely divisible by two and three.
|
|
14
|
+
//
|
|
15
|
+
//
|
|
16
|
+
// Optimization note: our main path below is load-latency dependent. Thus it
|
|
17
|
+
// is maybe beneficial to have fast paths that depend on branch prediction but
|
|
18
|
+
// have less latency. This results in more instructions but, potentially, also
|
|
19
|
+
// higher speeds.
|
|
20
|
+
//
|
|
21
|
+
// We first try a few fast paths.
|
|
22
|
+
const auto in = vector_u8::load(input);
|
|
23
|
+
const uint16_t input_utf8_end_of_code_point_mask =
|
|
24
|
+
utf8_end_of_code_point_mask & 0xfff;
|
|
25
|
+
if (utf8_end_of_code_point_mask == 0xfff) {
|
|
26
|
+
// We process the data in chunks of 12 bytes.
|
|
27
|
+
// Note: using 16 bytes is unsafe, see issue_ossfuzz_71218
|
|
28
|
+
in.store_bytes_as_utf16<big_endian>(utf16_output);
|
|
29
|
+
utf16_output += 12; // We wrote 12 16-bit characters.
|
|
30
|
+
return 12; // We consumed 12 bytes.
|
|
31
|
+
}
|
|
32
|
+
if (((utf8_end_of_code_point_mask & 0xFFFF) == 0xaaaa)) {
|
|
33
|
+
// We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte
|
|
34
|
+
// UTF-16 code units.
|
|
35
|
+
#if SIMDUTF_IS_BIG_ENDIAN
|
|
36
|
+
const auto in16 = as_vector_u16(in);
|
|
37
|
+
#else
|
|
38
|
+
const auto in16 = as_vector_u16(in).swap_bytes();
|
|
39
|
+
#endif // SIMDUTF_IS_BIG_ENDIAN
|
|
40
|
+
const auto lo = in16 & uint16_t(0x007f);
|
|
41
|
+
const auto hi = in16.shr<2>();
|
|
42
|
+
|
|
43
|
+
auto composed = select(uint16_t(0x1f00 >> 2), hi, lo);
|
|
44
|
+
if simdutf_constexpr (!match_system(big_endian)) {
|
|
45
|
+
composed = composed.swap_bytes();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
composed.store(utf16_output);
|
|
49
|
+
utf16_output += 8; // We wrote 16 bytes, 8 code points.
|
|
50
|
+
return 16;
|
|
51
|
+
}
|
|
52
|
+
if (input_utf8_end_of_code_point_mask == 0x924) {
|
|
53
|
+
// We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte
|
|
54
|
+
// UTF-16 code units. There is probably a more efficient sequence, but the
|
|
55
|
+
// following might do.
|
|
56
|
+
|
|
57
|
+
// AltiVec: it might be done better, for now SSE translation
|
|
58
|
+
|
|
59
|
+
const auto sh =
|
|
60
|
+
vector_u8(2, 1, 0, 16, 5, 4, 3, 16, 8, 7, 6, 16, 11, 10, 9, 16);
|
|
61
|
+
#if SIMDUTF_IS_BIG_ENDIAN
|
|
62
|
+
const auto perm =
|
|
63
|
+
as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes();
|
|
64
|
+
#else
|
|
65
|
+
const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero()));
|
|
66
|
+
#endif // SIMDUTF_IS_BIG_ENDIAN
|
|
67
|
+
const auto b0 = perm & uint32_t(0x0000007f);
|
|
68
|
+
const auto b1 = select(uint32_t(0x00003f00 >> 2), perm.shr<2>(), b0);
|
|
69
|
+
const auto b2 = select(uint32_t(0x000f0000 >> 4), perm.shr<4>(), b1);
|
|
70
|
+
const auto composed = b2;
|
|
71
|
+
auto packed = vector_u32::pack(composed, composed);
|
|
72
|
+
|
|
73
|
+
if simdutf_constexpr (!match_system(big_endian)) {
|
|
74
|
+
packed = packed.swap_bytes();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
packed.store(utf16_output);
|
|
78
|
+
utf16_output += 4;
|
|
79
|
+
return 12;
|
|
80
|
+
}
|
|
81
|
+
/// We do not have a fast path available, so we fallback.
|
|
82
|
+
|
|
83
|
+
const uint8_t idx =
|
|
84
|
+
tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0];
|
|
85
|
+
const uint8_t consumed =
|
|
86
|
+
tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
|
|
87
|
+
|
|
88
|
+
if (idx < 64) {
|
|
89
|
+
// SIX (6) input code-code units
|
|
90
|
+
// this is a relatively easy scenario
|
|
91
|
+
// we process SIX (6) input code-code units. The max length in bytes of six
|
|
92
|
+
// code code units spanning between 1 and 2 bytes each is 12 bytes. On
|
|
93
|
+
// processors where pdep/pext is fast, we might be able to use a small
|
|
94
|
+
// lookup table.
|
|
95
|
+
const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]);
|
|
96
|
+
#if SIMDUTF_IS_BIG_ENDIAN
|
|
97
|
+
const auto perm =
|
|
98
|
+
as_vector_u16(sh.lookup_32(in, vector_u8::zero())).swap_bytes();
|
|
99
|
+
#else
|
|
100
|
+
const auto perm = as_vector_u16(sh.lookup_32(in, vector_u8::zero()));
|
|
101
|
+
#endif // SIMDUTF_IS_BIG_ENDIAN
|
|
102
|
+
const auto b0 = perm & uint16_t(0x007f);
|
|
103
|
+
const auto b1 = perm & uint16_t(0x1f00);
|
|
104
|
+
|
|
105
|
+
auto composed = b0 | b1.shr<2>();
|
|
106
|
+
|
|
107
|
+
if simdutf_constexpr (!match_system(big_endian)) {
|
|
108
|
+
composed = composed.swap_bytes();
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
composed.store(utf16_output);
|
|
112
|
+
utf16_output += 6; // We wrote 12 bytes, 6 code points.
|
|
113
|
+
} else if (idx < 145) {
|
|
114
|
+
// FOUR (4) input code-code units
|
|
115
|
+
const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]);
|
|
116
|
+
#if SIMDUTF_IS_BIG_ENDIAN
|
|
117
|
+
const auto perm =
|
|
118
|
+
as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes();
|
|
119
|
+
#else
|
|
120
|
+
const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero()));
|
|
121
|
+
#endif // SIMDUTF_IS_BIG_ENDIAN
|
|
122
|
+
const auto b0 = perm & uint32_t(0x0000007f);
|
|
123
|
+
const auto b1 = perm & uint32_t(0x00003f00);
|
|
124
|
+
const auto b2 = perm & uint32_t(0x000f0000);
|
|
125
|
+
|
|
126
|
+
const auto composed = b0 | b1.shr<2>() | b2.shr<4>();
|
|
127
|
+
|
|
128
|
+
auto packed = vector_u32::pack(composed, composed);
|
|
129
|
+
|
|
130
|
+
if simdutf_constexpr (!match_system(big_endian)) {
|
|
131
|
+
packed = packed.swap_bytes();
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
packed.store(utf16_output);
|
|
135
|
+
utf16_output += 4;
|
|
136
|
+
} else if (idx < 209) {
|
|
137
|
+
// TWO (2) input code-code units
|
|
138
|
+
//////////////
|
|
139
|
+
// There might be garbage inputs where a leading byte mascarades as a
|
|
140
|
+
// four-byte leading byte (by being followed by 3 continuation byte), but is
|
|
141
|
+
// not greater than 0xf0. This could trigger a buffer overflow if we only
|
|
142
|
+
// counted leading bytes of the form 0xf0 as generating surrogate pairs,
|
|
143
|
+
// without further UTF-8 validation. Thus we must be careful to ensure that
|
|
144
|
+
// only leading bytes at least as large as 0xf0 generate surrogate pairs. We
|
|
145
|
+
// do as at the cost of an extra mask.
|
|
146
|
+
/////////////
|
|
147
|
+
const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]);
|
|
148
|
+
#if SIMDUTF_IS_BIG_ENDIAN
|
|
149
|
+
const auto perm =
|
|
150
|
+
as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes();
|
|
151
|
+
#else
|
|
152
|
+
const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero()));
|
|
153
|
+
#endif // SIMDUTF_IS_BIG_ENDIAN
|
|
154
|
+
const auto ascii = perm & uint32_t(0x00000007f);
|
|
155
|
+
const auto middlebyte = perm & uint32_t(0x00003f00);
|
|
156
|
+
const auto middlebyte_shifted = middlebyte.shr<2>();
|
|
157
|
+
|
|
158
|
+
auto middlehighbyte = perm & uint32_t(0x003f0000);
|
|
159
|
+
// correct for spurious high bit
|
|
160
|
+
|
|
161
|
+
const auto correct = (perm & uint32_t(0x00400000)).shr<1>();
|
|
162
|
+
middlehighbyte = correct ^ middlehighbyte;
|
|
163
|
+
const auto middlehighbyte_shifted = middlehighbyte.shr<4>();
|
|
164
|
+
// We deliberately carry the leading four bits in highbyte if they are
|
|
165
|
+
// present, we remove them later when computing hightenbits.
|
|
166
|
+
const auto highbyte = perm & uint32_t(0xff000000);
|
|
167
|
+
const auto highbyte_shifted = highbyte.shr<6>();
|
|
168
|
+
// When we need to generate a surrogate pair (leading byte > 0xF0), then
|
|
169
|
+
// the corresponding 32-bit value in 'composed' will be greater than
|
|
170
|
+
// > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the
|
|
171
|
+
// location of the surrogate pairs.
|
|
172
|
+
const auto composed =
|
|
173
|
+
ascii | middlebyte_shifted | highbyte_shifted | middlehighbyte_shifted;
|
|
174
|
+
|
|
175
|
+
const auto composedminus = composed - uint32_t(0x10000);
|
|
176
|
+
const auto lowtenbits = composedminus & uint32_t(0x3ff);
|
|
177
|
+
// Notice the 0x3ff mask:
|
|
178
|
+
const auto hightenbits = composedminus.shr<10>() & uint32_t(0x3ff);
|
|
179
|
+
const auto lowtenbitsadd = lowtenbits + uint32_t(0xDC00);
|
|
180
|
+
const auto hightenbitsadd = hightenbits + uint32_t(0xD800);
|
|
181
|
+
const auto lowtenbitsaddshifted = lowtenbitsadd.shl<16>();
|
|
182
|
+
auto surrogates = hightenbitsadd | lowtenbitsaddshifted;
|
|
183
|
+
|
|
184
|
+
uint32_t basic_buffer[4];
|
|
185
|
+
composed.store(basic_buffer);
|
|
186
|
+
uint32_t surrogate_buffer[4];
|
|
187
|
+
surrogates.swap_bytes().store(surrogate_buffer);
|
|
188
|
+
|
|
189
|
+
for (size_t i = 0; i < 3; i++) {
|
|
190
|
+
if (basic_buffer[i] > 0x3c00000) {
|
|
191
|
+
const auto ch0 = uint16_t(surrogate_buffer[i] & 0xffff);
|
|
192
|
+
const auto ch1 = uint16_t(surrogate_buffer[i] >> 16);
|
|
193
|
+
if (match_system(big_endian)) {
|
|
194
|
+
utf16_output[1] = scalar::u16_swap_bytes(ch0);
|
|
195
|
+
utf16_output[0] = scalar::u16_swap_bytes(ch1);
|
|
196
|
+
} else {
|
|
197
|
+
utf16_output[1] = ch0;
|
|
198
|
+
utf16_output[0] = ch1;
|
|
199
|
+
}
|
|
200
|
+
utf16_output += 2;
|
|
201
|
+
} else {
|
|
202
|
+
const auto chr = uint16_t(basic_buffer[i]);
|
|
203
|
+
utf16_output[0] = scalar::utf16::swap_if_needed<big_endian>(chr);
|
|
204
|
+
utf16_output++;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
} else {
|
|
208
|
+
// here we know that there is an error but we do not handle errors
|
|
209
|
+
}
|
|
210
|
+
return consumed;
|
|
211
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
// depends on "tables/utf8_to_utf16_tables.h"
|
|
2
|
+
|
|
3
|
+
// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the
|
|
4
|
+
// end of the code points. Only the least significant 12 bits of the mask
|
|
5
|
+
// are accessed.
|
|
6
|
+
// It returns how many bytes were consumed (up to 12).
|
|
7
|
+
size_t convert_masked_utf8_to_utf32(const char *input,
|
|
8
|
+
uint64_t utf8_end_of_code_point_mask,
|
|
9
|
+
char32_t *&utf32_output) {
|
|
10
|
+
// we use an approach where we try to process up to 12 input bytes.
|
|
11
|
+
// Why 12 input bytes and not 16? Because we are concerned with the size of
|
|
12
|
+
// the lookup tables. Also 12 is nicely divisible by two and three.
|
|
13
|
+
//
|
|
14
|
+
//
|
|
15
|
+
// Optimization note: our main path below is load-latency dependent. Thus it
|
|
16
|
+
// is maybe beneficial to have fast paths that depend on branch prediction but
|
|
17
|
+
// have less latency. This results in more instructions but, potentially, also
|
|
18
|
+
// higher speeds.
|
|
19
|
+
//
|
|
20
|
+
// We first try a few fast paths.
|
|
21
|
+
const auto in = vector_u8::load(input);
|
|
22
|
+
const uint16_t input_utf8_end_of_code_point_mask =
|
|
23
|
+
utf8_end_of_code_point_mask & 0xfff;
|
|
24
|
+
if (utf8_end_of_code_point_mask == 0xfff) {
|
|
25
|
+
// We process the data in chunks of 12 bytes.
|
|
26
|
+
in.store_bytes_as_utf32(utf32_output);
|
|
27
|
+
utf32_output += 12; // We wrote 12 32-bit characters.
|
|
28
|
+
return 12; // We consumed 12 bytes.
|
|
29
|
+
}
|
|
30
|
+
if (((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) {
|
|
31
|
+
// We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte
|
|
32
|
+
// UTF-32 code units.
|
|
33
|
+
#if SIMDUTF_IS_BIG_ENDIAN
|
|
34
|
+
const auto perm = as_vector_u16(in);
|
|
35
|
+
#else
|
|
36
|
+
const auto perm = as_vector_u16(in).swap_bytes();
|
|
37
|
+
#endif // SIMDUTF_IS_BIG_ENDIAN
|
|
38
|
+
// in = [110aaaaa|10bbbbbb]
|
|
39
|
+
// t0 = [00000000|00bbbbbb]
|
|
40
|
+
const auto t0 = perm & uint16_t(0x007f);
|
|
41
|
+
|
|
42
|
+
// t1 = [00110aaa|aabbbbbb]
|
|
43
|
+
const auto t1 = perm.shr<2>();
|
|
44
|
+
const auto composed = select(uint16_t(0x1f00 >> 2), t1, t0);
|
|
45
|
+
|
|
46
|
+
const auto composed8 = as_vector_u8(composed);
|
|
47
|
+
composed8.store_words_as_utf32(utf32_output);
|
|
48
|
+
|
|
49
|
+
utf32_output += 8; // We wrote 32 bytes, 8 code points.
|
|
50
|
+
return 16;
|
|
51
|
+
}
|
|
52
|
+
if (input_utf8_end_of_code_point_mask == 0x924) {
|
|
53
|
+
// We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte
|
|
54
|
+
// UTF-32 code units.
|
|
55
|
+
#if SIMDUTF_IS_BIG_ENDIAN
|
|
56
|
+
const auto sh =
|
|
57
|
+
vector_u8(-1, 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11);
|
|
58
|
+
#else
|
|
59
|
+
const auto sh =
|
|
60
|
+
vector_u8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1);
|
|
61
|
+
#endif // SIMDUTF_IS_BIG_ENDIAN
|
|
62
|
+
const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero()));
|
|
63
|
+
|
|
64
|
+
// in = [1110aaaa|10bbbbbb|10cccccc]
|
|
65
|
+
|
|
66
|
+
// t0 = [00000000|00000000|00cccccc]
|
|
67
|
+
const auto t0 = perm & uint32_t(0x0000007f);
|
|
68
|
+
|
|
69
|
+
// t2 = [00000000|0000bbbb|bbcccccc]
|
|
70
|
+
const auto t1 = perm.shr<2>();
|
|
71
|
+
const auto t2 = select(uint32_t(0x00003f00 >> 2), t1, t0);
|
|
72
|
+
|
|
73
|
+
// t4 = [00000000|aaaabbbb|bbcccccc]
|
|
74
|
+
const auto t3 = perm.shr<4>();
|
|
75
|
+
const auto t4 = select(uint32_t(0x0f0000 >> 4), t3, t2);
|
|
76
|
+
|
|
77
|
+
t4.store(utf32_output);
|
|
78
|
+
utf32_output += 4;
|
|
79
|
+
return 12;
|
|
80
|
+
}
|
|
81
|
+
/// We do not have a fast path available, so we fallback.
|
|
82
|
+
|
|
83
|
+
const uint8_t idx =
|
|
84
|
+
tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0];
|
|
85
|
+
const uint8_t consumed =
|
|
86
|
+
tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
|
|
87
|
+
if (idx < 64) {
|
|
88
|
+
// SIX (6) input code-code units
|
|
89
|
+
// this is a relatively easy scenario
|
|
90
|
+
// we process SIX (6) input code-code units. The max length in bytes of six
|
|
91
|
+
// code code units spanning between 1 and 2 bytes each is 12 bytes. On
|
|
92
|
+
// processors where pdep/pext is fast, we might be able to use a small
|
|
93
|
+
// lookup table.
|
|
94
|
+
const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]);
|
|
95
|
+
#if SIMDUTF_IS_BIG_ENDIAN
|
|
96
|
+
const auto perm =
|
|
97
|
+
as_vector_u16(sh.lookup_32(in, vector_u8::zero())).swap_bytes();
|
|
98
|
+
#else
|
|
99
|
+
const auto perm = as_vector_u16(sh.lookup_32(in, vector_u8::zero()));
|
|
100
|
+
#endif // SIMDUTF_IS_BIG_ENDIAN
|
|
101
|
+
const auto ascii = perm & uint16_t(0x7f);
|
|
102
|
+
const auto highbyte = perm & uint16_t(0x1f00);
|
|
103
|
+
const auto composed = ascii | highbyte.shr<2>();
|
|
104
|
+
|
|
105
|
+
as_vector_u8(composed).store_words_as_utf32(utf32_output);
|
|
106
|
+
utf32_output += 6; // We wrote 12 bytes, 6 code points.
|
|
107
|
+
} else if (idx < 145) {
|
|
108
|
+
// FOUR (4) input code-code units
|
|
109
|
+
const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]);
|
|
110
|
+
#if SIMDUTF_IS_BIG_ENDIAN
|
|
111
|
+
const auto perm =
|
|
112
|
+
as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes();
|
|
113
|
+
#else
|
|
114
|
+
const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero()));
|
|
115
|
+
#endif // SIMDUTF_IS_BIG_ENDIAN
|
|
116
|
+
const auto ascii = perm & uint32_t(0x7f);
|
|
117
|
+
const auto middlebyte = perm & uint32_t(0x3f00);
|
|
118
|
+
const auto middlebyte_shifted = middlebyte.shr<2>();
|
|
119
|
+
const auto highbyte = perm & uint32_t(0x0f0000);
|
|
120
|
+
const auto highbyte_shifted = highbyte.shr<4>();
|
|
121
|
+
const auto composed = ascii | middlebyte_shifted | highbyte_shifted;
|
|
122
|
+
|
|
123
|
+
composed.store(utf32_output);
|
|
124
|
+
utf32_output += 4;
|
|
125
|
+
} else if (idx < 209) {
|
|
126
|
+
// TWO (2) input code-code units
|
|
127
|
+
const auto sh = vector_u8::load(&tables::utf8_to_utf16::shufutf8[idx]);
|
|
128
|
+
#if SIMDUTF_IS_BIG_ENDIAN
|
|
129
|
+
const auto perm =
|
|
130
|
+
as_vector_u32(sh.lookup_32(in, vector_u8::zero())).swap_bytes();
|
|
131
|
+
#else
|
|
132
|
+
const auto perm = as_vector_u32(sh.lookup_32(in, vector_u8::zero()));
|
|
133
|
+
#endif // SIMDUTF_IS_BIG_ENDIAN
|
|
134
|
+
const auto ascii = perm & uint32_t(0x0000007f);
|
|
135
|
+
const auto middlebyte = perm & uint32_t(0x3f00);
|
|
136
|
+
const auto middlebyte_shifted = middlebyte.shr<2>();
|
|
137
|
+
auto middlehighbyte = perm & uint32_t(0x003f0000);
|
|
138
|
+
// correct for spurious high bit
|
|
139
|
+
const auto correct0 = perm & uint32_t(0x00400000);
|
|
140
|
+
const auto correct = correct0.shr<1>();
|
|
141
|
+
middlehighbyte = correct ^ middlehighbyte;
|
|
142
|
+
const auto middlehighbyte_shifted = middlehighbyte.shr<4>();
|
|
143
|
+
const auto highbyte = perm & uint32_t(0x07000000);
|
|
144
|
+
const auto highbyte_shifted = highbyte.shr<6>();
|
|
145
|
+
const auto composed =
|
|
146
|
+
ascii | middlebyte_shifted | highbyte_shifted | middlehighbyte_shifted;
|
|
147
|
+
composed.store(utf32_output);
|
|
148
|
+
utf32_output += 3;
|
|
149
|
+
} else {
|
|
150
|
+
// here we know that there is an error but we do not handle errors
|
|
151
|
+
}
|
|
152
|
+
return consumed;
|
|
153
|
+
}
|