react-native-quick-crypto 1.0.19 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/QuickCrypto.podspec +12 -38
- package/README.md +2 -0
- package/android/CMakeLists.txt +3 -0
- package/android/build.gradle +5 -1
- package/cpp/argon2/HybridArgon2.cpp +10 -3
- package/cpp/blake3/HybridBlake3.cpp +5 -3
- package/cpp/cipher/CCMCipher.cpp +29 -16
- package/cpp/cipher/CCMCipher.hpp +2 -4
- package/cpp/cipher/ChaCha20Cipher.cpp +14 -18
- package/cpp/cipher/ChaCha20Cipher.hpp +2 -4
- package/cpp/cipher/ChaCha20Poly1305Cipher.cpp +34 -23
- package/cpp/cipher/ChaCha20Poly1305Cipher.hpp +2 -4
- package/cpp/cipher/GCMCipher.cpp +14 -15
- package/cpp/cipher/HybridCipher.cpp +39 -36
- package/cpp/cipher/HybridCipher.hpp +17 -1
- package/cpp/cipher/HybridRsaCipher.cpp +74 -29
- package/cpp/cipher/OCBCipher.cpp +4 -3
- package/cpp/cipher/XChaCha20Poly1305Cipher.cpp +14 -13
- package/cpp/cipher/XSalsa20Cipher.cpp +72 -6
- package/cpp/cipher/XSalsa20Cipher.hpp +25 -3
- package/cpp/cipher/XSalsa20Poly1305Cipher.cpp +21 -25
- package/cpp/dh/HybridDiffieHellman.cpp +29 -0
- package/cpp/ec/HybridEcKeyPair.cpp +35 -33
- package/cpp/ec/HybridEcKeyPair.hpp +3 -7
- package/cpp/ecdh/HybridECDH.cpp +23 -0
- package/cpp/ed25519/HybridEdKeyPair.cpp +73 -117
- package/cpp/ed25519/HybridEdKeyPair.hpp +5 -9
- package/cpp/hash/HybridHash.cpp +5 -7
- package/cpp/hkdf/HybridHkdf.cpp +6 -4
- package/cpp/hmac/HybridHmac.cpp +4 -6
- package/cpp/kmac/HybridKmac.cpp +4 -4
- package/cpp/mldsa/HybridMlDsaKeyPair.cpp +37 -49
- package/cpp/mlkem/HybridMlKemKeyPair.cpp +39 -43
- package/cpp/pbkdf2/HybridPbkdf2.cpp +7 -8
- package/cpp/rsa/HybridRsaKeyPair.cpp +5 -8
- package/cpp/rsa/HybridRsaKeyPair.hpp +4 -7
- package/cpp/scrypt/HybridScrypt.cpp +6 -4
- package/cpp/sign/HybridSignHandle.cpp +25 -68
- package/cpp/sign/HybridVerifyHandle.cpp +23 -60
- package/cpp/utils/HybridUtils.cpp +213 -111
- package/cpp/utils/HybridUtils.hpp +9 -2
- package/cpp/utils/QuickCryptoUtils.hpp +72 -0
- package/deps/simdutf/LICENSE-APACHE +201 -0
- package/deps/simdutf/LICENSE-MIT +18 -0
- package/deps/simdutf/README.md +2782 -0
- package/deps/simdutf/include/simdutf/avx512.h +79 -0
- package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
- package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
- package/deps/simdutf/include/simdutf/common_defs.h +186 -0
- package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
- package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
- package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
- package/deps/simdutf/include/simdutf/error.h +126 -0
- package/deps/simdutf/include/simdutf/implementation.h +7081 -0
- package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
- package/deps/simdutf/include/simdutf/portability.h +285 -0
- package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
- package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
- package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
- package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
- package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
- package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
- package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
- package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
- package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
- package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
- package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
- package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
- package/deps/simdutf/include/simdutf.h +26 -0
- package/deps/simdutf/include/simdutf_c.h +342 -0
- package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
- package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
- package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
- package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
- package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
- package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
- package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
- package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
- package/deps/simdutf/src/encoding_types.cpp +67 -0
- package/deps/simdutf/src/error.cpp +3 -0
- package/deps/simdutf/src/fallback/implementation.cpp +589 -0
- package/deps/simdutf/src/generic/ascii_validation.h +50 -0
- package/deps/simdutf/src/generic/base64.h +233 -0
- package/deps/simdutf/src/generic/base64lengths.h +63 -0
- package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
- package/deps/simdutf/src/generic/find.h +75 -0
- package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
- package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
- package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
- package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
- package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
- package/deps/simdutf/src/generic/utf16.h +73 -0
- package/deps/simdutf/src/generic/utf32.h +136 -0
- package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
- package/deps/simdutf/src/generic/utf8.h +92 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
- package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
- package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
- package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
- package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
- package/deps/simdutf/src/generic/validate_utf16.h +164 -0
- package/deps/simdutf/src/generic/validate_utf32.h +99 -0
- package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
- package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
- package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
- package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
- package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
- package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
- package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
- package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
- package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
- package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
- package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
- package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
- package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
- package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
- package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
- package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
- package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
- package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
- package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
- package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
- package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
- package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
- package/deps/simdutf/src/implementation.cpp +2526 -0
- package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
- package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
- package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
- package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
- package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
- package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
- package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
- package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
- package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
- package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
- package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
- package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
- package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
- package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
- package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
- package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
- package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
- package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
- package/deps/simdutf/src/ppc64/templates.cpp +91 -0
- package/deps/simdutf/src/rvv/implementation.cpp +138 -0
- package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
- package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
- package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
- package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
- package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
- package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
- package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
- package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
- package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
- package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
- package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
- package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
- package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
- package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
- package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
- package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
- package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
- package/deps/simdutf/src/simdutf/arm64.h +43 -0
- package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
- package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
- package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
- package/deps/simdutf/src/simdutf/fallback.h +42 -0
- package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
- package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
- package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
- package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
- package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
- package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
- package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
- package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
- package/deps/simdutf/src/simdutf/haswell.h +63 -0
- package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
- package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
- package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
- package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
- package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
- package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
- package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
- package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
- package/deps/simdutf/src/simdutf/icelake.h +81 -0
- package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
- package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
- package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
- package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
- package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
- package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
- package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
- package/deps/simdutf/src/simdutf/lasx.h +49 -0
- package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
- package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
- package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
- package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
- package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
- package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
- package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
- package/deps/simdutf/src/simdutf/lsx.h +52 -0
- package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
- package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
- package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
- package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
- package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
- package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
- package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
- package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
- package/deps/simdutf/src/simdutf/ppc64.h +40 -0
- package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
- package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
- package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
- package/deps/simdutf/src/simdutf/rvv.h +41 -0
- package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
- package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
- package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
- package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
- package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
- package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
- package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
- package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
- package/deps/simdutf/src/simdutf/westmere.h +59 -0
- package/deps/simdutf/src/simdutf.cpp +152 -0
- package/deps/simdutf/src/simdutf_c.cpp +525 -0
- package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
- package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
- package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
- package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
- package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
- package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
- package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
- package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
- package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
- package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
- package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
- package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
- package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
- package/lib/commonjs/argon2.js +51 -2
- package/lib/commonjs/argon2.js.map +1 -1
- package/lib/commonjs/cipher.js +109 -11
- package/lib/commonjs/cipher.js.map +1 -1
- package/lib/commonjs/dsa.js +8 -2
- package/lib/commonjs/dsa.js.map +1 -1
- package/lib/commonjs/hash.js +15 -5
- package/lib/commonjs/hash.js.map +1 -1
- package/lib/commonjs/hkdf.js +33 -6
- package/lib/commonjs/hkdf.js.map +1 -1
- package/lib/commonjs/hmac.js +15 -5
- package/lib/commonjs/hmac.js.map +1 -1
- package/lib/commonjs/keys/publicCipher.js +10 -4
- package/lib/commonjs/keys/publicCipher.js.map +1 -1
- package/lib/commonjs/random.js +11 -2
- package/lib/commonjs/random.js.map +1 -1
- package/lib/commonjs/rsa.js +12 -5
- package/lib/commonjs/rsa.js.map +1 -1
- package/lib/commonjs/scrypt.js +47 -6
- package/lib/commonjs/scrypt.js.map +1 -1
- package/lib/commonjs/subtle.js +76 -5
- package/lib/commonjs/subtle.js.map +1 -1
- package/lib/commonjs/utils/cipher.js +18 -7
- package/lib/commonjs/utils/cipher.js.map +1 -1
- package/lib/commonjs/utils/conversion.js +33 -9
- package/lib/commonjs/utils/conversion.js.map +1 -1
- package/lib/commonjs/utils/timingSafeEqual.js +7 -2
- package/lib/commonjs/utils/timingSafeEqual.js.map +1 -1
- package/lib/commonjs/x509certificate.js +6 -6
- package/lib/commonjs/x509certificate.js.map +1 -1
- package/lib/module/argon2.js +51 -2
- package/lib/module/argon2.js.map +1 -1
- package/lib/module/cipher.js +109 -11
- package/lib/module/cipher.js.map +1 -1
- package/lib/module/dsa.js +8 -2
- package/lib/module/dsa.js.map +1 -1
- package/lib/module/hash.js +15 -5
- package/lib/module/hash.js.map +1 -1
- package/lib/module/hkdf.js +33 -6
- package/lib/module/hkdf.js.map +1 -1
- package/lib/module/hmac.js +15 -5
- package/lib/module/hmac.js.map +1 -1
- package/lib/module/keys/publicCipher.js +10 -4
- package/lib/module/keys/publicCipher.js.map +1 -1
- package/lib/module/random.js +11 -2
- package/lib/module/random.js.map +1 -1
- package/lib/module/rsa.js +11 -4
- package/lib/module/rsa.js.map +1 -1
- package/lib/module/scrypt.js +47 -6
- package/lib/module/scrypt.js.map +1 -1
- package/lib/module/subtle.js +76 -5
- package/lib/module/subtle.js.map +1 -1
- package/lib/module/utils/cipher.js +18 -7
- package/lib/module/utils/cipher.js.map +1 -1
- package/lib/module/utils/conversion.js +33 -9
- package/lib/module/utils/conversion.js.map +1 -1
- package/lib/module/utils/timingSafeEqual.js +8 -3
- package/lib/module/utils/timingSafeEqual.js.map +1 -1
- package/lib/module/x509certificate.js +6 -6
- package/lib/module/x509certificate.js.map +1 -1
- package/lib/typescript/argon2.d.ts.map +1 -1
- package/lib/typescript/cipher.d.ts +2 -2
- package/lib/typescript/cipher.d.ts.map +1 -1
- package/lib/typescript/dsa.d.ts.map +1 -1
- package/lib/typescript/hash.d.ts +2 -2
- package/lib/typescript/hash.d.ts.map +1 -1
- package/lib/typescript/hkdf.d.ts.map +1 -1
- package/lib/typescript/hmac.d.ts +2 -2
- package/lib/typescript/hmac.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/keys/publicCipher.d.ts.map +1 -1
- package/lib/typescript/random.d.ts.map +1 -1
- package/lib/typescript/rsa.d.ts.map +1 -1
- package/lib/typescript/scrypt.d.ts.map +1 -1
- package/lib/typescript/specs/utils.nitro.d.ts +0 -2
- package/lib/typescript/specs/utils.nitro.d.ts.map +1 -1
- package/lib/typescript/subtle.d.ts.map +1 -1
- package/lib/typescript/utils/cipher.d.ts +13 -1
- package/lib/typescript/utils/cipher.d.ts.map +1 -1
- package/lib/typescript/utils/conversion.d.ts +9 -6
- package/lib/typescript/utils/conversion.d.ts.map +1 -1
- package/lib/typescript/utils/timingSafeEqual.d.ts.map +1 -1
- package/lib/typescript/x509certificate.d.ts.map +1 -1
- package/nitrogen/generated/shared/c++/HybridUtilsSpec.cpp +0 -2
- package/nitrogen/generated/shared/c++/HybridUtilsSpec.hpp +0 -3
- package/package.json +38 -6
- package/src/argon2.ts +80 -2
- package/src/cipher.ts +139 -15
- package/src/dsa.ts +11 -2
- package/src/hash.ts +17 -7
- package/src/hkdf.ts +44 -6
- package/src/hmac.ts +17 -7
- package/src/keys/publicCipher.ts +10 -4
- package/src/random.ts +11 -2
- package/src/rsa.ts +18 -4
- package/src/scrypt.ts +73 -6
- package/src/specs/utils.nitro.ts +0 -2
- package/src/subtle.ts +90 -8
- package/src/utils/cipher.ts +30 -8
- package/src/utils/conversion.ts +58 -20
- package/src/utils/timingSafeEqual.ts +8 -3
- package/src/x509certificate.ts +5 -6
- package/deps/blake3/.cargo/config.toml +0 -2
- package/deps/blake3/.git-blame-ignore-revs +0 -2
- package/deps/blake3/.github/workflows/build_b3sum.py +0 -38
- package/deps/blake3/.github/workflows/ci.yml +0 -491
- package/deps/blake3/.github/workflows/tag.yml +0 -43
- package/deps/blake3/.github/workflows/upload_github_release_asset.py +0 -73
- package/deps/blake3/CONTRIBUTING.md +0 -31
- package/deps/blake3/Cargo.toml +0 -135
- package/deps/blake3/b3sum/Cargo.lock +0 -513
- package/deps/blake3/b3sum/Cargo.toml +0 -26
- package/deps/blake3/b3sum/README.md +0 -72
- package/deps/blake3/b3sum/src/main.rs +0 -564
- package/deps/blake3/b3sum/src/unit_tests.rs +0 -235
- package/deps/blake3/b3sum/tests/cli_tests.rs +0 -680
- package/deps/blake3/b3sum/what_does_check_do.md +0 -176
- package/deps/blake3/benches/bench.rs +0 -623
- package/deps/blake3/build.rs +0 -389
- package/deps/blake3/c/CMakeLists.txt +0 -383
- package/deps/blake3/c/CMakePresets.json +0 -73
- package/deps/blake3/c/Makefile.testing +0 -82
- package/deps/blake3/c/blake3-config.cmake.in +0 -14
- package/deps/blake3/c/blake3_avx2.c +0 -326
- package/deps/blake3/c/blake3_avx2_x86-64_unix.S +0 -1815
- package/deps/blake3/c/blake3_avx2_x86-64_windows_gnu.S +0 -1817
- package/deps/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +0 -1828
- package/deps/blake3/c/blake3_avx512.c +0 -1388
- package/deps/blake3/c/blake3_avx512_x86-64_unix.S +0 -4824
- package/deps/blake3/c/blake3_avx512_x86-64_windows_gnu.S +0 -2615
- package/deps/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +0 -2634
- package/deps/blake3/c/blake3_c_rust_bindings/Cargo.toml +0 -32
- package/deps/blake3/c/blake3_c_rust_bindings/README.md +0 -4
- package/deps/blake3/c/blake3_c_rust_bindings/benches/bench.rs +0 -477
- package/deps/blake3/c/blake3_c_rust_bindings/build.rs +0 -253
- package/deps/blake3/c/blake3_c_rust_bindings/cross_test.sh +0 -31
- package/deps/blake3/c/blake3_c_rust_bindings/src/lib.rs +0 -333
- package/deps/blake3/c/blake3_c_rust_bindings/src/test.rs +0 -696
- package/deps/blake3/c/blake3_sse2.c +0 -566
- package/deps/blake3/c/blake3_sse2_x86-64_unix.S +0 -2291
- package/deps/blake3/c/blake3_sse2_x86-64_windows_gnu.S +0 -2332
- package/deps/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +0 -2350
- package/deps/blake3/c/blake3_sse41.c +0 -560
- package/deps/blake3/c/blake3_sse41_x86-64_unix.S +0 -2028
- package/deps/blake3/c/blake3_sse41_x86-64_windows_gnu.S +0 -2069
- package/deps/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +0 -2089
- package/deps/blake3/c/blake3_tbb.cpp +0 -37
- package/deps/blake3/c/dependencies/CMakeLists.txt +0 -3
- package/deps/blake3/c/dependencies/tbb/CMakeLists.txt +0 -28
- package/deps/blake3/c/example.c +0 -36
- package/deps/blake3/c/example_tbb.c +0 -57
- package/deps/blake3/c/libblake3.pc.in +0 -12
- package/deps/blake3/c/main.c +0 -166
- package/deps/blake3/c/test.py +0 -97
- package/deps/blake3/media/B3.svg +0 -70
- package/deps/blake3/media/BLAKE3.svg +0 -85
- package/deps/blake3/media/speed.svg +0 -1474
- package/deps/blake3/reference_impl/Cargo.toml +0 -8
- package/deps/blake3/reference_impl/README.md +0 -14
- package/deps/blake3/reference_impl/reference_impl.rs +0 -374
- package/deps/blake3/src/ffi_avx2.rs +0 -65
- package/deps/blake3/src/ffi_avx512.rs +0 -169
- package/deps/blake3/src/ffi_neon.rs +0 -82
- package/deps/blake3/src/ffi_sse2.rs +0 -126
- package/deps/blake3/src/ffi_sse41.rs +0 -126
- package/deps/blake3/src/guts.rs +0 -60
- package/deps/blake3/src/hazmat.rs +0 -704
- package/deps/blake3/src/io.rs +0 -64
- package/deps/blake3/src/join.rs +0 -92
- package/deps/blake3/src/lib.rs +0 -1835
- package/deps/blake3/src/platform.rs +0 -587
- package/deps/blake3/src/portable.rs +0 -198
- package/deps/blake3/src/rust_avx2.rs +0 -474
- package/deps/blake3/src/rust_sse2.rs +0 -775
- package/deps/blake3/src/rust_sse41.rs +0 -766
- package/deps/blake3/src/test.rs +0 -1049
- package/deps/blake3/src/traits.rs +0 -227
- package/deps/blake3/src/wasm32_simd.rs +0 -794
- package/deps/blake3/test_vectors/Cargo.toml +0 -19
- package/deps/blake3/test_vectors/cross_test.sh +0 -25
- package/deps/blake3/test_vectors/src/bin/generate.rs +0 -4
- package/deps/blake3/test_vectors/src/lib.rs +0 -350
- package/deps/blake3/test_vectors/test_vectors.json +0 -217
- package/deps/blake3/tools/compiler_version/Cargo.toml +0 -7
- package/deps/blake3/tools/compiler_version/build.rs +0 -6
- package/deps/blake3/tools/compiler_version/src/main.rs +0 -27
- package/deps/blake3/tools/instruction_set_support/Cargo.toml +0 -6
- package/deps/blake3/tools/instruction_set_support/src/main.rs +0 -10
- package/deps/blake3/tools/release.md +0 -16
- package/deps/ncrypto/.bazelignore +0 -4
- package/deps/ncrypto/.bazelrc +0 -1
- package/deps/ncrypto/.bazelversion +0 -1
- package/deps/ncrypto/.clang-format +0 -111
- package/deps/ncrypto/.github/workflows/bazel.yml +0 -58
- package/deps/ncrypto/.github/workflows/commitlint.yml +0 -16
- package/deps/ncrypto/.github/workflows/linter.yml +0 -38
- package/deps/ncrypto/.github/workflows/macos.yml +0 -43
- package/deps/ncrypto/.github/workflows/release-please.yml +0 -16
- package/deps/ncrypto/.github/workflows/ubuntu.yml +0 -128
- package/deps/ncrypto/.github/workflows/visual-studio.yml +0 -49
- package/deps/ncrypto/.python-version +0 -1
- package/deps/ncrypto/.release-please-manifest.json +0 -3
- package/deps/ncrypto/BUILD.bazel +0 -44
- package/deps/ncrypto/CHANGELOG.md +0 -37
- package/deps/ncrypto/CMakeLists.txt +0 -79
- package/deps/ncrypto/MODULE.bazel +0 -16
- package/deps/ncrypto/MODULE.bazel.lock +0 -461
- package/deps/ncrypto/cmake/CPM.cmake +0 -1225
- package/deps/ncrypto/cmake/ncrypto-flags.cmake +0 -17
- package/deps/ncrypto/ncrypto.pc.in +0 -10
- package/deps/ncrypto/patches/0001-Expose-libdecrepit-so-NodeJS-can-use-it-for-ncrypto.patch +0 -28
- package/deps/ncrypto/pyproject.toml +0 -38
- package/deps/ncrypto/release-please-config.json +0 -11
- package/deps/ncrypto/src/CMakeLists.txt +0 -40
- package/deps/ncrypto/tests/BUILD.bazel +0 -11
- package/deps/ncrypto/tests/CMakeLists.txt +0 -7
- package/deps/ncrypto/tests/basic.cpp +0 -856
- package/deps/ncrypto/tools/run-clang-format.sh +0 -42
- package/lib/tsconfig.tsbuildinfo +0 -1
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
#ifndef SIMDUTF_UTF8_H
|
|
2
|
+
#define SIMDUTF_UTF8_H
|
|
3
|
+
|
|
4
|
+
namespace simdutf {
|
|
5
|
+
namespace scalar {
|
|
6
|
+
namespace {
|
|
7
|
+
namespace utf8 {
|
|
8
|
+
|
|
9
|
+
// credit: based on code from Google Fuchsia (Apache Licensed)
|
|
10
|
+
template <class BytePtr>
|
|
11
|
+
simdutf_constexpr23 simdutf_warn_unused bool validate(BytePtr data,
|
|
12
|
+
size_t len) noexcept {
|
|
13
|
+
static_assert(
|
|
14
|
+
std::is_same<typename std::decay<decltype(*data)>::type, uint8_t>::value,
|
|
15
|
+
"dereferencing the data pointer must result in a uint8_t");
|
|
16
|
+
uint64_t pos = 0;
|
|
17
|
+
uint32_t code_point = 0;
|
|
18
|
+
while (pos < len) {
|
|
19
|
+
uint64_t next_pos;
|
|
20
|
+
#if SIMDUTF_CPLUSPLUS23
|
|
21
|
+
if !consteval
|
|
22
|
+
#endif
|
|
23
|
+
{ // check if the next 16 bytes are ascii.
|
|
24
|
+
next_pos = pos + 16;
|
|
25
|
+
if (next_pos <= len) { // if it is safe to read 16 more bytes, check
|
|
26
|
+
// that they are ascii
|
|
27
|
+
uint64_t v1{};
|
|
28
|
+
std::memcpy(&v1, data + pos, sizeof(uint64_t));
|
|
29
|
+
uint64_t v2{};
|
|
30
|
+
std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
|
|
31
|
+
uint64_t v{v1 | v2};
|
|
32
|
+
if ((v & 0x8080808080808080) == 0) {
|
|
33
|
+
pos = next_pos;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
unsigned char byte = data[pos];
|
|
40
|
+
|
|
41
|
+
while (byte < 0b10000000) {
|
|
42
|
+
if (++pos == len) {
|
|
43
|
+
return true;
|
|
44
|
+
}
|
|
45
|
+
byte = data[pos];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if ((byte & 0b11100000) == 0b11000000) {
|
|
49
|
+
next_pos = pos + 2;
|
|
50
|
+
if (next_pos > len) {
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
53
|
+
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
// range check
|
|
57
|
+
code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
|
|
58
|
+
if ((code_point < 0x80) || (0x7ff < code_point)) {
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
} else if ((byte & 0b11110000) == 0b11100000) {
|
|
62
|
+
next_pos = pos + 3;
|
|
63
|
+
if (next_pos > len) {
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
67
|
+
return false;
|
|
68
|
+
}
|
|
69
|
+
if ((data[pos + 2] & 0b11000000) != 0b10000000) {
|
|
70
|
+
return false;
|
|
71
|
+
}
|
|
72
|
+
// range check
|
|
73
|
+
code_point = (byte & 0b00001111) << 12 |
|
|
74
|
+
(data[pos + 1] & 0b00111111) << 6 |
|
|
75
|
+
(data[pos + 2] & 0b00111111);
|
|
76
|
+
if ((code_point < 0x800) || (0xffff < code_point) ||
|
|
77
|
+
(0xd7ff < code_point && code_point < 0xe000)) {
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
} else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
|
|
81
|
+
next_pos = pos + 4;
|
|
82
|
+
if (next_pos > len) {
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
86
|
+
return false;
|
|
87
|
+
}
|
|
88
|
+
if ((data[pos + 2] & 0b11000000) != 0b10000000) {
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
if ((data[pos + 3] & 0b11000000) != 0b10000000) {
|
|
92
|
+
return false;
|
|
93
|
+
}
|
|
94
|
+
// range check
|
|
95
|
+
code_point =
|
|
96
|
+
(byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
|
|
97
|
+
(data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
|
|
98
|
+
if (code_point <= 0xffff || 0x10ffff < code_point) {
|
|
99
|
+
return false;
|
|
100
|
+
}
|
|
101
|
+
} else {
|
|
102
|
+
// we may have a continuation
|
|
103
|
+
return false;
|
|
104
|
+
}
|
|
105
|
+
pos = next_pos;
|
|
106
|
+
}
|
|
107
|
+
return true;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
simdutf_really_inline simdutf_warn_unused bool validate(const char *buf,
|
|
111
|
+
size_t len) noexcept {
|
|
112
|
+
return validate(reinterpret_cast<const uint8_t *>(buf), len);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
template <class BytePtr>
|
|
116
|
+
simdutf_constexpr23 simdutf_warn_unused result
|
|
117
|
+
validate_with_errors(BytePtr data, size_t len) noexcept {
|
|
118
|
+
static_assert(
|
|
119
|
+
std::is_same<typename std::decay<decltype(*data)>::type, uint8_t>::value,
|
|
120
|
+
"dereferencing the data pointer must result in a uint8_t");
|
|
121
|
+
size_t pos = 0;
|
|
122
|
+
uint32_t code_point = 0;
|
|
123
|
+
while (pos < len) {
|
|
124
|
+
// check of the next 16 bytes are ascii.
|
|
125
|
+
size_t next_pos = pos + 16;
|
|
126
|
+
if (next_pos <=
|
|
127
|
+
len) { // if it is safe to read 16 more bytes, check that they are ascii
|
|
128
|
+
uint64_t v1;
|
|
129
|
+
std::memcpy(&v1, data + pos, sizeof(uint64_t));
|
|
130
|
+
uint64_t v2;
|
|
131
|
+
std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
|
|
132
|
+
uint64_t v{v1 | v2};
|
|
133
|
+
if ((v & 0x8080808080808080) == 0) {
|
|
134
|
+
pos = next_pos;
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
unsigned char byte = data[pos];
|
|
139
|
+
|
|
140
|
+
while (byte < 0b10000000) {
|
|
141
|
+
if (++pos == len) {
|
|
142
|
+
return result(error_code::SUCCESS, len);
|
|
143
|
+
}
|
|
144
|
+
byte = data[pos];
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if ((byte & 0b11100000) == 0b11000000) {
|
|
148
|
+
next_pos = pos + 2;
|
|
149
|
+
if (next_pos > len) {
|
|
150
|
+
return result(error_code::TOO_SHORT, pos);
|
|
151
|
+
}
|
|
152
|
+
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
153
|
+
return result(error_code::TOO_SHORT, pos);
|
|
154
|
+
}
|
|
155
|
+
// range check
|
|
156
|
+
code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
|
|
157
|
+
if ((code_point < 0x80) || (0x7ff < code_point)) {
|
|
158
|
+
return result(error_code::OVERLONG, pos);
|
|
159
|
+
}
|
|
160
|
+
} else if ((byte & 0b11110000) == 0b11100000) {
|
|
161
|
+
next_pos = pos + 3;
|
|
162
|
+
if (next_pos > len) {
|
|
163
|
+
return result(error_code::TOO_SHORT, pos);
|
|
164
|
+
}
|
|
165
|
+
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
166
|
+
return result(error_code::TOO_SHORT, pos);
|
|
167
|
+
}
|
|
168
|
+
if ((data[pos + 2] & 0b11000000) != 0b10000000) {
|
|
169
|
+
return result(error_code::TOO_SHORT, pos);
|
|
170
|
+
}
|
|
171
|
+
// range check
|
|
172
|
+
code_point = (byte & 0b00001111) << 12 |
|
|
173
|
+
(data[pos + 1] & 0b00111111) << 6 |
|
|
174
|
+
(data[pos + 2] & 0b00111111);
|
|
175
|
+
if ((code_point < 0x800) || (0xffff < code_point)) {
|
|
176
|
+
return result(error_code::OVERLONG, pos);
|
|
177
|
+
}
|
|
178
|
+
if (0xd7ff < code_point && code_point < 0xe000) {
|
|
179
|
+
return result(error_code::SURROGATE, pos);
|
|
180
|
+
}
|
|
181
|
+
} else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
|
|
182
|
+
next_pos = pos + 4;
|
|
183
|
+
if (next_pos > len) {
|
|
184
|
+
return result(error_code::TOO_SHORT, pos);
|
|
185
|
+
}
|
|
186
|
+
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
187
|
+
return result(error_code::TOO_SHORT, pos);
|
|
188
|
+
}
|
|
189
|
+
if ((data[pos + 2] & 0b11000000) != 0b10000000) {
|
|
190
|
+
return result(error_code::TOO_SHORT, pos);
|
|
191
|
+
}
|
|
192
|
+
if ((data[pos + 3] & 0b11000000) != 0b10000000) {
|
|
193
|
+
return result(error_code::TOO_SHORT, pos);
|
|
194
|
+
}
|
|
195
|
+
// range check
|
|
196
|
+
code_point =
|
|
197
|
+
(byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
|
|
198
|
+
(data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
|
|
199
|
+
if (code_point <= 0xffff) {
|
|
200
|
+
return result(error_code::OVERLONG, pos);
|
|
201
|
+
}
|
|
202
|
+
if (0x10ffff < code_point) {
|
|
203
|
+
return result(error_code::TOO_LARGE, pos);
|
|
204
|
+
}
|
|
205
|
+
} else {
|
|
206
|
+
// we either have too many continuation bytes or an invalid leading byte
|
|
207
|
+
if ((byte & 0b11000000) == 0b10000000) {
|
|
208
|
+
return result(error_code::TOO_LONG, pos);
|
|
209
|
+
} else {
|
|
210
|
+
return result(error_code::HEADER_BITS, pos);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
pos = next_pos;
|
|
214
|
+
}
|
|
215
|
+
return result(error_code::SUCCESS, len);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
simdutf_really_inline simdutf_warn_unused result
|
|
219
|
+
validate_with_errors(const char *buf, size_t len) noexcept {
|
|
220
|
+
return validate_with_errors(reinterpret_cast<const uint8_t *>(buf), len);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Finds the previous leading byte starting backward from buf and validates with
|
|
224
|
+
// errors from there Used to pinpoint the location of an error when an invalid
|
|
225
|
+
// chunk is detected We assume that the stream starts with a leading byte, and
|
|
226
|
+
// to check that it is the case, we ask that you pass a pointer to the start of
|
|
227
|
+
// the stream (start).
|
|
228
|
+
inline simdutf_warn_unused result rewind_and_validate_with_errors(
|
|
229
|
+
const char *start, const char *buf, size_t len) noexcept {
|
|
230
|
+
// First check that we start with a leading byte
|
|
231
|
+
if ((*start & 0b11000000) == 0b10000000) {
|
|
232
|
+
return result(error_code::TOO_LONG, 0);
|
|
233
|
+
}
|
|
234
|
+
size_t extra_len{0};
|
|
235
|
+
// A leading byte cannot be further than 4 bytes away
|
|
236
|
+
for (int i = 0; i < 5; i++) {
|
|
237
|
+
unsigned char byte = *buf;
|
|
238
|
+
if ((byte & 0b11000000) != 0b10000000) {
|
|
239
|
+
break;
|
|
240
|
+
} else {
|
|
241
|
+
buf--;
|
|
242
|
+
extra_len++;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
result res = validate_with_errors(buf, len + extra_len);
|
|
247
|
+
res.count -= extra_len;
|
|
248
|
+
return res;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
template <typename InputPtr>
|
|
252
|
+
#if SIMDUTF_CPLUSPLUS20
|
|
253
|
+
requires simdutf::detail::indexes_into_byte_like<InputPtr>
|
|
254
|
+
#endif
|
|
255
|
+
simdutf_constexpr23 size_t count_code_points(InputPtr data, size_t len) {
|
|
256
|
+
size_t counter{0};
|
|
257
|
+
for (size_t i = 0; i < len; i++) {
|
|
258
|
+
// -65 is 0b10111111, anything larger in two-complement's should start a new
|
|
259
|
+
// code point.
|
|
260
|
+
if (int8_t(data[i]) > -65) {
|
|
261
|
+
counter++;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
return counter;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
template <typename InputPtr>
|
|
268
|
+
#if SIMDUTF_CPLUSPLUS20
|
|
269
|
+
requires simdutf::detail::indexes_into_byte_like<InputPtr>
|
|
270
|
+
#endif
|
|
271
|
+
simdutf_constexpr23 size_t utf16_length_from_utf8(InputPtr data, size_t len) {
|
|
272
|
+
size_t counter{0};
|
|
273
|
+
for (size_t i = 0; i < len; i++) {
|
|
274
|
+
if (int8_t(data[i]) > -65) {
|
|
275
|
+
counter++;
|
|
276
|
+
}
|
|
277
|
+
if (uint8_t(data[i]) >= 240) {
|
|
278
|
+
counter++;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
return counter;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
template <typename InputPtr>
|
|
285
|
+
#if SIMDUTF_CPLUSPLUS20
|
|
286
|
+
requires simdutf::detail::indexes_into_byte_like<InputPtr>
|
|
287
|
+
#endif
|
|
288
|
+
simdutf_warn_unused simdutf_constexpr23 size_t
|
|
289
|
+
trim_partial_utf8(InputPtr input, size_t length) {
|
|
290
|
+
if (length < 3) {
|
|
291
|
+
switch (length) {
|
|
292
|
+
case 2:
|
|
293
|
+
if (uint8_t(input[length - 1]) >= 0xc0) {
|
|
294
|
+
return length - 1;
|
|
295
|
+
} // 2-, 3- and 4-byte characters with only 1 byte left
|
|
296
|
+
if (uint8_t(input[length - 2]) >= 0xe0) {
|
|
297
|
+
return length - 2;
|
|
298
|
+
} // 3- and 4-byte characters with only 2 bytes left
|
|
299
|
+
return length;
|
|
300
|
+
case 1:
|
|
301
|
+
if (uint8_t(input[length - 1]) >= 0xc0) {
|
|
302
|
+
return length - 1;
|
|
303
|
+
} // 2-, 3- and 4-byte characters with only 1 byte left
|
|
304
|
+
return length;
|
|
305
|
+
case 0:
|
|
306
|
+
return length;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
if (uint8_t(input[length - 1]) >= 0xc0) {
|
|
310
|
+
return length - 1;
|
|
311
|
+
} // 2-, 3- and 4-byte characters with only 1 byte left
|
|
312
|
+
if (uint8_t(input[length - 2]) >= 0xe0) {
|
|
313
|
+
return length - 2;
|
|
314
|
+
} // 3- and 4-byte characters with only 1 byte left
|
|
315
|
+
if (uint8_t(input[length - 3]) >= 0xf0) {
|
|
316
|
+
return length - 3;
|
|
317
|
+
} // 4-byte characters with only 3 bytes left
|
|
318
|
+
return length;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
} // namespace utf8
|
|
322
|
+
} // unnamed namespace
|
|
323
|
+
} // namespace scalar
|
|
324
|
+
} // namespace simdutf
|
|
325
|
+
|
|
326
|
+
#endif
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
#ifndef SIMDUTF_UTF8_TO_LATIN1_H
|
|
2
|
+
#define SIMDUTF_UTF8_TO_LATIN1_H
|
|
3
|
+
|
|
4
|
+
namespace simdutf {
|
|
5
|
+
namespace scalar {
|
|
6
|
+
namespace {
|
|
7
|
+
namespace utf8_to_latin1 {
|
|
8
|
+
|
|
9
|
+
template <typename InputPtr, typename OutputPtr>
|
|
10
|
+
#if SIMDUTF_CPLUSPLUS20
|
|
11
|
+
requires(simdutf::detail::indexes_into_byte_like<InputPtr> &&
|
|
12
|
+
simdutf::detail::indexes_into_byte_like<OutputPtr>)
|
|
13
|
+
#endif
|
|
14
|
+
simdutf_constexpr23 size_t convert(InputPtr data, size_t len,
|
|
15
|
+
OutputPtr latin_output) {
|
|
16
|
+
size_t pos = 0;
|
|
17
|
+
auto start = latin_output;
|
|
18
|
+
|
|
19
|
+
while (pos < len) {
|
|
20
|
+
#if SIMDUTF_CPLUSPLUS23
|
|
21
|
+
if !consteval
|
|
22
|
+
#endif
|
|
23
|
+
{
|
|
24
|
+
// try to convert the next block of 16 ASCII bytes
|
|
25
|
+
if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that
|
|
26
|
+
// they are ascii
|
|
27
|
+
uint64_t v1;
|
|
28
|
+
::memcpy(&v1, data + pos, sizeof(uint64_t));
|
|
29
|
+
uint64_t v2;
|
|
30
|
+
::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
|
|
31
|
+
uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000
|
|
32
|
+
// 1000 1000 .... etc
|
|
33
|
+
if ((v & 0x8080808080808080) ==
|
|
34
|
+
0) { // if NONE of these are set, e.g. all of them are zero, then
|
|
35
|
+
// everything is ASCII
|
|
36
|
+
size_t final_pos = pos + 16;
|
|
37
|
+
while (pos < final_pos) {
|
|
38
|
+
*latin_output++ = char(data[pos]);
|
|
39
|
+
pos++;
|
|
40
|
+
}
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// suppose it is not an all ASCII byte sequence
|
|
47
|
+
uint8_t leading_byte = data[pos]; // leading byte
|
|
48
|
+
if (leading_byte < 0b10000000) {
|
|
49
|
+
// converting one ASCII byte !!!
|
|
50
|
+
*latin_output++ = char(leading_byte);
|
|
51
|
+
pos++;
|
|
52
|
+
} else if ((leading_byte & 0b11100000) ==
|
|
53
|
+
0b11000000) { // the first three bits indicate:
|
|
54
|
+
// We have a two-byte UTF-8
|
|
55
|
+
if (pos + 1 >= len) {
|
|
56
|
+
return 0;
|
|
57
|
+
} // minimal bound checking
|
|
58
|
+
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
59
|
+
return 0;
|
|
60
|
+
} // checks if the next byte is a valid continuation byte in UTF-8. A
|
|
61
|
+
// valid continuation byte starts with 10.
|
|
62
|
+
// range check -
|
|
63
|
+
uint32_t code_point =
|
|
64
|
+
(leading_byte & 0b00011111) << 6 |
|
|
65
|
+
(data[pos + 1] &
|
|
66
|
+
0b00111111); // assembles the Unicode code point from the two bytes.
|
|
67
|
+
// It does this by discarding the leading 110 and 10
|
|
68
|
+
// bits from the two bytes, shifting the remaining bits
|
|
69
|
+
// of the first byte, and then combining the results
|
|
70
|
+
// with a bitwise OR operation.
|
|
71
|
+
if (code_point < 0x80 || 0xFF < code_point) {
|
|
72
|
+
return 0; // We only care about the range 129-255 which is Non-ASCII
|
|
73
|
+
// latin1 characters. A code_point beneath 0x80 is invalid as
|
|
74
|
+
// it is already covered by bytes whose leading bit is zero.
|
|
75
|
+
}
|
|
76
|
+
*latin_output++ = char(code_point);
|
|
77
|
+
pos += 2;
|
|
78
|
+
} else {
|
|
79
|
+
return 0;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return latin_output - start;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
template <typename InputPtr>
|
|
86
|
+
#if SIMDUTF_CPLUSPLUS20
|
|
87
|
+
requires simdutf::detail::indexes_into_byte_like<InputPtr>
|
|
88
|
+
#endif
|
|
89
|
+
simdutf_constexpr23 result convert_with_errors(InputPtr data, size_t len,
|
|
90
|
+
char *latin_output) {
|
|
91
|
+
size_t pos = 0;
|
|
92
|
+
char *start{latin_output};
|
|
93
|
+
|
|
94
|
+
while (pos < len) {
|
|
95
|
+
#if SIMDUTF_CPLUSPLUS23
|
|
96
|
+
if !consteval
|
|
97
|
+
#endif
|
|
98
|
+
{
|
|
99
|
+
// try to convert the next block of 16 ASCII bytes
|
|
100
|
+
if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that
|
|
101
|
+
// they are ascii
|
|
102
|
+
uint64_t v1;
|
|
103
|
+
::memcpy(&v1, data + pos, sizeof(uint64_t));
|
|
104
|
+
uint64_t v2;
|
|
105
|
+
::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
|
|
106
|
+
uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000
|
|
107
|
+
// 1000 1000...etc
|
|
108
|
+
if ((v & 0x8080808080808080) ==
|
|
109
|
+
0) { // if NONE of these are set, e.g. all of them are zero, then
|
|
110
|
+
// everything is ASCII
|
|
111
|
+
size_t final_pos = pos + 16;
|
|
112
|
+
while (pos < final_pos) {
|
|
113
|
+
*latin_output++ = char(data[pos]);
|
|
114
|
+
pos++;
|
|
115
|
+
}
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
// suppose it is not an all ASCII byte sequence
|
|
121
|
+
uint8_t leading_byte = data[pos]; // leading byte
|
|
122
|
+
if (leading_byte < 0b10000000) {
|
|
123
|
+
// converting one ASCII byte !!!
|
|
124
|
+
*latin_output++ = char(leading_byte);
|
|
125
|
+
pos++;
|
|
126
|
+
} else if ((leading_byte & 0b11100000) ==
|
|
127
|
+
0b11000000) { // the first three bits indicate:
|
|
128
|
+
// We have a two-byte UTF-8
|
|
129
|
+
if (pos + 1 >= len) {
|
|
130
|
+
return result(error_code::TOO_SHORT, pos);
|
|
131
|
+
} // minimal bound checking
|
|
132
|
+
if ((data[pos + 1] & 0b11000000) != 0b10000000) {
|
|
133
|
+
return result(error_code::TOO_SHORT, pos);
|
|
134
|
+
} // checks if the next byte is a valid continuation byte in UTF-8. A
|
|
135
|
+
// valid continuation byte starts with 10.
|
|
136
|
+
// range check -
|
|
137
|
+
uint32_t code_point =
|
|
138
|
+
(leading_byte & 0b00011111) << 6 |
|
|
139
|
+
(data[pos + 1] &
|
|
140
|
+
0b00111111); // assembles the Unicode code point from the two bytes.
|
|
141
|
+
// It does this by discarding the leading 110 and 10
|
|
142
|
+
// bits from the two bytes, shifting the remaining bits
|
|
143
|
+
// of the first byte, and then combining the results
|
|
144
|
+
// with a bitwise OR operation.
|
|
145
|
+
if (code_point < 0x80) {
|
|
146
|
+
return result(error_code::OVERLONG, pos);
|
|
147
|
+
}
|
|
148
|
+
if (0xFF < code_point) {
|
|
149
|
+
return result(error_code::TOO_LARGE, pos);
|
|
150
|
+
} // We only care about the range 129-255 which is Non-ASCII latin1
|
|
151
|
+
// characters
|
|
152
|
+
*latin_output++ = char(code_point);
|
|
153
|
+
pos += 2;
|
|
154
|
+
} else if ((leading_byte & 0b11110000) == 0b11100000) {
|
|
155
|
+
// We have a three-byte UTF-8
|
|
156
|
+
return result(error_code::TOO_LARGE, pos);
|
|
157
|
+
} else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000
|
|
158
|
+
// we have a 4-byte UTF-8 word.
|
|
159
|
+
return result(error_code::TOO_LARGE, pos);
|
|
160
|
+
} else {
|
|
161
|
+
// we either have too many continuation bytes or an invalid leading byte
|
|
162
|
+
if ((leading_byte & 0b11000000) == 0b10000000) {
|
|
163
|
+
return result(error_code::TOO_LONG, pos);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return result(error_code::HEADER_BITS, pos);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return result(error_code::SUCCESS, latin_output - start);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
inline result rewind_and_convert_with_errors(size_t prior_bytes,
|
|
173
|
+
const char *buf, size_t len,
|
|
174
|
+
char *latin1_output) {
|
|
175
|
+
size_t extra_len{0};
|
|
176
|
+
// We potentially need to go back in time and find a leading byte.
|
|
177
|
+
// In theory '3' would be sufficient, but sometimes the error can go back
|
|
178
|
+
// quite far.
|
|
179
|
+
size_t how_far_back = prior_bytes;
|
|
180
|
+
// size_t how_far_back = 3; // 3 bytes in the past + current position
|
|
181
|
+
// if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; }
|
|
182
|
+
bool found_leading_bytes{false};
|
|
183
|
+
// important: it is i <= how_far_back and not 'i < how_far_back'.
|
|
184
|
+
for (size_t i = 0; i <= how_far_back; i++) {
|
|
185
|
+
unsigned char byte = buf[-static_cast<std::ptrdiff_t>(i)];
|
|
186
|
+
found_leading_bytes = ((byte & 0b11000000) != 0b10000000);
|
|
187
|
+
if (found_leading_bytes) {
|
|
188
|
+
if (i > 0 && byte < 128) {
|
|
189
|
+
// If we had to go back and the leading byte is ascii
|
|
190
|
+
// then we can stop right away.
|
|
191
|
+
return result(error_code::TOO_LONG, 0 - i + 1);
|
|
192
|
+
}
|
|
193
|
+
buf -= i;
|
|
194
|
+
extra_len = i;
|
|
195
|
+
break;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
//
|
|
199
|
+
// It is possible for this function to return a negative count in its result.
|
|
200
|
+
// C++ Standard Section 18.1 defines size_t is in <cstddef> which is described
|
|
201
|
+
// in C Standard as <stddef.h>. C Standard Section 4.1.5 defines size_t as an
|
|
202
|
+
// unsigned integral type of the result of the sizeof operator
|
|
203
|
+
//
|
|
204
|
+
// An unsigned type will simply wrap round arithmetically (well defined).
|
|
205
|
+
//
|
|
206
|
+
if (!found_leading_bytes) {
|
|
207
|
+
// If how_far_back == 3, we may have four consecutive continuation bytes!!!
|
|
208
|
+
// [....] [continuation] [continuation] [continuation] | [buf is
|
|
209
|
+
// continuation] Or we possibly have a stream that does not start with a
|
|
210
|
+
// leading byte.
|
|
211
|
+
return result(error_code::TOO_LONG, 0 - how_far_back);
|
|
212
|
+
}
|
|
213
|
+
result res = convert_with_errors(buf, len + extra_len, latin1_output);
|
|
214
|
+
if (res.error) {
|
|
215
|
+
res.count -= extra_len;
|
|
216
|
+
}
|
|
217
|
+
return res;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
} // namespace utf8_to_latin1
|
|
221
|
+
} // unnamed namespace
|
|
222
|
+
} // namespace scalar
|
|
223
|
+
} // namespace simdutf
|
|
224
|
+
|
|
225
|
+
#endif
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#ifndef SIMDUTF_VALID_UTF8_TO_LATIN1_H
|
|
2
|
+
#define SIMDUTF_VALID_UTF8_TO_LATIN1_H
|
|
3
|
+
|
|
4
|
+
namespace simdutf {
|
|
5
|
+
namespace scalar {
|
|
6
|
+
namespace {
|
|
7
|
+
namespace utf8_to_latin1 {
|
|
8
|
+
|
|
9
|
+
template <typename InputPtr>
|
|
10
|
+
#if SIMDUTF_CPLUSPLUS20
|
|
11
|
+
requires simdutf::detail::indexes_into_byte_like<InputPtr>
|
|
12
|
+
#endif
|
|
13
|
+
simdutf_constexpr23 size_t convert_valid(InputPtr data, size_t len,
|
|
14
|
+
char *latin_output) {
|
|
15
|
+
|
|
16
|
+
size_t pos = 0;
|
|
17
|
+
char *start{latin_output};
|
|
18
|
+
|
|
19
|
+
while (pos < len) {
|
|
20
|
+
#if SIMDUTF_CPLUSPLUS23
|
|
21
|
+
if !consteval
|
|
22
|
+
#endif
|
|
23
|
+
{
|
|
24
|
+
// try to convert the next block of 16 ASCII bytes
|
|
25
|
+
if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that
|
|
26
|
+
// they are ascii
|
|
27
|
+
uint64_t v1;
|
|
28
|
+
::memcpy(&v1, data + pos, sizeof(uint64_t));
|
|
29
|
+
uint64_t v2;
|
|
30
|
+
::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
|
|
31
|
+
uint64_t v{v1 |
|
|
32
|
+
v2}; // We are only interested in these bits: 1000 1000 1000
|
|
33
|
+
// 1000, so it makes sense to concatenate everything
|
|
34
|
+
if ((v & 0x8080808080808080) ==
|
|
35
|
+
0) { // if NONE of these are set, e.g. all of them are zero, then
|
|
36
|
+
// everything is ASCII
|
|
37
|
+
size_t final_pos = pos + 16;
|
|
38
|
+
while (pos < final_pos) {
|
|
39
|
+
*latin_output++ = uint8_t(data[pos]);
|
|
40
|
+
pos++;
|
|
41
|
+
}
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// suppose it is not an all ASCII byte sequence
|
|
48
|
+
auto leading_byte = uint8_t(data[pos]); // leading byte
|
|
49
|
+
if (leading_byte < 0b10000000) {
|
|
50
|
+
// converting one ASCII byte !!!
|
|
51
|
+
*latin_output++ = char(leading_byte);
|
|
52
|
+
pos++;
|
|
53
|
+
} else if ((leading_byte & 0b11100000) ==
|
|
54
|
+
0b11000000) { // the first three bits indicate:
|
|
55
|
+
// We have a two-byte UTF-8
|
|
56
|
+
if (pos + 1 >= len) {
|
|
57
|
+
break;
|
|
58
|
+
} // minimal bound checking
|
|
59
|
+
if ((uint8_t(data[pos + 1]) & 0b11000000) != 0b10000000) {
|
|
60
|
+
return 0;
|
|
61
|
+
} // checks if the next byte is a valid continuation byte in UTF-8. A
|
|
62
|
+
// valid continuation byte starts with 10.
|
|
63
|
+
// range check -
|
|
64
|
+
uint32_t code_point =
|
|
65
|
+
(leading_byte & 0b00011111) << 6 |
|
|
66
|
+
(uint8_t(data[pos + 1]) &
|
|
67
|
+
0b00111111); // assembles the Unicode code point from the two bytes.
|
|
68
|
+
// It does this by discarding the leading 110 and 10
|
|
69
|
+
// bits from the two bytes, shifting the remaining bits
|
|
70
|
+
// of the first byte, and then combining the results
|
|
71
|
+
// with a bitwise OR operation.
|
|
72
|
+
*latin_output++ = char(code_point);
|
|
73
|
+
pos += 2;
|
|
74
|
+
} else {
|
|
75
|
+
// we may have a continuation but we do not do error checking
|
|
76
|
+
return 0;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return latin_output - start;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
} // namespace utf8_to_latin1
|
|
83
|
+
} // unnamed namespace
|
|
84
|
+
} // namespace scalar
|
|
85
|
+
} // namespace simdutf
|
|
86
|
+
|
|
87
|
+
#endif
|