npm - react-native-quick-crypto - Versions diffs - 1.0.19 → 1.1.1 - Mend

react-native-quick-crypto 1.0.19 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (561) hide show

package/QuickCrypto.podspec +12 -38
package/README.md +2 -0
package/android/CMakeLists.txt +3 -0
package/android/build.gradle +5 -1
package/cpp/argon2/HybridArgon2.cpp +10 -3
package/cpp/blake3/HybridBlake3.cpp +5 -3
package/cpp/cipher/CCMCipher.cpp +29 -16
package/cpp/cipher/CCMCipher.hpp +2 -4
package/cpp/cipher/ChaCha20Cipher.cpp +14 -18
package/cpp/cipher/ChaCha20Cipher.hpp +2 -4
package/cpp/cipher/ChaCha20Poly1305Cipher.cpp +34 -23
package/cpp/cipher/ChaCha20Poly1305Cipher.hpp +2 -4
package/cpp/cipher/GCMCipher.cpp +14 -15
package/cpp/cipher/HybridCipher.cpp +39 -36
package/cpp/cipher/HybridCipher.hpp +17 -1
package/cpp/cipher/HybridRsaCipher.cpp +74 -29
package/cpp/cipher/OCBCipher.cpp +4 -3
package/cpp/cipher/XChaCha20Poly1305Cipher.cpp +14 -13
package/cpp/cipher/XSalsa20Cipher.cpp +72 -6
package/cpp/cipher/XSalsa20Cipher.hpp +25 -3
package/cpp/cipher/XSalsa20Poly1305Cipher.cpp +21 -25
package/cpp/dh/HybridDiffieHellman.cpp +29 -0
package/cpp/ec/HybridEcKeyPair.cpp +35 -33
package/cpp/ec/HybridEcKeyPair.hpp +3 -7
package/cpp/ecdh/HybridECDH.cpp +23 -0
package/cpp/ed25519/HybridEdKeyPair.cpp +73 -117
package/cpp/ed25519/HybridEdKeyPair.hpp +5 -9
package/cpp/hash/HybridHash.cpp +5 -7
package/cpp/hkdf/HybridHkdf.cpp +6 -4
package/cpp/hmac/HybridHmac.cpp +4 -6
package/cpp/kmac/HybridKmac.cpp +4 -4
package/cpp/mldsa/HybridMlDsaKeyPair.cpp +37 -49
package/cpp/mlkem/HybridMlKemKeyPair.cpp +39 -43
package/cpp/pbkdf2/HybridPbkdf2.cpp +7 -8
package/cpp/rsa/HybridRsaKeyPair.cpp +5 -8
package/cpp/rsa/HybridRsaKeyPair.hpp +4 -7
package/cpp/scrypt/HybridScrypt.cpp +6 -4
package/cpp/sign/HybridSignHandle.cpp +25 -68
package/cpp/sign/HybridVerifyHandle.cpp +23 -60
package/cpp/utils/HybridUtils.cpp +213 -111
package/cpp/utils/HybridUtils.hpp +9 -2
package/cpp/utils/QuickCryptoUtils.hpp +72 -0
package/deps/simdutf/LICENSE-APACHE +201 -0
package/deps/simdutf/LICENSE-MIT +18 -0
package/deps/simdutf/README.md +2782 -0
package/deps/simdutf/include/simdutf/avx512.h +79 -0
package/deps/simdutf/include/simdutf/base64_implementation.h +158 -0
package/deps/simdutf/include/simdutf/base64_tables.h +887 -0
package/deps/simdutf/include/simdutf/common_defs.h +186 -0
package/deps/simdutf/include/simdutf/compiler_check.h +50 -0
package/deps/simdutf/include/simdutf/constexpr_ptr.h +138 -0
package/deps/simdutf/include/simdutf/encoding_types.h +189 -0
package/deps/simdutf/include/simdutf/error.h +126 -0
package/deps/simdutf/include/simdutf/implementation.h +7081 -0
package/deps/simdutf/include/simdutf/internal/isadetection.h +325 -0
package/deps/simdutf/include/simdutf/portability.h +285 -0
package/deps/simdutf/include/simdutf/scalar/ascii.h +86 -0
package/deps/simdutf/include/simdutf/scalar/atomic_util.h +105 -0
package/deps/simdutf/include/simdutf/scalar/base64.h +911 -0
package/deps/simdutf/include/simdutf/scalar/latin1.h +26 -0
package/deps/simdutf/include/simdutf/scalar/latin1_to_utf16/latin1_to_utf16.h +52 -0
package/deps/simdutf/include/simdutf/scalar/latin1_to_utf32/latin1_to_utf32.h +27 -0
package/deps/simdutf/include/simdutf/scalar/latin1_to_utf8/latin1_to_utf8.h +191 -0
package/deps/simdutf/include/simdutf/scalar/swap_bytes.h +35 -0
package/deps/simdutf/include/simdutf/scalar/utf16.h +226 -0
package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/utf16_to_latin1.h +108 -0
package/deps/simdutf/include/simdutf/scalar/utf16_to_latin1/valid_utf16_to_latin1.h +40 -0
package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/utf16_to_utf32.h +86 -0
package/deps/simdutf/include/simdutf/scalar/utf16_to_utf32/valid_utf16_to_utf32.h +44 -0
package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/utf16_to_utf8.h +295 -0
package/deps/simdutf/include/simdutf/scalar/utf16_to_utf8/valid_utf16_to_utf8.h +91 -0
package/deps/simdutf/include/simdutf/scalar/utf32.h +82 -0
package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/utf32_to_latin1.h +68 -0
package/deps/simdutf/include/simdutf/scalar/utf32_to_latin1/valid_utf32_to_latin1.h +67 -0
package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/utf32_to_utf16.h +84 -0
package/deps/simdutf/include/simdutf/scalar/utf32_to_utf16/valid_utf32_to_utf16.h +44 -0
package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/utf32_to_utf8.h +142 -0
package/deps/simdutf/include/simdutf/scalar/utf32_to_utf8/valid_utf32_to_utf8.h +72 -0
package/deps/simdutf/include/simdutf/scalar/utf8.h +326 -0
package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/utf8_to_latin1.h +225 -0
package/deps/simdutf/include/simdutf/scalar/utf8_to_latin1/valid_utf8_to_latin1.h +87 -0
package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/utf8_to_utf16.h +342 -0
package/deps/simdutf/include/simdutf/scalar/utf8_to_utf16/valid_utf8_to_utf16.h +106 -0
package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/utf8_to_utf32.h +299 -0
package/deps/simdutf/include/simdutf/scalar/utf8_to_utf32/valid_utf8_to_utf32.h +83 -0
package/deps/simdutf/include/simdutf/simdutf_version.h +26 -0
package/deps/simdutf/include/simdutf.h +26 -0
package/deps/simdutf/include/simdutf_c.h +342 -0
package/deps/simdutf/src/arm64/arm_base64.cpp +791 -0
package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf16.cpp +24 -0
package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf32.cpp +24 -0
package/deps/simdutf/src/arm64/arm_convert_latin1_to_utf8.cpp +70 -0
package/deps/simdutf/src/arm64/arm_convert_utf16_to_latin1.cpp +61 -0
package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf32.cpp +185 -0
package/deps/simdutf/src/arm64/arm_convert_utf16_to_utf8.cpp +780 -0
package/deps/simdutf/src/arm64/arm_convert_utf32_to_latin1.cpp +60 -0
package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf16.cpp +208 -0
package/deps/simdutf/src/arm64/arm_convert_utf32_to_utf8.cpp +505 -0
package/deps/simdutf/src/arm64/arm_convert_utf8_to_latin1.cpp +69 -0
package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf16.cpp +313 -0
package/deps/simdutf/src/arm64/arm_convert_utf8_to_utf32.cpp +179 -0
package/deps/simdutf/src/arm64/arm_find.cpp +199 -0
package/deps/simdutf/src/arm64/arm_utf16fix.cpp +185 -0
package/deps/simdutf/src/arm64/arm_validate_utf16.cpp +165 -0
package/deps/simdutf/src/arm64/arm_validate_utf32le.cpp +65 -0
package/deps/simdutf/src/arm64/implementation.cpp +1442 -0
package/deps/simdutf/src/encoding_types.cpp +67 -0
package/deps/simdutf/src/error.cpp +3 -0
package/deps/simdutf/src/fallback/implementation.cpp +589 -0
package/deps/simdutf/src/generic/ascii_validation.h +50 -0
package/deps/simdutf/src/generic/base64.h +233 -0
package/deps/simdutf/src/generic/base64lengths.h +63 -0
package/deps/simdutf/src/generic/buf_block_reader.h +109 -0
package/deps/simdutf/src/generic/find.h +75 -0
package/deps/simdutf/src/generic/utf16/change_endianness.h +24 -0
package/deps/simdutf/src/generic/utf16/count_code_points_bytemask.h +58 -0
package/deps/simdutf/src/generic/utf16/to_well_formed.h +93 -0
package/deps/simdutf/src/generic/utf16/utf32_length_from_utf16.h +15 -0
package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16.h +35 -0
package/deps/simdutf/src/generic/utf16/utf8_length_from_utf16_bytemask.h +199 -0
package/deps/simdutf/src/generic/utf16.h +73 -0
package/deps/simdutf/src/generic/utf32.h +136 -0
package/deps/simdutf/src/generic/utf8/utf16_length_from_utf8_bytemask.h +53 -0
package/deps/simdutf/src/generic/utf8.h +92 -0
package/deps/simdutf/src/generic/utf8_to_latin1/utf8_to_latin1.h +316 -0
package/deps/simdutf/src/generic/utf8_to_latin1/valid_utf8_to_latin1.h +78 -0
package/deps/simdutf/src/generic/utf8_to_utf16/utf8_to_utf16.h +332 -0
package/deps/simdutf/src/generic/utf8_to_utf16/valid_utf8_to_utf16.h +74 -0
package/deps/simdutf/src/generic/utf8_to_utf32/utf8_to_utf32.h +318 -0
package/deps/simdutf/src/generic/utf8_to_utf32/valid_utf8_to_utf32.h +42 -0
package/deps/simdutf/src/generic/utf8_validation/utf8_lookup4_algorithm.h +223 -0
package/deps/simdutf/src/generic/utf8_validation/utf8_validator.h +84 -0
package/deps/simdutf/src/generic/validate_utf16.h +164 -0
package/deps/simdutf/src/generic/validate_utf32.h +99 -0
package/deps/simdutf/src/haswell/avx2_base64.cpp +837 -0
package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf16.cpp +28 -0
package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf32.cpp +20 -0
package/deps/simdutf/src/haswell/avx2_convert_latin1_to_utf8.cpp +83 -0
package/deps/simdutf/src/haswell/avx2_convert_utf16_to_latin1.cpp +83 -0
package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf32.cpp +210 -0
package/deps/simdutf/src/haswell/avx2_convert_utf16_to_utf8.cpp +602 -0
package/deps/simdutf/src/haswell/avx2_convert_utf32_to_latin1.cpp +116 -0
package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf16.cpp +164 -0
package/deps/simdutf/src/haswell/avx2_convert_utf32_to_utf8.cpp +569 -0
package/deps/simdutf/src/haswell/avx2_convert_utf8_to_latin1.cpp +60 -0
package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf16.cpp +195 -0
package/deps/simdutf/src/haswell/avx2_convert_utf8_to_utf32.cpp +135 -0
package/deps/simdutf/src/haswell/avx2_utf16fix.cpp +173 -0
package/deps/simdutf/src/haswell/avx2_validate_utf16.cpp +17 -0
package/deps/simdutf/src/haswell/implementation.cpp +1447 -0
package/deps/simdutf/src/icelake/icelake_ascii_validation.inl.cpp +19 -0
package/deps/simdutf/src/icelake/icelake_base64.inl.cpp +630 -0
package/deps/simdutf/src/icelake/icelake_common.inl.cpp +37 -0
package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf16.inl.cpp +36 -0
package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf32.inl.cpp +23 -0
package/deps/simdutf/src/icelake/icelake_convert_latin1_to_utf8.inl.cpp +107 -0
package/deps/simdutf/src/icelake/icelake_convert_utf16_to_latin1.inl.cpp +103 -0
package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf32.inl.cpp +136 -0
package/deps/simdutf/src/icelake/icelake_convert_utf16_to_utf8.inl.cpp +206 -0
package/deps/simdutf/src/icelake/icelake_convert_utf32_to_latin1.inl.cpp +74 -0
package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf16.inl.cpp +338 -0
package/deps/simdutf/src/icelake/icelake_convert_utf32_to_utf8.inl.cpp +574 -0
package/deps/simdutf/src/icelake/icelake_convert_utf8_to_latin1.inl.cpp +104 -0
package/deps/simdutf/src/icelake/icelake_convert_utf8_to_utf16.inl.cpp +75 -0
package/deps/simdutf/src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp +69 -0
package/deps/simdutf/src/icelake/icelake_find.inl.cpp +146 -0
package/deps/simdutf/src/icelake/icelake_from_utf8.inl.cpp +266 -0
package/deps/simdutf/src/icelake/icelake_from_valid_utf8.inl.cpp +136 -0
package/deps/simdutf/src/icelake/icelake_macros.inl.cpp +143 -0
package/deps/simdutf/src/icelake/icelake_utf16fix.cpp +138 -0
package/deps/simdutf/src/icelake/icelake_utf32_validation.inl.cpp +63 -0
package/deps/simdutf/src/icelake/icelake_utf8_common.inl.cpp +753 -0
package/deps/simdutf/src/icelake/icelake_utf8_length_from_utf16.inl.cpp +269 -0
package/deps/simdutf/src/icelake/icelake_utf8_validation.inl.cpp +116 -0
package/deps/simdutf/src/icelake/implementation.cpp +1903 -0
package/deps/simdutf/src/implementation.cpp +2526 -0
package/deps/simdutf/src/lasx/implementation.cpp +1531 -0
package/deps/simdutf/src/lasx/lasx_base64.cpp +695 -0
package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf16.cpp +76 -0
package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf32.cpp +55 -0
package/deps/simdutf/src/lasx/lasx_convert_latin1_to_utf8.cpp +65 -0
package/deps/simdutf/src/lasx/lasx_convert_utf16_to_latin1.cpp +64 -0
package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf32.cpp +183 -0
package/deps/simdutf/src/lasx/lasx_convert_utf16_to_utf8.cpp +550 -0
package/deps/simdutf/src/lasx/lasx_convert_utf32_to_latin1.cpp +73 -0
package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf16.cpp +218 -0
package/deps/simdutf/src/lasx/lasx_convert_utf32_to_utf8.cpp +589 -0
package/deps/simdutf/src/lasx/lasx_convert_utf8_to_latin1.cpp +72 -0
package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf16.cpp +296 -0
package/deps/simdutf/src/lasx/lasx_convert_utf8_to_utf32.cpp +190 -0
package/deps/simdutf/src/lasx/lasx_find.cpp +64 -0
package/deps/simdutf/src/lasx/lasx_validate_utf16.cpp +13 -0
package/deps/simdutf/src/lasx/lasx_validate_utf32le.cpp +84 -0
package/deps/simdutf/src/lsx/implementation.cpp +1417 -0
package/deps/simdutf/src/lsx/lsx_base64.cpp +675 -0
package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf16.cpp +39 -0
package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf32.cpp +27 -0
package/deps/simdutf/src/lsx/lsx_convert_latin1_to_utf8.cpp +56 -0
package/deps/simdutf/src/lsx/lsx_convert_utf16_to_latin1.cpp +64 -0
package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf32.cpp +133 -0
package/deps/simdutf/src/lsx/lsx_convert_utf16_to_utf8.cpp +518 -0
package/deps/simdutf/src/lsx/lsx_convert_utf32_to_latin1.cpp +66 -0
package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf16.cpp +155 -0
package/deps/simdutf/src/lsx/lsx_convert_utf32_to_utf8.cpp +459 -0
package/deps/simdutf/src/lsx/lsx_convert_utf8_to_latin1.cpp +75 -0
package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf16.cpp +291 -0
package/deps/simdutf/src/lsx/lsx_convert_utf8_to_utf32.cpp +179 -0
package/deps/simdutf/src/lsx/lsx_find.cpp +60 -0
package/deps/simdutf/src/lsx/lsx_validate_utf16.cpp +13 -0
package/deps/simdutf/src/lsx/lsx_validate_utf32le.cpp +68 -0
package/deps/simdutf/src/ppc64/implementation.cpp +992 -0
package/deps/simdutf/src/ppc64/ppc64_base64.cpp +480 -0
package/deps/simdutf/src/ppc64/ppc64_base64_internal_tests.cpp +401 -0
package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf16.cpp +12 -0
package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf32.cpp +12 -0
package/deps/simdutf/src/ppc64/ppc64_convert_latin1_to_utf8.cpp +149 -0
package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_latin1.cpp +67 -0
package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf32.cpp +87 -0
package/deps/simdutf/src/ppc64/ppc64_convert_utf16_to_utf8.cpp +296 -0
package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_latin1.cpp +57 -0
package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf16.cpp +117 -0
package/deps/simdutf/src/ppc64/ppc64_convert_utf32_to_utf8.cpp +166 -0
package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_latin1.cpp +69 -0
package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf16.cpp +211 -0
package/deps/simdutf/src/ppc64/ppc64_convert_utf8_to_utf32.cpp +153 -0
package/deps/simdutf/src/ppc64/ppc64_utf16_to_utf8_tables.h +1011 -0
package/deps/simdutf/src/ppc64/ppc64_utf8_length_from_latin1.cpp +37 -0
package/deps/simdutf/src/ppc64/ppc64_validate_utf16.cpp +19 -0
package/deps/simdutf/src/ppc64/templates.cpp +91 -0
package/deps/simdutf/src/rvv/implementation.cpp +138 -0
package/deps/simdutf/src/rvv/rvv_find.cpp +27 -0
package/deps/simdutf/src/rvv/rvv_helpers.inl.cpp +23 -0
package/deps/simdutf/src/rvv/rvv_latin1_to.inl.cpp +71 -0
package/deps/simdutf/src/rvv/rvv_length_from.inl.cpp +164 -0
package/deps/simdutf/src/rvv/rvv_utf16_to.inl.cpp +399 -0
package/deps/simdutf/src/rvv/rvv_utf16fix.cpp +110 -0
package/deps/simdutf/src/rvv/rvv_utf32_to.inl.cpp +307 -0
package/deps/simdutf/src/rvv/rvv_utf8_to.inl.cpp +435 -0
package/deps/simdutf/src/rvv/rvv_validate.inl.cpp +275 -0
package/deps/simdutf/src/simdutf/arm64/begin.h +2 -0
package/deps/simdutf/src/simdutf/arm64/bitmanipulation.h +34 -0
package/deps/simdutf/src/simdutf/arm64/end.h +2 -0
package/deps/simdutf/src/simdutf/arm64/implementation.h +307 -0
package/deps/simdutf/src/simdutf/arm64/intrinsics.h +10 -0
package/deps/simdutf/src/simdutf/arm64/simd.h +547 -0
package/deps/simdutf/src/simdutf/arm64/simd16-inl.h +403 -0
package/deps/simdutf/src/simdutf/arm64/simd32-inl.h +129 -0
package/deps/simdutf/src/simdutf/arm64/simd64-inl.h +28 -0
package/deps/simdutf/src/simdutf/arm64.h +43 -0
package/deps/simdutf/src/simdutf/fallback/begin.h +1 -0
package/deps/simdutf/src/simdutf/fallback/bitmanipulation.h +13 -0
package/deps/simdutf/src/simdutf/fallback/end.h +1 -0
package/deps/simdutf/src/simdutf/fallback/implementation.h +331 -0
package/deps/simdutf/src/simdutf/fallback.h +42 -0
package/deps/simdutf/src/simdutf/haswell/begin.h +15 -0
package/deps/simdutf/src/simdutf/haswell/bitmanipulation.h +35 -0
package/deps/simdutf/src/simdutf/haswell/end.h +13 -0
package/deps/simdutf/src/simdutf/haswell/implementation.h +338 -0
package/deps/simdutf/src/simdutf/haswell/intrinsics.h +67 -0
package/deps/simdutf/src/simdutf/haswell/simd.h +363 -0
package/deps/simdutf/src/simdutf/haswell/simd16-inl.h +261 -0
package/deps/simdutf/src/simdutf/haswell/simd32-inl.h +111 -0
package/deps/simdutf/src/simdutf/haswell/simd64-inl.h +34 -0
package/deps/simdutf/src/simdutf/haswell.h +63 -0
package/deps/simdutf/src/simdutf/icelake/begin.h +14 -0
package/deps/simdutf/src/simdutf/icelake/bitmanipulation.h +44 -0
package/deps/simdutf/src/simdutf/icelake/end.h +12 -0
package/deps/simdutf/src/simdutf/icelake/implementation.h +346 -0
package/deps/simdutf/src/simdutf/icelake/intrinsics.h +138 -0
package/deps/simdutf/src/simdutf/icelake/simd.h +17 -0
package/deps/simdutf/src/simdutf/icelake/simd16-inl.h +90 -0
package/deps/simdutf/src/simdutf/icelake/simd32-inl.h +47 -0
package/deps/simdutf/src/simdutf/icelake.h +81 -0
package/deps/simdutf/src/simdutf/lasx/begin.h +8 -0
package/deps/simdutf/src/simdutf/lasx/bitmanipulation.h +25 -0
package/deps/simdutf/src/simdutf/lasx/end.h +8 -0
package/deps/simdutf/src/simdutf/lasx/implementation.h +310 -0
package/deps/simdutf/src/simdutf/lasx/intrinsics.h +319 -0
package/deps/simdutf/src/simdutf/lasx/simd.h +551 -0
package/deps/simdutf/src/simdutf/lasx/simd16-inl.h +234 -0
package/deps/simdutf/src/simdutf/lasx/simd32-inl.h +74 -0
package/deps/simdutf/src/simdutf/lasx/simd64-inl.h +52 -0
package/deps/simdutf/src/simdutf/lasx.h +49 -0
package/deps/simdutf/src/simdutf/lsx/begin.h +2 -0
package/deps/simdutf/src/simdutf/lsx/bitmanipulation.h +25 -0
package/deps/simdutf/src/simdutf/lsx/end.h +2 -0
package/deps/simdutf/src/simdutf/lsx/implementation.h +309 -0
package/deps/simdutf/src/simdutf/lsx/intrinsics.h +196 -0
package/deps/simdutf/src/simdutf/lsx/simd.h +421 -0
package/deps/simdutf/src/simdutf/lsx/simd16-inl.h +242 -0
package/deps/simdutf/src/simdutf/lsx/simd32-inl.h +69 -0
package/deps/simdutf/src/simdutf/lsx/simd64-inl.h +50 -0
package/deps/simdutf/src/simdutf/lsx.h +52 -0
package/deps/simdutf/src/simdutf/ppc64/begin.h +1 -0
package/deps/simdutf/src/simdutf/ppc64/bitmanipulation.h +29 -0
package/deps/simdutf/src/simdutf/ppc64/end.h +1 -0
package/deps/simdutf/src/simdutf/ppc64/implementation.h +348 -0
package/deps/simdutf/src/simdutf/ppc64/intrinsics.h +19 -0
package/deps/simdutf/src/simdutf/ppc64/simd.h +177 -0
package/deps/simdutf/src/simdutf/ppc64/simd16-inl.h +327 -0
package/deps/simdutf/src/simdutf/ppc64/simd32-inl.h +247 -0
package/deps/simdutf/src/simdutf/ppc64/simd8-inl.h +618 -0
package/deps/simdutf/src/simdutf/ppc64.h +40 -0
package/deps/simdutf/src/simdutf/rvv/begin.h +7 -0
package/deps/simdutf/src/simdutf/rvv/end.h +7 -0
package/deps/simdutf/src/simdutf/rvv/implementation.h +321 -0
package/deps/simdutf/src/simdutf/rvv/intrinsics.h +131 -0
package/deps/simdutf/src/simdutf/rvv.h +41 -0
package/deps/simdutf/src/simdutf/westmere/begin.h +8 -0
package/deps/simdutf/src/simdutf/westmere/bitmanipulation.h +37 -0
package/deps/simdutf/src/simdutf/westmere/end.h +8 -0
package/deps/simdutf/src/simdutf/westmere/implementation.h +338 -0
package/deps/simdutf/src/simdutf/westmere/intrinsics.h +38 -0
package/deps/simdutf/src/simdutf/westmere/simd.h +379 -0
package/deps/simdutf/src/simdutf/westmere/simd16-inl.h +242 -0
package/deps/simdutf/src/simdutf/westmere/simd32-inl.h +151 -0
package/deps/simdutf/src/simdutf/westmere/simd64-inl.h +33 -0
package/deps/simdutf/src/simdutf/westmere.h +59 -0
package/deps/simdutf/src/simdutf.cpp +152 -0
package/deps/simdutf/src/simdutf_c.cpp +525 -0
package/deps/simdutf/src/tables/utf16_to_utf8_tables.h +768 -0
package/deps/simdutf/src/tables/utf32_to_utf16_tables.h +53 -0
package/deps/simdutf/src/tables/utf8_to_utf16_tables.h +826 -0
package/deps/simdutf/src/westmere/implementation.cpp +1479 -0
package/deps/simdutf/src/westmere/internal/loader.cpp +7 -0
package/deps/simdutf/src/westmere/internal/write_v_u16_11bits_to_utf8.cpp +66 -0
package/deps/simdutf/src/westmere/sse_base64.cpp +672 -0
package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf16.cpp +21 -0
package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf32.cpp +31 -0
package/deps/simdutf/src/westmere/sse_convert_latin1_to_utf8.cpp +71 -0
package/deps/simdutf/src/westmere/sse_convert_utf16_to_latin1.cpp +70 -0
package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf32.cpp +206 -0
package/deps/simdutf/src/westmere/sse_convert_utf16_to_utf8.cpp +504 -0
package/deps/simdutf/src/westmere/sse_convert_utf32_to_latin1.cpp +82 -0
package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf16.cpp +209 -0
package/deps/simdutf/src/westmere/sse_convert_utf32_to_utf8.cpp +589 -0
package/deps/simdutf/src/westmere/sse_convert_utf8_to_latin1.cpp +58 -0
package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf16.cpp +197 -0
package/deps/simdutf/src/westmere/sse_convert_utf8_to_utf32.cpp +141 -0
package/deps/simdutf/src/westmere/sse_utf16fix.cpp +82 -0
package/deps/simdutf/src/westmere/sse_validate_utf16.cpp +17 -0
package/lib/commonjs/argon2.js +51 -2
package/lib/commonjs/argon2.js.map +1 -1
package/lib/commonjs/cipher.js +109 -11
package/lib/commonjs/cipher.js.map +1 -1
package/lib/commonjs/dsa.js +8 -2
package/lib/commonjs/dsa.js.map +1 -1
package/lib/commonjs/hash.js +15 -5
package/lib/commonjs/hash.js.map +1 -1
package/lib/commonjs/hkdf.js +33 -6
package/lib/commonjs/hkdf.js.map +1 -1
package/lib/commonjs/hmac.js +15 -5
package/lib/commonjs/hmac.js.map +1 -1
package/lib/commonjs/keys/publicCipher.js +10 -4
package/lib/commonjs/keys/publicCipher.js.map +1 -1
package/lib/commonjs/random.js +11 -2
package/lib/commonjs/random.js.map +1 -1
package/lib/commonjs/rsa.js +12 -5
package/lib/commonjs/rsa.js.map +1 -1
package/lib/commonjs/scrypt.js +47 -6
package/lib/commonjs/scrypt.js.map +1 -1
package/lib/commonjs/subtle.js +76 -5
package/lib/commonjs/subtle.js.map +1 -1
package/lib/commonjs/utils/cipher.js +18 -7
package/lib/commonjs/utils/cipher.js.map +1 -1
package/lib/commonjs/utils/conversion.js +33 -9
package/lib/commonjs/utils/conversion.js.map +1 -1
package/lib/commonjs/utils/timingSafeEqual.js +7 -2
package/lib/commonjs/utils/timingSafeEqual.js.map +1 -1
package/lib/commonjs/x509certificate.js +6 -6
package/lib/commonjs/x509certificate.js.map +1 -1
package/lib/module/argon2.js +51 -2
package/lib/module/argon2.js.map +1 -1
package/lib/module/cipher.js +109 -11
package/lib/module/cipher.js.map +1 -1
package/lib/module/dsa.js +8 -2
package/lib/module/dsa.js.map +1 -1
package/lib/module/hash.js +15 -5
package/lib/module/hash.js.map +1 -1
package/lib/module/hkdf.js +33 -6
package/lib/module/hkdf.js.map +1 -1
package/lib/module/hmac.js +15 -5
package/lib/module/hmac.js.map +1 -1
package/lib/module/keys/publicCipher.js +10 -4
package/lib/module/keys/publicCipher.js.map +1 -1
package/lib/module/random.js +11 -2
package/lib/module/random.js.map +1 -1
package/lib/module/rsa.js +11 -4
package/lib/module/rsa.js.map +1 -1
package/lib/module/scrypt.js +47 -6
package/lib/module/scrypt.js.map +1 -1
package/lib/module/subtle.js +76 -5
package/lib/module/subtle.js.map +1 -1
package/lib/module/utils/cipher.js +18 -7
package/lib/module/utils/cipher.js.map +1 -1
package/lib/module/utils/conversion.js +33 -9
package/lib/module/utils/conversion.js.map +1 -1
package/lib/module/utils/timingSafeEqual.js +8 -3
package/lib/module/utils/timingSafeEqual.js.map +1 -1
package/lib/module/x509certificate.js +6 -6
package/lib/module/x509certificate.js.map +1 -1
package/lib/typescript/argon2.d.ts.map +1 -1
package/lib/typescript/cipher.d.ts +2 -2
package/lib/typescript/cipher.d.ts.map +1 -1
package/lib/typescript/dsa.d.ts.map +1 -1
package/lib/typescript/hash.d.ts +2 -2
package/lib/typescript/hash.d.ts.map +1 -1
package/lib/typescript/hkdf.d.ts.map +1 -1
package/lib/typescript/hmac.d.ts +2 -2
package/lib/typescript/hmac.d.ts.map +1 -1
package/lib/typescript/index.d.ts +1 -1
package/lib/typescript/index.d.ts.map +1 -1
package/lib/typescript/keys/publicCipher.d.ts.map +1 -1
package/lib/typescript/random.d.ts.map +1 -1
package/lib/typescript/rsa.d.ts.map +1 -1
package/lib/typescript/scrypt.d.ts.map +1 -1
package/lib/typescript/specs/utils.nitro.d.ts +0 -2
package/lib/typescript/specs/utils.nitro.d.ts.map +1 -1
package/lib/typescript/subtle.d.ts.map +1 -1
package/lib/typescript/utils/cipher.d.ts +13 -1
package/lib/typescript/utils/cipher.d.ts.map +1 -1
package/lib/typescript/utils/conversion.d.ts +9 -6
package/lib/typescript/utils/conversion.d.ts.map +1 -1
package/lib/typescript/utils/timingSafeEqual.d.ts.map +1 -1
package/lib/typescript/x509certificate.d.ts.map +1 -1
package/nitrogen/generated/shared/c++/HybridUtilsSpec.cpp +0 -2
package/nitrogen/generated/shared/c++/HybridUtilsSpec.hpp +0 -3
package/package.json +38 -6
package/src/argon2.ts +80 -2
package/src/cipher.ts +139 -15
package/src/dsa.ts +11 -2
package/src/hash.ts +17 -7
package/src/hkdf.ts +44 -6
package/src/hmac.ts +17 -7
package/src/keys/publicCipher.ts +10 -4
package/src/random.ts +11 -2
package/src/rsa.ts +18 -4
package/src/scrypt.ts +73 -6
package/src/specs/utils.nitro.ts +0 -2
package/src/subtle.ts +90 -8
package/src/utils/cipher.ts +30 -8
package/src/utils/conversion.ts +58 -20
package/src/utils/timingSafeEqual.ts +8 -3
package/src/x509certificate.ts +5 -6
package/deps/blake3/.cargo/config.toml +0 -2
package/deps/blake3/.git-blame-ignore-revs +0 -2
package/deps/blake3/.github/workflows/build_b3sum.py +0 -38
package/deps/blake3/.github/workflows/ci.yml +0 -491
package/deps/blake3/.github/workflows/tag.yml +0 -43
package/deps/blake3/.github/workflows/upload_github_release_asset.py +0 -73
package/deps/blake3/CONTRIBUTING.md +0 -31
package/deps/blake3/Cargo.toml +0 -135
package/deps/blake3/b3sum/Cargo.lock +0 -513
package/deps/blake3/b3sum/Cargo.toml +0 -26
package/deps/blake3/b3sum/README.md +0 -72
package/deps/blake3/b3sum/src/main.rs +0 -564
package/deps/blake3/b3sum/src/unit_tests.rs +0 -235
package/deps/blake3/b3sum/tests/cli_tests.rs +0 -680
package/deps/blake3/b3sum/what_does_check_do.md +0 -176
package/deps/blake3/benches/bench.rs +0 -623
package/deps/blake3/build.rs +0 -389
package/deps/blake3/c/CMakeLists.txt +0 -383
package/deps/blake3/c/CMakePresets.json +0 -73
package/deps/blake3/c/Makefile.testing +0 -82
package/deps/blake3/c/blake3-config.cmake.in +0 -14
package/deps/blake3/c/blake3_avx2.c +0 -326
package/deps/blake3/c/blake3_avx2_x86-64_unix.S +0 -1815
package/deps/blake3/c/blake3_avx2_x86-64_windows_gnu.S +0 -1817
package/deps/blake3/c/blake3_avx2_x86-64_windows_msvc.asm +0 -1828
package/deps/blake3/c/blake3_avx512.c +0 -1388
package/deps/blake3/c/blake3_avx512_x86-64_unix.S +0 -4824
package/deps/blake3/c/blake3_avx512_x86-64_windows_gnu.S +0 -2615
package/deps/blake3/c/blake3_avx512_x86-64_windows_msvc.asm +0 -2634
package/deps/blake3/c/blake3_c_rust_bindings/Cargo.toml +0 -32
package/deps/blake3/c/blake3_c_rust_bindings/README.md +0 -4
package/deps/blake3/c/blake3_c_rust_bindings/benches/bench.rs +0 -477
package/deps/blake3/c/blake3_c_rust_bindings/build.rs +0 -253
package/deps/blake3/c/blake3_c_rust_bindings/cross_test.sh +0 -31
package/deps/blake3/c/blake3_c_rust_bindings/src/lib.rs +0 -333
package/deps/blake3/c/blake3_c_rust_bindings/src/test.rs +0 -696
package/deps/blake3/c/blake3_sse2.c +0 -566
package/deps/blake3/c/blake3_sse2_x86-64_unix.S +0 -2291
package/deps/blake3/c/blake3_sse2_x86-64_windows_gnu.S +0 -2332
package/deps/blake3/c/blake3_sse2_x86-64_windows_msvc.asm +0 -2350
package/deps/blake3/c/blake3_sse41.c +0 -560
package/deps/blake3/c/blake3_sse41_x86-64_unix.S +0 -2028
package/deps/blake3/c/blake3_sse41_x86-64_windows_gnu.S +0 -2069
package/deps/blake3/c/blake3_sse41_x86-64_windows_msvc.asm +0 -2089
package/deps/blake3/c/blake3_tbb.cpp +0 -37
package/deps/blake3/c/dependencies/CMakeLists.txt +0 -3
package/deps/blake3/c/dependencies/tbb/CMakeLists.txt +0 -28
package/deps/blake3/c/example.c +0 -36
package/deps/blake3/c/example_tbb.c +0 -57
package/deps/blake3/c/libblake3.pc.in +0 -12
package/deps/blake3/c/main.c +0 -166
package/deps/blake3/c/test.py +0 -97
package/deps/blake3/media/B3.svg +0 -70
package/deps/blake3/media/BLAKE3.svg +0 -85
package/deps/blake3/media/speed.svg +0 -1474
package/deps/blake3/reference_impl/Cargo.toml +0 -8
package/deps/blake3/reference_impl/README.md +0 -14
package/deps/blake3/reference_impl/reference_impl.rs +0 -374
package/deps/blake3/src/ffi_avx2.rs +0 -65
package/deps/blake3/src/ffi_avx512.rs +0 -169
package/deps/blake3/src/ffi_neon.rs +0 -82
package/deps/blake3/src/ffi_sse2.rs +0 -126
package/deps/blake3/src/ffi_sse41.rs +0 -126
package/deps/blake3/src/guts.rs +0 -60
package/deps/blake3/src/hazmat.rs +0 -704
package/deps/blake3/src/io.rs +0 -64
package/deps/blake3/src/join.rs +0 -92
package/deps/blake3/src/lib.rs +0 -1835
package/deps/blake3/src/platform.rs +0 -587
package/deps/blake3/src/portable.rs +0 -198
package/deps/blake3/src/rust_avx2.rs +0 -474
package/deps/blake3/src/rust_sse2.rs +0 -775
package/deps/blake3/src/rust_sse41.rs +0 -766
package/deps/blake3/src/test.rs +0 -1049
package/deps/blake3/src/traits.rs +0 -227
package/deps/blake3/src/wasm32_simd.rs +0 -794
package/deps/blake3/test_vectors/Cargo.toml +0 -19
package/deps/blake3/test_vectors/cross_test.sh +0 -25
package/deps/blake3/test_vectors/src/bin/generate.rs +0 -4
package/deps/blake3/test_vectors/src/lib.rs +0 -350
package/deps/blake3/test_vectors/test_vectors.json +0 -217
package/deps/blake3/tools/compiler_version/Cargo.toml +0 -7
package/deps/blake3/tools/compiler_version/build.rs +0 -6
package/deps/blake3/tools/compiler_version/src/main.rs +0 -27
package/deps/blake3/tools/instruction_set_support/Cargo.toml +0 -6
package/deps/blake3/tools/instruction_set_support/src/main.rs +0 -10
package/deps/blake3/tools/release.md +0 -16
package/deps/ncrypto/.bazelignore +0 -4
package/deps/ncrypto/.bazelrc +0 -1
package/deps/ncrypto/.bazelversion +0 -1
package/deps/ncrypto/.clang-format +0 -111
package/deps/ncrypto/.github/workflows/bazel.yml +0 -58
package/deps/ncrypto/.github/workflows/commitlint.yml +0 -16
package/deps/ncrypto/.github/workflows/linter.yml +0 -38
package/deps/ncrypto/.github/workflows/macos.yml +0 -43
package/deps/ncrypto/.github/workflows/release-please.yml +0 -16
package/deps/ncrypto/.github/workflows/ubuntu.yml +0 -128
package/deps/ncrypto/.github/workflows/visual-studio.yml +0 -49
package/deps/ncrypto/.python-version +0 -1
package/deps/ncrypto/.release-please-manifest.json +0 -3
package/deps/ncrypto/BUILD.bazel +0 -44
package/deps/ncrypto/CHANGELOG.md +0 -37
package/deps/ncrypto/CMakeLists.txt +0 -79
package/deps/ncrypto/MODULE.bazel +0 -16
package/deps/ncrypto/MODULE.bazel.lock +0 -461
package/deps/ncrypto/cmake/CPM.cmake +0 -1225
package/deps/ncrypto/cmake/ncrypto-flags.cmake +0 -17
package/deps/ncrypto/ncrypto.pc.in +0 -10
package/deps/ncrypto/patches/0001-Expose-libdecrepit-so-NodeJS-can-use-it-for-ncrypto.patch +0 -28
package/deps/ncrypto/pyproject.toml +0 -38
package/deps/ncrypto/release-please-config.json +0 -11
package/deps/ncrypto/src/CMakeLists.txt +0 -40
package/deps/ncrypto/tests/BUILD.bazel +0 -11
package/deps/ncrypto/tests/CMakeLists.txt +0 -7
package/deps/ncrypto/tests/basic.cpp +0 -856
package/deps/ncrypto/tools/run-clang-format.sh +0 -42
package/lib/tsconfig.tsbuildinfo +0 -1

package/deps/simdutf/src/arm64/arm_base64.cpp ADDED Viewed

@@ -0,0 +1,791 @@
+/**
+ * References and further reading:
+ *
+ * Wojciech Muła, Daniel Lemire, Base64 encoding and decoding at almost the
+ * speed of a memory copy, Software: Practice and Experience 50 (2), 2020.
+ * https://arxiv.org/abs/1910.05109
+ *
+ * Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding using AVX2
+ * Instructions, ACM Transactions on the Web 12 (3), 2018.
+ * https://arxiv.org/abs/1704.00605
+ *
+ * Simon Josefsson. 2006. The Base16, Base32, and Base64 Data Encodings.
+ * https://tools.ietf.org/html/rfc4648. (2006). Internet Engineering Task Force,
+ * Request for Comments: 4648.
+ *
+ * Alfred Klomp. 2014a. Fast Base64 encoding/decoding with SSE vectorization.
+ * http://www.alfredklomp.com/programming/sse-base64/. (2014).
+ *
+ * Alfred Klomp. 2014b. Fast Base64 stream encoder/decoder in C99, with SIMD
+ * acceleration. https://github.com/aklomp/base64. (2014).
+ *
+ * Hanson Char. 2014. A Fast and Correct Base 64 Codec. (2014).
+ * https://aws.amazon.com/blogs/developer/a-fast-and-correct-base-64-codec/
+ *
+ * Nick Kopp. 2013. Base64 Encoding on a GPU.
+ * https://www.codeproject.com/Articles/276993/Base-Encoding-on-a-GPU. (2013).
+ */
+/**
+ * Insert a line feed character in the 16-byte input at index K in [0,16).
+ */
+inline uint8x16_t insert_line_feed16(uint8x16_t input, size_t K) {
+  static const uint8_t shuffle_masks[16][16] = {
+      {0x80, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
+      {0, 0x80, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
+      {0, 1, 0x80, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
+      {0, 1, 2, 0x80, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
+      {0, 1, 2, 3, 0x80, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
+      {0, 1, 2, 3, 4, 0x80, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14},
+      {0, 1, 2, 3, 4, 5, 0x80, 6, 7, 8, 9, 10, 11, 12, 13, 14},
+      {0, 1, 2, 3, 4, 5, 6, 0x80, 7, 8, 9, 10, 11, 12, 13, 14},
+      {0, 1, 2, 3, 4, 5, 6, 7, 0x80, 8, 9, 10, 11, 12, 13, 14},
+      {0, 1, 2, 3, 4, 5, 6, 7, 8, 0x80, 9, 10, 11, 12, 13, 14},
+      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x80, 10, 11, 12, 13, 14},
+      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0x80, 11, 12, 13, 14},
+      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0x80, 12, 13, 14},
+      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0x80, 13, 14},
+      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0x80, 14},
+      {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0x80}};
+  // Prepare a vector with '\n' (0x0A)
+  uint8x16_t line_feed_vector = vdupq_n_u8('\n');
+  // Load the precomputed shuffle mask for K
+  uint8x16_t mask = vld1q_u8(shuffle_masks[K]);
+  // Create a mask where 0x80 indicates the line feed position
+  uint8x16_t lf_pos = vceqq_u8(mask, vdupq_n_u8(0x80));
+  uint8x16_t result = vqtbl1q_u8(input, mask);
+  // Use vbsl to select '\n' where lf_pos is true, else keep input bytes
+  return vbslq_u8(lf_pos, line_feed_vector, result);
+}
+// offset is the number of characters in the current line.
+// It can range from 0 to line_length (inclusive).
+// If offset == line_length, we need to insert a line feed before writing
+// anything.
+size_t write_output_with_line_feeds(uint8_t *dst, uint8x16_t src,
+                                    size_t line_length, size_t &offset) {
+  // Fast path: no need to insert line feeds
+  // If we are at offset, we would write from [offset, offset + 16).
+  // We need that line_length >= offset + 16.
+  if (offset + 16 <= line_length) {
+    // No need to insert line feeds
+    vst1q_u8(dst, src);
+    offset += 16; // offset could be line_length here.
+    return 16;
+  }
+  // We have that offset + 16 >= line_length
+  // the common case is that line_length is greater than 16
+  if (simdutf_likely(line_length >= 16)) {
+    // offset <= line_length.
+    // offset + 16 > line_length
+    // So line_length - offset < 16
+    // and line_length - offset >= 0
+    uint8x16_t chunk = insert_line_feed16(src, line_length - offset);
+    vst1q_u8(dst, chunk);
+    // Not ideal to pull the last element and write it separately but
+    // it simplifies the code.
+    *(dst + 16) = vgetq_lane_u8(src, 15);
+    offset += 16 - line_length;
+    return 16 + 1; // we wrote 16 bytes plus one line feed
+  }
+  // Uncommon case where line_length < 16
+  // This is going to be SLOW.
+  else {
+    uint8_t buffer[16];
+    vst1q_u8(buffer, src);
+    size_t out_pos = 0;
+    size_t local_offset = offset;
+    for (size_t i = 0; i < 16;) {
+      if (local_offset == line_length) {
+        dst[out_pos++] = '\n';
+        local_offset = 0;
+      }
+      dst[out_pos++] = buffer[i++];
+      local_offset++;
+    }
+    offset = local_offset;
+    return out_pos;
+  }
+}
+template <bool insert_line_feeds>
+size_t encode_base64_impl(char *dst, const char *src, size_t srclen,
+                          base64_options options,
+                          size_t line_length = simdutf::default_line_length) {
+  size_t offset = 0;
+  if (line_length < 4) {
+    line_length = 4; // We do not support line_length less than 4
+  }
+  // credit: Wojciech Muła
+  uint8_t *out = (uint8_t *)dst;
+  constexpr static uint8_t source_table[64] = {
+      'A', 'Q', 'g', 'w', 'B', 'R', 'h', 'x', 'C', 'S', 'i', 'y', 'D',
+      'T', 'j', 'z', 'E', 'U', 'k', '0', 'F', 'V', 'l', '1', 'G', 'W',
+      'm', '2', 'H', 'X', 'n', '3', 'I', 'Y', 'o', '4', 'J', 'Z', 'p',
+      '5', 'K', 'a', 'q', '6', 'L', 'b', 'r', '7', 'M', 'c', 's', '8',
+      'N', 'd', 't', '9', 'O', 'e', 'u', '+', 'P', 'f', 'v', '/',
+  };
+  constexpr static uint8_t source_table_url[64] = {
+      'A', 'Q', 'g', 'w', 'B', 'R', 'h', 'x', 'C', 'S', 'i', 'y', 'D',
+      'T', 'j', 'z', 'E', 'U', 'k', '0', 'F', 'V', 'l', '1', 'G', 'W',
+      'm', '2', 'H', 'X', 'n', '3', 'I', 'Y', 'o', '4', 'J', 'Z', 'p',
+      '5', 'K', 'a', 'q', '6', 'L', 'b', 'r', '7', 'M', 'c', 's', '8',
+      'N', 'd', 't', '9', 'O', 'e', 'u', '-', 'P', 'f', 'v', '_',
+  };
+  const uint8x16_t v3f = vdupq_n_u8(0x3f);
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+  // When trying to load a uint8_t array, Visual Studio might
+  // error with: error C2664: '__n128x4 neon_ld4m_q8(const char *)':
+  // cannot convert argument 1 from 'const uint8_t [64]' to 'const char *
+  const uint8x16x4_t table = vld4q_u8(
+      (reinterpret_cast<const char *>(options & base64_url) ? source_table_url
+                                                            : source_table));
+#else
+  const uint8x16x4_t table =
+      vld4q_u8((options & base64_url) ? source_table_url : source_table);
+#endif
+  size_t i = 0;
+  for (; i + 16 * 3 <= srclen; i += 16 * 3) {
+    const uint8x16x3_t in = vld3q_u8((const uint8_t *)src + i);
+    uint8x16x4_t result;
+    result.val[0] = vshrq_n_u8(in.val[0], 2);
+    result.val[1] =
+        vandq_u8(vsliq_n_u8(vshrq_n_u8(in.val[1], 4), in.val[0], 4), v3f);
+    result.val[2] =
+        vandq_u8(vsliq_n_u8(vshrq_n_u8(in.val[2], 6), in.val[1], 2), v3f);
+    result.val[3] = vandq_u8(in.val[2], v3f);
+    result.val[0] = vqtbl4q_u8(table, result.val[0]);
+    result.val[1] = vqtbl4q_u8(table, result.val[1]);
+    result.val[2] = vqtbl4q_u8(table, result.val[2]);
+    result.val[3] = vqtbl4q_u8(table, result.val[3]);
+    if (insert_line_feeds) {
+      if (line_length >= 64) { // fast path
+        vst4q_u8(out, result);
+        if (offset + 64 > line_length) {
+          size_t location_end = line_length - offset;
+          size_t to_move = 64 - location_end;
+          std::memmove(out + location_end + 1, out + location_end, to_move);
+          out[location_end] = '\n';
+          offset = to_move;
+          out += 64 + 1;
+        } else {
+          offset += 64;
+          out += 64;
+        }
+      } else { // slow path
+        uint8x16x2_t Z0 = vzipq_u8(result.val[0], result.val[1]);
+        uint8x16x2_t Z1 = vzipq_u8(result.val[2], result.val[3]);
+        uint16x8x2_t Z2 = vzipq_u16(vreinterpretq_u16_u8(Z0.val[0]),
+                                    vreinterpretq_u16_u8(Z1.val[0]));
+        uint16x8x2_t Z3 = vzipq_u16(vreinterpretq_u16_u8(Z0.val[1]),
+                                    vreinterpretq_u16_u8(Z1.val[1]));
+        uint8x16_t T0 = vreinterpretq_u8_u16(Z2.val[0]);
+        uint8x16_t T1 = vreinterpretq_u8_u16(Z2.val[1]);
+        uint8x16_t T2 = vreinterpretq_u8_u16(Z3.val[0]);
+        uint8x16_t T3 = vreinterpretq_u8_u16(Z3.val[1]);
+        out += write_output_with_line_feeds(out, T0, line_length, offset);
+        out += write_output_with_line_feeds(out, T1, line_length, offset);
+        out += write_output_with_line_feeds(out, T2, line_length, offset);
+        out += write_output_with_line_feeds(out, T3, line_length, offset);
+      }
+    } else {
+      vst4q_u8(out, result);
+      out += 64;
+    }
+  }
+  if (i + 24 <= srclen) {
+    const uint8x8_t v3f_d = vdup_n_u8(0x3f);
+    const uint8x8x3_t in = vld3_u8((const uint8_t *)src + i);
+    uint8x8x4_t result;
+    result.val[0] = vshr_n_u8(in.val[0], 2);
+    result.val[1] =
+        vand_u8(vsli_n_u8(vshr_n_u8(in.val[1], 4), in.val[0], 4), v3f_d);
+    result.val[2] =
+        vand_u8(vsli_n_u8(vshr_n_u8(in.val[2], 6), in.val[1], 2), v3f_d);
+    result.val[3] = vand_u8(in.val[2], v3f_d);
+    result.val[0] = vqtbl4_u8(table, result.val[0]);
+    result.val[1] = vqtbl4_u8(table, result.val[1]);
+    result.val[2] = vqtbl4_u8(table, result.val[2]);
+    result.val[3] = vqtbl4_u8(table, result.val[3]);
+    if (insert_line_feeds) {
+      if (line_length >= 32) { // fast path
+        vst4_u8(out, result);
+        if (offset + 32 > line_length) {
+          size_t location_end = line_length - offset;
+          size_t to_move = 32 - location_end;
+          std::memmove(out + location_end + 1, out + location_end, to_move);
+          out[location_end] = '\n';
+          offset = to_move;
+          out += 32 + 1;
+        } else {
+          offset += 32;
+          out += 32;
+        }
+      } else { // slow path
+        uint8x8x2_t Z0 = vzip_u8(result.val[0], result.val[1]);
+        uint8x8x2_t Z1 = vzip_u8(result.val[2], result.val[3]);
+        uint16x4x2_t Z2 = vzip_u16(vreinterpret_u16_u8(Z0.val[0]),
+                                   vreinterpret_u16_u8(Z1.val[0]));
+        uint16x4x2_t Z3 = vzip_u16(vreinterpret_u16_u8(Z0.val[1]),
+                                   vreinterpret_u16_u8(Z1.val[1]));
+        uint8x8_t T0 = vreinterpret_u8_u16(Z2.val[0]);
+        uint8x8_t T1 = vreinterpret_u8_u16(Z2.val[1]);
+        uint8x8_t T2 = vreinterpret_u8_u16(Z3.val[0]);
+        uint8x8_t T3 = vreinterpret_u8_u16(Z3.val[1]);
+        uint8x16_t TT0 = vcombine_u8(T0, T1);
+        uint8x16_t TT1 = vcombine_u8(T2, T3);
+        out += write_output_with_line_feeds(out, TT0, line_length, offset);
+        out += write_output_with_line_feeds(out, TT1, line_length, offset);
+      }
+    } else {
+      vst4_u8(out, result);
+      out += 32;
+    }
+    i += 24;
+  }
+  out += scalar::base64::tail_encode_base64_impl<insert_line_feeds>(
+      (char *)out, src + i, srclen - i, options, line_length, offset);
+  return size_t((char *)out - dst);
+}
+size_t encode_base64(char *dst, const char *src, size_t srclen,
+                     base64_options options) {
+  return encode_base64_impl<false>(dst, src, srclen, options);
+}
+static inline void compress(uint8x16_t data, uint16_t mask, char *output) {
+  if (mask == 0) {
+    vst1q_u8((uint8_t *)output, data);
+    return;
+  }
+  uint8_t mask1 = uint8_t(mask);      // least significant 8 bits
+  uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits
+  uint64x2_t compactmasku64 = {tables::base64::thintable_epi8[mask1],
+                               tables::base64::thintable_epi8[mask2]};
+  uint8x16_t compactmask = vreinterpretq_u8_u64(compactmasku64);
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+  const uint8x16_t off =
+      simdutf_make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
+#else
+  const uint8x16_t off = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8};
+#endif
+  compactmask = vaddq_u8(compactmask, off);
+  uint8x16_t pruned = vqtbl1q_u8(data, compactmask);
+  int pop1 = tables::base64::BitsSetTable256mul2[mask1];
+  // then load the corresponding mask, what it does is to write
+  // only the first pop1 bytes from the first 8 bytes, and then
+  // it fills in with the bytes from the second 8 bytes + some filling
+  // at the end.
+  compactmask = vld1q_u8(tables::base64::pshufb_combine_table + pop1 * 8);
+  uint8x16_t answer = vqtbl1q_u8(pruned, compactmask);
+  vst1q_u8((uint8_t *)output, answer);
+}
+struct block64 {
+  uint8x16_t chunks[4];
+};
+static_assert(sizeof(block64) == 64, "block64 is not 64 bytes");
+template <bool base64_url, bool default_or_url>
+uint64_t to_base64_mask(block64 *b, bool *error) {
+  uint8x16_t v0f = vdupq_n_u8(0xf);
+  uint8x16_t v01 = vdupq_n_u8(0x1);
+  uint8x16_t lo_nibbles0 = vandq_u8(b->chunks[0], v0f);
+  uint8x16_t lo_nibbles1 = vandq_u8(b->chunks[1], v0f);
+  uint8x16_t lo_nibbles2 = vandq_u8(b->chunks[2], v0f);
+  uint8x16_t lo_nibbles3 = vandq_u8(b->chunks[3], v0f);
+  // Needed by the decoding step.
+  uint8x16_t hi_bits0 = vshrq_n_u8(b->chunks[0], 3);
+  uint8x16_t hi_bits1 = vshrq_n_u8(b->chunks[1], 3);
+  uint8x16_t hi_bits2 = vshrq_n_u8(b->chunks[2], 3);
+  uint8x16_t hi_bits3 = vshrq_n_u8(b->chunks[3], 3);
+  uint8x16_t lut_lo;
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+  if (default_or_url) {
+    lut_lo =
+        simdutf_make_uint8x16_t(0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+                                0xf8, 0xf9, 0xf1, 0xa2, 0xa1, 0xa5, 0xa0, 0xa6);
+  } else if (base64_url) {
+    lut_lo =
+        simdutf_make_uint8x16_t(0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+                                0xf8, 0xf9, 0xf1, 0xa0, 0xa1, 0xa5, 0xa0, 0xa2);
+  } else {
+    lut_lo =
+        simdutf_make_uint8x16_t(0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+                                0xf8, 0xf9, 0xf1, 0xa2, 0xa1, 0xa1, 0xa0, 0xa4);
+  }
+#else
+  if (default_or_url) {
+    lut_lo = uint8x16_t{0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+                        0xf8, 0xf9, 0xf1, 0xa2, 0xa1, 0xa5, 0xa0, 0xa6};
+  } else if (base64_url) {
+    lut_lo = uint8x16_t{0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+                        0xf8, 0xf9, 0xf1, 0xa0, 0xa1, 0xa5, 0xa0, 0xa2};
+  } else {
+    lut_lo = uint8x16_t{0xa9, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
+                        0xf8, 0xf9, 0xf1, 0xa2, 0xa1, 0xa1, 0xa0, 0xa4};
+  }
+#endif
+  uint8x16_t lo0 = vqtbl1q_u8(lut_lo, lo_nibbles0);
+  uint8x16_t lo1 = vqtbl1q_u8(lut_lo, lo_nibbles1);
+  uint8x16_t lo2 = vqtbl1q_u8(lut_lo, lo_nibbles2);
+  uint8x16_t lo3 = vqtbl1q_u8(lut_lo, lo_nibbles3);
+  uint8x16_t lut_hi;
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+  if (default_or_url) {
+    lut_hi =
+        simdutf_make_uint8x16_t(0x0, 0x1, 0x0, 0x0, 0x1, 0x6, 0x8, 0x8, 0x10,
+                                0x20, 0x20, 0x12, 0x40, 0x80, 0x80, 0x40);
+  } else if (base64_url) {
+    lut_hi =
+        simdutf_make_uint8x16_t(0x0, 0x1, 0x0, 0x0, 0x1, 0x6, 0x8, 0x8, 0x10,
+                                0x20, 0x20, 0x12, 0x40, 0x80, 0x80, 0x40);
+  } else {
+    lut_hi =
+        simdutf_make_uint8x16_t(0x0, 0x1, 0x0, 0x0, 0x1, 0x6, 0x8, 0x8, 0x10,
+                                0x20, 0x20, 0x10, 0x40, 0x80, 0x80, 0x40);
+  }
+#else
+  if (default_or_url) {
+    lut_hi = uint8x16_t{0x0,  0x1,  0x0,  0x0,  0x1,  0x6,  0x8,  0x8,
+                        0x10, 0x20, 0x20, 0x12, 0x40, 0x80, 0x80, 0x40};
+  } else if (base64_url) {
+    lut_hi = uint8x16_t{0x0,  0x1,  0x0,  0x0,  0x1,  0x4,  0x8,  0x8,
+                        0x10, 0x20, 0x20, 0x12, 0x40, 0x80, 0x80, 0x40};
+  } else {
+    lut_hi = uint8x16_t{0x0,  0x1,  0x0,  0x0,  0x1,  0x6,  0x8,  0x8,
+                        0x10, 0x20, 0x20, 0x10, 0x40, 0x80, 0x80, 0x40};
+  }
+#endif
+  uint8x16_t hi0 = vqtbl1q_u8(lut_hi, hi_bits0);
+  uint8x16_t hi1 = vqtbl1q_u8(lut_hi, hi_bits1);
+  uint8x16_t hi2 = vqtbl1q_u8(lut_hi, hi_bits2);
+  uint8x16_t hi3 = vqtbl1q_u8(lut_hi, hi_bits3);
+  // maps error byte to 0 and space byte to 1, valid bytes are >1
+  uint8x16_t res0 = vandq_u8(lo0, hi0);
+  uint8x16_t res1 = vandq_u8(lo1, hi1);
+  uint8x16_t res2 = vandq_u8(lo2, hi2);
+  uint8x16_t res3 = vandq_u8(lo3, hi3);
+  uint8_t checks =
+      vminvq_u8(vminq_u8(vminq_u8(res0, res1), vminq_u8(res2, res3)));
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+  const uint8x16_t bit_mask =
+      simdutf_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+                              0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
+#else
+  const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
+                               0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
+#endif
+  uint64_t badcharmask = 0;
+  *error = checks == 0;
+  if (checks <= 1) {
+    // Add each of the elements next to each other, successively, to stuff each
+    // 8 byte mask into one.
+    uint8x16_t test0 = vcleq_u8(res0, v01);
+    uint8x16_t test1 = vcleq_u8(res1, v01);
+    uint8x16_t test2 = vcleq_u8(res2, v01);
+    uint8x16_t test3 = vcleq_u8(res3, v01);
+    uint8x16_t sum0 =
+        vpaddq_u8(vandq_u8(test0, bit_mask), vandq_u8(test1, bit_mask));
+    uint8x16_t sum1 =
+        vpaddq_u8(vandq_u8(test2, bit_mask), vandq_u8(test3, bit_mask));
+    sum0 = vpaddq_u8(sum0, sum1);
+    sum0 = vpaddq_u8(sum0, sum0);
+    badcharmask = vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
+  }
+  // This is the transformation step that can be done while we are waiting for
+  // sum0
+  uint8x16_t roll_lut;
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+  if (default_or_url) {
+    roll_lut =
+        simdutf_make_uint8x16_t(0xBF, 0xE0, 0xB9, 0x13, 0x04, 0xBF, 0xBF, 0xB9,
+                                0xB9, 0x00, 0xFF, 0x11, 0xFF, 0xBF, 0x10, 0xB9);
+  } else if (base64_url) {
+    roll_lut =
+        simdutf_make_uint8x16_t(0xB9, 0xB9, 0xBF, 0xBF, 0x04, 0x11, 0xE0, 0x00,
+                                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+  } else {
+    roll_lut =
+        simdutf_make_uint8x16_t(0xB9, 0xB9, 0xBF, 0xBF, 0x04, 0x10, 0x13, 0x00,
+                                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+  }
+#else
+  if (default_or_url) {
+    roll_lut = uint8x16_t{0xBF, 0xE0, 0xB9, 0x13, 0x04, 0xBF, 0xBF, 0xB9,
+                          0xB9, 0x00, 0xFF, 0x11, 0xFF, 0xBF, 0x10, 0xB9};
+  } else if (base64_url) {
+    roll_lut = uint8x16_t{0xB9, 0xB9, 0xBF, 0xBF, 0x04, 0x11, 0xE0, 0x00,
+                          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+  } else {
+    roll_lut = uint8x16_t{0xB9, 0xB9, 0xBF, 0xBF, 0x04, 0x10, 0x13, 0x00,
+                          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+  }
+#endif
+  uint8x16_t roll0, roll1, roll2, roll3;
+  if (default_or_url) {
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+    const uint8x16_t delta_asso =
+        simdutf_make_uint8x16_t(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+                                0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x16);
+#else
+    const uint8x16_t delta_asso =
+        uint8x16_t{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+                   0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x16};
+#endif
+    // the logic of translating is based on westmere
+    uint8x16_t delta_hash0 =
+        vrhaddq_u8(vqtbl1q_u8(delta_asso, lo_nibbles0), hi_bits0);
+    uint8x16_t delta_hash1 =
+        vrhaddq_u8(vqtbl1q_u8(delta_asso, lo_nibbles1), hi_bits1);
+    uint8x16_t delta_hash2 =
+        vrhaddq_u8(vqtbl1q_u8(delta_asso, lo_nibbles2), hi_bits2);
+    uint8x16_t delta_hash3 =
+        vrhaddq_u8(vqtbl1q_u8(delta_asso, lo_nibbles3), hi_bits3);
+    const uint8x16x2_t roll_lut_2 = {roll_lut, roll_lut};
+    roll0 = vqtbl2q_u8(roll_lut_2, delta_hash0);
+    roll1 = vqtbl2q_u8(roll_lut_2, delta_hash1);
+    roll2 = vqtbl2q_u8(roll_lut_2, delta_hash2);
+    roll3 = vqtbl2q_u8(roll_lut_2, delta_hash3);
+  } else {
+    uint8x16_t delta_hash0 = vclzq_u8(res0);
+    uint8x16_t delta_hash1 = vclzq_u8(res1);
+    uint8x16_t delta_hash2 = vclzq_u8(res2);
+    uint8x16_t delta_hash3 = vclzq_u8(res3);
+    roll0 = vqtbl1q_u8(roll_lut, delta_hash0);
+    roll1 = vqtbl1q_u8(roll_lut, delta_hash1);
+    roll2 = vqtbl1q_u8(roll_lut, delta_hash2);
+    roll3 = vqtbl1q_u8(roll_lut, delta_hash3);
+  }
+  b->chunks[0] = vaddq_u8(b->chunks[0], roll0);
+  b->chunks[1] = vaddq_u8(b->chunks[1], roll1);
+  b->chunks[2] = vaddq_u8(b->chunks[2], roll2);
+  b->chunks[3] = vaddq_u8(b->chunks[3], roll3);
+  return badcharmask;
+}
+void copy_block(block64 *b, char *output) {
+  vst1q_u8((uint8_t *)output, b->chunks[0]);
+  vst1q_u8((uint8_t *)output + 16, b->chunks[1]);
+  vst1q_u8((uint8_t *)output + 32, b->chunks[2]);
+  vst1q_u8((uint8_t *)output + 48, b->chunks[3]);
+}
+uint64_t compress_block(block64 *b, uint64_t mask, char *output) {
+  uint64_t popcounts =
+      vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0);
+  uint64_t offsets = popcounts * 0x0101010101010101;
+  compress(b->chunks[0], uint16_t(mask), output);
+  compress(b->chunks[1], uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF]);
+  compress(b->chunks[2], uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF]);
+  compress(b->chunks[3], uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF]);
+  return offsets >> 56;
+}
+// The caller of this function is responsible to ensure that there are 64 bytes
+// available from reading at src. The data is read into a block64 structure.
+void load_block(block64 *b, const char *src) {
+  b->chunks[0] = vld1q_u8(reinterpret_cast<const uint8_t *>(src));
+  b->chunks[1] = vld1q_u8(reinterpret_cast<const uint8_t *>(src) + 16);
+  b->chunks[2] = vld1q_u8(reinterpret_cast<const uint8_t *>(src) + 32);
+  b->chunks[3] = vld1q_u8(reinterpret_cast<const uint8_t *>(src) + 48);
+}
+// The caller of this function is responsible to ensure that there are 32 bytes
+// available from reading at data. It returns a 16-byte value, narrowing with
+// saturation the 16-bit words.
+inline uint8x16_t load_satured(const uint16_t *data) {
+  uint16x8_t in1 = vld1q_u16(data);
+  uint16x8_t in2 = vld1q_u16(data + 8);
+  return vqmovn_high_u16(vqmovn_u16(in1), in2);
+}
+// The caller of this function is responsible to ensure that there are 128 bytes
+// available from reading at src. The data is read into a block64 structure.
+void load_block(block64 *b, const char16_t *src) {
+  b->chunks[0] = load_satured(reinterpret_cast<const uint16_t *>(src));
+  b->chunks[1] = load_satured(reinterpret_cast<const uint16_t *>(src) + 16);
+  b->chunks[2] = load_satured(reinterpret_cast<const uint16_t *>(src) + 32);
+  b->chunks[3] = load_satured(reinterpret_cast<const uint16_t *>(src) + 48);
+}
+// decode 64 bytes and output 48 bytes
+void base64_decode_block(char *out, const char *src) {
+  uint8x16x4_t str = vld4q_u8((uint8_t *)src);
+  uint8x16x3_t outvec;
+  outvec.val[0] = vsliq_n_u8(vshrq_n_u8(str.val[1], 4), str.val[0], 2);
+  outvec.val[1] = vsliq_n_u8(vshrq_n_u8(str.val[2], 2), str.val[1], 4);
+  outvec.val[2] = vsliq_n_u8(str.val[3], str.val[2], 6);
+  vst3q_u8((uint8_t *)out, outvec);
+}
+static size_t compress_block_single(block64 *b, uint64_t mask, char *output) {
+  const size_t pos64 = trailing_zeroes(mask);
+  const int8_t pos = pos64 & 0xf;
+  // Predefine the index vector
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+  const uint8x16_t v1 = simdutf_make_uint8x16_t(0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+                                                10, 11, 12, 13, 14, 15);
+#else  // SIMDUTF_REGULAR_VISUAL_STUDIO
+  const uint8x16_t v1 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+#endif // SIMDUTF_REGULAR_VISUAL_STUDIO
+  switch (pos64 >> 4) {
+  case 0b00: {
+    const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1));
+    const uint8x16_t v2 =
+        vcgtq_s8(vreinterpretq_s8_u8(v1),
+                 vreinterpretq_s8_u8(v0));  // Compare greater than
+    const uint8x16_t sh = vsubq_u8(v1, v2); // Subtract
+    const uint8x16_t compressed =
+        vqtbl1q_u8(b->chunks[0], sh); // Table lookup (shuffle)
+    vst1q_u8((uint8_t *)(output + 0 * 16), compressed);
+    vst1q_u8((uint8_t *)(output + 1 * 16 - 1), b->chunks[1]);
+    vst1q_u8((uint8_t *)(output + 2 * 16 - 1), b->chunks[2]);
+    vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]);
+  } break;
+  case 0b01: {
+    vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]);
+    const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1));
+    const uint8x16_t v2 =
+        vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0));
+    const uint8x16_t sh = vsubq_u8(v1, v2);
+    const uint8x16_t compressed = vqtbl1q_u8(b->chunks[1], sh);
+    vst1q_u8((uint8_t *)(output + 1 * 16), compressed);
+    vst1q_u8((uint8_t *)(output + 2 * 16 - 1), b->chunks[2]);
+    vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]);
+  } break;
+  case 0b10: {
+    vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]);
+    vst1q_u8((uint8_t *)(output + 1 * 16), b->chunks[1]);
+    const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1));
+    const uint8x16_t v2 =
+        vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0));
+    const uint8x16_t sh = vsubq_u8(v1, v2);
+    const uint8x16_t compressed = vqtbl1q_u8(b->chunks[2], sh);
+    vst1q_u8((uint8_t *)(output + 2 * 16), compressed);
+    vst1q_u8((uint8_t *)(output + 3 * 16 - 1), b->chunks[3]);
+  } break;
+  case 0b11: {
+    vst1q_u8((uint8_t *)(output + 0 * 16), b->chunks[0]);
+    vst1q_u8((uint8_t *)(output + 1 * 16), b->chunks[1]);
+    vst1q_u8((uint8_t *)(output + 2 * 16), b->chunks[2]);
+    const uint8x16_t v0 = vmovq_n_u8((uint8_t)(pos - 1));
+    const uint8x16_t v2 =
+        vcgtq_s8(vreinterpretq_s8_u8(v1), vreinterpretq_s8_u8(v0));
+    const uint8x16_t sh = vsubq_u8(v1, v2);
+    const uint8x16_t compressed = vqtbl1q_u8(b->chunks[3], sh);
+    vst1q_u8((uint8_t *)(output + 3 * 16), compressed);
+  } break;
+  }
+  return 63;
+}
+template <typename T> bool is_power_of_two(T x) { return (x & (x - 1)) == 0; }
+template <bool base64_url, bool ignore_garbage, bool default_or_url,
+          typename char_type>
+full_result
+compress_decode_base64(char *dst, const char_type *src, size_t srclen,
+                       base64_options options,
+                       last_chunk_handling_options last_chunk_options) {
+  const uint8_t *to_base64 =
+      default_or_url ? tables::base64::to_base64_default_or_url_value
+                     : (base64_url ? tables::base64::to_base64_url_value
+                                   : tables::base64::to_base64_value);
+  auto ri = simdutf::scalar::base64::find_end(src, srclen, options);
+  size_t equallocation = ri.equallocation;
+  size_t equalsigns = ri.equalsigns;
+  srclen = ri.srclen;
+  size_t full_input_length = ri.full_input_length;
+  if (srclen == 0) {
+    if (!ignore_garbage && equalsigns > 0) {
+      return {INVALID_BASE64_CHARACTER, equallocation, 0};
+    }
+    return {SUCCESS, full_input_length, 0};
+  }
+  const char_type *const srcinit = src;
+  const char *const dstinit = dst;
+  const char_type *const srcend = src + srclen;
+  constexpr size_t block_size = 10;
+  char buffer[block_size * 64];
+  char *bufferptr = buffer;
+  if (srclen >= 64) {
+    const char_type *const srcend64 = src + srclen - 64;
+    while (src <= srcend64) {
+      block64 b;
+      load_block(&b, src);
+      src += 64;
+      bool error = false;
+      uint64_t badcharmask =
+          to_base64_mask<base64_url, default_or_url>(&b, &error);
+      if (badcharmask) {
+        if (error && !ignore_garbage) {
+          src -= 64;
+          while (src < srcend && scalar::base64::is_eight_byte(*src) &&
+                 to_base64[uint8_t(*src)] <= 64) {
+            src++;
+          }
+          if (src < srcend) {
+            // should never happen
+          }
+          return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit),
+                  size_t(dst - dstinit)};
+        }
+      }
+      if (badcharmask != 0) {
+        // optimization opportunity: check for simple masks like those made of
+        // continuous 1s followed by continuous 0s. And masks containing a
+        // single bad character.
+        if (is_power_of_two(badcharmask)) {
+          bufferptr += compress_block_single(&b, badcharmask, bufferptr);
+        } else {
+          bufferptr += compress_block(&b, badcharmask, bufferptr);
+        }
+      } else {
+        // optimization opportunity: if bufferptr == buffer and mask == 0, we
+        // can avoid the call to compress_block and decode directly.
+        copy_block(&b, bufferptr);
+        bufferptr += 64;
+      }
+      if (bufferptr >= (block_size - 1) * 64 + buffer) {
+        for (size_t i = 0; i < (block_size - 1); i++) {
+          base64_decode_block(dst, buffer + i * 64);
+          dst += 48;
+        }
+        std::memcpy(buffer, buffer + (block_size - 1) * 64,
+                    64); // 64 might be too much
+        bufferptr -= (block_size - 1) * 64;
+      }
+    }
+  }
+  char *buffer_start = buffer;
+  // Optimization note: if this is almost full, then it is worth our
+  // time, otherwise, we should just decode directly.
+  int last_block = (int)((bufferptr - buffer_start) % 64);
+  if (last_block != 0 && srcend - src + last_block >= 64) {
+    while ((bufferptr - buffer_start) % 64 != 0 && src < srcend) {
+      uint8_t val = to_base64[uint8_t(*src)];
+      *bufferptr = char(val);
+      if ((!scalar::base64::is_eight_byte(*src) || val > 64) &&
+          !ignore_garbage) {
+        return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit),
+                size_t(dst - dstinit)};
+      }
+      bufferptr += (val <= 63);
+      src++;
+    }
+  }
+  for (; buffer_start + 64 <= bufferptr; buffer_start += 64) {
+    base64_decode_block(dst, buffer_start);
+    dst += 48;
+  }
+  if ((bufferptr - buffer_start) % 64 != 0) {
+    while (buffer_start + 4 < bufferptr) {
+      uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) +
+                         (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) +
+                         (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) +
+                         (uint32_t(uint8_t(buffer_start[3])) << 0 * 6))
+                        << 8;
+#if !SIMDUTF_IS_BIG_ENDIAN
+      triple = scalar::u32_swap_bytes(triple);
+#endif
+      std::memcpy(dst, &triple, 4);
+      dst += 3;
+      buffer_start += 4;
+    }
+    if (buffer_start + 4 <= bufferptr) {
+      uint32_t triple = ((uint32_t(uint8_t(buffer_start[0])) << 3 * 6) +
+                         (uint32_t(uint8_t(buffer_start[1])) << 2 * 6) +
+                         (uint32_t(uint8_t(buffer_start[2])) << 1 * 6) +
+                         (uint32_t(uint8_t(buffer_start[3])) << 0 * 6))
+                        << 8;
+#if !SIMDUTF_IS_BIG_ENDIAN
+      triple = scalar::u32_swap_bytes(triple);
+#endif
+      std::memcpy(dst, &triple, 3);
+      dst += 3;
+      buffer_start += 4;
+    }
+    // we may have 1, 2 or 3 bytes left and we need to decode them so let us
+    // backtrack
+    int leftover = int(bufferptr - buffer_start);
+    while (leftover > 0) {
+      if (!ignore_garbage) {
+        while (to_base64[uint8_t(*(src - 1))] == 64) {
+          src--;
+        }
+      } else {
+        while (to_base64[uint8_t(*(src - 1))] >= 64) {
+          src--;
+        }
+      }
+      src--;
+      leftover--;
+    }
+  }
+  if (src < srcend + equalsigns) {
+    full_result r = scalar::base64::base64_tail_decode(
+        dst, src, srcend - src, equalsigns, options, last_chunk_options);
+    r = scalar::base64::patch_tail_result(
+        r, size_t(src - srcinit), size_t(dst - dstinit), equallocation,
+        full_input_length, last_chunk_options);
+    // When is_partial(last_chunk_options) is true, we must either end with
+    // the end of the stream (beyond whitespace) or right after a non-ignorable
+    // character or at the very beginning of the stream.
+    // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
+    if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS &&
+        r.input_count < full_input_length) {
+      // First check if we can extend the input to the end of the stream
+      while (r.input_count < full_input_length &&
+             base64_ignorable(*(srcinit + r.input_count), options)) {
+        r.input_count++;
+      }
+      // If we are still not at the end of the stream, then we must backtrack
+      // to the last non-ignorable character.
+      if (r.input_count < full_input_length) {
+        while (r.input_count > 0 &&
+               base64_ignorable(*(srcinit + r.input_count - 1), options)) {
+          r.input_count--;
+        }
+      }
+    }
+    return r;
+  }
+  if (equalsigns > 0 && !ignore_garbage) {
+    if ((size_t(dst - dstinit) % 3 == 0) ||
+        ((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) {
+      return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)};
+    }
+  }
+  return {SUCCESS, srclen, size_t(dst - dstinit)};
+}