digest-xxhash 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/digest/xxhash/ext.c +1 -1
- data/ext/digest/xxhash/xxhash.h +820 -480
- data/lib/digest/xxhash/version.rb +1 -1
- metadata +3 -3
    
        data/ext/digest/xxhash/xxhash.h
    CHANGED
    
    | @@ -716,8 +716,15 @@ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canoni | |
| 716 716 | 
             
            # define XXH_HAS_ATTRIBUTE(x) 0
         | 
| 717 717 | 
             
            #endif
         | 
| 718 718 |  | 
| 719 | 
            +
            /*
         | 
| 720 | 
            +
             * C23 __STDC_VERSION__ number hasn't been specified yet. For now
         | 
| 721 | 
            +
             * leave as `201711L` (C17 + 1).
         | 
| 722 | 
            +
             * TODO: Update to correct value when its been specified.
         | 
| 723 | 
            +
             */
         | 
| 724 | 
            +
            #define XXH_C23_VN 201711L
         | 
| 725 | 
            +
             | 
| 719 726 | 
             
            /* C-language Attributes are added in C23. */
         | 
| 720 | 
            -
            #if defined(__STDC_VERSION__) && (__STDC_VERSION__  | 
| 727 | 
            +
            #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
         | 
| 721 728 | 
             
            # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
         | 
| 722 729 | 
             
            #else
         | 
| 723 730 | 
             
            # define XXH_HAS_C_ATTRIBUTE(x) 0
         | 
| @@ -743,6 +750,18 @@ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canoni | |
| 743 750 | 
             
            # define XXH_FALLTHROUGH /* fallthrough */
         | 
| 744 751 | 
             
            #endif
         | 
| 745 752 |  | 
| 753 | 
            +
            /*
         | 
| 754 | 
            +
             * Define XXH_NOESCAPE for annotated pointers in public API.
         | 
| 755 | 
            +
             * https://clang.llvm.org/docs/AttributeReference.html#noescape
         | 
| 756 | 
            +
             * As of writing this, only supported by clang.
         | 
| 757 | 
            +
             */
         | 
| 758 | 
            +
            #if XXH_HAS_ATTRIBUTE(noescape)
         | 
| 759 | 
            +
            # define XXH_NOESCAPE __attribute__((noescape))
         | 
| 760 | 
            +
            #else
         | 
| 761 | 
            +
            # define XXH_NOESCAPE
         | 
| 762 | 
            +
            #endif
         | 
| 763 | 
            +
             | 
| 764 | 
            +
             | 
| 746 765 | 
             
            /*!
         | 
| 747 766 | 
             
             * @}
         | 
| 748 767 | 
             
             * @ingroup public
         | 
| @@ -813,7 +832,7 @@ typedef uint64_t XXH64_hash_t; | |
| 813 832 | 
             
             * @see
         | 
| 814 833 | 
             
             *    XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
         | 
| 815 834 | 
             
             */
         | 
| 816 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
         | 
| 835 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
         | 
| 817 836 |  | 
| 818 837 | 
             
            /*******   Streaming   *******/
         | 
| 819 838 | 
             
            #ifndef XXH_NO_STREAM
         | 
| @@ -825,16 +844,16 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(const void* input, size_t length, XX | |
| 825 844 | 
             
            typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
         | 
| 826 845 | 
             
            XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
         | 
| 827 846 | 
             
            XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
         | 
| 828 | 
            -
            XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
         | 
| 847 | 
            +
            XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);
         | 
| 829 848 |  | 
| 830 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
         | 
| 831 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
         | 
| 832 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
         | 
| 849 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
         | 
| 850 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
         | 
| 851 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
         | 
| 833 852 | 
             
            #endif /* !XXH_NO_STREAM */
         | 
| 834 853 | 
             
            /*******   Canonical representation   *******/
         | 
| 835 854 | 
             
            typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
         | 
| 836 | 
            -
            XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
         | 
| 837 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
         | 
| 855 | 
            +
            XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
         | 
| 856 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
         | 
| 838 857 |  | 
| 839 858 | 
             
            #ifndef XXH_NO_XXH3
         | 
| 840 859 |  | 
| @@ -872,7 +891,7 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canoni | |
| 872 891 | 
             
             *
         | 
| 873 892 | 
             
             * XXH3 implementation is portable:
         | 
| 874 893 | 
             
             * it has a generic C90 formulation that can be compiled on any platform,
         | 
| 875 | 
            -
             * all implementations  | 
| 894 | 
            +
             * all implementations generate exactly the same hash value on all platforms.
         | 
| 876 895 | 
             
             * Starting from v0.8.0, it's also labelled "stable", meaning that
         | 
| 877 896 | 
             
             * any future version will also generate the same hash value.
         | 
| 878 897 | 
             
             *
         | 
| @@ -902,7 +921,7 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canoni | |
| 902 921 | 
             
             * @see
         | 
| 903 922 | 
             
             *    XXH3_64bits_reset(), XXH3_64bits_update(), XXH3_64bits_digest(): Streaming version.
         | 
| 904 923 | 
             
             */
         | 
| 905 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(const void* input, size_t length);
         | 
| 924 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
         | 
| 906 925 |  | 
| 907 926 | 
             
            /*!
         | 
| 908 927 | 
             
             * @brief 64-bit seeded variant of XXH3
         | 
| @@ -919,7 +938,7 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(const void* input, size_t leng | |
| 919 938 | 
             
             * @param length The length
         | 
| 920 939 | 
             
             * @param seed The 64-bit seed to alter the state.
         | 
| 921 940 | 
             
             */
         | 
| 922 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(const void* input, size_t length, XXH64_hash_t seed);
         | 
| 941 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
         | 
| 923 942 |  | 
| 924 943 | 
             
            /*!
         | 
| 925 944 | 
             
             * The bare minimum size for a custom secret.
         | 
| @@ -948,7 +967,7 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(const void* input, si | |
| 948 967 | 
             
             * This is not necessarily the case when using the blob of bytes directly
         | 
| 949 968 | 
             
             * because, when hashing _small_ inputs, only a portion of the secret is employed.
         | 
| 950 969 | 
             
             */
         | 
| 951 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
         | 
| 970 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
         | 
| 952 971 |  | 
| 953 972 |  | 
| 954 973 | 
             
            /*******   Streaming   *******/
         | 
| @@ -968,20 +987,20 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(const void* data, s | |
| 968 987 | 
             
            typedef struct XXH3_state_s XXH3_state_t;
         | 
| 969 988 | 
             
            XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
         | 
| 970 989 | 
             
            XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
         | 
| 971 | 
            -
            XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
         | 
| 990 | 
            +
            XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);
         | 
| 972 991 |  | 
| 973 992 | 
             
            /*
         | 
| 974 993 | 
             
             * XXH3_64bits_reset():
         | 
| 975 994 | 
             
             * Initialize with default parameters.
         | 
| 976 995 | 
             
             * digest will be equivalent to `XXH3_64bits()`.
         | 
| 977 996 | 
             
             */
         | 
| 978 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
         | 
| 997 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
         | 
| 979 998 | 
             
            /*
         | 
| 980 999 | 
             
             * XXH3_64bits_reset_withSeed():
         | 
| 981 1000 | 
             
             * Generate a custom secret from `seed`, and store it into `statePtr`.
         | 
| 982 1001 | 
             
             * digest will be equivalent to `XXH3_64bits_withSeed()`.
         | 
| 983 1002 | 
             
             */
         | 
| 984 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
         | 
| 1003 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
         | 
| 985 1004 | 
             
            /*!
         | 
| 986 1005 | 
             
             * XXH3_64bits_reset_withSecret():
         | 
| 987 1006 | 
             
             * `secret` is referenced, it _must outlive_ the hash streaming session.
         | 
| @@ -991,10 +1010,10 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, | |
| 991 1010 | 
             
             * When in doubt about the randomness of a candidate `secret`,
         | 
| 992 1011 | 
             
             * consider employing `XXH3_generateSecret()` instead (see below).
         | 
| 993 1012 | 
             
             */
         | 
| 994 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
         | 
| 1013 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
         | 
| 995 1014 |  | 
| 996 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
         | 
| 997 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t  XXH3_64bits_digest (const XXH3_state_t* statePtr);
         | 
| 1015 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
         | 
| 1016 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t  XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
         | 
| 998 1017 | 
             
            #endif /* !XXH_NO_STREAM */
         | 
| 999 1018 |  | 
| 1000 1019 | 
             
            /* note : canonical representation of XXH3 is the same as XXH64
         | 
| @@ -1033,11 +1052,11 @@ typedef struct { | |
| 1033 1052 | 
             
             * @see
         | 
| 1034 1053 | 
             
             *    XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest(): Streaming version.
         | 
| 1035 1054 | 
             
             */
         | 
| 1036 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(const void* data, size_t len);
         | 
| 1055 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
         | 
| 1037 1056 | 
             
            /*! @brief Seeded 128-bit variant of XXH3. @see XXH3_64bits_withSeed(). */
         | 
| 1038 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
         | 
| 1057 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
         | 
| 1039 1058 | 
             
            /*! @brief Custom secret 128-bit variant of XXH3. @see XXH3_64bits_withSecret(). */
         | 
| 1040 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
         | 
| 1059 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
         | 
| 1041 1060 |  | 
| 1042 1061 | 
             
            /*******   Streaming   *******/
         | 
| 1043 1062 | 
             
            #ifndef XXH_NO_STREAM
         | 
| @@ -1053,12 +1072,12 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(const void* data, | |
| 1053 1072 | 
             
             * All reset and streaming functions have same meaning as their 64-bit counterpart.
         | 
| 1054 1073 | 
             
             */
         | 
| 1055 1074 |  | 
| 1056 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
         | 
| 1057 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
         | 
| 1058 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
         | 
| 1075 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
         | 
| 1076 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
         | 
| 1077 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
         | 
| 1059 1078 |  | 
| 1060 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
         | 
| 1061 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
         | 
| 1079 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
         | 
| 1080 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
         | 
| 1062 1081 | 
             
            #endif /* !XXH_NO_STREAM */
         | 
| 1063 1082 |  | 
| 1064 1083 | 
             
            /* Following helper functions make it possible to compare XXH128_hast_t values.
         | 
| @@ -1079,13 +1098,13 @@ XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2); | |
| 1079 1098 | 
             
             *          =0 if *h128_1 == *h128_2
         | 
| 1080 1099 | 
             
             *          <0 if *h128_1  < *h128_2
         | 
| 1081 1100 | 
             
             */
         | 
| 1082 | 
            -
            XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(const void* h128_1, const void* h128_2);
         | 
| 1101 | 
            +
            XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
         | 
| 1083 1102 |  | 
| 1084 1103 |  | 
| 1085 1104 | 
             
            /*******   Canonical representation   *******/
         | 
| 1086 1105 | 
             
            typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
         | 
| 1087 | 
            -
            XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
         | 
| 1088 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
         | 
| 1106 | 
            +
            XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
         | 
| 1107 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
         | 
| 1089 1108 |  | 
| 1090 1109 |  | 
| 1091 1110 | 
             
            #endif  /* !XXH_NO_XXH3 */
         | 
| @@ -1266,13 +1285,18 @@ struct XXH3_state_s { | |
| 1266 1285 | 
             
             * Note that this doesn't prepare the state for a streaming operation,
         | 
| 1267 1286 | 
             
             * it's still necessary to use XXH3_NNbits_reset*() afterwards.
         | 
| 1268 1287 | 
             
             */
         | 
| 1269 | 
            -
            #define XXH3_INITSTATE(XXH3_state_ptr) | 
| 1288 | 
            +
            #define XXH3_INITSTATE(XXH3_state_ptr)                       \
         | 
| 1289 | 
            +
                do {                                                     \
         | 
| 1290 | 
            +
                    XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
         | 
| 1291 | 
            +
                    tmp_xxh3_state_ptr->seed = 0;                        \
         | 
| 1292 | 
            +
                    tmp_xxh3_state_ptr->extSecret = NULL;                \
         | 
| 1293 | 
            +
                } while(0)
         | 
| 1270 1294 |  | 
| 1271 1295 |  | 
| 1272 1296 | 
             
            /*!
         | 
| 1273 1297 | 
             
             * simple alias to pre-selected XXH3_128bits variant
         | 
| 1274 1298 | 
             
             */
         | 
| 1275 | 
            -
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
         | 
| 1299 | 
            +
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
         | 
| 1276 1300 |  | 
| 1277 1301 |  | 
| 1278 1302 | 
             
            /* ===   Experimental API   === */
         | 
| @@ -1329,7 +1353,7 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(const void* data, size_t len, XXH6 | |
| 1329 1353 | 
             
             *    }
         | 
| 1330 1354 | 
             
             * @endcode
         | 
| 1331 1355 | 
             
             */
         | 
| 1332 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
         | 
| 1356 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
         | 
| 1333 1357 |  | 
| 1334 1358 | 
             
            /*!
         | 
| 1335 1359 | 
             
             * @brief Generate the same secret as the _withSeed() variants.
         | 
| @@ -1368,7 +1392,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secr | |
| 1368 1392 | 
             
             * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
         | 
| 1369 1393 | 
             
             * @param seed The seed to seed the state.
         | 
| 1370 1394 | 
             
             */
         | 
| 1371 | 
            -
            XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
         | 
| 1395 | 
            +
            XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
         | 
| 1372 1396 |  | 
| 1373 1397 | 
             
            /*!
         | 
| 1374 1398 | 
             
             * These variants generate hash values using either
         | 
| @@ -1397,24 +1421,24 @@ XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_ | |
| 1397 1421 | 
             
             * because only portions of the secret are employed for small data.
         | 
| 1398 1422 | 
             
             */
         | 
| 1399 1423 | 
             
            XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
         | 
| 1400 | 
            -
            XXH3_64bits_withSecretandSeed(const void* data, size_t len,
         | 
| 1401 | 
            -
                                          const void* secret, size_t secretSize,
         | 
| 1424 | 
            +
            XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
         | 
| 1425 | 
            +
                                          XXH_NOESCAPE const void* secret, size_t secretSize,
         | 
| 1402 1426 | 
             
                                          XXH64_hash_t seed);
         | 
| 1403 1427 | 
             
            /*! @copydoc XXH3_64bits_withSecretandSeed() */
         | 
| 1404 1428 | 
             
            XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
         | 
| 1405 | 
            -
            XXH3_128bits_withSecretandSeed(const void* input, size_t length,
         | 
| 1406 | 
            -
                                           const void* secret, size_t secretSize,
         | 
| 1429 | 
            +
            XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
         | 
| 1430 | 
            +
                                           XXH_NOESCAPE const void* secret, size_t secretSize,
         | 
| 1407 1431 | 
             
                                           XXH64_hash_t seed64);
         | 
| 1408 1432 | 
             
            #ifndef XXH_NO_STREAM
         | 
| 1409 1433 | 
             
            /*! @copydoc XXH3_64bits_withSecretandSeed() */
         | 
| 1410 1434 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 1411 | 
            -
            XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
         | 
| 1412 | 
            -
                                                const void* secret, size_t secretSize,
         | 
| 1435 | 
            +
            XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
         | 
| 1436 | 
            +
                                                XXH_NOESCAPE const void* secret, size_t secretSize,
         | 
| 1413 1437 | 
             
                                                XXH64_hash_t seed64);
         | 
| 1414 1438 | 
             
            /*! @copydoc XXH3_64bits_withSecretandSeed() */
         | 
| 1415 1439 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 1416 | 
            -
            XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
         | 
| 1417 | 
            -
                                                 const void* secret, size_t secretSize,
         | 
| 1440 | 
            +
            XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
         | 
| 1441 | 
            +
                                                 XXH_NOESCAPE const void* secret, size_t secretSize,
         | 
| 1418 1442 | 
             
                                                 XXH64_hash_t seed64);
         | 
| 1419 1443 | 
             
            #endif /* !XXH_NO_STREAM */
         | 
| 1420 1444 |  | 
| @@ -1522,7 +1546,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, | |
| 1522 1546 | 
             
             *   care, as what works on one compiler/platform/optimization level may cause
         | 
| 1523 1547 | 
             
             *   another to read garbage data or even crash.
         | 
| 1524 1548 | 
             
             *
         | 
| 1525 | 
            -
             * See  | 
| 1549 | 
            +
             * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
         | 
| 1526 1550 | 
             
             *
         | 
| 1527 1551 | 
             
             * Prefer these methods in priority order (0 > 3 > 1 > 2)
         | 
| 1528 1552 | 
             
             */
         | 
| @@ -1608,6 +1632,23 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, | |
| 1608 1632 | 
             
             */
         | 
| 1609 1633 | 
             
            #  define XXH_NO_INLINE_HINTS 0
         | 
| 1610 1634 |  | 
| 1635 | 
            +
            /*!
         | 
| 1636 | 
            +
             * @def XXH3_INLINE_SECRET
         | 
| 1637 | 
            +
             * @brief Determines whether to inline the XXH3 withSecret code.
         | 
| 1638 | 
            +
             *
         | 
| 1639 | 
            +
             * When the secret size is known, the compiler can improve the performance
         | 
| 1640 | 
            +
             * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret().
         | 
| 1641 | 
            +
             *
         | 
| 1642 | 
            +
             * However, if the secret size is not known, it doesn't have any benefit. This
         | 
| 1643 | 
            +
             * happens when xxHash is compiled into a global symbol. Therefore, if
         | 
| 1644 | 
            +
             * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0.
         | 
| 1645 | 
            +
             *
         | 
| 1646 | 
            +
             * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers
         | 
| 1647 | 
            +
             * that are *sometimes* force inline on -Og, and it is impossible to automatically
         | 
| 1648 | 
            +
             * detect this optimization level.
         | 
| 1649 | 
            +
             */
         | 
| 1650 | 
            +
            #  define XXH3_INLINE_SECRET 0
         | 
| 1651 | 
            +
             | 
| 1611 1652 | 
             
            /*!
         | 
| 1612 1653 | 
             
             * @def XXH32_ENDJMP
         | 
| 1613 1654 | 
             
             * @brief Whether to use a jump for `XXH32_finalize`.
         | 
| @@ -1682,6 +1723,15 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, | |
| 1682 1723 | 
             
            #  endif
         | 
| 1683 1724 | 
             
            #endif
         | 
| 1684 1725 |  | 
| 1726 | 
            +
            #ifndef XXH3_INLINE_SECRET
         | 
| 1727 | 
            +
            #  if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
         | 
| 1728 | 
            +
                 || !defined(XXH_INLINE_ALL)
         | 
| 1729 | 
            +
            #    define XXH3_INLINE_SECRET 0
         | 
| 1730 | 
            +
            #  else
         | 
| 1731 | 
            +
            #    define XXH3_INLINE_SECRET 1
         | 
| 1732 | 
            +
            #  endif
         | 
| 1733 | 
            +
            #endif
         | 
| 1734 | 
            +
             | 
| 1685 1735 | 
             
            #ifndef XXH32_ENDJMP
         | 
| 1686 1736 | 
             
            /* generally preferable for performance */
         | 
| 1687 1737 | 
             
            #  define XXH32_ENDJMP 0
         | 
| @@ -1778,6 +1828,11 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) | |
| 1778 1828 | 
             
            #  define XXH_NO_INLINE static
         | 
| 1779 1829 | 
             
            #endif
         | 
| 1780 1830 |  | 
| 1831 | 
            +
            #if XXH3_INLINE_SECRET
         | 
| 1832 | 
            +
            #  define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
         | 
| 1833 | 
            +
            #else
         | 
| 1834 | 
            +
            #  define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
         | 
| 1835 | 
            +
            #endif
         | 
| 1781 1836 |  | 
| 1782 1837 |  | 
| 1783 1838 | 
             
            /* *************************************
         | 
| @@ -1803,7 +1858,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) | |
| 1803 1858 | 
             
            #  include <assert.h>   /* note: can still be disabled with NDEBUG */
         | 
| 1804 1859 | 
             
            #  define XXH_ASSERT(c)   assert(c)
         | 
| 1805 1860 | 
             
            #else
         | 
| 1806 | 
            -
            #  define XXH_ASSERT(c)   ( | 
| 1861 | 
            +
            #  define XXH_ASSERT(c)   XXH_ASSUME(c)
         | 
| 1807 1862 | 
             
            #endif
         | 
| 1808 1863 |  | 
| 1809 1864 | 
             
            /* note: use after variable declarations */
         | 
| @@ -1835,11 +1890,17 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) | |
| 1835 1890 | 
             
             * XXH3_initCustomSecret_scalar().
         | 
| 1836 1891 | 
             
             */
         | 
| 1837 1892 | 
             
            #if defined(__GNUC__) || defined(__clang__)
         | 
| 1838 | 
            -
            #  define XXH_COMPILER_GUARD(var) __asm__ | 
| 1893 | 
            +
            #  define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
         | 
| 1839 1894 | 
             
            #else
         | 
| 1840 1895 | 
             
            #  define XXH_COMPILER_GUARD(var) ((void)0)
         | 
| 1841 1896 | 
             
            #endif
         | 
| 1842 1897 |  | 
| 1898 | 
            +
            #if defined(__clang__)
         | 
| 1899 | 
            +
            #  define XXH_COMPILER_GUARD_W(var) __asm__("" : "+w" (var))
         | 
| 1900 | 
            +
            #else
         | 
| 1901 | 
            +
            #  define XXH_COMPILER_GUARD_W(var) ((void)0)
         | 
| 1902 | 
            +
            #endif
         | 
| 1903 | 
            +
             | 
| 1843 1904 | 
             
            /* *************************************
         | 
| 1844 1905 | 
             
            *  Basic Types
         | 
| 1845 1906 | 
             
            ***************************************/
         | 
| @@ -1946,7 +2007,7 @@ static xxh_u32 XXH_read32(const void* ptr) | |
| 1946 2007 |  | 
| 1947 2008 | 
             
            /*
         | 
| 1948 2009 | 
             
             * Portable and safe solution. Generally efficient.
         | 
| 1949 | 
            -
             * see:  | 
| 2010 | 
            +
             * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
         | 
| 1950 2011 | 
             
             */
         | 
| 1951 2012 | 
             
            static xxh_u32 XXH_read32(const void* memPtr)
         | 
| 1952 2013 | 
             
            {
         | 
| @@ -2022,6 +2083,51 @@ static int XXH_isLittleEndian(void) | |
| 2022 2083 | 
             
            #  define XXH_HAS_BUILTIN(x) 0
         | 
| 2023 2084 | 
             
            #endif
         | 
| 2024 2085 |  | 
| 2086 | 
            +
             | 
| 2087 | 
            +
             | 
| 2088 | 
            +
            /*
         | 
| 2089 | 
            +
             * C23 and future versions have standard "unreachable()".
         | 
| 2090 | 
            +
             * Once it has been implemented reliably we can add it as an
         | 
| 2091 | 
            +
             * additional case:
         | 
| 2092 | 
            +
             *
         | 
| 2093 | 
            +
             * ```
         | 
| 2094 | 
            +
             * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
         | 
| 2095 | 
            +
             * #  include <stddef.h>
         | 
| 2096 | 
            +
             * #  ifdef unreachable
         | 
| 2097 | 
            +
             * #    define XXH_UNREACHABLE() unreachable()
         | 
| 2098 | 
            +
             * #  endif
         | 
| 2099 | 
            +
             * #endif
         | 
| 2100 | 
            +
             * ```
         | 
| 2101 | 
            +
             *
         | 
| 2102 | 
            +
             * Note C++23 also has std::unreachable() which can be detected
         | 
| 2103 | 
            +
             * as follows:
         | 
| 2104 | 
            +
             * ```
         | 
| 2105 | 
            +
             * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
         | 
| 2106 | 
            +
             * #  include <utility>
         | 
| 2107 | 
            +
             * #  define XXH_UNREACHABLE() std::unreachable()
         | 
| 2108 | 
            +
             * #endif
         | 
| 2109 | 
            +
             * ```
         | 
| 2110 | 
            +
             * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
         | 
| 2111 | 
            +
             * We don't use that as including `<utility>` in `extern "C"` blocks
         | 
| 2112 | 
            +
             * doesn't work on GCC12
         | 
| 2113 | 
            +
             */
         | 
| 2114 | 
            +
             | 
| 2115 | 
            +
            #if XXH_HAS_BUILTIN(__builtin_unreachable)
         | 
| 2116 | 
            +
            #  define XXH_UNREACHABLE() __builtin_unreachable()
         | 
| 2117 | 
            +
             | 
| 2118 | 
            +
            #elif defined(_MSC_VER)
         | 
| 2119 | 
            +
            #  define XXH_UNREACHABLE() __assume(0)
         | 
| 2120 | 
            +
             | 
| 2121 | 
            +
            #else
         | 
| 2122 | 
            +
            #  define XXH_UNREACHABLE()
         | 
| 2123 | 
            +
            #endif
         | 
| 2124 | 
            +
             | 
| 2125 | 
            +
            #if XXH_HAS_BUILTIN(__builtin_assume)
         | 
| 2126 | 
            +
            #  define XXH_ASSUME(c) __builtin_assume(c)
         | 
| 2127 | 
            +
            #else
         | 
| 2128 | 
            +
            #  define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
         | 
| 2129 | 
            +
            #endif
         | 
| 2130 | 
            +
             | 
| 2025 2131 | 
             
            /*!
         | 
| 2026 2132 | 
             
             * @internal
         | 
| 2027 2133 | 
             
             * @def XXH_rotl32(x,r)
         | 
| @@ -2211,9 +2317,9 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) | |
| 2211 2317 | 
             
                 *   can load data, while v3 can multiply. SSE forces them to operate
         | 
| 2212 2318 | 
             
                 *   together.
         | 
| 2213 2319 | 
             
                 *
         | 
| 2214 | 
            -
                 * This is also enabled on AArch64, as Clang  | 
| 2215 | 
            -
                 *  | 
| 2216 | 
            -
                 *  | 
| 2320 | 
            +
                 * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
         | 
| 2321 | 
            +
                 * the loop. NEON is only faster on the A53, and with the newer cores, it is less
         | 
| 2322 | 
            +
                 * than half the speed.
         | 
| 2217 2323 | 
             
                 */
         | 
| 2218 2324 | 
             
                XXH_COMPILER_GUARD(acc);
         | 
| 2219 2325 | 
             
            #endif
         | 
| @@ -2288,41 +2394,41 @@ XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) | |
| 2288 2394 | 
             
                } else {
         | 
| 2289 2395 | 
             
                     switch(len&15) /* or switch(bEnd - p) */ {
         | 
| 2290 2396 | 
             
                       case 12:      XXH_PROCESS4;
         | 
| 2291 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2397 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2292 2398 | 
             
                       case 8:       XXH_PROCESS4;
         | 
| 2293 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2399 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2294 2400 | 
             
                       case 4:       XXH_PROCESS4;
         | 
| 2295 2401 | 
             
                                     return XXH32_avalanche(hash);
         | 
| 2296 2402 |  | 
| 2297 2403 | 
             
                       case 13:      XXH_PROCESS4;
         | 
| 2298 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2404 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2299 2405 | 
             
                       case 9:       XXH_PROCESS4;
         | 
| 2300 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2406 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2301 2407 | 
             
                       case 5:       XXH_PROCESS4;
         | 
| 2302 2408 | 
             
                                     XXH_PROCESS1;
         | 
| 2303 2409 | 
             
                                     return XXH32_avalanche(hash);
         | 
| 2304 2410 |  | 
| 2305 2411 | 
             
                       case 14:      XXH_PROCESS4;
         | 
| 2306 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2412 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2307 2413 | 
             
                       case 10:      XXH_PROCESS4;
         | 
| 2308 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2414 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2309 2415 | 
             
                       case 6:       XXH_PROCESS4;
         | 
| 2310 2416 | 
             
                                     XXH_PROCESS1;
         | 
| 2311 2417 | 
             
                                     XXH_PROCESS1;
         | 
| 2312 2418 | 
             
                                     return XXH32_avalanche(hash);
         | 
| 2313 2419 |  | 
| 2314 2420 | 
             
                       case 15:      XXH_PROCESS4;
         | 
| 2315 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2421 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2316 2422 | 
             
                       case 11:      XXH_PROCESS4;
         | 
| 2317 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2423 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2318 2424 | 
             
                       case 7:       XXH_PROCESS4;
         | 
| 2319 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2425 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2320 2426 | 
             
                       case 3:       XXH_PROCESS1;
         | 
| 2321 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2427 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2322 2428 | 
             
                       case 2:       XXH_PROCESS1;
         | 
| 2323 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2429 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2324 2430 | 
             
                       case 1:       XXH_PROCESS1;
         | 
| 2325 | 
            -
                                     XXH_FALLTHROUGH;
         | 
| 2431 | 
            +
                                     XXH_FALLTHROUGH;  /* fallthrough */
         | 
| 2326 2432 | 
             
                       case 0:       return XXH32_avalanche(hash);
         | 
| 2327 2433 | 
             
                    }
         | 
| 2328 2434 | 
             
                    XXH_ASSERT(0);
         | 
| @@ -2590,7 +2696,7 @@ static xxh_u64 XXH_read64(const void* ptr) | |
| 2590 2696 |  | 
| 2591 2697 | 
             
            /*
         | 
| 2592 2698 | 
             
             * Portable and safe solution. Generally efficient.
         | 
| 2593 | 
            -
             * see:  | 
| 2699 | 
            +
             * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
         | 
| 2594 2700 | 
             
             */
         | 
| 2595 2701 | 
             
            static xxh_u64 XXH_read64(const void* memPtr)
         | 
| 2596 2702 | 
             
            {
         | 
| @@ -2823,7 +2929,7 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment | |
| 2823 2929 |  | 
| 2824 2930 |  | 
| 2825 2931 | 
             
            /*! @ingroup XXH64_family */
         | 
| 2826 | 
            -
            XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
         | 
| 2932 | 
            +
            XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
         | 
| 2827 2933 | 
             
            {
         | 
| 2828 2934 | 
             
            #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
         | 
| 2829 2935 | 
             
                /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
         | 
| @@ -2857,13 +2963,13 @@ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) | |
| 2857 2963 | 
             
            }
         | 
| 2858 2964 |  | 
| 2859 2965 | 
             
            /*! @ingroup XXH64_family */
         | 
| 2860 | 
            -
            XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
         | 
| 2966 | 
            +
            XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)
         | 
| 2861 2967 | 
             
            {
         | 
| 2862 2968 | 
             
                XXH_memcpy(dstState, srcState, sizeof(*dstState));
         | 
| 2863 2969 | 
             
            }
         | 
| 2864 2970 |  | 
| 2865 2971 | 
             
            /*! @ingroup XXH64_family */
         | 
| 2866 | 
            -
            XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
         | 
| 2972 | 
            +
            XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)
         | 
| 2867 2973 | 
             
            {
         | 
| 2868 2974 | 
             
                XXH_ASSERT(statePtr != NULL);
         | 
| 2869 2975 | 
             
                memset(statePtr, 0, sizeof(*statePtr));
         | 
| @@ -2876,7 +2982,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t s | |
| 2876 2982 |  | 
| 2877 2983 | 
             
            /*! @ingroup XXH64_family */
         | 
| 2878 2984 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 2879 | 
            -
            XXH64_update (XXH64_state_t* state, const void* input, size_t len)
         | 
| 2985 | 
            +
            XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
         | 
| 2880 2986 | 
             
            {
         | 
| 2881 2987 | 
             
                if (input==NULL) {
         | 
| 2882 2988 | 
             
                    XXH_ASSERT(len == 0);
         | 
| @@ -2927,7 +3033,7 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len) | |
| 2927 3033 |  | 
| 2928 3034 |  | 
| 2929 3035 | 
             
            /*! @ingroup XXH64_family */
         | 
| 2930 | 
            -
            XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
         | 
| 3036 | 
            +
            XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)
         | 
| 2931 3037 | 
             
            {
         | 
| 2932 3038 | 
             
                xxh_u64 h64;
         | 
| 2933 3039 |  | 
| @@ -2950,7 +3056,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state) | |
| 2950 3056 | 
             
            /******* Canonical representation   *******/
         | 
| 2951 3057 |  | 
| 2952 3058 | 
             
            /*! @ingroup XXH64_family */
         | 
| 2953 | 
            -
            XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
         | 
| 3059 | 
            +
            XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)
         | 
| 2954 3060 | 
             
            {
         | 
| 2955 3061 | 
             
                XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
         | 
| 2956 3062 | 
             
                if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
         | 
| @@ -2958,7 +3064,7 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t | |
| 2958 3064 | 
             
            }
         | 
| 2959 3065 |  | 
| 2960 3066 | 
             
            /*! @ingroup XXH64_family */
         | 
| 2961 | 
            -
            XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
         | 
| 3067 | 
            +
            XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)
         | 
| 2962 3068 | 
             
            {
         | 
| 2963 3069 | 
             
                return XXH_readBE64(src);
         | 
| 2964 3070 | 
             
            }
         | 
| @@ -2979,11 +3085,19 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src | |
| 2979 3085 | 
             
            /* ===   Compiler specifics   === */
         | 
| 2980 3086 |  | 
| 2981 3087 | 
             
            #if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
         | 
| 2982 | 
            -
            #  define XXH_RESTRICT | 
| 3088 | 
            +
            #  define XXH_RESTRICT   /* disable */
         | 
| 2983 3089 | 
             
            #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
         | 
| 2984 3090 | 
             
            #  define XXH_RESTRICT   restrict
         | 
| 3091 | 
            +
            #elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
         | 
| 3092 | 
            +
               || (defined (__clang__)) \
         | 
| 3093 | 
            +
               || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
         | 
| 3094 | 
            +
               || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
         | 
| 3095 | 
            +
            /*
         | 
| 3096 | 
            +
             * There are a LOT more compilers that recognize __restrict but this
         | 
| 3097 | 
            +
             * covers the major ones.
         | 
| 3098 | 
            +
             */
         | 
| 3099 | 
            +
            #  define XXH_RESTRICT   __restrict
         | 
| 2985 3100 | 
             
            #else
         | 
| 2986 | 
            -
            /* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
         | 
| 2987 3101 | 
             
            #  define XXH_RESTRICT   /* disable */
         | 
| 2988 3102 | 
             
            #endif
         | 
| 2989 3103 |  | 
| @@ -2998,9 +3112,12 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src | |
| 2998 3112 | 
             
            #endif
         | 
| 2999 3113 |  | 
| 3000 3114 | 
             
            #if defined(__GNUC__) || defined(__clang__)
         | 
| 3115 | 
            +
            #  if defined(__ARM_FEATURE_SVE)
         | 
| 3116 | 
            +
            #    include <arm_sve.h>
         | 
| 3117 | 
            +
            #  endif
         | 
| 3001 3118 | 
             
            #  if defined(__ARM_NEON__) || defined(__ARM_NEON) \
         | 
| 3002 | 
            -
               || defined( | 
| 3003 | 
            -
               || defined(_M_ARM64) | 
| 3119 | 
            +
               || (defined(_M_ARM) && _M_ARM >= 7) \
         | 
| 3120 | 
            +
               || defined(_M_ARM64) || defined(_M_ARM64EC)
         | 
| 3004 3121 | 
             
            #    define inline __inline__  /* circumvent a clang bug */
         | 
| 3005 3122 | 
             
            #    include <arm_neon.h>
         | 
| 3006 3123 | 
             
            #    undef inline
         | 
| @@ -3125,12 +3242,13 @@ enum XXH_VECTOR_TYPE /* fake enum */ { | |
| 3125 3242 | 
             
                XXH_AVX512 = 3,  /*!< AVX512 for Skylake and Icelake */
         | 
| 3126 3243 | 
             
                XXH_NEON   = 4,  /*!< NEON for most ARMv7-A and all AArch64 */
         | 
| 3127 3244 | 
             
                XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */
         | 
| 3245 | 
            +
                XXH_SVE    = 6,  /*!< SVE for some ARMv8-A and ARMv9-A */
         | 
| 3128 3246 | 
             
            };
         | 
| 3129 3247 | 
             
            /*!
         | 
| 3130 3248 | 
             
             * @ingroup tuning
         | 
| 3131 3249 | 
             
             * @brief Selects the minimum alignment for XXH3's accumulators.
         | 
| 3132 3250 | 
             
             *
         | 
| 3133 | 
            -
             * When using SIMD, this should match the alignment  | 
| 3251 | 
            +
             * When using SIMD, this should match the alignment required for said vector
         | 
| 3134 3252 | 
             
             * type, so, for example, 32 for AVX2.
         | 
| 3135 3253 | 
             
             *
         | 
| 3136 3254 | 
             
             * Default: Auto detected.
         | 
| @@ -3146,10 +3264,13 @@ enum XXH_VECTOR_TYPE /* fake enum */ { | |
| 3146 3264 | 
             
            #  define XXH_AVX512 3
         | 
| 3147 3265 | 
             
            #  define XXH_NEON   4
         | 
| 3148 3266 | 
             
            #  define XXH_VSX    5
         | 
| 3267 | 
            +
            #  define XXH_SVE    6
         | 
| 3149 3268 | 
             
            #endif
         | 
| 3150 3269 |  | 
| 3151 3270 | 
             
            #ifndef XXH_VECTOR    /* can be defined on command line */
         | 
| 3152 | 
            -
            #  if ( | 
| 3271 | 
            +
            #  if defined(__ARM_FEATURE_SVE)
         | 
| 3272 | 
            +
            #    define XXH_VECTOR XXH_SVE
         | 
| 3273 | 
            +
            #  elif ( \
         | 
| 3153 3274 | 
             
                    defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
         | 
| 3154 3275 | 
             
                 || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
         | 
| 3155 3276 | 
             
               ) && ( \
         | 
| @@ -3172,6 +3293,17 @@ enum XXH_VECTOR_TYPE /* fake enum */ { | |
| 3172 3293 | 
             
            #  endif
         | 
| 3173 3294 | 
             
            #endif
         | 
| 3174 3295 |  | 
| 3296 | 
            +
            /* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
         | 
| 3297 | 
            +
            #if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
         | 
| 3298 | 
            +
            #  ifdef _MSC_VER
         | 
| 3299 | 
            +
            #    pragma warning(once : 4606)
         | 
| 3300 | 
            +
            #  else
         | 
| 3301 | 
            +
            #    warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
         | 
| 3302 | 
            +
            #  endif
         | 
| 3303 | 
            +
            #  undef XXH_VECTOR
         | 
| 3304 | 
            +
            #  define XXH_VECTOR XXH_SCALAR
         | 
| 3305 | 
            +
            #endif
         | 
| 3306 | 
            +
             | 
| 3175 3307 | 
             
            /*
         | 
| 3176 3308 | 
             
             * Controls the alignment of the accumulator,
         | 
| 3177 3309 | 
             
             * for compatibility with aligned vector loads, which are usually faster.
         | 
| @@ -3191,16 +3323,26 @@ enum XXH_VECTOR_TYPE /* fake enum */ { | |
| 3191 3323 | 
             
            #     define XXH_ACC_ALIGN 16
         | 
| 3192 3324 | 
             
            #  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
         | 
| 3193 3325 | 
             
            #     define XXH_ACC_ALIGN 64
         | 
| 3326 | 
            +
            #  elif XXH_VECTOR == XXH_SVE   /* sve */
         | 
| 3327 | 
            +
            #     define XXH_ACC_ALIGN 64
         | 
| 3194 3328 | 
             
            #  endif
         | 
| 3195 3329 | 
             
            #endif
         | 
| 3196 3330 |  | 
| 3197 3331 | 
             
            #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
         | 
| 3198 3332 | 
             
                || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
         | 
| 3199 3333 | 
             
            #  define XXH_SEC_ALIGN XXH_ACC_ALIGN
         | 
| 3334 | 
            +
            #elif XXH_VECTOR == XXH_SVE
         | 
| 3335 | 
            +
            #  define XXH_SEC_ALIGN XXH_ACC_ALIGN
         | 
| 3200 3336 | 
             
            #else
         | 
| 3201 3337 | 
             
            #  define XXH_SEC_ALIGN 8
         | 
| 3202 3338 | 
             
            #endif
         | 
| 3203 3339 |  | 
| 3340 | 
            +
            #if defined(__GNUC__) || defined(__clang__)
         | 
| 3341 | 
            +
            #  define XXH_ALIASING __attribute__((may_alias))
         | 
| 3342 | 
            +
            #else
         | 
| 3343 | 
            +
            #  define XXH_ALIASING /* nothing */
         | 
| 3344 | 
            +
            #endif
         | 
| 3345 | 
            +
             | 
| 3204 3346 | 
             
            /*
         | 
| 3205 3347 | 
             
             * UGLY HACK:
         | 
| 3206 3348 | 
             
             * GCC usually generates the best code with -O3 for xxHash.
         | 
| @@ -3229,107 +3371,16 @@ enum XXH_VECTOR_TYPE /* fake enum */ { | |
| 3229 3371 | 
             
            #  pragma GCC optimize("-O2")
         | 
| 3230 3372 | 
             
            #endif
         | 
| 3231 3373 |  | 
| 3232 | 
            -
             | 
| 3233 3374 | 
             
            #if XXH_VECTOR == XXH_NEON
         | 
| 3375 | 
            +
             | 
| 3234 3376 | 
             
            /*
         | 
| 3235 | 
            -
             *  | 
| 3236 | 
            -
             *  | 
| 3237 | 
            -
             *
         | 
| 3238 | 
            -
             * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.
         | 
| 3239 | 
            -
             *
         | 
| 3240 | 
            -
             * To do the same operation, the 128-bit 'Q' register needs to be split into
         | 
| 3241 | 
            -
             * two 64-bit 'D' registers, performing this operation::
         | 
| 3242 | 
            -
             *
         | 
| 3243 | 
            -
             *   [                a                 |                 b                ]
         | 
| 3244 | 
            -
             *            |              '---------. .--------'                |
         | 
| 3245 | 
            -
             *            |                         x                          |
         | 
| 3246 | 
            -
             *            |              .---------' '--------.                |
         | 
| 3247 | 
            -
             *   [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[    a >> 32     |     b >> 32    ]
         | 
| 3248 | 
            -
             *
         | 
| 3249 | 
            -
             * Due to significant changes in aarch64, the fastest method for aarch64 is
         | 
| 3250 | 
            -
             * completely different than the fastest method for ARMv7-A.
         | 
| 3251 | 
            -
             *
         | 
| 3252 | 
            -
             * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
         | 
| 3253 | 
            -
             * D11 will modify the high half of Q5. This is similar to how modifying AH
         | 
| 3254 | 
            -
             * will only affect bits 8-15 of AX on x86.
         | 
| 3255 | 
            -
             *
         | 
| 3256 | 
            -
             * VZIP takes two registers, and puts even lanes in one register and odd lanes
         | 
| 3257 | 
            -
             * in the other.
         | 
| 3258 | 
            -
             *
         | 
| 3259 | 
            -
             * On ARMv7-A, this strangely modifies both parameters in place instead of
         | 
| 3260 | 
            -
             * taking the usual 3-operand form.
         | 
| 3261 | 
            -
             *
         | 
| 3262 | 
            -
             * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
         | 
| 3263 | 
            -
             * lower and upper halves of the Q register to end up with the high and low
         | 
| 3264 | 
            -
             * halves where we want - all in one instruction.
         | 
| 3265 | 
            -
             *
         | 
| 3266 | 
            -
             *   vzip.32   d10, d11       @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] }
         | 
| 3267 | 
            -
             *
         | 
| 3268 | 
            -
             * Unfortunately we need inline assembly for this: Instructions modifying two
         | 
| 3269 | 
            -
             * registers at once is not possible in GCC or Clang's IR, and they have to
         | 
| 3270 | 
            -
             * create a copy.
         | 
| 3377 | 
            +
             * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
         | 
| 3378 | 
            +
             * optimizes out the entire hashLong loop because of the aliasing violation.
         | 
| 3271 3379 | 
             
             *
         | 
| 3272 | 
            -
             *  | 
| 3273 | 
            -
             *
         | 
| 3274 | 
            -
             * In order to make it easier to write a decent compiler for aarch64, many
         | 
| 3275 | 
            -
             * quirks were removed, such as conditional execution.
         | 
| 3276 | 
            -
             *
         | 
| 3277 | 
            -
             * NEON was also affected by this.
         | 
| 3278 | 
            -
             *
         | 
| 3279 | 
            -
             * aarch64 cannot access the high bits of a Q-form register, and writes to a
         | 
| 3280 | 
            -
             * D-form register zero the high bits, similar to how writes to W-form scalar
         | 
| 3281 | 
            -
             * registers (or DWORD registers on x86_64) work.
         | 
| 3282 | 
            -
             *
         | 
| 3283 | 
            -
             * The formerly free vget_high intrinsics now require a vext (with a few
         | 
| 3284 | 
            -
             * exceptions)
         | 
| 3285 | 
            -
             *
         | 
| 3286 | 
            -
             * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent
         | 
| 3287 | 
            -
             * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one
         | 
| 3288 | 
            -
             * operand.
         | 
| 3289 | 
            -
             *
         | 
| 3290 | 
            -
             * The equivalent of the VZIP.32 on the lower and upper halves would be this
         | 
| 3291 | 
            -
             * mess:
         | 
| 3292 | 
            -
             *
         | 
| 3293 | 
            -
             *   ext     v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] }
         | 
| 3294 | 
            -
             *   zip1    v1.2s, v0.2s, v2.2s     // v1 = { v0[0], v2[0] }
         | 
| 3295 | 
            -
             *   zip2    v0.2s, v0.2s, v1.2s     // v0 = { v0[1], v2[1] }
         | 
| 3296 | 
            -
             *
         | 
| 3297 | 
            -
             * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):
         | 
| 3298 | 
            -
             *
         | 
| 3299 | 
            -
             *   shrn    v1.2s, v0.2d, #32  // v1 = (uint32x2_t)(v0 >> 32);
         | 
| 3300 | 
            -
             *   xtn     v0.2s, v0.2d       // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
         | 
| 3301 | 
            -
             *
         | 
| 3302 | 
            -
             * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
         | 
| 3380 | 
            +
             * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
         | 
| 3381 | 
            +
             * so the only option is to mark it as aliasing.
         | 
| 3303 3382 | 
             
             */
         | 
| 3304 | 
            -
             | 
| 3305 | 
            -
            /*!
         | 
| 3306 | 
            -
             * Function-like macro:
         | 
| 3307 | 
            -
             * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)
         | 
| 3308 | 
            -
             * {
         | 
| 3309 | 
            -
             *     outLo = (uint32x2_t)(in & 0xFFFFFFFF);
         | 
| 3310 | 
            -
             *     outHi = (uint32x2_t)(in >> 32);
         | 
| 3311 | 
            -
             *     in = UNDEFINED;
         | 
| 3312 | 
            -
             * }
         | 
| 3313 | 
            -
             */
         | 
| 3314 | 
            -
            # if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
         | 
| 3315 | 
            -
               && (defined(__GNUC__) || defined(__clang__)) \
         | 
| 3316 | 
            -
               && (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
         | 
| 3317 | 
            -
            #  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                              \
         | 
| 3318 | 
            -
                do {                                                                                    \
         | 
| 3319 | 
            -
                  /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
         | 
| 3320 | 
            -
                  /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \
         | 
| 3321 | 
            -
                  /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
         | 
| 3322 | 
            -
                  __asm__("vzip.32  %e0, %f0" : "+w" (in));                                             \
         | 
| 3323 | 
            -
                  (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));                                   \
         | 
| 3324 | 
            -
                  (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));                                   \
         | 
| 3325 | 
            -
               } while (0)
         | 
| 3326 | 
            -
            # else
         | 
| 3327 | 
            -
            #  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                            \
         | 
| 3328 | 
            -
                do {                                                                                  \
         | 
| 3329 | 
            -
                  (outLo) = vmovn_u64    (in);                                                        \
         | 
| 3330 | 
            -
                  (outHi) = vshrn_n_u64  ((in), 32);                                                  \
         | 
| 3331 | 
            -
                } while (0)
         | 
| 3332 | 
            -
            # endif
         | 
| 3383 | 
            +
            typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
         | 
| 3333 3384 |  | 
| 3334 3385 | 
             
            /*!
         | 
| 3335 3386 | 
             
             * @internal
         | 
| @@ -3347,7 +3398,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ { | |
| 3347 3398 | 
             
            #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
         | 
| 3348 3399 | 
             
            XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
         | 
| 3349 3400 | 
             
            {
         | 
| 3350 | 
            -
                return *( | 
| 3401 | 
            +
                return *(xxh_aliasing_uint64x2_t const *)ptr;
         | 
| 3351 3402 | 
             
            }
         | 
| 3352 3403 | 
             
            #else
         | 
| 3353 3404 | 
             
            XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
         | 
| @@ -3355,38 +3406,75 @@ XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) | |
| 3355 3406 | 
             
                return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
         | 
| 3356 3407 | 
             
            }
         | 
| 3357 3408 | 
             
            #endif
         | 
| 3409 | 
            +
             | 
| 3410 | 
            +
            /*!
         | 
| 3411 | 
            +
             * @internal
         | 
| 3412 | 
            +
             * @brief `vmlal_u32` on low and high halves of a vector.
         | 
| 3413 | 
            +
             *
         | 
| 3414 | 
            +
             * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with
         | 
| 3415 | 
            +
             * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32`
         | 
| 3416 | 
            +
             * with `vmlal_u32`.
         | 
| 3417 | 
            +
             */
         | 
| 3418 | 
            +
            #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
         | 
| 3419 | 
            +
            XXH_FORCE_INLINE uint64x2_t
         | 
| 3420 | 
            +
            XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
         | 
| 3421 | 
            +
            {
         | 
| 3422 | 
            +
                /* Inline assembly is the only way */
         | 
| 3423 | 
            +
                __asm__("umlal   %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
         | 
| 3424 | 
            +
                return acc;
         | 
| 3425 | 
            +
            }
         | 
| 3426 | 
            +
            XXH_FORCE_INLINE uint64x2_t
         | 
| 3427 | 
            +
            XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
         | 
| 3428 | 
            +
            {
         | 
| 3429 | 
            +
                /* This intrinsic works as expected */
         | 
| 3430 | 
            +
                return vmlal_high_u32(acc, lhs, rhs);
         | 
| 3431 | 
            +
            }
         | 
| 3432 | 
            +
            #else
         | 
| 3433 | 
            +
            /* Portable intrinsic versions */
         | 
| 3434 | 
            +
            XXH_FORCE_INLINE uint64x2_t
         | 
| 3435 | 
            +
            XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
         | 
| 3436 | 
            +
            {
         | 
| 3437 | 
            +
                return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
         | 
| 3438 | 
            +
            }
         | 
| 3439 | 
            +
            /*! @copydoc XXH_vmlal_low_u32
         | 
| 3440 | 
            +
             * Assume the compiler converts this to vmlal_high_u32 on aarch64 */
         | 
| 3441 | 
            +
            XXH_FORCE_INLINE uint64x2_t
         | 
| 3442 | 
            +
            XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
         | 
| 3443 | 
            +
            {
         | 
| 3444 | 
            +
                return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
         | 
| 3445 | 
            +
            }
         | 
| 3446 | 
            +
            #endif
         | 
| 3447 | 
            +
             | 
| 3358 3448 | 
             
            /*!
         | 
| 3359 3449 | 
             
             * @ingroup tuning
         | 
| 3360 3450 | 
             
             * @brief Controls the NEON to scalar ratio for XXH3
         | 
| 3361 3451 | 
             
             *
         | 
| 3362 | 
            -
             *  | 
| 3363 | 
            -
             * 2 lanes on scalar by default.
         | 
| 3452 | 
            +
             * This can be set to 2, 4, 6, or 8.
         | 
| 3364 3453 | 
             
             *
         | 
| 3365 | 
            -
             *  | 
| 3366 | 
            -
             * emulated 64-bit arithmetic is too slow.
         | 
| 3454 | 
            +
             * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used.
         | 
| 3367 3455 | 
             
             *
         | 
| 3368 | 
            -
             *  | 
| 3456 | 
            +
             * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those
         | 
| 3457 | 
            +
             * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU
         | 
| 3458 | 
            +
             * bandwidth.
         | 
| 3369 3459 | 
             
             *
         | 
| 3370 | 
            -
             *  | 
| 3371 | 
            -
             * have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
         | 
| 3372 | 
            -
             * you are only using 2/3 of the CPU bandwidth.
         | 
| 3373 | 
            -
             *
         | 
| 3374 | 
            -
             * This is even more noticable on the more advanced cores like the A76 which
         | 
| 3460 | 
            +
             * This is even more noticeable on the more advanced cores like the Cortex-A76 which
         | 
| 3375 3461 | 
             
             * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
         | 
| 3376 3462 | 
             
             *
         | 
| 3377 | 
            -
             * Therefore,  | 
| 3378 | 
            -
             *  | 
| 3379 | 
            -
             * | 
| 3380 | 
            -
             *  | 
| 3463 | 
            +
             * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes
         | 
| 3464 | 
            +
             * and 2 scalar lanes, which is chosen by default.
         | 
| 3465 | 
            +
             *
         | 
| 3466 | 
            +
             * This does not apply to Apple processors or 32-bit processors, which run better with
         | 
| 3467 | 
            +
             * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes.
         | 
| 3381 3468 | 
             
             *
         | 
| 3382 3469 | 
             
             * This change benefits CPUs with large micro-op buffers without negatively affecting
         | 
| 3383 | 
            -
             * other CPUs:
         | 
| 3470 | 
            +
             * most other CPUs:
         | 
| 3384 3471 | 
             
             *
         | 
| 3385 3472 | 
             
             *  | Chipset               | Dispatch type       | NEON only | 6:2 hybrid | Diff. |
         | 
| 3386 3473 | 
             
             *  |:----------------------|:--------------------|----------:|-----------:|------:|
         | 
| 3387 3474 | 
             
             *  | Snapdragon 730 (A76)  | 2 NEON/8 micro-ops  |  8.8 GB/s |  10.1 GB/s |  ~16% |
         | 
| 3388 3475 | 
             
             *  | Snapdragon 835 (A73)  | 2 NEON/3 micro-ops  |  5.1 GB/s |   5.3 GB/s |   ~5% |
         | 
| 3389 3476 | 
             
             *  | Marvell PXA1928 (A53) | In-order dual-issue |  1.9 GB/s |   1.9 GB/s |    0% |
         | 
| 3477 | 
            +
             *  | Apple M1              | 4 NEON/8 micro-ops  | 37.3 GB/s |  36.1 GB/s |  ~-3% |
         | 
| 3390 3478 | 
             
             *
         | 
| 3391 3479 | 
             
             * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
         | 
| 3392 3480 | 
             
             *
         | 
| @@ -3394,7 +3482,7 @@ XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) | |
| 3394 3482 | 
             
             */
         | 
| 3395 3483 | 
             
            # ifndef XXH3_NEON_LANES
         | 
| 3396 3484 | 
             
            #  if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
         | 
| 3397 | 
            -
               && XXH_SIZE_OPT <= 0
         | 
| 3485 | 
            +
               && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
         | 
| 3398 3486 | 
             
            #   define XXH3_NEON_LANES 6
         | 
| 3399 3487 | 
             
            #  else
         | 
| 3400 3488 | 
             
            #   define XXH3_NEON_LANES XXH_ACC_NB
         | 
| @@ -3442,6 +3530,11 @@ typedef __vector unsigned long long xxh_u64x2; | |
| 3442 3530 | 
             
            typedef __vector unsigned char xxh_u8x16;
         | 
| 3443 3531 | 
             
            typedef __vector unsigned xxh_u32x4;
         | 
| 3444 3532 |  | 
| 3533 | 
            +
            /*
         | 
| 3534 | 
            +
             * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
         | 
| 3535 | 
            +
             */
         | 
| 3536 | 
            +
            typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
         | 
| 3537 | 
            +
             | 
| 3445 3538 | 
             
            # ifndef XXH_VSX_BE
         | 
| 3446 3539 | 
             
            #  if defined(__BIG_ENDIAN__) \
         | 
| 3447 3540 | 
             
              || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
         | 
| @@ -3516,6 +3609,20 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) | |
| 3516 3609 | 
             
            # endif /* XXH_vec_mulo, XXH_vec_mule */
         | 
| 3517 3610 | 
             
            #endif /* XXH_VECTOR == XXH_VSX */
         | 
| 3518 3611 |  | 
| 3612 | 
            +
            #if XXH_VECTOR == XXH_SVE
         | 
| 3613 | 
            +
            #define ACCRND(acc, offset) \
         | 
| 3614 | 
            +
            do { \
         | 
| 3615 | 
            +
                svuint64_t input_vec = svld1_u64(mask, xinput + offset);         \
         | 
| 3616 | 
            +
                svuint64_t secret_vec = svld1_u64(mask, xsecret + offset);       \
         | 
| 3617 | 
            +
                svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec);     \
         | 
| 3618 | 
            +
                svuint64_t swapped = svtbl_u64(input_vec, kSwap);                \
         | 
| 3619 | 
            +
                svuint64_t mixed_lo = svextw_u64_x(mask, mixed);                 \
         | 
| 3620 | 
            +
                svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32);            \
         | 
| 3621 | 
            +
                svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
         | 
| 3622 | 
            +
                acc = svadd_u64_x(mask, acc, mul);                               \
         | 
| 3623 | 
            +
            } while (0)
         | 
| 3624 | 
            +
            #endif /* XXH_VECTOR == XXH_SVE */
         | 
| 3625 | 
            +
             | 
| 3519 3626 |  | 
| 3520 3627 | 
             
            /* prefetch
         | 
| 3521 3628 | 
             
             * can be disabled, by declaring XXH_NO_PREFETCH build macro */
         | 
| @@ -3952,31 +4059,33 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len, | |
| 3952 4059 | 
             
                XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
         | 
| 3953 4060 | 
             
                XXH_ASSERT(16 < len && len <= 128);
         | 
| 3954 4061 |  | 
| 3955 | 
            -
                {   xxh_u64 acc = len * XXH_PRIME64_1;
         | 
| 4062 | 
            +
                {   xxh_u64 acc = len * XXH_PRIME64_1, acc_end;
         | 
| 3956 4063 | 
             
            #if XXH_SIZE_OPT >= 1
         | 
| 3957 4064 | 
             
                    /* Smaller and cleaner, but slightly slower. */
         | 
| 3958 | 
            -
                     | 
| 4065 | 
            +
                    unsigned int i = (unsigned int)(len - 1) / 32;
         | 
| 3959 4066 | 
             
                    do {
         | 
| 3960 4067 | 
             
                        acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
         | 
| 3961 4068 | 
             
                        acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
         | 
| 3962 4069 | 
             
                    } while (i-- != 0);
         | 
| 4070 | 
            +
                    acc_end = 0;
         | 
| 3963 4071 | 
             
            #else
         | 
| 4072 | 
            +
                    acc += XXH3_mix16B(input+0, secret+0, seed);
         | 
| 4073 | 
            +
                    acc_end = XXH3_mix16B(input+len-16, secret+16, seed);
         | 
| 3964 4074 | 
             
                    if (len > 32) {
         | 
| 4075 | 
            +
                        acc += XXH3_mix16B(input+16, secret+32, seed);
         | 
| 4076 | 
            +
                        acc_end += XXH3_mix16B(input+len-32, secret+48, seed);
         | 
| 3965 4077 | 
             
                        if (len > 64) {
         | 
| 4078 | 
            +
                            acc += XXH3_mix16B(input+32, secret+64, seed);
         | 
| 4079 | 
            +
                            acc_end += XXH3_mix16B(input+len-48, secret+80, seed);
         | 
| 4080 | 
            +
             | 
| 3966 4081 | 
             
                            if (len > 96) {
         | 
| 3967 4082 | 
             
                                acc += XXH3_mix16B(input+48, secret+96, seed);
         | 
| 3968 | 
            -
                                 | 
| 4083 | 
            +
                                acc_end += XXH3_mix16B(input+len-64, secret+112, seed);
         | 
| 3969 4084 | 
             
                            }
         | 
| 3970 | 
            -
                            acc += XXH3_mix16B(input+32, secret+64, seed);
         | 
| 3971 | 
            -
                            acc += XXH3_mix16B(input+len-48, secret+80, seed);
         | 
| 3972 4085 | 
             
                        }
         | 
| 3973 | 
            -
                        acc += XXH3_mix16B(input+16, secret+32, seed);
         | 
| 3974 | 
            -
                        acc += XXH3_mix16B(input+len-32, secret+48, seed);
         | 
| 3975 4086 | 
             
                    }
         | 
| 3976 | 
            -
                    acc += XXH3_mix16B(input+0, secret+0, seed);
         | 
| 3977 | 
            -
                    acc += XXH3_mix16B(input+len-16, secret+16, seed);
         | 
| 3978 4087 | 
             
            #endif
         | 
| 3979 | 
            -
                    return XXH3_avalanche(acc);
         | 
| 4088 | 
            +
                    return XXH3_avalanche(acc + acc_end);
         | 
| 3980 4089 | 
             
                }
         | 
| 3981 4090 | 
             
            }
         | 
| 3982 4091 |  | 
| @@ -3994,13 +4103,17 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, | |
| 3994 4103 | 
             
                #define XXH3_MIDSIZE_LASTOFFSET  17
         | 
| 3995 4104 |  | 
| 3996 4105 | 
             
                {   xxh_u64 acc = len * XXH_PRIME64_1;
         | 
| 3997 | 
            -
                     | 
| 3998 | 
            -
                    int  | 
| 4106 | 
            +
                    xxh_u64 acc_end;
         | 
| 4107 | 
            +
                    unsigned int const nbRounds = (unsigned int)len / 16;
         | 
| 4108 | 
            +
                    unsigned int i;
         | 
| 4109 | 
            +
                    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
         | 
| 3999 4110 | 
             
                    for (i=0; i<8; i++) {
         | 
| 4000 4111 | 
             
                        acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
         | 
| 4001 4112 | 
             
                    }
         | 
| 4002 | 
            -
                     | 
| 4113 | 
            +
                    /* last bytes */
         | 
| 4114 | 
            +
                    acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
         | 
| 4003 4115 | 
             
                    XXH_ASSERT(nbRounds >= 8);
         | 
| 4116 | 
            +
                    acc = XXH3_avalanche(acc);
         | 
| 4004 4117 | 
             
            #if defined(__clang__)                                /* Clang */ \
         | 
| 4005 4118 | 
             
                && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
         | 
| 4006 4119 | 
             
                && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
         | 
| @@ -4027,11 +4140,13 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, | |
| 4027 4140 | 
             
                    #pragma clang loop vectorize(disable)
         | 
| 4028 4141 | 
             
            #endif
         | 
| 4029 4142 | 
             
                    for (i=8 ; i < nbRounds; i++) {
         | 
| 4030 | 
            -
                         | 
| 4143 | 
            +
                        /*
         | 
| 4144 | 
            +
                         * Prevents clang for unrolling the acc loop and interleaving with this one.
         | 
| 4145 | 
            +
                         */
         | 
| 4146 | 
            +
                        XXH_COMPILER_GUARD(acc);
         | 
| 4147 | 
            +
                        acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
         | 
| 4031 4148 | 
             
                    }
         | 
| 4032 | 
            -
                     | 
| 4033 | 
            -
                    acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
         | 
| 4034 | 
            -
                    return XXH3_avalanche(acc);
         | 
| 4149 | 
            +
                    return XXH3_avalanche(acc + acc_end);
         | 
| 4035 4150 | 
             
                }
         | 
| 4036 4151 | 
             
            }
         | 
| 4037 4152 |  | 
| @@ -4047,6 +4162,47 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, | |
| 4047 4162 | 
             
            #  define ACC_NB XXH_ACC_NB
         | 
| 4048 4163 | 
             
            #endif
         | 
| 4049 4164 |  | 
| 4165 | 
            +
            #ifndef XXH_PREFETCH_DIST
         | 
| 4166 | 
            +
            #  ifdef __clang__
         | 
| 4167 | 
            +
            #    define XXH_PREFETCH_DIST 320
         | 
| 4168 | 
            +
            #  else
         | 
| 4169 | 
            +
            #    if (XXH_VECTOR == XXH_AVX512)
         | 
| 4170 | 
            +
            #      define XXH_PREFETCH_DIST 512
         | 
| 4171 | 
            +
            #    else
         | 
| 4172 | 
            +
            #      define XXH_PREFETCH_DIST 384
         | 
| 4173 | 
            +
            #    endif
         | 
| 4174 | 
            +
            #  endif  /* __clang__ */
         | 
| 4175 | 
            +
            #endif  /* XXH_PREFETCH_DIST */
         | 
| 4176 | 
            +
             | 
| 4177 | 
            +
            /*
         | 
| 4178 | 
            +
             * These macros are to generate an XXH3_accumulate() function.
         | 
| 4179 | 
            +
             * The two arguments select the name suffix and target attribute.
         | 
| 4180 | 
            +
             *
         | 
| 4181 | 
            +
             * The name of this symbol is XXH3_accumulate_<name>() and it calls
         | 
| 4182 | 
            +
             * XXH3_accumulate_512_<name>().
         | 
| 4183 | 
            +
             *
         | 
| 4184 | 
            +
             * It may be useful to hand implement this function if the compiler fails to
         | 
| 4185 | 
            +
             * optimize the inline function.
         | 
| 4186 | 
            +
             */
         | 
| 4187 | 
            +
            #define XXH3_ACCUMULATE_TEMPLATE(name)                      \
         | 
| 4188 | 
            +
            void                                                        \
         | 
| 4189 | 
            +
            XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc,           \
         | 
| 4190 | 
            +
                                   const xxh_u8* XXH_RESTRICT input,    \
         | 
| 4191 | 
            +
                                   const xxh_u8* XXH_RESTRICT secret,   \
         | 
| 4192 | 
            +
                                   size_t nbStripes)                    \
         | 
| 4193 | 
            +
            {                                                           \
         | 
| 4194 | 
            +
                size_t n;                                               \
         | 
| 4195 | 
            +
                for (n = 0; n < nbStripes; n++ ) {                      \
         | 
| 4196 | 
            +
                    const xxh_u8* const in = input + n*XXH_STRIPE_LEN;  \
         | 
| 4197 | 
            +
                    XXH_PREFETCH(in + XXH_PREFETCH_DIST);               \
         | 
| 4198 | 
            +
                    XXH3_accumulate_512_##name(                         \
         | 
| 4199 | 
            +
                             acc,                                       \
         | 
| 4200 | 
            +
                             in,                                        \
         | 
| 4201 | 
            +
                             secret + n*XXH_SECRET_CONSUME_RATE);       \
         | 
| 4202 | 
            +
                }                                                       \
         | 
| 4203 | 
            +
            }
         | 
| 4204 | 
            +
             | 
| 4205 | 
            +
             | 
| 4050 4206 | 
             
            XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
         | 
| 4051 4207 | 
             
            {
         | 
| 4052 4208 | 
             
                if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
         | 
| @@ -4115,7 +4271,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc, | |
| 4115 4271 | 
             
                    /* data_key    = data_vec ^ key_vec; */
         | 
| 4116 4272 | 
             
                    __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
         | 
| 4117 4273 | 
             
                    /* data_key_lo = data_key >> 32; */
         | 
| 4118 | 
            -
                    __m512i const data_key_lo =  | 
| 4274 | 
            +
                    __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
         | 
| 4119 4275 | 
             
                    /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
         | 
| 4120 4276 | 
             
                    __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
         | 
| 4121 4277 | 
             
                    /* xacc[0] += swap(data_vec); */
         | 
| @@ -4125,6 +4281,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc, | |
| 4125 4281 | 
             
                    *xacc = _mm512_add_epi64(product, sum);
         | 
| 4126 4282 | 
             
                }
         | 
| 4127 4283 | 
             
            }
         | 
| 4284 | 
            +
            XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
         | 
| 4128 4285 |  | 
| 4129 4286 | 
             
            /*
         | 
| 4130 4287 | 
             
             * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
         | 
| @@ -4158,13 +4315,12 @@ XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) | |
| 4158 4315 | 
             
                    /* xacc[0] ^= (xacc[0] >> 47) */
         | 
| 4159 4316 | 
             
                    __m512i const acc_vec     = *xacc;
         | 
| 4160 4317 | 
             
                    __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
         | 
| 4161 | 
            -
                    __m512i const data_vec    = _mm512_xor_si512     (acc_vec, shifted);
         | 
| 4162 4318 | 
             
                    /* xacc[0] ^= secret; */
         | 
| 4163 4319 | 
             
                    __m512i const key_vec     = _mm512_loadu_si512   (secret);
         | 
| 4164 | 
            -
                    __m512i const data_key    =  | 
| 4320 | 
            +
                    __m512i const data_key    = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
         | 
| 4165 4321 |  | 
| 4166 4322 | 
             
                    /* xacc[0] *= XXH_PRIME32_1; */
         | 
| 4167 | 
            -
                    __m512i const data_key_hi =  | 
| 4323 | 
            +
                    __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
         | 
| 4168 4324 | 
             
                    __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
         | 
| 4169 4325 | 
             
                    __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
         | 
| 4170 4326 | 
             
                    *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
         | 
| @@ -4179,7 +4335,8 @@ XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64) | |
| 4179 4335 | 
             
                XXH_ASSERT(((size_t)customSecret & 63) == 0);
         | 
| 4180 4336 | 
             
                (void)(&XXH_writeLE64);
         | 
| 4181 4337 | 
             
                {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
         | 
| 4182 | 
            -
                    __m512i const  | 
| 4338 | 
            +
                    __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
         | 
| 4339 | 
            +
                    __m512i const seed     = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
         | 
| 4183 4340 |  | 
| 4184 4341 | 
             
                    const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
         | 
| 4185 4342 | 
             
                          __m512i* const dest = (      __m512i*) customSecret;
         | 
| @@ -4187,14 +4344,7 @@ XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64) | |
| 4187 4344 | 
             
                    XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
         | 
| 4188 4345 | 
             
                    XXH_ASSERT(((size_t)dest & 63) == 0);
         | 
| 4189 4346 | 
             
                    for (i=0; i < nbRounds; ++i) {
         | 
| 4190 | 
            -
                         | 
| 4191 | 
            -
                         * this will warn "discards 'const' qualifier". */
         | 
| 4192 | 
            -
                        union {
         | 
| 4193 | 
            -
                            const __m512i* cp;
         | 
| 4194 | 
            -
                            void* p;
         | 
| 4195 | 
            -
                        } remote_const_void;
         | 
| 4196 | 
            -
                        remote_const_void.cp = src + i;
         | 
| 4197 | 
            -
                        dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
         | 
| 4347 | 
            +
                        dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
         | 
| 4198 4348 | 
             
                }   }
         | 
| 4199 4349 | 
             
            }
         | 
| 4200 4350 |  | 
| @@ -4230,7 +4380,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc, | |
| 4230 4380 | 
             
                        /* data_key    = data_vec ^ key_vec; */
         | 
| 4231 4381 | 
             
                        __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
         | 
| 4232 4382 | 
             
                        /* data_key_lo = data_key >> 32; */
         | 
| 4233 | 
            -
                        __m256i const data_key_lo =  | 
| 4383 | 
            +
                        __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
         | 
| 4234 4384 | 
             
                        /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
         | 
| 4235 4385 | 
             
                        __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
         | 
| 4236 4386 | 
             
                        /* xacc[i] += swap(data_vec); */
         | 
| @@ -4240,6 +4390,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc, | |
| 4240 4390 | 
             
                        xacc[i] = _mm256_add_epi64(product, sum);
         | 
| 4241 4391 | 
             
                }   }
         | 
| 4242 4392 | 
             
            }
         | 
| 4393 | 
            +
            XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
         | 
| 4243 4394 |  | 
| 4244 4395 | 
             
            XXH_FORCE_INLINE XXH_TARGET_AVX2 void
         | 
| 4245 4396 | 
             
            XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
         | 
| @@ -4262,7 +4413,7 @@ XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) | |
| 4262 4413 | 
             
                        __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
         | 
| 4263 4414 |  | 
| 4264 4415 | 
             
                        /* xacc[i] *= XXH_PRIME32_1; */
         | 
| 4265 | 
            -
                        __m256i const data_key_hi =  | 
| 4416 | 
            +
                        __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
         | 
| 4266 4417 | 
             
                        __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
         | 
| 4267 4418 | 
             
                        __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
         | 
| 4268 4419 | 
             
                        xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
         | 
| @@ -4294,12 +4445,12 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTR | |
| 4294 4445 | 
             
                    XXH_ASSERT(((size_t)dest & 31) == 0);
         | 
| 4295 4446 |  | 
| 4296 4447 | 
             
                    /* GCC -O2 need unroll loop manually */
         | 
| 4297 | 
            -
                    dest[0] = _mm256_add_epi64( | 
| 4298 | 
            -
                    dest[1] = _mm256_add_epi64( | 
| 4299 | 
            -
                    dest[2] = _mm256_add_epi64( | 
| 4300 | 
            -
                    dest[3] = _mm256_add_epi64( | 
| 4301 | 
            -
                    dest[4] = _mm256_add_epi64( | 
| 4302 | 
            -
                    dest[5] = _mm256_add_epi64( | 
| 4448 | 
            +
                    dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
         | 
| 4449 | 
            +
                    dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
         | 
| 4450 | 
            +
                    dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
         | 
| 4451 | 
            +
                    dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
         | 
| 4452 | 
            +
                    dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
         | 
| 4453 | 
            +
                    dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
         | 
| 4303 4454 | 
             
                }
         | 
| 4304 4455 | 
             
            }
         | 
| 4305 4456 |  | 
| @@ -4346,6 +4497,7 @@ XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc, | |
| 4346 4497 | 
             
                        xacc[i] = _mm_add_epi64(product, sum);
         | 
| 4347 4498 | 
             
                }   }
         | 
| 4348 4499 | 
             
            }
         | 
| 4500 | 
            +
            XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
         | 
| 4349 4501 |  | 
| 4350 4502 | 
             
            XXH_FORCE_INLINE XXH_TARGET_SSE2 void
         | 
| 4351 4503 | 
             
            XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
         | 
| @@ -4431,6 +4583,16 @@ XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, | |
| 4431 4583 | 
             
             * CPU, and it also mitigates some GCC codegen issues.
         | 
| 4432 4584 | 
             
             *
         | 
| 4433 4585 | 
             
             * @see XXH3_NEON_LANES for configuring this and details about this optimization.
         | 
| 4586 | 
            +
             *
         | 
| 4587 | 
            +
             * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit
         | 
| 4588 | 
            +
             * integers instead of the other platforms which mask full 64-bit vectors,
         | 
| 4589 | 
            +
             * so the setup is more complicated than just shifting right.
         | 
| 4590 | 
            +
             *
         | 
| 4591 | 
            +
             * Additionally, there is an optimization for 4 lanes at once noted below.
         | 
| 4592 | 
            +
             *
         | 
| 4593 | 
            +
             * Since, as stated, the most optimal amount of lanes for Cortexes is 6,
         | 
| 4594 | 
            +
             * there needs to be *three* versions of the accumulate operation used
         | 
| 4595 | 
            +
             * for the remaining 2 lanes.
         | 
| 4434 4596 | 
             
             */
         | 
| 4435 4597 | 
             
            XXH_FORCE_INLINE void
         | 
| 4436 4598 | 
             
            XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
         | 
| @@ -4439,49 +4601,113 @@ XXH3_accumulate_512_neon( void* XXH_RESTRICT acc, | |
| 4439 4601 | 
             
            {
         | 
| 4440 4602 | 
             
                XXH_ASSERT((((size_t)acc) & 15) == 0);
         | 
| 4441 4603 | 
             
                XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
         | 
| 4442 | 
            -
                {
         | 
| 4443 | 
            -
                     | 
| 4604 | 
            +
                {   /* GCC for darwin arm64 does not like aliasing here */
         | 
| 4605 | 
            +
                    xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
         | 
| 4444 4606 | 
             
                    /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
         | 
| 4445 4607 | 
             
                    uint8_t const* const xinput = (const uint8_t *) input;
         | 
| 4446 4608 | 
             
                    uint8_t const* const xsecret  = (const uint8_t *) secret;
         | 
| 4447 4609 |  | 
| 4448 4610 | 
             
                    size_t i;
         | 
| 4449 | 
            -
                    /*  | 
| 4611 | 
            +
                    /* Scalar lanes use the normal scalarRound routine */
         | 
| 4450 4612 | 
             
                    for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
         | 
| 4451 4613 | 
             
                        XXH3_scalarRound(acc, input, secret, i);
         | 
| 4452 4614 | 
             
                    }
         | 
| 4453 | 
            -
                     | 
| 4454 | 
            -
             | 
| 4615 | 
            +
                    i = 0;
         | 
| 4616 | 
            +
                    /* 4 NEON lanes at a time. */
         | 
| 4617 | 
            +
                    for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
         | 
| 4618 | 
            +
                        /* data_vec = xinput[i]; */
         | 
| 4619 | 
            +
                        uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput  + (i * 16));
         | 
| 4620 | 
            +
                        uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput  + ((i+1) * 16));
         | 
| 4621 | 
            +
                        /* key_vec  = xsecret[i];  */
         | 
| 4622 | 
            +
                        uint64x2_t key_vec_1  = XXH_vld1q_u64(xsecret + (i * 16));
         | 
| 4623 | 
            +
                        uint64x2_t key_vec_2  = XXH_vld1q_u64(xsecret + ((i+1) * 16));
         | 
| 4624 | 
            +
                        /* data_swap = swap(data_vec) */
         | 
| 4625 | 
            +
                        uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
         | 
| 4626 | 
            +
                        uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
         | 
| 4627 | 
            +
                        /* data_key = data_vec ^ key_vec; */
         | 
| 4628 | 
            +
                        uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
         | 
| 4629 | 
            +
                        uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
         | 
| 4630 | 
            +
             | 
| 4631 | 
            +
                        /*
         | 
| 4632 | 
            +
                         * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
         | 
| 4633 | 
            +
                         * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
         | 
| 4634 | 
            +
                         * get one vector with the low 32 bits of each lane, and one vector
         | 
| 4635 | 
            +
                         * with the high 32 bits of each lane.
         | 
| 4636 | 
            +
                         *
         | 
| 4637 | 
            +
                         * This compiles to two instructions on AArch64 and has a paired vector
         | 
| 4638 | 
            +
                         * result, which is an artifact from ARMv7a's version which modified both
         | 
| 4639 | 
            +
                         * vectors in place.
         | 
| 4640 | 
            +
                         *
         | 
| 4641 | 
            +
                         *  [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
         | 
| 4642 | 
            +
                         *  [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
         | 
| 4643 | 
            +
                         */
         | 
| 4644 | 
            +
                        uint32x4x2_t unzipped = vuzpq_u32(
         | 
| 4645 | 
            +
                            vreinterpretq_u32_u64(data_key_1),
         | 
| 4646 | 
            +
                            vreinterpretq_u32_u64(data_key_2)
         | 
| 4647 | 
            +
                        );
         | 
| 4648 | 
            +
                        /* data_key_lo = data_key & 0xFFFFFFFF */
         | 
| 4649 | 
            +
                        uint32x4_t data_key_lo = unzipped.val[0];
         | 
| 4650 | 
            +
                        /* data_key_hi = data_key >> 32 */
         | 
| 4651 | 
            +
                        uint32x4_t data_key_hi = unzipped.val[1];
         | 
| 4652 | 
            +
                        /*
         | 
| 4653 | 
            +
                         * Then, we can split the vectors horizontally and multiply which, as for most
         | 
| 4654 | 
            +
                         * widening intrinsics, have a variant that works on both high half vectors
         | 
| 4655 | 
            +
                         * for free on AArch64.
         | 
| 4656 | 
            +
                         *
         | 
| 4657 | 
            +
                         * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
         | 
| 4658 | 
            +
                         */
         | 
| 4659 | 
            +
                        uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
         | 
| 4660 | 
            +
                        uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
         | 
| 4661 | 
            +
                        /*
         | 
| 4662 | 
            +
                         * Clang reorders
         | 
| 4663 | 
            +
                         *    a += b * c;     // umlal   swap.2d, dkl.2s, dkh.2s
         | 
| 4664 | 
            +
                         *    c += a;         // add     acc.2d, acc.2d, swap.2d
         | 
| 4665 | 
            +
                         * to
         | 
| 4666 | 
            +
                         *    c += a;         // add     acc.2d, acc.2d, swap.2d
         | 
| 4667 | 
            +
                         *    c += b * c;     // umlal   acc.2d, dkl.2s, dkh.2s
         | 
| 4668 | 
            +
                         *
         | 
| 4669 | 
            +
                         * While it would make sense in theory since the addition is faster,
         | 
| 4670 | 
            +
                         * for reasons likely related to umlal being limited to certain NEON
         | 
| 4671 | 
            +
                         * pipelines, this is worse. A compiler guard fixes this.
         | 
| 4672 | 
            +
                         */
         | 
| 4673 | 
            +
                        XXH_COMPILER_GUARD_W(sum_1);
         | 
| 4674 | 
            +
                        XXH_COMPILER_GUARD_W(sum_2);
         | 
| 4675 | 
            +
                        /* xacc[i] = acc_vec + sum; */
         | 
| 4676 | 
            +
                        xacc[i]   = vaddq_u64(xacc[i], sum_1);
         | 
| 4677 | 
            +
                        xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
         | 
| 4678 | 
            +
                    }
         | 
| 4679 | 
            +
                    /* Operate on the remaining NEON lanes 2 at a time. */
         | 
| 4680 | 
            +
                    for (; i < XXH3_NEON_LANES / 2; i++) {
         | 
| 4455 4681 | 
             
                        /* data_vec = xinput[i]; */
         | 
| 4456 4682 | 
             
                        uint64x2_t data_vec = XXH_vld1q_u64(xinput  + (i * 16));
         | 
| 4457 4683 | 
             
                        /* key_vec  = xsecret[i];  */
         | 
| 4458 4684 | 
             
                        uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
         | 
| 4459 | 
            -
                        uint64x2_t data_key;
         | 
| 4460 | 
            -
                        uint32x2_t data_key_lo, data_key_hi;
         | 
| 4461 4685 | 
             
                        /* acc_vec_2 = swap(data_vec) */
         | 
| 4462 | 
            -
                        uint64x2_t  | 
| 4686 | 
            +
                        uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
         | 
| 4463 4687 | 
             
                        /* data_key = data_vec ^ key_vec; */
         | 
| 4464 | 
            -
                        data_key = veorq_u64(data_vec, key_vec);
         | 
| 4465 | 
            -
                        /*  | 
| 4466 | 
            -
             | 
| 4467 | 
            -
             | 
| 4468 | 
            -
                         | 
| 4469 | 
            -
                         | 
| 4470 | 
            -
                         | 
| 4471 | 
            -
                         | 
| 4472 | 
            -
                         | 
| 4473 | 
            -
                         | 
| 4688 | 
            +
                        uint64x2_t data_key = veorq_u64(data_vec, key_vec);
         | 
| 4689 | 
            +
                        /* For two lanes, just use VMOVN and VSHRN. */
         | 
| 4690 | 
            +
                        /* data_key_lo = data_key & 0xFFFFFFFF; */
         | 
| 4691 | 
            +
                        uint32x2_t data_key_lo = vmovn_u64(data_key);
         | 
| 4692 | 
            +
                        /* data_key_hi = data_key >> 32; */
         | 
| 4693 | 
            +
                        uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
         | 
| 4694 | 
            +
                        /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
         | 
| 4695 | 
            +
                        uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
         | 
| 4696 | 
            +
                        /* Same Clang workaround as before */
         | 
| 4697 | 
            +
                        XXH_COMPILER_GUARD_W(sum);
         | 
| 4698 | 
            +
                        /* xacc[i] = acc_vec + sum; */
         | 
| 4699 | 
            +
                        xacc[i] = vaddq_u64 (xacc[i], sum);
         | 
| 4474 4700 | 
             
                    }
         | 
| 4475 | 
            -
             | 
| 4476 4701 | 
             
                }
         | 
| 4477 4702 | 
             
            }
         | 
| 4703 | 
            +
            XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
         | 
| 4478 4704 |  | 
| 4479 4705 | 
             
            XXH_FORCE_INLINE void
         | 
| 4480 4706 | 
             
            XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
         | 
| 4481 4707 | 
             
            {
         | 
| 4482 4708 | 
             
                XXH_ASSERT((((size_t)acc) & 15) == 0);
         | 
| 4483 4709 |  | 
| 4484 | 
            -
                {    | 
| 4710 | 
            +
                {   xxh_aliasing_uint64x2_t* xacc       = (xxh_aliasing_uint64x2_t*) acc;
         | 
| 4485 4711 | 
             
                    uint8_t const* xsecret = (uint8_t const*) secret;
         | 
| 4486 4712 | 
             
                    uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);
         | 
| 4487 4713 |  | 
| @@ -4493,47 +4719,42 @@ XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) | |
| 4493 4719 | 
             
                    for (i=0; i < XXH3_NEON_LANES / 2; i++) {
         | 
| 4494 4720 | 
             
                        /* xacc[i] ^= (xacc[i] >> 47); */
         | 
| 4495 4721 | 
             
                        uint64x2_t acc_vec  = xacc[i];
         | 
| 4496 | 
            -
                        uint64x2_t shifted  = vshrq_n_u64 | 
| 4497 | 
            -
                        uint64x2_t data_vec = veorq_u64 | 
| 4722 | 
            +
                        uint64x2_t shifted  = vshrq_n_u64(acc_vec, 47);
         | 
| 4723 | 
            +
                        uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
         | 
| 4498 4724 |  | 
| 4499 4725 | 
             
                        /* xacc[i] ^= xsecret[i]; */
         | 
| 4500 | 
            -
                        uint64x2_t key_vec  = XXH_vld1q_u64 | 
| 4501 | 
            -
                        uint64x2_t data_key = veorq_u64 | 
| 4726 | 
            +
                        uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
         | 
| 4727 | 
            +
                        uint64x2_t data_key = veorq_u64(data_vec, key_vec);
         | 
| 4502 4728 |  | 
| 4503 4729 | 
             
                        /* xacc[i] *= XXH_PRIME32_1 */
         | 
| 4504 | 
            -
                        uint32x2_t data_key_lo | 
| 4505 | 
            -
                         | 
| 4506 | 
            -
             | 
| 4507 | 
            -
                         *  | 
| 4508 | 
            -
             | 
| 4509 | 
            -
             | 
| 4510 | 
            -
             | 
| 4511 | 
            -
             | 
| 4512 | 
            -
             | 
| 4513 | 
            -
             | 
| 4514 | 
            -
             | 
| 4515 | 
            -
             | 
| 4516 | 
            -
             | 
| 4517 | 
            -
             | 
| 4518 | 
            -
             | 
| 4519 | 
            -
             | 
| 4520 | 
            -
             | 
| 4521 | 
            -
             | 
| 4522 | 
            -
             | 
| 4523 | 
            -
             | 
| 4524 | 
            -
             | 
| 4525 | 
            -
             | 
| 4526 | 
            -
             | 
| 4527 | 
            -
             | 
| 4528 | 
            -
             | 
| 4529 | 
            -
                            prod_hi = vshlq_n_u64(prod_hi, 32);
         | 
| 4530 | 
            -
                            /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
         | 
| 4531 | 
            -
                            xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
         | 
| 4532 | 
            -
                        }
         | 
| 4730 | 
            +
                        uint32x2_t data_key_lo = vmovn_u64(data_key);
         | 
| 4731 | 
            +
                        uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
         | 
| 4732 | 
            +
                        /*
         | 
| 4733 | 
            +
                         * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
         | 
| 4734 | 
            +
                         *
         | 
| 4735 | 
            +
                         * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
         | 
| 4736 | 
            +
                         * incorrectly "optimize" this:
         | 
| 4737 | 
            +
                         *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));
         | 
| 4738 | 
            +
                         *   shifted = vshll_n_u32(tmp, 32);
         | 
| 4739 | 
            +
                         * to this:
         | 
| 4740 | 
            +
                         *   tmp     = "vmulq_u64"(a, b); // no such thing!
         | 
| 4741 | 
            +
                         *   shifted = vshlq_n_u64(tmp, 32);
         | 
| 4742 | 
            +
                         *
         | 
| 4743 | 
            +
                         * However, unlike SSE, Clang lacks a 64-bit multiply routine
         | 
| 4744 | 
            +
                         * for NEON, and it scalarizes two 64-bit multiplies instead.
         | 
| 4745 | 
            +
                         *
         | 
| 4746 | 
            +
                         * vmull_u32 has the same timing as vmul_u32, and it avoids
         | 
| 4747 | 
            +
                         * this bug completely.
         | 
| 4748 | 
            +
                         * See https://bugs.llvm.org/show_bug.cgi?id=39967
         | 
| 4749 | 
            +
                         */
         | 
| 4750 | 
            +
                        uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
         | 
| 4751 | 
            +
                        /* xacc[i] = prod_hi << 32; */
         | 
| 4752 | 
            +
                        prod_hi = vshlq_n_u64(prod_hi, 32);
         | 
| 4753 | 
            +
                        /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
         | 
| 4754 | 
            +
                        xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
         | 
| 4533 4755 | 
             
                    }
         | 
| 4534 4756 | 
             
                }
         | 
| 4535 4757 | 
             
            }
         | 
| 4536 | 
            -
             | 
| 4537 4758 | 
             
            #endif
         | 
| 4538 4759 |  | 
| 4539 4760 | 
             
            #if (XXH_VECTOR == XXH_VSX)
         | 
| @@ -4544,23 +4765,23 @@ XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc, | |
| 4544 4765 | 
             
                                const void* XXH_RESTRICT secret)
         | 
| 4545 4766 | 
             
            {
         | 
| 4546 4767 | 
             
                /* presumed aligned */
         | 
| 4547 | 
            -
                 | 
| 4548 | 
            -
                 | 
| 4549 | 
            -
                 | 
| 4768 | 
            +
                xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
         | 
| 4769 | 
            +
                xxh_u8 const* const xinput   = (xxh_u8 const*) input;   /* no alignment restriction */
         | 
| 4770 | 
            +
                xxh_u8 const* const xsecret  = (xxh_u8 const*) secret;    /* no alignment restriction */
         | 
| 4550 4771 | 
             
                xxh_u64x2 const v32 = { 32, 32 };
         | 
| 4551 4772 | 
             
                size_t i;
         | 
| 4552 4773 | 
             
                for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
         | 
| 4553 4774 | 
             
                    /* data_vec = xinput[i]; */
         | 
| 4554 | 
            -
                    xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
         | 
| 4775 | 
            +
                    xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
         | 
| 4555 4776 | 
             
                    /* key_vec = xsecret[i]; */
         | 
| 4556 | 
            -
                    xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
         | 
| 4777 | 
            +
                    xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
         | 
| 4557 4778 | 
             
                    xxh_u64x2 const data_key = data_vec ^ key_vec;
         | 
| 4558 4779 | 
             
                    /* shuffled = (data_key << 32) | (data_key >> 32); */
         | 
| 4559 4780 | 
             
                    xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
         | 
| 4560 4781 | 
             
                    /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
         | 
| 4561 4782 | 
             
                    xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
         | 
| 4562 4783 | 
             
                    /* acc_vec = xacc[i]; */
         | 
| 4563 | 
            -
                    xxh_u64x2 acc_vec        =  | 
| 4784 | 
            +
                    xxh_u64x2 acc_vec        = xacc[i];
         | 
| 4564 4785 | 
             
                    acc_vec += product;
         | 
| 4565 4786 |  | 
| 4566 4787 | 
             
                    /* swap high and low halves */
         | 
| @@ -4569,18 +4790,18 @@ XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc, | |
| 4569 4790 | 
             
            #else
         | 
| 4570 4791 | 
             
                    acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
         | 
| 4571 4792 | 
             
            #endif
         | 
| 4572 | 
            -
                     | 
| 4573 | 
            -
                    vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
         | 
| 4793 | 
            +
                    xacc[i] = acc_vec;
         | 
| 4574 4794 | 
             
                }
         | 
| 4575 4795 | 
             
            }
         | 
| 4796 | 
            +
            XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
         | 
| 4576 4797 |  | 
| 4577 4798 | 
             
            XXH_FORCE_INLINE void
         | 
| 4578 4799 | 
             
            XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
         | 
| 4579 4800 | 
             
            {
         | 
| 4580 4801 | 
             
                XXH_ASSERT((((size_t)acc) & 15) == 0);
         | 
| 4581 4802 |  | 
| 4582 | 
            -
                { | 
| 4583 | 
            -
                    const  | 
| 4803 | 
            +
                {   xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
         | 
| 4804 | 
            +
                    const xxh_u8* const xsecret = (const xxh_u8*) secret;
         | 
| 4584 4805 | 
             
                    /* constants */
         | 
| 4585 4806 | 
             
                    xxh_u64x2 const v32  = { 32, 32 };
         | 
| 4586 4807 | 
             
                    xxh_u64x2 const v47 = { 47, 47 };
         | 
| @@ -4592,7 +4813,7 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) | |
| 4592 4813 | 
             
                        xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
         | 
| 4593 4814 |  | 
| 4594 4815 | 
             
                        /* xacc[i] ^= xsecret[i]; */
         | 
| 4595 | 
            -
                        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
         | 
| 4816 | 
            +
                        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
         | 
| 4596 4817 | 
             
                        xxh_u64x2 const data_key = data_vec ^ key_vec;
         | 
| 4597 4818 |  | 
| 4598 4819 | 
             
                        /* xacc[i] *= XXH_PRIME32_1 */
         | 
| @@ -4606,8 +4827,148 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) | |
| 4606 4827 |  | 
| 4607 4828 | 
             
            #endif
         | 
| 4608 4829 |  | 
| 4830 | 
            +
            #if (XXH_VECTOR == XXH_SVE)
         | 
| 4831 | 
            +
             | 
| 4832 | 
            +
            XXH_FORCE_INLINE void
         | 
| 4833 | 
            +
            XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
         | 
| 4834 | 
            +
                               const void* XXH_RESTRICT input,
         | 
| 4835 | 
            +
                               const void* XXH_RESTRICT secret)
         | 
| 4836 | 
            +
            {
         | 
| 4837 | 
            +
                uint64_t *xacc = (uint64_t *)acc;
         | 
| 4838 | 
            +
                const uint64_t *xinput = (const uint64_t *)(const void *)input;
         | 
| 4839 | 
            +
                const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
         | 
| 4840 | 
            +
                svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
         | 
| 4841 | 
            +
                uint64_t element_count = svcntd();
         | 
| 4842 | 
            +
                if (element_count >= 8) {
         | 
| 4843 | 
            +
                    svbool_t mask = svptrue_pat_b64(SV_VL8);
         | 
| 4844 | 
            +
                    svuint64_t vacc = svld1_u64(mask, xacc);
         | 
| 4845 | 
            +
                    ACCRND(vacc, 0);
         | 
| 4846 | 
            +
                    svst1_u64(mask, xacc, vacc);
         | 
| 4847 | 
            +
                } else if (element_count == 2) {   /* sve128 */
         | 
| 4848 | 
            +
                    svbool_t mask = svptrue_pat_b64(SV_VL2);
         | 
| 4849 | 
            +
                    svuint64_t acc0 = svld1_u64(mask, xacc + 0);
         | 
| 4850 | 
            +
                    svuint64_t acc1 = svld1_u64(mask, xacc + 2);
         | 
| 4851 | 
            +
                    svuint64_t acc2 = svld1_u64(mask, xacc + 4);
         | 
| 4852 | 
            +
                    svuint64_t acc3 = svld1_u64(mask, xacc + 6);
         | 
| 4853 | 
            +
                    ACCRND(acc0, 0);
         | 
| 4854 | 
            +
                    ACCRND(acc1, 2);
         | 
| 4855 | 
            +
                    ACCRND(acc2, 4);
         | 
| 4856 | 
            +
                    ACCRND(acc3, 6);
         | 
| 4857 | 
            +
                    svst1_u64(mask, xacc + 0, acc0);
         | 
| 4858 | 
            +
                    svst1_u64(mask, xacc + 2, acc1);
         | 
| 4859 | 
            +
                    svst1_u64(mask, xacc + 4, acc2);
         | 
| 4860 | 
            +
                    svst1_u64(mask, xacc + 6, acc3);
         | 
| 4861 | 
            +
                } else {
         | 
| 4862 | 
            +
                    svbool_t mask = svptrue_pat_b64(SV_VL4);
         | 
| 4863 | 
            +
                    svuint64_t acc0 = svld1_u64(mask, xacc + 0);
         | 
| 4864 | 
            +
                    svuint64_t acc1 = svld1_u64(mask, xacc + 4);
         | 
| 4865 | 
            +
                    ACCRND(acc0, 0);
         | 
| 4866 | 
            +
                    ACCRND(acc1, 4);
         | 
| 4867 | 
            +
                    svst1_u64(mask, xacc + 0, acc0);
         | 
| 4868 | 
            +
                    svst1_u64(mask, xacc + 4, acc1);
         | 
| 4869 | 
            +
                }
         | 
| 4870 | 
            +
            }
         | 
| 4871 | 
            +
             | 
| 4872 | 
            +
            XXH_FORCE_INLINE void
         | 
| 4873 | 
            +
            XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
         | 
| 4874 | 
            +
                           const xxh_u8* XXH_RESTRICT input,
         | 
| 4875 | 
            +
                           const xxh_u8* XXH_RESTRICT secret,
         | 
| 4876 | 
            +
                           size_t nbStripes)
         | 
| 4877 | 
            +
            {
         | 
| 4878 | 
            +
                if (nbStripes != 0) {
         | 
| 4879 | 
            +
                    uint64_t *xacc = (uint64_t *)acc;
         | 
| 4880 | 
            +
                    const uint64_t *xinput = (const uint64_t *)(const void *)input;
         | 
| 4881 | 
            +
                    const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
         | 
| 4882 | 
            +
                    svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
         | 
| 4883 | 
            +
                    uint64_t element_count = svcntd();
         | 
| 4884 | 
            +
                    if (element_count >= 8) {
         | 
| 4885 | 
            +
                        svbool_t mask = svptrue_pat_b64(SV_VL8);
         | 
| 4886 | 
            +
                        svuint64_t vacc = svld1_u64(mask, xacc + 0);
         | 
| 4887 | 
            +
                        do {
         | 
| 4888 | 
            +
                            /* svprfd(svbool_t, void *, enum svfprop); */
         | 
| 4889 | 
            +
                            svprfd(mask, xinput + 128, SV_PLDL1STRM);
         | 
| 4890 | 
            +
                            ACCRND(vacc, 0);
         | 
| 4891 | 
            +
                            xinput += 8;
         | 
| 4892 | 
            +
                            xsecret += 1;
         | 
| 4893 | 
            +
                            nbStripes--;
         | 
| 4894 | 
            +
                       } while (nbStripes != 0);
         | 
| 4895 | 
            +
             | 
| 4896 | 
            +
                       svst1_u64(mask, xacc + 0, vacc);
         | 
| 4897 | 
            +
                    } else if (element_count == 2) { /* sve128 */
         | 
| 4898 | 
            +
                        svbool_t mask = svptrue_pat_b64(SV_VL2);
         | 
| 4899 | 
            +
                        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
         | 
| 4900 | 
            +
                        svuint64_t acc1 = svld1_u64(mask, xacc + 2);
         | 
| 4901 | 
            +
                        svuint64_t acc2 = svld1_u64(mask, xacc + 4);
         | 
| 4902 | 
            +
                        svuint64_t acc3 = svld1_u64(mask, xacc + 6);
         | 
| 4903 | 
            +
                        do {
         | 
| 4904 | 
            +
                            svprfd(mask, xinput + 128, SV_PLDL1STRM);
         | 
| 4905 | 
            +
                            ACCRND(acc0, 0);
         | 
| 4906 | 
            +
                            ACCRND(acc1, 2);
         | 
| 4907 | 
            +
                            ACCRND(acc2, 4);
         | 
| 4908 | 
            +
                            ACCRND(acc3, 6);
         | 
| 4909 | 
            +
                            xinput += 8;
         | 
| 4910 | 
            +
                            xsecret += 1;
         | 
| 4911 | 
            +
                            nbStripes--;
         | 
| 4912 | 
            +
                       } while (nbStripes != 0);
         | 
| 4913 | 
            +
             | 
| 4914 | 
            +
                       svst1_u64(mask, xacc + 0, acc0);
         | 
| 4915 | 
            +
                       svst1_u64(mask, xacc + 2, acc1);
         | 
| 4916 | 
            +
                       svst1_u64(mask, xacc + 4, acc2);
         | 
| 4917 | 
            +
                       svst1_u64(mask, xacc + 6, acc3);
         | 
| 4918 | 
            +
                    } else {
         | 
| 4919 | 
            +
                        svbool_t mask = svptrue_pat_b64(SV_VL4);
         | 
| 4920 | 
            +
                        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
         | 
| 4921 | 
            +
                        svuint64_t acc1 = svld1_u64(mask, xacc + 4);
         | 
| 4922 | 
            +
                        do {
         | 
| 4923 | 
            +
                            svprfd(mask, xinput + 128, SV_PLDL1STRM);
         | 
| 4924 | 
            +
                            ACCRND(acc0, 0);
         | 
| 4925 | 
            +
                            ACCRND(acc1, 4);
         | 
| 4926 | 
            +
                            xinput += 8;
         | 
| 4927 | 
            +
                            xsecret += 1;
         | 
| 4928 | 
            +
                            nbStripes--;
         | 
| 4929 | 
            +
                       } while (nbStripes != 0);
         | 
| 4930 | 
            +
             | 
| 4931 | 
            +
                       svst1_u64(mask, xacc + 0, acc0);
         | 
| 4932 | 
            +
                       svst1_u64(mask, xacc + 4, acc1);
         | 
| 4933 | 
            +
                   }
         | 
| 4934 | 
            +
                }
         | 
| 4935 | 
            +
            }
         | 
| 4936 | 
            +
             | 
| 4937 | 
            +
            #endif
         | 
| 4938 | 
            +
             | 
| 4609 4939 | 
             
            /* scalar variants - universal */
         | 
| 4610 4940 |  | 
| 4941 | 
            +
            #if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
         | 
| 4942 | 
            +
            /*
         | 
| 4943 | 
            +
             * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
         | 
| 4944 | 
            +
             * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
         | 
| 4945 | 
            +
             *
         | 
| 4946 | 
            +
             * While this might not seem like much, as AArch64 is a 64-bit architecture, only
         | 
| 4947 | 
            +
             * big Cortex designs have a full 64-bit multiplier.
         | 
| 4948 | 
            +
             *
         | 
| 4949 | 
            +
             * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
         | 
| 4950 | 
            +
             * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
         | 
| 4951 | 
            +
             * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
         | 
| 4952 | 
            +
             *
         | 
| 4953 | 
            +
             * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
         | 
| 4954 | 
            +
             * not have this penalty and does the mask automatically.
         | 
| 4955 | 
            +
             */
         | 
| 4956 | 
            +
            XXH_FORCE_INLINE xxh_u64
         | 
| 4957 | 
            +
            XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
         | 
| 4958 | 
            +
            {
         | 
| 4959 | 
            +
                xxh_u64 ret;
         | 
| 4960 | 
            +
                /* note: %x = 64-bit register, %w = 32-bit register */
         | 
| 4961 | 
            +
                __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
         | 
| 4962 | 
            +
                return ret;
         | 
| 4963 | 
            +
            }
         | 
| 4964 | 
            +
            #else
         | 
| 4965 | 
            +
            XXH_FORCE_INLINE xxh_u64
         | 
| 4966 | 
            +
            XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
         | 
| 4967 | 
            +
            {
         | 
| 4968 | 
            +
                return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
         | 
| 4969 | 
            +
            }
         | 
| 4970 | 
            +
            #endif
         | 
| 4971 | 
            +
             | 
| 4611 4972 | 
             
            /*!
         | 
| 4612 4973 | 
             
             * @internal
         | 
| 4613 4974 | 
             
             * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
         | 
| @@ -4630,7 +4991,7 @@ XXH3_scalarRound(void* XXH_RESTRICT acc, | |
| 4630 4991 | 
             
                    xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
         | 
| 4631 4992 | 
             
                    xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
         | 
| 4632 4993 | 
             
                    xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
         | 
| 4633 | 
            -
                    xacc[lane]  | 
| 4994 | 
            +
                    xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
         | 
| 4634 4995 | 
             
                }
         | 
| 4635 4996 | 
             
            }
         | 
| 4636 4997 |  | 
| @@ -4655,6 +5016,7 @@ XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc, | |
| 4655 5016 | 
             
                    XXH3_scalarRound(acc, input, secret, i);
         | 
| 4656 5017 | 
             
                }
         | 
| 4657 5018 | 
             
            }
         | 
| 5019 | 
            +
            XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
         | 
| 4658 5020 |  | 
| 4659 5021 | 
             
            /*!
         | 
| 4660 5022 | 
             
             * @internal
         | 
| @@ -4706,10 +5068,10 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64) | |
| 4706 5068 | 
             
                const xxh_u8* kSecretPtr = XXH3_kSecret;
         | 
| 4707 5069 | 
             
                XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
         | 
| 4708 5070 |  | 
| 4709 | 
            -
            #if defined( | 
| 5071 | 
            +
            #if defined(__GNUC__) && defined(__aarch64__)
         | 
| 4710 5072 | 
             
                /*
         | 
| 4711 5073 | 
             
                 * UGLY HACK:
         | 
| 4712 | 
            -
                 * Clang  | 
| 5074 | 
            +
                 * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
         | 
| 4713 5075 | 
             
                 * placed sequentially, in order, at the top of the unrolled loop.
         | 
| 4714 5076 | 
             
                 *
         | 
| 4715 5077 | 
             
                 * While MOVK is great for generating constants (2 cycles for a 64-bit
         | 
| @@ -4724,7 +5086,7 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64) | |
| 4724 5086 | 
             
                 * ADD
         | 
| 4725 5087 | 
             
                 * SUB      STR
         | 
| 4726 5088 | 
             
                 *          STR
         | 
| 4727 | 
            -
                 * By forcing loads from memory (as the asm line causes  | 
| 5089 | 
            +
                 * By forcing loads from memory (as the asm line causes the compiler to assume
         | 
| 4728 5090 | 
             
                 * that XXH3_kSecretPtr has been changed), the pipelines are used more
         | 
| 4729 5091 | 
             
                 * efficiently:
         | 
| 4730 5092 | 
             
                 *   I   L   S
         | 
| @@ -4741,17 +5103,11 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64) | |
| 4741 5103 | 
             
                 */
         | 
| 4742 5104 | 
             
                XXH_COMPILER_GUARD(kSecretPtr);
         | 
| 4743 5105 | 
             
            #endif
         | 
| 4744 | 
            -
                /*
         | 
| 4745 | 
            -
                 * Note: in debug mode, this overrides the asm optimization
         | 
| 4746 | 
            -
                 * and Clang will emit MOVK chains again.
         | 
| 4747 | 
            -
                 */
         | 
| 4748 | 
            -
                XXH_ASSERT(kSecretPtr == XXH3_kSecret);
         | 
| 4749 | 
            -
             | 
| 4750 5106 | 
             
                {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
         | 
| 4751 5107 | 
             
                    int i;
         | 
| 4752 5108 | 
             
                    for (i=0; i < nbRounds; i++) {
         | 
| 4753 5109 | 
             
                        /*
         | 
| 4754 | 
            -
                         * The asm hack causes  | 
| 5110 | 
            +
                         * The asm hack causes the compiler to assume that kSecretPtr aliases with
         | 
| 4755 5111 | 
             
                         * customSecret, and on aarch64, this prevented LDP from merging two
         | 
| 4756 5112 | 
             
                         * loads together for free. Putting the loads together before the stores
         | 
| 4757 5113 | 
             
                         * properly generates LDP.
         | 
| @@ -4764,7 +5120,7 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64) | |
| 4764 5120 | 
             
            }
         | 
| 4765 5121 |  | 
| 4766 5122 |  | 
| 4767 | 
            -
            typedef void (* | 
| 5123 | 
            +
            typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
         | 
| 4768 5124 | 
             
            typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
         | 
| 4769 5125 | 
             
            typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
         | 
| 4770 5126 |  | 
| @@ -4772,36 +5128,48 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64); | |
| 4772 5128 | 
             
            #if (XXH_VECTOR == XXH_AVX512)
         | 
| 4773 5129 |  | 
| 4774 5130 | 
             
            #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
         | 
| 5131 | 
            +
            #define XXH3_accumulate     XXH3_accumulate_avx512
         | 
| 4775 5132 | 
             
            #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
         | 
| 4776 5133 | 
             
            #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
         | 
| 4777 5134 |  | 
| 4778 5135 | 
             
            #elif (XXH_VECTOR == XXH_AVX2)
         | 
| 4779 5136 |  | 
| 4780 5137 | 
             
            #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
         | 
| 5138 | 
            +
            #define XXH3_accumulate     XXH3_accumulate_avx2
         | 
| 4781 5139 | 
             
            #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
         | 
| 4782 5140 | 
             
            #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
         | 
| 4783 5141 |  | 
| 4784 5142 | 
             
            #elif (XXH_VECTOR == XXH_SSE2)
         | 
| 4785 5143 |  | 
| 4786 5144 | 
             
            #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
         | 
| 5145 | 
            +
            #define XXH3_accumulate     XXH3_accumulate_sse2
         | 
| 4787 5146 | 
             
            #define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
         | 
| 4788 5147 | 
             
            #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
         | 
| 4789 5148 |  | 
| 4790 5149 | 
             
            #elif (XXH_VECTOR == XXH_NEON)
         | 
| 4791 5150 |  | 
| 4792 5151 | 
             
            #define XXH3_accumulate_512 XXH3_accumulate_512_neon
         | 
| 5152 | 
            +
            #define XXH3_accumulate     XXH3_accumulate_neon
         | 
| 4793 5153 | 
             
            #define XXH3_scrambleAcc    XXH3_scrambleAcc_neon
         | 
| 4794 5154 | 
             
            #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
         | 
| 4795 5155 |  | 
| 4796 5156 | 
             
            #elif (XXH_VECTOR == XXH_VSX)
         | 
| 4797 5157 |  | 
| 4798 5158 | 
             
            #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
         | 
| 5159 | 
            +
            #define XXH3_accumulate     XXH3_accumulate_vsx
         | 
| 4799 5160 | 
             
            #define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx
         | 
| 4800 5161 | 
             
            #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
         | 
| 4801 5162 |  | 
| 5163 | 
            +
            #elif (XXH_VECTOR == XXH_SVE)
         | 
| 5164 | 
            +
            #define XXH3_accumulate_512 XXH3_accumulate_512_sve
         | 
| 5165 | 
            +
            #define XXH3_accumulate     XXH3_accumulate_sve
         | 
| 5166 | 
            +
            #define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
         | 
| 5167 | 
            +
            #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
         | 
| 5168 | 
            +
             | 
| 4802 5169 | 
             
            #else /* scalar */
         | 
| 4803 5170 |  | 
| 4804 5171 | 
             
            #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
         | 
| 5172 | 
            +
            #define XXH3_accumulate     XXH3_accumulate_scalar
         | 
| 4805 5173 | 
             
            #define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
         | 
| 4806 5174 | 
             
            #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
         | 
| 4807 5175 |  | 
| @@ -4812,45 +5180,11 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64); | |
| 4812 5180 | 
             
            #  define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
         | 
| 4813 5181 | 
             
            #endif
         | 
| 4814 5182 |  | 
| 4815 | 
            -
            #ifndef XXH_PREFETCH_DIST
         | 
| 4816 | 
            -
            #  ifdef __clang__
         | 
| 4817 | 
            -
            #    define XXH_PREFETCH_DIST 320
         | 
| 4818 | 
            -
            #  else
         | 
| 4819 | 
            -
            #    if (XXH_VECTOR == XXH_AVX512)
         | 
| 4820 | 
            -
            #      define XXH_PREFETCH_DIST 512
         | 
| 4821 | 
            -
            #    else
         | 
| 4822 | 
            -
            #      define XXH_PREFETCH_DIST 384
         | 
| 4823 | 
            -
            #    endif
         | 
| 4824 | 
            -
            #  endif  /* __clang__ */
         | 
| 4825 | 
            -
            #endif  /* XXH_PREFETCH_DIST */
         | 
| 4826 | 
            -
             | 
| 4827 | 
            -
            /*
         | 
| 4828 | 
            -
             * XXH3_accumulate()
         | 
| 4829 | 
            -
             * Loops over XXH3_accumulate_512().
         | 
| 4830 | 
            -
             * Assumption: nbStripes will not overflow the secret size
         | 
| 4831 | 
            -
             */
         | 
| 4832 | 
            -
            XXH_FORCE_INLINE void
         | 
| 4833 | 
            -
            XXH3_accumulate(     xxh_u64* XXH_RESTRICT acc,
         | 
| 4834 | 
            -
                            const xxh_u8* XXH_RESTRICT input,
         | 
| 4835 | 
            -
                            const xxh_u8* XXH_RESTRICT secret,
         | 
| 4836 | 
            -
                                  size_t nbStripes,
         | 
| 4837 | 
            -
                                  XXH3_f_accumulate_512 f_acc512)
         | 
| 4838 | 
            -
            {
         | 
| 4839 | 
            -
                size_t n;
         | 
| 4840 | 
            -
                for (n = 0; n < nbStripes; n++ ) {
         | 
| 4841 | 
            -
                    const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
         | 
| 4842 | 
            -
                    XXH_PREFETCH(in + XXH_PREFETCH_DIST);
         | 
| 4843 | 
            -
                    f_acc512(acc,
         | 
| 4844 | 
            -
                             in,
         | 
| 4845 | 
            -
                             secret + n*XXH_SECRET_CONSUME_RATE);
         | 
| 4846 | 
            -
                }
         | 
| 4847 | 
            -
            }
         | 
| 4848 | 
            -
             | 
| 4849 5183 | 
             
            XXH_FORCE_INLINE void
         | 
| 4850 5184 | 
             
            XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
         | 
| 4851 5185 | 
             
                                  const xxh_u8* XXH_RESTRICT input, size_t len,
         | 
| 4852 5186 | 
             
                                  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
         | 
| 4853 | 
            -
                                         | 
| 5187 | 
            +
                                        XXH3_f_accumulate f_acc,
         | 
| 4854 5188 | 
             
                                        XXH3_f_scrambleAcc f_scramble)
         | 
| 4855 5189 | 
             
            {
         | 
| 4856 5190 | 
             
                size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
         | 
| @@ -4862,7 +5196,7 @@ XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc, | |
| 4862 5196 | 
             
                XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
         | 
| 4863 5197 |  | 
| 4864 5198 | 
             
                for (n = 0; n < nb_blocks; n++) {
         | 
| 4865 | 
            -
                     | 
| 5199 | 
            +
                    f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
         | 
| 4866 5200 | 
             
                    f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
         | 
| 4867 5201 | 
             
                }
         | 
| 4868 5202 |  | 
| @@ -4870,12 +5204,12 @@ XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc, | |
| 4870 5204 | 
             
                XXH_ASSERT(len > XXH_STRIPE_LEN);
         | 
| 4871 5205 | 
             
                {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
         | 
| 4872 5206 | 
             
                    XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
         | 
| 4873 | 
            -
                     | 
| 5207 | 
            +
                    f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
         | 
| 4874 5208 |  | 
| 4875 5209 | 
             
                    /* last stripe */
         | 
| 4876 5210 | 
             
                    {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
         | 
| 4877 5211 | 
             
            #define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
         | 
| 4878 | 
            -
                         | 
| 5212 | 
            +
                        XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
         | 
| 4879 5213 | 
             
                }   }
         | 
| 4880 5214 | 
             
            }
         | 
| 4881 5215 |  | 
| @@ -4920,12 +5254,12 @@ XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secre | |
| 4920 5254 | 
             
            XXH_FORCE_INLINE XXH64_hash_t
         | 
| 4921 5255 | 
             
            XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
         | 
| 4922 5256 | 
             
                                       const void* XXH_RESTRICT secret, size_t secretSize,
         | 
| 4923 | 
            -
                                        | 
| 5257 | 
            +
                                       XXH3_f_accumulate f_acc,
         | 
| 4924 5258 | 
             
                                       XXH3_f_scrambleAcc f_scramble)
         | 
| 4925 5259 | 
             
            {
         | 
| 4926 5260 | 
             
                XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
         | 
| 4927 5261 |  | 
| 4928 | 
            -
                XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize,  | 
| 5262 | 
            +
                XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
         | 
| 4929 5263 |  | 
| 4930 5264 | 
             
                /* converge into final hash */
         | 
| 4931 5265 | 
             
                XXH_STATIC_ASSERT(sizeof(acc) == 64);
         | 
| @@ -4939,13 +5273,15 @@ XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len, | |
| 4939 5273 | 
             
             * It's important for performance to transmit secret's size (when it's static)
         | 
| 4940 5274 | 
             
             * so that the compiler can properly optimize the vectorized loop.
         | 
| 4941 5275 | 
             
             * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
         | 
| 5276 | 
            +
             * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
         | 
| 5277 | 
            +
             * breaks -Og, this is XXH_NO_INLINE.
         | 
| 4942 5278 | 
             
             */
         | 
| 4943 | 
            -
             | 
| 5279 | 
            +
            XXH3_WITH_SECRET_INLINE XXH64_hash_t
         | 
| 4944 5280 | 
             
            XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
         | 
| 4945 5281 | 
             
                                         XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
         | 
| 4946 5282 | 
             
            {
         | 
| 4947 5283 | 
             
                (void)seed64;
         | 
| 4948 | 
            -
                return XXH3_hashLong_64b_internal(input, len, secret, secretLen,  | 
| 5284 | 
            +
                return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
         | 
| 4949 5285 | 
             
            }
         | 
| 4950 5286 |  | 
| 4951 5287 | 
             
            /*
         | 
| @@ -4959,7 +5295,7 @@ XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len, | |
| 4959 5295 | 
             
                                      XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
         | 
| 4960 5296 | 
             
            {
         | 
| 4961 5297 | 
             
                (void)seed64; (void)secret; (void)secretLen;
         | 
| 4962 | 
            -
                return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),  | 
| 5298 | 
            +
                return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
         | 
| 4963 5299 | 
             
            }
         | 
| 4964 5300 |  | 
| 4965 5301 | 
             
            /*
         | 
| @@ -4976,7 +5312,7 @@ XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len, | |
| 4976 5312 | 
             
            XXH_FORCE_INLINE XXH64_hash_t
         | 
| 4977 5313 | 
             
            XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
         | 
| 4978 5314 | 
             
                                                XXH64_hash_t seed,
         | 
| 4979 | 
            -
                                                 | 
| 5315 | 
            +
                                                XXH3_f_accumulate f_acc,
         | 
| 4980 5316 | 
             
                                                XXH3_f_scrambleAcc f_scramble,
         | 
| 4981 5317 | 
             
                                                XXH3_f_initCustomSecret f_initSec)
         | 
| 4982 5318 | 
             
            {
         | 
| @@ -4984,12 +5320,12 @@ XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len, | |
| 4984 5320 | 
             
                if (seed == 0)
         | 
| 4985 5321 | 
             
                    return XXH3_hashLong_64b_internal(input, len,
         | 
| 4986 5322 | 
             
                                                      XXH3_kSecret, sizeof(XXH3_kSecret),
         | 
| 4987 | 
            -
                                                       | 
| 5323 | 
            +
                                                      f_acc, f_scramble);
         | 
| 4988 5324 | 
             
            #endif
         | 
| 4989 5325 | 
             
                {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
         | 
| 4990 5326 | 
             
                    f_initSec(secret, seed);
         | 
| 4991 5327 | 
             
                    return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
         | 
| 4992 | 
            -
                                                       | 
| 5328 | 
            +
                                                      f_acc, f_scramble);
         | 
| 4993 5329 | 
             
                }
         | 
| 4994 5330 | 
             
            }
         | 
| 4995 5331 |  | 
| @@ -4997,12 +5333,12 @@ XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len, | |
| 4997 5333 | 
             
             * It's important for performance that XXH3_hashLong is not inlined.
         | 
| 4998 5334 | 
             
             */
         | 
| 4999 5335 | 
             
            XXH_NO_INLINE XXH64_hash_t
         | 
| 5000 | 
            -
            XXH3_hashLong_64b_withSeed(const void* input, size_t len,
         | 
| 5001 | 
            -
                                       XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
         | 
| 5336 | 
            +
            XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
         | 
| 5337 | 
            +
                                       XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
         | 
| 5002 5338 | 
             
            {
         | 
| 5003 5339 | 
             
                (void)secret; (void)secretLen;
         | 
| 5004 5340 | 
             
                return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
         | 
| 5005 | 
            -
                             | 
| 5341 | 
            +
                            XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
         | 
| 5006 5342 | 
             
            }
         | 
| 5007 5343 |  | 
| 5008 5344 |  | 
| @@ -5035,27 +5371,27 @@ XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len, | |
| 5035 5371 | 
             
            /* ===   Public entry point   === */
         | 
| 5036 5372 |  | 
| 5037 5373 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5038 | 
            -
            XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t length)
         | 
| 5374 | 
            +
            XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)
         | 
| 5039 5375 | 
             
            {
         | 
| 5040 5376 | 
             
                return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
         | 
| 5041 5377 | 
             
            }
         | 
| 5042 5378 |  | 
| 5043 5379 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5044 5380 | 
             
            XXH_PUBLIC_API XXH64_hash_t
         | 
| 5045 | 
            -
            XXH3_64bits_withSecret(const void* input, size_t length, const void* secret, size_t secretSize)
         | 
| 5381 | 
            +
            XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
         | 
| 5046 5382 | 
             
            {
         | 
| 5047 5383 | 
             
                return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
         | 
| 5048 5384 | 
             
            }
         | 
| 5049 5385 |  | 
| 5050 5386 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5051 5387 | 
             
            XXH_PUBLIC_API XXH64_hash_t
         | 
| 5052 | 
            -
            XXH3_64bits_withSeed(const void* input, size_t length, XXH64_hash_t seed)
         | 
| 5388 | 
            +
            XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
         | 
| 5053 5389 | 
             
            {
         | 
| 5054 5390 | 
             
                return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
         | 
| 5055 5391 | 
             
            }
         | 
| 5056 5392 |  | 
| 5057 5393 | 
             
            XXH_PUBLIC_API XXH64_hash_t
         | 
| 5058 | 
            -
            XXH3_64bits_withSecretandSeed(const void* input, size_t length, const void* secret, size_t secretSize, XXH64_hash_t seed)
         | 
| 5394 | 
            +
            XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
         | 
| 5059 5395 | 
             
            {
         | 
| 5060 5396 | 
             
                if (length <= XXH3_MIDSIZE_MAX)
         | 
| 5061 5397 | 
             
                    return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
         | 
| @@ -5148,7 +5484,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr) | |
| 5148 5484 |  | 
| 5149 5485 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5150 5486 | 
             
            XXH_PUBLIC_API void
         | 
| 5151 | 
            -
            XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
         | 
| 5487 | 
            +
            XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)
         | 
| 5152 5488 | 
             
            {
         | 
| 5153 5489 | 
             
                XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
         | 
| 5154 5490 | 
             
            }
         | 
| @@ -5182,7 +5518,7 @@ XXH3_reset_internal(XXH3_state_t* statePtr, | |
| 5182 5518 |  | 
| 5183 5519 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5184 5520 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 5185 | 
            -
            XXH3_64bits_reset(XXH3_state_t* statePtr)
         | 
| 5521 | 
            +
            XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
         | 
| 5186 5522 | 
             
            {
         | 
| 5187 5523 | 
             
                if (statePtr == NULL) return XXH_ERROR;
         | 
| 5188 5524 | 
             
                XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
         | 
| @@ -5191,7 +5527,7 @@ XXH3_64bits_reset(XXH3_state_t* statePtr) | |
| 5191 5527 |  | 
| 5192 5528 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5193 5529 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 5194 | 
            -
            XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
         | 
| 5530 | 
            +
            XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
         | 
| 5195 5531 | 
             
            {
         | 
| 5196 5532 | 
             
                if (statePtr == NULL) return XXH_ERROR;
         | 
| 5197 5533 | 
             
                XXH3_reset_internal(statePtr, 0, secret, secretSize);
         | 
| @@ -5202,7 +5538,7 @@ XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t | |
| 5202 5538 |  | 
| 5203 5539 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5204 5540 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 5205 | 
            -
            XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
         | 
| 5541 | 
            +
            XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
         | 
| 5206 5542 | 
             
            {
         | 
| 5207 5543 | 
             
                if (statePtr == NULL) return XXH_ERROR;
         | 
| 5208 5544 | 
             
                if (seed==0) return XXH3_64bits_reset(statePtr);
         | 
| @@ -5214,7 +5550,7 @@ XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed) | |
| 5214 5550 |  | 
| 5215 5551 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5216 5552 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 5217 | 
            -
            XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
         | 
| 5553 | 
            +
            XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
         | 
| 5218 5554 | 
             
            {
         | 
| 5219 5555 | 
             
                if (statePtr == NULL) return XXH_ERROR;
         | 
| 5220 5556 | 
             
                if (secret == NULL) return XXH_ERROR;
         | 
| @@ -5224,31 +5560,57 @@ XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, | |
| 5224 5560 | 
             
                return XXH_OK;
         | 
| 5225 5561 | 
             
            }
         | 
| 5226 5562 |  | 
| 5227 | 
            -
             | 
| 5228 | 
            -
             *  | 
| 5229 | 
            -
             *  | 
| 5230 | 
            -
             | 
| 5563 | 
            +
            /*!
         | 
| 5564 | 
            +
             * @internal
         | 
| 5565 | 
            +
             * @brief Processes a large input for XXH3_update() and XXH3_digest_long().
         | 
| 5566 | 
            +
             *
         | 
| 5567 | 
            +
             * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
         | 
| 5568 | 
            +
             *
         | 
| 5569 | 
            +
             * @param acc                Pointer to the 8 accumulator lanes
         | 
| 5570 | 
            +
             * @param nbStripesSoFarPtr  In/out pointer to the number of leftover stripes in the block*
         | 
| 5571 | 
            +
             * @param nbStripesPerBlock  Number of stripes in a block
         | 
| 5572 | 
            +
             * @param input              Input pointer
         | 
| 5573 | 
            +
             * @param nbStripes          Number of stripes to process
         | 
| 5574 | 
            +
             * @param secret             Secret pointer
         | 
| 5575 | 
            +
             * @param secretLimit        Offset of the last block in @p secret
         | 
| 5576 | 
            +
             * @param f_acc              Pointer to an XXH3_accumulate implementation
         | 
| 5577 | 
            +
             * @param f_scramble         Pointer to an XXH3_scrambleAcc implementation
         | 
| 5578 | 
            +
             * @return                   Pointer past the end of @p input after processing
         | 
| 5579 | 
            +
             */
         | 
| 5580 | 
            +
            XXH_FORCE_INLINE const xxh_u8 *
         | 
| 5231 5581 | 
             
            XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
         | 
| 5232 5582 | 
             
                                size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
         | 
| 5233 5583 | 
             
                                const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
         | 
| 5234 5584 | 
             
                                const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
         | 
| 5235 | 
            -
                                 | 
| 5585 | 
            +
                                XXH3_f_accumulate f_acc,
         | 
| 5236 5586 | 
             
                                XXH3_f_scrambleAcc f_scramble)
         | 
| 5237 5587 | 
             
            {
         | 
| 5238 | 
            -
                 | 
| 5239 | 
            -
                 | 
| 5240 | 
            -
                if (nbStripesPerBlock - *nbStripesSoFarPtr | 
| 5241 | 
            -
                    /*  | 
| 5242 | 
            -
                    size_t  | 
| 5243 | 
            -
             | 
| 5244 | 
            -
                     | 
| 5245 | 
            -
             | 
| 5246 | 
            -
             | 
| 5247 | 
            -
             | 
| 5248 | 
            -
             | 
| 5249 | 
            -
             | 
| 5588 | 
            +
                const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
         | 
| 5589 | 
            +
                /* Process full blocks */
         | 
| 5590 | 
            +
                if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
         | 
| 5591 | 
            +
                    /* Process the initial partial block... */
         | 
| 5592 | 
            +
                    size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
         | 
| 5593 | 
            +
             | 
| 5594 | 
            +
                    do {
         | 
| 5595 | 
            +
                        /* Accumulate and scramble */
         | 
| 5596 | 
            +
                        f_acc(acc, input, initialSecret, nbStripesThisIter);
         | 
| 5597 | 
            +
                        f_scramble(acc, secret + secretLimit);
         | 
| 5598 | 
            +
                        input += nbStripesThisIter * XXH_STRIPE_LEN;
         | 
| 5599 | 
            +
                        nbStripes -= nbStripesThisIter;
         | 
| 5600 | 
            +
                        /* Then continue the loop with the full block size */
         | 
| 5601 | 
            +
                        nbStripesThisIter = nbStripesPerBlock;
         | 
| 5602 | 
            +
                        initialSecret = secret;
         | 
| 5603 | 
            +
                    } while (nbStripes >= nbStripesPerBlock);
         | 
| 5604 | 
            +
                    *nbStripesSoFarPtr = 0;
         | 
| 5605 | 
            +
                }
         | 
| 5606 | 
            +
                /* Process a partial block */
         | 
| 5607 | 
            +
                if (nbStripes > 0) {
         | 
| 5608 | 
            +
                    f_acc(acc, input, initialSecret, nbStripes);
         | 
| 5609 | 
            +
                    input += nbStripes * XXH_STRIPE_LEN;
         | 
| 5250 5610 | 
             
                    *nbStripesSoFarPtr += nbStripes;
         | 
| 5251 5611 | 
             
                }
         | 
| 5612 | 
            +
                /* Return end pointer */
         | 
| 5613 | 
            +
                return input;
         | 
| 5252 5614 | 
             
            }
         | 
| 5253 5615 |  | 
| 5254 5616 | 
             
            #ifndef XXH3_STREAM_USE_STACK
         | 
| @@ -5262,7 +5624,7 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc, | |
| 5262 5624 | 
             
            XXH_FORCE_INLINE XXH_errorcode
         | 
| 5263 5625 | 
             
            XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
         | 
| 5264 5626 | 
             
                        const xxh_u8* XXH_RESTRICT input, size_t len,
         | 
| 5265 | 
            -
                         | 
| 5627 | 
            +
                        XXH3_f_accumulate f_acc,
         | 
| 5266 5628 | 
             
                        XXH3_f_scrambleAcc f_scramble)
         | 
| 5267 5629 | 
             
            {
         | 
| 5268 5630 | 
             
                if (input==NULL) {
         | 
| @@ -5278,7 +5640,8 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state, | |
| 5278 5640 | 
             
                     * when operating accumulators directly into state.
         | 
| 5279 5641 | 
             
                     * Operating into stack space seems to enable proper optimization.
         | 
| 5280 5642 | 
             
                     * clang, on the other hand, doesn't seem to need this trick */
         | 
| 5281 | 
            -
                    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; | 
| 5643 | 
            +
                    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
         | 
| 5644 | 
            +
                    XXH_memcpy(acc, state->acc, sizeof(acc));
         | 
| 5282 5645 | 
             
            #else
         | 
| 5283 5646 | 
             
                    xxh_u64* XXH_RESTRICT const acc = state->acc;
         | 
| 5284 5647 | 
             
            #endif
         | 
| @@ -5286,7 +5649,7 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state, | |
| 5286 5649 | 
             
                    XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
         | 
| 5287 5650 |  | 
| 5288 5651 | 
             
                    /* small input : just fill in tmp buffer */
         | 
| 5289 | 
            -
                    if ( | 
| 5652 | 
            +
                    if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
         | 
| 5290 5653 | 
             
                        XXH_memcpy(state->buffer + state->bufferedSize, input, len);
         | 
| 5291 5654 | 
             
                        state->bufferedSize += (XXH32_hash_t)len;
         | 
| 5292 5655 | 
             
                        return XXH_OK;
         | 
| @@ -5308,57 +5671,20 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state, | |
| 5308 5671 | 
             
                                           &state->nbStripesSoFar, state->nbStripesPerBlock,
         | 
| 5309 5672 | 
             
                                            state->buffer, XXH3_INTERNALBUFFER_STRIPES,
         | 
| 5310 5673 | 
             
                                            secret, state->secretLimit,
         | 
| 5311 | 
            -
                                             | 
| 5674 | 
            +
                                            f_acc, f_scramble);
         | 
| 5312 5675 | 
             
                        state->bufferedSize = 0;
         | 
| 5313 5676 | 
             
                    }
         | 
| 5314 5677 | 
             
                    XXH_ASSERT(input < bEnd);
         | 
| 5315 | 
            -
             | 
| 5316 | 
            -
                    /* large input to consume : ingest per full block */
         | 
| 5317 | 
            -
                    if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
         | 
| 5678 | 
            +
                    if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
         | 
| 5318 5679 | 
             
                        size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
         | 
| 5319 | 
            -
                         | 
| 5320 | 
            -
                        /* join to current block's end */
         | 
| 5321 | 
            -
                        {   size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
         | 
| 5322 | 
            -
                            XXH_ASSERT(nbStripesToEnd <= nbStripes);
         | 
| 5323 | 
            -
                            XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
         | 
| 5324 | 
            -
                            f_scramble(acc, secret + state->secretLimit);
         | 
| 5325 | 
            -
                            state->nbStripesSoFar = 0;
         | 
| 5326 | 
            -
                            input += nbStripesToEnd * XXH_STRIPE_LEN;
         | 
| 5327 | 
            -
                            nbStripes -= nbStripesToEnd;
         | 
| 5328 | 
            -
                        }
         | 
| 5329 | 
            -
                        /* consume per entire blocks */
         | 
| 5330 | 
            -
                        while(nbStripes >= state->nbStripesPerBlock) {
         | 
| 5331 | 
            -
                            XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
         | 
| 5332 | 
            -
                            f_scramble(acc, secret + state->secretLimit);
         | 
| 5333 | 
            -
                            input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
         | 
| 5334 | 
            -
                            nbStripes -= state->nbStripesPerBlock;
         | 
| 5335 | 
            -
                        }
         | 
| 5336 | 
            -
                        /* consume last partial block */
         | 
| 5337 | 
            -
                        XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
         | 
| 5338 | 
            -
                        input += nbStripes * XXH_STRIPE_LEN;
         | 
| 5339 | 
            -
                        XXH_ASSERT(input < bEnd);  /* at least some bytes left */
         | 
| 5340 | 
            -
                        state->nbStripesSoFar = nbStripes;
         | 
| 5341 | 
            -
                        /* buffer predecessor of last partial stripe */
         | 
| 5342 | 
            -
                        XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
         | 
| 5343 | 
            -
                        XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
         | 
| 5344 | 
            -
                    } else {
         | 
| 5345 | 
            -
                        /* content to consume <= block size */
         | 
| 5346 | 
            -
                        /* Consume input by a multiple of internal buffer size */
         | 
| 5347 | 
            -
                        if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
         | 
| 5348 | 
            -
                            const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
         | 
| 5349 | 
            -
                            do {
         | 
| 5350 | 
            -
                                XXH3_consumeStripes(acc,
         | 
| 5680 | 
            +
                        input = XXH3_consumeStripes(acc,
         | 
| 5351 5681 | 
             
                                                   &state->nbStripesSoFar, state->nbStripesPerBlock,
         | 
| 5352 | 
            -
             | 
| 5353 | 
            -
             | 
| 5354 | 
            -
             | 
| 5355 | 
            -
             | 
| 5356 | 
            -
                            } while (input<limit);
         | 
| 5357 | 
            -
                            /* buffer predecessor of last partial stripe */
         | 
| 5358 | 
            -
                            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
         | 
| 5359 | 
            -
                        }
         | 
| 5360 | 
            -
                    }
         | 
| 5682 | 
            +
                                                   input, nbStripes,
         | 
| 5683 | 
            +
                                                   secret, state->secretLimit,
         | 
| 5684 | 
            +
                                                   f_acc, f_scramble);
         | 
| 5685 | 
            +
                        XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
         | 
| 5361 5686 |  | 
| 5687 | 
            +
                    }
         | 
| 5362 5688 | 
             
                    /* Some remaining input (always) : buffer it */
         | 
| 5363 5689 | 
             
                    XXH_ASSERT(input < bEnd);
         | 
| 5364 5690 | 
             
                    XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
         | 
| @@ -5367,7 +5693,7 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state, | |
| 5367 5693 | 
             
                    state->bufferedSize = (XXH32_hash_t)(bEnd-input);
         | 
| 5368 5694 | 
             
            #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
         | 
| 5369 5695 | 
             
                    /* save stack accumulators into state */
         | 
| 5370 | 
            -
                     | 
| 5696 | 
            +
                    XXH_memcpy(state->acc, acc, sizeof(acc));
         | 
| 5371 5697 | 
             
            #endif
         | 
| 5372 5698 | 
             
                }
         | 
| 5373 5699 |  | 
| @@ -5376,10 +5702,10 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state, | |
| 5376 5702 |  | 
| 5377 5703 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5378 5704 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 5379 | 
            -
            XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
         | 
| 5705 | 
            +
            XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
         | 
| 5380 5706 | 
             
            {
         | 
| 5381 5707 | 
             
                return XXH3_update(state, (const xxh_u8*)input, len,
         | 
| 5382 | 
            -
                                    | 
| 5708 | 
            +
                                   XXH3_accumulate, XXH3_scrambleAcc);
         | 
| 5383 5709 | 
             
            }
         | 
| 5384 5710 |  | 
| 5385 5711 |  | 
| @@ -5388,37 +5714,40 @@ XXH3_digest_long (XXH64_hash_t* acc, | |
| 5388 5714 | 
             
                              const XXH3_state_t* state,
         | 
| 5389 5715 | 
             
                              const unsigned char* secret)
         | 
| 5390 5716 | 
             
            {
         | 
| 5717 | 
            +
                xxh_u8 lastStripe[XXH_STRIPE_LEN];
         | 
| 5718 | 
            +
                const xxh_u8* lastStripePtr;
         | 
| 5719 | 
            +
             | 
| 5391 5720 | 
             
                /*
         | 
| 5392 5721 | 
             
                 * Digest on a local copy. This way, the state remains unaltered, and it can
         | 
| 5393 5722 | 
             
                 * continue ingesting more input afterwards.
         | 
| 5394 5723 | 
             
                 */
         | 
| 5395 5724 | 
             
                XXH_memcpy(acc, state->acc, sizeof(state->acc));
         | 
| 5396 5725 | 
             
                if (state->bufferedSize >= XXH_STRIPE_LEN) {
         | 
| 5726 | 
            +
                    /* Consume remaining stripes then point to remaining data in buffer */
         | 
| 5397 5727 | 
             
                    size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
         | 
| 5398 5728 | 
             
                    size_t nbStripesSoFar = state->nbStripesSoFar;
         | 
| 5399 5729 | 
             
                    XXH3_consumeStripes(acc,
         | 
| 5400 5730 | 
             
                                       &nbStripesSoFar, state->nbStripesPerBlock,
         | 
| 5401 5731 | 
             
                                        state->buffer, nbStripes,
         | 
| 5402 5732 | 
             
                                        secret, state->secretLimit,
         | 
| 5403 | 
            -
                                         | 
| 5404 | 
            -
                     | 
| 5405 | 
            -
                    XXH3_accumulate_512(acc,
         | 
| 5406 | 
            -
                                        state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
         | 
| 5407 | 
            -
                                        secret + state->secretLimit - XXH_SECRET_LASTACC_START);
         | 
| 5733 | 
            +
                                        XXH3_accumulate, XXH3_scrambleAcc);
         | 
| 5734 | 
            +
                    lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
         | 
| 5408 5735 | 
             
                } else {  /* bufferedSize < XXH_STRIPE_LEN */
         | 
| 5409 | 
            -
                     | 
| 5736 | 
            +
                    /* Copy to temp buffer */
         | 
| 5410 5737 | 
             
                    size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
         | 
| 5411 5738 | 
             
                    XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
         | 
| 5412 5739 | 
             
                    XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
         | 
| 5413 5740 | 
             
                    XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
         | 
| 5414 | 
            -
                     | 
| 5415 | 
            -
                                        lastStripe,
         | 
| 5416 | 
            -
                                        secret + state->secretLimit - XXH_SECRET_LASTACC_START);
         | 
| 5741 | 
            +
                    lastStripePtr = lastStripe;
         | 
| 5417 5742 | 
             
                }
         | 
| 5743 | 
            +
                /* Last stripe */
         | 
| 5744 | 
            +
                XXH3_accumulate_512(acc,
         | 
| 5745 | 
            +
                                    lastStripePtr,
         | 
| 5746 | 
            +
                                    secret + state->secretLimit - XXH_SECRET_LASTACC_START);
         | 
| 5418 5747 | 
             
            }
         | 
| 5419 5748 |  | 
| 5420 5749 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5421 | 
            -
            XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
         | 
| 5750 | 
            +
            XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
         | 
| 5422 5751 | 
             
            {
         | 
| 5423 5752 | 
             
                const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
         | 
| 5424 5753 | 
             
                if (state->totalLen > XXH3_MIDSIZE_MAX) {
         | 
| @@ -5631,7 +5960,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len, | |
| 5631 5960 | 
             
            #if XXH_SIZE_OPT >= 1
         | 
| 5632 5961 | 
             
                    {
         | 
| 5633 5962 | 
             
                        /* Smaller, but slightly slower. */
         | 
| 5634 | 
            -
                         | 
| 5963 | 
            +
                        unsigned int i = (unsigned int)(len - 1) / 32;
         | 
| 5635 5964 | 
             
                        do {
         | 
| 5636 5965 | 
             
                            acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
         | 
| 5637 5966 | 
             
                        } while (i-- != 0);
         | 
| @@ -5669,25 +5998,34 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len, | |
| 5669 5998 | 
             
                XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
         | 
| 5670 5999 |  | 
| 5671 6000 | 
             
                {   XXH128_hash_t acc;
         | 
| 5672 | 
            -
                     | 
| 5673 | 
            -
                    int i;
         | 
| 6001 | 
            +
                    unsigned i;
         | 
| 5674 6002 | 
             
                    acc.low64 = len * XXH_PRIME64_1;
         | 
| 5675 6003 | 
             
                    acc.high64 = 0;
         | 
| 5676 | 
            -
                     | 
| 6004 | 
            +
                    /*
         | 
| 6005 | 
            +
                     *  We set as `i` as offset + 32. We do this so that unchanged
         | 
| 6006 | 
            +
                     * `len` can be used as upper bound. This reaches a sweet spot
         | 
| 6007 | 
            +
                     * where both x86 and aarch64 get simple agen and good codegen
         | 
| 6008 | 
            +
                     * for the loop.
         | 
| 6009 | 
            +
                     */
         | 
| 6010 | 
            +
                    for (i = 32; i < 160; i += 32) {
         | 
| 5677 6011 | 
             
                        acc = XXH128_mix32B(acc,
         | 
| 5678 | 
            -
                                            input  +  | 
| 5679 | 
            -
                                            input  +  | 
| 5680 | 
            -
                                            secret +  | 
| 6012 | 
            +
                                            input  + i - 32,
         | 
| 6013 | 
            +
                                            input  + i - 16,
         | 
| 6014 | 
            +
                                            secret + i - 32,
         | 
| 5681 6015 | 
             
                                            seed);
         | 
| 5682 6016 | 
             
                    }
         | 
| 5683 6017 | 
             
                    acc.low64 = XXH3_avalanche(acc.low64);
         | 
| 5684 6018 | 
             
                    acc.high64 = XXH3_avalanche(acc.high64);
         | 
| 5685 | 
            -
                     | 
| 5686 | 
            -
             | 
| 6019 | 
            +
                    /*
         | 
| 6020 | 
            +
                     * NB: `i <= len` will duplicate the last 32-bytes if
         | 
| 6021 | 
            +
                     * len % 32 was zero. This is an unfortunate necessity to keep
         | 
| 6022 | 
            +
                     * the hash result stable.
         | 
| 6023 | 
            +
                     */
         | 
| 6024 | 
            +
                    for (i=160; i <= len; i += 32) {
         | 
| 5687 6025 | 
             
                        acc = XXH128_mix32B(acc,
         | 
| 5688 | 
            -
                                            input +  | 
| 5689 | 
            -
                                            input +  | 
| 5690 | 
            -
                                            secret + XXH3_MIDSIZE_STARTOFFSET +  | 
| 6026 | 
            +
                                            input + i - 32,
         | 
| 6027 | 
            +
                                            input + i - 16,
         | 
| 6028 | 
            +
                                            secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
         | 
| 5691 6029 | 
             
                                            seed);
         | 
| 5692 6030 | 
             
                    }
         | 
| 5693 6031 | 
             
                    /* last bytes */
         | 
| @@ -5695,7 +6033,7 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len, | |
| 5695 6033 | 
             
                                        input + len - 16,
         | 
| 5696 6034 | 
             
                                        input + len - 32,
         | 
| 5697 6035 | 
             
                                        secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
         | 
| 5698 | 
            -
                                         | 
| 6036 | 
            +
                                        (XXH64_hash_t)0 - seed);
         | 
| 5699 6037 |  | 
| 5700 6038 | 
             
                    {   XXH128_hash_t h128;
         | 
| 5701 6039 | 
             
                        h128.low64  = acc.low64 + acc.high64;
         | 
| @@ -5712,12 +6050,12 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len, | |
| 5712 6050 | 
             
            XXH_FORCE_INLINE XXH128_hash_t
         | 
| 5713 6051 | 
             
            XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
         | 
| 5714 6052 | 
             
                                        const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
         | 
| 5715 | 
            -
                                         | 
| 6053 | 
            +
                                        XXH3_f_accumulate f_acc,
         | 
| 5716 6054 | 
             
                                        XXH3_f_scrambleAcc f_scramble)
         | 
| 5717 6055 | 
             
            {
         | 
| 5718 6056 | 
             
                XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
         | 
| 5719 6057 |  | 
| 5720 | 
            -
                XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize,  | 
| 6058 | 
            +
                XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
         | 
| 5721 6059 |  | 
| 5722 6060 | 
             
                /* converge into final hash */
         | 
| 5723 6061 | 
             
                XXH_STATIC_ASSERT(sizeof(acc) == 64);
         | 
| @@ -5744,38 +6082,41 @@ XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len, | |
| 5744 6082 | 
             
            {
         | 
| 5745 6083 | 
             
                (void)seed64; (void)secret; (void)secretLen;
         | 
| 5746 6084 | 
             
                return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
         | 
| 5747 | 
            -
                                                    | 
| 6085 | 
            +
                                                   XXH3_accumulate, XXH3_scrambleAcc);
         | 
| 5748 6086 | 
             
            }
         | 
| 5749 6087 |  | 
| 5750 6088 | 
             
            /*
         | 
| 5751 6089 | 
             
             * It's important for performance to pass @p secretLen (when it's static)
         | 
| 5752 6090 | 
             
             * to the compiler, so that it can properly optimize the vectorized loop.
         | 
| 6091 | 
            +
             *
         | 
| 6092 | 
            +
             * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
         | 
| 6093 | 
            +
             * breaks -Og, this is XXH_NO_INLINE.
         | 
| 5753 6094 | 
             
             */
         | 
| 5754 | 
            -
             | 
| 6095 | 
            +
            XXH3_WITH_SECRET_INLINE XXH128_hash_t
         | 
| 5755 6096 | 
             
            XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
         | 
| 5756 6097 | 
             
                                          XXH64_hash_t seed64,
         | 
| 5757 6098 | 
             
                                          const void* XXH_RESTRICT secret, size_t secretLen)
         | 
| 5758 6099 | 
             
            {
         | 
| 5759 6100 | 
             
                (void)seed64;
         | 
| 5760 6101 | 
             
                return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
         | 
| 5761 | 
            -
                                                    | 
| 6102 | 
            +
                                                   XXH3_accumulate, XXH3_scrambleAcc);
         | 
| 5762 6103 | 
             
            }
         | 
| 5763 6104 |  | 
| 5764 6105 | 
             
            XXH_FORCE_INLINE XXH128_hash_t
         | 
| 5765 6106 | 
             
            XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
         | 
| 5766 6107 | 
             
                                            XXH64_hash_t seed64,
         | 
| 5767 | 
            -
                                             | 
| 6108 | 
            +
                                            XXH3_f_accumulate f_acc,
         | 
| 5768 6109 | 
             
                                            XXH3_f_scrambleAcc f_scramble,
         | 
| 5769 6110 | 
             
                                            XXH3_f_initCustomSecret f_initSec)
         | 
| 5770 6111 | 
             
            {
         | 
| 5771 6112 | 
             
                if (seed64 == 0)
         | 
| 5772 6113 | 
             
                    return XXH3_hashLong_128b_internal(input, len,
         | 
| 5773 6114 | 
             
                                                       XXH3_kSecret, sizeof(XXH3_kSecret),
         | 
| 5774 | 
            -
                                                        | 
| 6115 | 
            +
                                                       f_acc, f_scramble);
         | 
| 5775 6116 | 
             
                {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
         | 
| 5776 6117 | 
             
                    f_initSec(secret, seed64);
         | 
| 5777 6118 | 
             
                    return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
         | 
| 5778 | 
            -
                                                        | 
| 6119 | 
            +
                                                       f_acc, f_scramble);
         | 
| 5779 6120 | 
             
                }
         | 
| 5780 6121 | 
             
            }
         | 
| 5781 6122 |  | 
| @@ -5788,7 +6129,7 @@ XXH3_hashLong_128b_withSeed(const void* input, size_t len, | |
| 5788 6129 | 
             
            {
         | 
| 5789 6130 | 
             
                (void)secret; (void)secretLen;
         | 
| 5790 6131 | 
             
                return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
         | 
| 5791 | 
            -
                             | 
| 6132 | 
            +
                            XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
         | 
| 5792 6133 | 
             
            }
         | 
| 5793 6134 |  | 
| 5794 6135 | 
             
            typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
         | 
| @@ -5819,7 +6160,7 @@ XXH3_128bits_internal(const void* input, size_t len, | |
| 5819 6160 | 
             
            /* ===   Public XXH128 API   === */
         | 
| 5820 6161 |  | 
| 5821 6162 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5822 | 
            -
            XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
         | 
| 6163 | 
            +
            XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
         | 
| 5823 6164 | 
             
            {
         | 
| 5824 6165 | 
             
                return XXH3_128bits_internal(input, len, 0,
         | 
| 5825 6166 | 
             
                                             XXH3_kSecret, sizeof(XXH3_kSecret),
         | 
| @@ -5828,7 +6169,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len) | |
| 5828 6169 |  | 
| 5829 6170 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5830 6171 | 
             
            XXH_PUBLIC_API XXH128_hash_t
         | 
| 5831 | 
            -
            XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
         | 
| 6172 | 
            +
            XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
         | 
| 5832 6173 | 
             
            {
         | 
| 5833 6174 | 
             
                return XXH3_128bits_internal(input, len, 0,
         | 
| 5834 6175 | 
             
                                             (const xxh_u8*)secret, secretSize,
         | 
| @@ -5837,7 +6178,7 @@ XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_ | |
| 5837 6178 |  | 
| 5838 6179 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5839 6180 | 
             
            XXH_PUBLIC_API XXH128_hash_t
         | 
| 5840 | 
            -
            XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
         | 
| 6181 | 
            +
            XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
         | 
| 5841 6182 | 
             
            {
         | 
| 5842 6183 | 
             
                return XXH3_128bits_internal(input, len, seed,
         | 
| 5843 6184 | 
             
                                             XXH3_kSecret, sizeof(XXH3_kSecret),
         | 
| @@ -5846,7 +6187,7 @@ XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed) | |
| 5846 6187 |  | 
| 5847 6188 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5848 6189 | 
             
            XXH_PUBLIC_API XXH128_hash_t
         | 
| 5849 | 
            -
            XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
         | 
| 6190 | 
            +
            XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
         | 
| 5850 6191 | 
             
            {
         | 
| 5851 6192 | 
             
                if (len <= XXH3_MIDSIZE_MAX)
         | 
| 5852 6193 | 
             
                    return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
         | 
| @@ -5855,7 +6196,7 @@ XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret | |
| 5855 6196 |  | 
| 5856 6197 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5857 6198 | 
             
            XXH_PUBLIC_API XXH128_hash_t
         | 
| 5858 | 
            -
            XXH128(const void* input, size_t len, XXH64_hash_t seed)
         | 
| 6199 | 
            +
            XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
         | 
| 5859 6200 | 
             
            {
         | 
| 5860 6201 | 
             
                return XXH3_128bits_withSeed(input, len, seed);
         | 
| 5861 6202 | 
             
            }
         | 
| @@ -5870,42 +6211,41 @@ XXH128(const void* input, size_t len, XXH64_hash_t seed) | |
| 5870 6211 |  | 
| 5871 6212 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5872 6213 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 5873 | 
            -
            XXH3_128bits_reset(XXH3_state_t* statePtr)
         | 
| 6214 | 
            +
            XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
         | 
| 5874 6215 | 
             
            {
         | 
| 5875 6216 | 
             
                return XXH3_64bits_reset(statePtr);
         | 
| 5876 6217 | 
             
            }
         | 
| 5877 6218 |  | 
| 5878 6219 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5879 6220 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 5880 | 
            -
            XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
         | 
| 6221 | 
            +
            XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
         | 
| 5881 6222 | 
             
            {
         | 
| 5882 6223 | 
             
                return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
         | 
| 5883 6224 | 
             
            }
         | 
| 5884 6225 |  | 
| 5885 6226 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5886 6227 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 5887 | 
            -
            XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
         | 
| 6228 | 
            +
            XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
         | 
| 5888 6229 | 
             
            {
         | 
| 5889 6230 | 
             
                return XXH3_64bits_reset_withSeed(statePtr, seed);
         | 
| 5890 6231 | 
             
            }
         | 
| 5891 6232 |  | 
| 5892 6233 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5893 6234 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 5894 | 
            -
            XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
         | 
| 6235 | 
            +
            XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
         | 
| 5895 6236 | 
             
            {
         | 
| 5896 6237 | 
             
                return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
         | 
| 5897 6238 | 
             
            }
         | 
| 5898 6239 |  | 
| 5899 6240 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5900 6241 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 5901 | 
            -
            XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
         | 
| 6242 | 
            +
            XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
         | 
| 5902 6243 | 
             
            {
         | 
| 5903 | 
            -
                return  | 
| 5904 | 
            -
                                   XXH3_accumulate_512, XXH3_scrambleAcc);
         | 
| 6244 | 
            +
                return XXH3_64bits_update(state, input, len);
         | 
| 5905 6245 | 
             
            }
         | 
| 5906 6246 |  | 
| 5907 6247 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5908 | 
            -
            XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
         | 
| 6248 | 
            +
            XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
         | 
| 5909 6249 | 
             
            {
         | 
| 5910 6250 | 
             
                const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
         | 
| 5911 6251 | 
             
                if (state->totalLen > XXH3_MIDSIZE_MAX) {
         | 
| @@ -5947,7 +6287,7 @@ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) | |
| 5947 6287 | 
             
             *           <0 if *h128_1  < *h128_2
         | 
| 5948 6288 | 
             
             *           =0 if *h128_1 == *h128_2  */
         | 
| 5949 6289 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5950 | 
            -
            XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
         | 
| 6290 | 
            +
            XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
         | 
| 5951 6291 | 
             
            {
         | 
| 5952 6292 | 
             
                XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
         | 
| 5953 6293 | 
             
                XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
         | 
| @@ -5961,7 +6301,7 @@ XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2) | |
| 5961 6301 | 
             
            /*======   Canonical representation   ======*/
         | 
| 5962 6302 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5963 6303 | 
             
            XXH_PUBLIC_API void
         | 
| 5964 | 
            -
            XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
         | 
| 6304 | 
            +
            XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)
         | 
| 5965 6305 | 
             
            {
         | 
| 5966 6306 | 
             
                XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
         | 
| 5967 6307 | 
             
                if (XXH_CPU_LITTLE_ENDIAN) {
         | 
| @@ -5974,7 +6314,7 @@ XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash) | |
| 5974 6314 |  | 
| 5975 6315 | 
             
            /*! @ingroup XXH3_family */
         | 
| 5976 6316 | 
             
            XXH_PUBLIC_API XXH128_hash_t
         | 
| 5977 | 
            -
            XXH128_hashFromCanonical(const XXH128_canonical_t* src)
         | 
| 6317 | 
            +
            XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)
         | 
| 5978 6318 | 
             
            {
         | 
| 5979 6319 | 
             
                XXH128_hash_t h;
         | 
| 5980 6320 | 
             
                h.high64 = XXH_readBE64(src);
         | 
| @@ -5998,7 +6338,7 @@ XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128) | |
| 5998 6338 |  | 
| 5999 6339 | 
             
            /*! @ingroup XXH3_family */
         | 
| 6000 6340 | 
             
            XXH_PUBLIC_API XXH_errorcode
         | 
| 6001 | 
            -
            XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
         | 
| 6341 | 
            +
            XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
         | 
| 6002 6342 | 
             
            {
         | 
| 6003 6343 | 
             
            #if (XXH_DEBUGLEVEL >= 1)
         | 
| 6004 6344 | 
             
                XXH_ASSERT(secretBuffer != NULL);
         | 
| @@ -6043,7 +6383,7 @@ XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSee | |
| 6043 6383 |  | 
| 6044 6384 | 
             
            /*! @ingroup XXH3_family */
         | 
| 6045 6385 | 
             
            XXH_PUBLIC_API void
         | 
| 6046 | 
            -
            XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
         | 
| 6386 | 
            +
            XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
         | 
| 6047 6387 | 
             
            {
         | 
| 6048 6388 | 
             
                XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
         | 
| 6049 6389 | 
             
                XXH3_initCustomSecret(secret, seed);
         | 
| @@ -6071,5 +6411,5 @@ XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed) | |
| 6071 6411 |  | 
| 6072 6412 |  | 
| 6073 6413 | 
             
            #if defined (__cplusplus)
         | 
| 6074 | 
            -
            }
         | 
| 6414 | 
            +
            } /* extern "C" */
         | 
| 6075 6415 | 
             
            #endif
         |