digest-xxhash 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -2
- data/digest-xxhash.gemspec +1 -1
- data/ext/digest/xxhash/ext.c +2 -0
- data/ext/digest/xxhash/xxhash.h +708 -407
- data/lib/digest/xxhash/version.rb +1 -1
- data/test/test.rb +6 -0
- metadata +21 -3
data/ext/digest/xxhash/xxhash.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* xxHash - Extremely Fast Hash algorithm
|
3
3
|
* Header File
|
4
|
-
* Copyright (C) 2012-
|
4
|
+
* Copyright (C) 2012-2021 Yann Collet
|
5
5
|
*
|
6
6
|
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
|
7
7
|
*
|
@@ -157,6 +157,7 @@ extern "C" {
|
|
157
157
|
# undef XXH3_64bits
|
158
158
|
# undef XXH3_64bits_withSecret
|
159
159
|
# undef XXH3_64bits_withSeed
|
160
|
+
# undef XXH3_64bits_withSecretandSeed
|
160
161
|
# undef XXH3_createState
|
161
162
|
# undef XXH3_freeState
|
162
163
|
# undef XXH3_copyState
|
@@ -174,6 +175,7 @@ extern "C" {
|
|
174
175
|
# undef XXH3_128bits_reset
|
175
176
|
# undef XXH3_128bits_reset_withSeed
|
176
177
|
# undef XXH3_128bits_reset_withSecret
|
178
|
+
# undef XXH3_128bits_reset_withSecretandSeed
|
177
179
|
# undef XXH3_128bits_update
|
178
180
|
# undef XXH3_128bits_digest
|
179
181
|
# undef XXH128_isEqual
|
@@ -284,23 +286,28 @@ extern "C" {
|
|
284
286
|
# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
|
285
287
|
# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
|
286
288
|
# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
|
289
|
+
# define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
|
287
290
|
# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
|
288
291
|
# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
|
289
292
|
# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
|
290
293
|
# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
|
291
294
|
# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
|
292
295
|
# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
|
296
|
+
# define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
|
293
297
|
# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
|
294
298
|
# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
|
295
299
|
# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
|
300
|
+
# define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
|
296
301
|
/* XXH3_128bits */
|
297
302
|
# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
|
298
303
|
# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
|
299
304
|
# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
|
300
305
|
# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
|
306
|
+
# define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
|
301
307
|
# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
|
302
308
|
# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
|
303
309
|
# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
|
310
|
+
# define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
|
304
311
|
# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
|
305
312
|
# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
|
306
313
|
# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
|
@@ -321,16 +328,16 @@ extern "C" {
|
|
321
328
|
/*!
|
322
329
|
* @brief Obtains the xxHash version.
|
323
330
|
*
|
324
|
-
* This is
|
325
|
-
*
|
331
|
+
* This is mostly useful when xxHash is compiled as a shared library,
|
332
|
+
* since the returned value comes from the library, as opposed to header file.
|
326
333
|
*
|
327
|
-
* @return `XXH_VERSION_NUMBER`
|
334
|
+
* @return `XXH_VERSION_NUMBER` of the invoked library.
|
328
335
|
*/
|
329
336
|
XXH_PUBLIC_API unsigned XXH_versionNumber (void);
|
330
337
|
|
331
338
|
|
332
339
|
/* ****************************
|
333
|
-
*
|
340
|
+
* Common basic types
|
334
341
|
******************************/
|
335
342
|
#include <stddef.h> /* size_t */
|
336
343
|
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
|
@@ -374,10 +381,9 @@ typedef uint32_t XXH32_hash_t;
|
|
374
381
|
* Contains functions used in the classic 32-bit xxHash algorithm.
|
375
382
|
*
|
376
383
|
* @note
|
377
|
-
* XXH32 is
|
378
|
-
*
|
379
|
-
* systems, and offers true 64/128 bit hash results.
|
380
|
-
* level of dispersion, and greatly reduces the risks of collisions.
|
384
|
+
* XXH32 is useful for older platforms, with no or poor 64-bit performance.
|
385
|
+
* Note that @ref xxh3_family provides competitive speed
|
386
|
+
* for both 32-bit and 64-bit systems, and offers true 64/128 bit hash results.
|
381
387
|
*
|
382
388
|
* @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
|
383
389
|
* @see @ref xxh32_impl for implementation details
|
@@ -594,36 +600,39 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
|
|
594
600
|
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
|
595
601
|
|
596
602
|
|
603
|
+
#ifdef __has_attribute
|
604
|
+
# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
|
605
|
+
#else
|
606
|
+
# define XXH_HAS_ATTRIBUTE(x) 0
|
607
|
+
#endif
|
608
|
+
|
609
|
+
/* C-language Attributes are added in C23. */
|
610
|
+
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
|
611
|
+
# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
|
612
|
+
#else
|
613
|
+
# define XXH_HAS_C_ATTRIBUTE(x) 0
|
614
|
+
#endif
|
615
|
+
|
616
|
+
#if defined(__cplusplus) && defined(__has_cpp_attribute)
|
617
|
+
# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
|
618
|
+
#else
|
619
|
+
# define XXH_HAS_CPP_ATTRIBUTE(x) 0
|
620
|
+
#endif
|
621
|
+
|
597
622
|
/*
|
598
623
|
Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
|
599
624
|
introduced in CPP17 and C23.
|
600
625
|
CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
|
601
626
|
C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
|
602
627
|
*/
|
603
|
-
|
604
|
-
#
|
605
|
-
#
|
606
|
-
#
|
607
|
-
#
|
608
|
-
|
609
|
-
#
|
610
|
-
#
|
611
|
-
# define XXH_FALLTHROUGH [[fallthrough]]
|
612
|
-
# endif
|
613
|
-
#endif
|
614
|
-
|
615
|
-
#ifndef XXH_FALLTHROUGH
|
616
|
-
# if defined(__GNUC__) && __GNUC__ >= 7
|
617
|
-
# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
|
618
|
-
# elif defined(__clang__) && (__clang_major__ >= 10) \
|
619
|
-
&& (!defined(__APPLE__) || (__clang_major__ >= 12))
|
620
|
-
/* Apple clang 12 is effectively clang-10 ,
|
621
|
-
* see https://en.wikipedia.org/wiki/Xcode for details
|
622
|
-
*/
|
623
|
-
# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
|
624
|
-
# else
|
625
|
-
# define XXH_FALLTHROUGH
|
626
|
-
# endif
|
628
|
+
#if XXH_HAS_C_ATTRIBUTE(x)
|
629
|
+
# define XXH_FALLTHROUGH [[fallthrough]]
|
630
|
+
#elif XXH_HAS_CPP_ATTRIBUTE(x)
|
631
|
+
# define XXH_FALLTHROUGH [[fallthrough]]
|
632
|
+
#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
|
633
|
+
# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
|
634
|
+
#else
|
635
|
+
# define XXH_FALLTHROUGH
|
627
636
|
#endif
|
628
637
|
|
629
638
|
/*!
|
@@ -669,8 +678,8 @@ typedef uint64_t XXH64_hash_t;
|
|
669
678
|
*
|
670
679
|
* @note
|
671
680
|
* XXH3 provides competitive speed for both 32-bit and 64-bit systems,
|
672
|
-
* and offers true 64/128 bit hash results.
|
673
|
-
*
|
681
|
+
* and offers true 64/128 bit hash results.
|
682
|
+
* It provides better speed for systems with vector processing capabilities.
|
674
683
|
*/
|
675
684
|
|
676
685
|
|
@@ -719,6 +728,8 @@ typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t
|
|
719
728
|
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
|
720
729
|
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
|
721
730
|
|
731
|
+
#ifndef XXH_NO_XXH3
|
732
|
+
|
722
733
|
/*!
|
723
734
|
* @}
|
724
735
|
* ************************************************************************
|
@@ -796,13 +807,17 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, X
|
|
796
807
|
* It's possible to provide any blob of bytes as a "secret" to generate the hash.
|
797
808
|
* This makes it more difficult for an external actor to prepare an intentional collision.
|
798
809
|
* The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
|
799
|
-
* However, the quality of
|
800
|
-
*
|
810
|
+
* However, the quality of the secret impacts the dispersion of the hash algorithm.
|
811
|
+
* Therefore, the secret _must_ look like a bunch of random bytes.
|
801
812
|
* Avoid "trivial" or structured data such as repeated sequences or a text document.
|
802
|
-
* Whenever
|
803
|
-
* consider
|
804
|
-
*
|
805
|
-
*
|
813
|
+
* Whenever in doubt about the "randomness" of the blob of bytes,
|
814
|
+
* consider employing "XXH3_generateSecret()" instead (see below).
|
815
|
+
* It will generate a proper high entropy secret derived from the blob of bytes.
|
816
|
+
* Another advantage of using XXH3_generateSecret() is that
|
817
|
+
* it guarantees that all bits within the initial blob of bytes
|
818
|
+
* will impact every bit of the output.
|
819
|
+
* This is not necessarily the case when using the blob of bytes directly
|
820
|
+
* because, when hashing _small_ inputs, only a portion of the secret is employed.
|
806
821
|
*/
|
807
822
|
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
|
808
823
|
|
@@ -922,6 +937,7 @@ XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_has
|
|
922
937
|
XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
|
923
938
|
|
924
939
|
|
940
|
+
#endif /* !XXH_NO_XXH3 */
|
925
941
|
#endif /* XXH_NO_LONG_LONG */
|
926
942
|
|
927
943
|
/*!
|
@@ -962,13 +978,10 @@ XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t*
|
|
962
978
|
struct XXH32_state_s {
|
963
979
|
XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
|
964
980
|
XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
|
965
|
-
XXH32_hash_t
|
966
|
-
XXH32_hash_t v2; /*!< Second accumulator lane */
|
967
|
-
XXH32_hash_t v3; /*!< Third accumulator lane */
|
968
|
-
XXH32_hash_t v4; /*!< Fourth accumulator lane */
|
981
|
+
XXH32_hash_t v[4]; /*!< Accumulator lanes */
|
969
982
|
XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
|
970
983
|
XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */
|
971
|
-
XXH32_hash_t reserved; /*!< Reserved field. Do not read
|
984
|
+
XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */
|
972
985
|
}; /* typedef'd to XXH32_state_t */
|
973
986
|
|
974
987
|
|
@@ -988,16 +1001,15 @@ struct XXH32_state_s {
|
|
988
1001
|
*/
|
989
1002
|
struct XXH64_state_s {
|
990
1003
|
XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */
|
991
|
-
XXH64_hash_t
|
992
|
-
XXH64_hash_t v2; /*!< Second accumulator lane */
|
993
|
-
XXH64_hash_t v3; /*!< Third accumulator lane */
|
994
|
-
XXH64_hash_t v4; /*!< Fourth accumulator lane */
|
1004
|
+
XXH64_hash_t v[4]; /*!< Accumulator lanes */
|
995
1005
|
XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
|
996
1006
|
XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */
|
997
1007
|
XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/
|
998
|
-
XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it
|
1008
|
+
XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */
|
999
1009
|
}; /* typedef'd to XXH64_state_t */
|
1000
1010
|
|
1011
|
+
#ifndef XXH_NO_XXH3
|
1012
|
+
|
1001
1013
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
|
1002
1014
|
# include <stdalign.h>
|
1003
1015
|
# define XXH_ALIGN(n) alignas(n)
|
@@ -1070,7 +1082,7 @@ struct XXH3_state_s {
|
|
1070
1082
|
/*!< The internal buffer. @see XXH32_state_s::mem32 */
|
1071
1083
|
XXH32_hash_t bufferedSize;
|
1072
1084
|
/*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
|
1073
|
-
XXH32_hash_t
|
1085
|
+
XXH32_hash_t useSeed;
|
1074
1086
|
/*!< Reserved field. Needed for padding on 64-bit. */
|
1075
1087
|
size_t nbStripesSoFar;
|
1076
1088
|
/*!< Number or stripes processed. */
|
@@ -1106,6 +1118,12 @@ struct XXH3_state_s {
|
|
1106
1118
|
#define XXH3_INITSTATE(XXH3_state_ptr) { (XXH3_state_ptr)->seed = 0; }
|
1107
1119
|
|
1108
1120
|
|
1121
|
+
/* XXH128() :
|
1122
|
+
* simple alias to pre-selected XXH3_128bits variant
|
1123
|
+
*/
|
1124
|
+
XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
|
1125
|
+
|
1126
|
+
|
1109
1127
|
/* === Experimental API === */
|
1110
1128
|
/* Symbols defined below must be considered tied to a specific library version. */
|
1111
1129
|
|
@@ -1118,33 +1136,92 @@ struct XXH3_state_s {
|
|
1118
1136
|
* as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
|
1119
1137
|
*
|
1120
1138
|
* The function accepts as input a custom seed of any length and any content,
|
1121
|
-
* and derives from it a high-entropy secret of length
|
1122
|
-
* into an already allocated buffer secretBuffer.
|
1123
|
-
*
|
1139
|
+
* and derives from it a high-entropy secret of length @secretSize
|
1140
|
+
* into an already allocated buffer @secretBuffer.
|
1141
|
+
* @secretSize must be >= XXH3_SECRET_SIZE_MIN
|
1124
1142
|
*
|
1125
1143
|
* The generated secret can then be used with any `*_withSecret()` variant.
|
1126
1144
|
* Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
|
1127
1145
|
* `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
|
1128
1146
|
* are part of this list. They all accept a `secret` parameter
|
1129
|
-
* which must be
|
1147
|
+
* which must be large enough for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
|
1130
1148
|
* _and_ feature very high entropy (consist of random-looking bytes).
|
1131
1149
|
* These conditions can be a high bar to meet, so
|
1132
|
-
*
|
1150
|
+
* XXH3_generateSecret() can be employed to ensure proper quality.
|
1133
1151
|
*
|
1134
1152
|
* customSeed can be anything. It can have any size, even small ones,
|
1135
|
-
* and its content can be anything, even
|
1136
|
-
* The resulting `secret` will nonetheless provide all
|
1153
|
+
* and its content can be anything, even "poor entropy" sources such as a bunch of zeroes.
|
1154
|
+
* The resulting `secret` will nonetheless provide all required qualities.
|
1137
1155
|
*
|
1138
|
-
* Supplying NULL as the customSeed copies the default secret into `secretBuffer`.
|
1139
1156
|
* When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
|
1140
1157
|
*/
|
1141
|
-
XXH_PUBLIC_API
|
1158
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
|
1142
1159
|
|
1143
1160
|
|
1144
|
-
/*
|
1145
|
-
|
1161
|
+
/*
|
1162
|
+
* XXH3_generateSecret_fromSeed():
|
1163
|
+
*
|
1164
|
+
* Generate the same secret as the _withSeed() variants.
|
1165
|
+
*
|
1166
|
+
* The resulting secret has a length of XXH3_SECRET_DEFAULT_SIZE (necessarily).
|
1167
|
+
* @secretBuffer must be already allocated, of size at least XXH3_SECRET_DEFAULT_SIZE bytes.
|
1168
|
+
*
|
1169
|
+
* The generated secret can be used in combination with
|
1170
|
+
*`*_withSecret()` and `_withSecretandSeed()` variants.
|
1171
|
+
* This generator is notably useful in combination with `_withSecretandSeed()`,
|
1172
|
+
* as a way to emulate a faster `_withSeed()` variant.
|
1173
|
+
*/
|
1174
|
+
XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
|
1175
|
+
|
1176
|
+
/*
|
1177
|
+
* *_withSecretandSeed() :
|
1178
|
+
* These variants generate hash values using either
|
1179
|
+
* @seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
|
1180
|
+
* or @secret for "large" keys (>= XXH3_MIDSIZE_MAX).
|
1181
|
+
*
|
1182
|
+
* This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
|
1183
|
+
* `_withSeed()` has to generate the secret on the fly for "large" keys.
|
1184
|
+
* It's fast, but can be perceptible for "not so large" keys (< 1 KB).
|
1185
|
+
* `_withSecret()` has to generate the masks on the fly for "small" keys,
|
1186
|
+
* which requires more instructions than _withSeed() variants.
|
1187
|
+
* Therefore, _withSecretandSeed variant combines the best of both worlds.
|
1188
|
+
*
|
1189
|
+
* When @secret has been generated by XXH3_generateSecret_fromSeed(),
|
1190
|
+
* this variant produces *exactly* the same results as `_withSeed()` variant,
|
1191
|
+
* hence offering only a pure speed benefit on "large" input,
|
1192
|
+
* by skipping the need to regenerate the secret for every large input.
|
1193
|
+
*
|
1194
|
+
* Another usage scenario is to hash the secret to a 64-bit hash value,
|
1195
|
+
* for example with XXH3_64bits(), which then becomes the seed,
|
1196
|
+
* and then employ both the seed and the secret in _withSecretandSeed().
|
1197
|
+
* On top of speed, an added benefit is that each bit in the secret
|
1198
|
+
* has a 50% chance to swap each bit in the output,
|
1199
|
+
* via its impact to the seed.
|
1200
|
+
* This is not guaranteed when using the secret directly in "small data" scenarios,
|
1201
|
+
* because only portions of the secret are employed for small data.
|
1202
|
+
*/
|
1203
|
+
XXH_PUBLIC_API XXH64_hash_t
|
1204
|
+
XXH3_64bits_withSecretandSeed(const void* data, size_t len,
|
1205
|
+
const void* secret, size_t secretSize,
|
1206
|
+
XXH64_hash_t seed);
|
1207
|
+
|
1208
|
+
XXH_PUBLIC_API XXH128_hash_t
|
1209
|
+
XXH3_128bits_withSecretandSeed(const void* data, size_t len,
|
1210
|
+
const void* secret, size_t secretSize,
|
1211
|
+
XXH64_hash_t seed64);
|
1212
|
+
|
1213
|
+
XXH_PUBLIC_API XXH_errorcode
|
1214
|
+
XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
|
1215
|
+
const void* secret, size_t secretSize,
|
1216
|
+
XXH64_hash_t seed64);
|
1217
|
+
|
1218
|
+
XXH_PUBLIC_API XXH_errorcode
|
1219
|
+
XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
|
1220
|
+
const void* secret, size_t secretSize,
|
1221
|
+
XXH64_hash_t seed64);
|
1146
1222
|
|
1147
1223
|
|
1224
|
+
#endif /* !XXH_NO_XXH3 */
|
1148
1225
|
#endif /* XXH_NO_LONG_LONG */
|
1149
1226
|
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
|
1150
1227
|
# define XXH_IMPLEMENTATION
|
@@ -1221,7 +1298,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1221
1298
|
* Use `memcpy()`. Safe and portable. Note that most modern compilers will
|
1222
1299
|
* eliminate the function call and treat it as an unaligned access.
|
1223
1300
|
*
|
1224
|
-
* - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((
|
1301
|
+
* - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
|
1225
1302
|
* @par
|
1226
1303
|
* Depends on compiler extensions and is therefore not portable.
|
1227
1304
|
* This method is safe _if_ your compiler supports it,
|
@@ -1248,22 +1325,12 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1248
1325
|
* care, as what works on one compiler/platform/optimization level may cause
|
1249
1326
|
* another to read garbage data or even crash.
|
1250
1327
|
*
|
1251
|
-
* See
|
1328
|
+
* See http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
|
1252
1329
|
*
|
1253
1330
|
* Prefer these methods in priority order (0 > 3 > 1 > 2)
|
1254
1331
|
*/
|
1255
1332
|
# define XXH_FORCE_MEMORY_ACCESS 0
|
1256
|
-
|
1257
|
-
* @def XXH_ACCEPT_NULL_INPUT_POINTER
|
1258
|
-
* @brief Whether to add explicit `NULL` checks.
|
1259
|
-
*
|
1260
|
-
* If the input pointer is `NULL` and the length is non-zero, xxHash's default
|
1261
|
-
* behavior is to dereference it, triggering a segfault.
|
1262
|
-
*
|
1263
|
-
* When this macro is enabled, xxHash actively checks the input for a null pointer.
|
1264
|
-
* If it is, the result for null input pointers is the same as a zero-length input.
|
1265
|
-
*/
|
1266
|
-
# define XXH_ACCEPT_NULL_INPUT_POINTER 0
|
1333
|
+
|
1267
1334
|
/*!
|
1268
1335
|
* @def XXH_FORCE_ALIGN_CHECK
|
1269
1336
|
* @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
|
@@ -1315,18 +1382,16 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1315
1382
|
# define XXH_NO_INLINE_HINTS 0
|
1316
1383
|
|
1317
1384
|
/*!
|
1318
|
-
* @def
|
1319
|
-
* @brief Whether to
|
1320
|
-
*
|
1321
|
-
* For performance, `XXH32_finalize` uses an unrolled loop
|
1322
|
-
* in the form of a switch statement.
|
1385
|
+
* @def XXH32_ENDJMP
|
1386
|
+
* @brief Whether to use a jump for `XXH32_finalize`.
|
1323
1387
|
*
|
1324
|
-
*
|
1325
|
-
*
|
1388
|
+
* For performance, `XXH32_finalize` uses multiple branches in the finalizer.
|
1389
|
+
* This is generally preferable for performance,
|
1390
|
+
* but depending on exact architecture, a jmp may be preferable.
|
1326
1391
|
*
|
1327
|
-
* This is
|
1392
|
+
* This setting is only possibly making a difference for very small inputs.
|
1328
1393
|
*/
|
1329
|
-
# define
|
1394
|
+
# define XXH32_ENDJMP 0
|
1330
1395
|
|
1331
1396
|
/*!
|
1332
1397
|
* @internal
|
@@ -1343,32 +1408,18 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1343
1408
|
*/
|
1344
1409
|
|
1345
1410
|
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
|
1346
|
-
/* prefer __packed__ structures (method 1) for
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1350
|
-
( \
|
1351
|
-
defined(__GNUC__) && ( \
|
1352
|
-
(defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
|
1353
|
-
( \
|
1354
|
-
defined(__mips__) && \
|
1355
|
-
(__mips <= 5 || __mips_isa_rev < 6) && \
|
1356
|
-
(!defined(__mips16) || defined(__mips_mips16e2)) \
|
1357
|
-
) \
|
1358
|
-
) \
|
1359
|
-
) \
|
1360
|
-
)
|
1411
|
+
/* prefer __packed__ structures (method 1) for GCC
|
1412
|
+
* < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
|
1413
|
+
* which for some reason does unaligned loads. */
|
1414
|
+
# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
|
1361
1415
|
# define XXH_FORCE_MEMORY_ACCESS 1
|
1362
1416
|
# endif
|
1363
1417
|
#endif
|
1364
1418
|
|
1365
|
-
#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
|
1366
|
-
# define XXH_ACCEPT_NULL_INPUT_POINTER 0
|
1367
|
-
#endif
|
1368
|
-
|
1369
1419
|
#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
|
1370
|
-
|
1371
|
-
|
1420
|
+
/* don't check on x86, aarch64, or arm when unaligned access is available */
|
1421
|
+
# if defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
|
1422
|
+
|| defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */
|
1372
1423
|
# define XXH_FORCE_ALIGN_CHECK 0
|
1373
1424
|
# else
|
1374
1425
|
# define XXH_FORCE_ALIGN_CHECK 1
|
@@ -1384,14 +1435,9 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1384
1435
|
# endif
|
1385
1436
|
#endif
|
1386
1437
|
|
1387
|
-
#ifndef
|
1388
|
-
|
1389
|
-
|
1390
|
-
/* The if/then loop is preferable to switch/case on gcc (on x64) */
|
1391
|
-
# define XXH_REROLL 1
|
1392
|
-
# else
|
1393
|
-
# define XXH_REROLL 0
|
1394
|
-
# endif
|
1438
|
+
#ifndef XXH32_ENDJMP
|
1439
|
+
/* generally preferable for performance */
|
1440
|
+
# define XXH32_ENDJMP 0
|
1395
1441
|
#endif
|
1396
1442
|
|
1397
1443
|
/*!
|
@@ -1413,13 +1459,13 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1413
1459
|
* @internal
|
1414
1460
|
* @brief Modify this function to use a different routine than malloc().
|
1415
1461
|
*/
|
1416
|
-
static void* XXH_malloc(size_t s) { return
|
1462
|
+
static void* XXH_malloc(size_t s) { return ruby_xmalloc(s); }
|
1417
1463
|
|
1418
1464
|
/*!
|
1419
1465
|
* @internal
|
1420
1466
|
* @brief Modify this function to use a different routine than free().
|
1421
1467
|
*/
|
1422
|
-
static void XXH_free(void* p) {
|
1468
|
+
static void XXH_free(void* p) { ruby_xfree(p); }
|
1423
1469
|
|
1424
1470
|
#include <string.h>
|
1425
1471
|
|
@@ -1443,19 +1489,19 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
|
|
1443
1489
|
#endif
|
1444
1490
|
|
1445
1491
|
#if XXH_NO_INLINE_HINTS /* disable inlining hints */
|
1446
|
-
# if defined(__GNUC__)
|
1492
|
+
# if defined(__GNUC__) || defined(__clang__)
|
1447
1493
|
# define XXH_FORCE_INLINE static __attribute__((unused))
|
1448
1494
|
# else
|
1449
1495
|
# define XXH_FORCE_INLINE static
|
1450
1496
|
# endif
|
1451
1497
|
# define XXH_NO_INLINE static
|
1452
1498
|
/* enable inlining hints */
|
1499
|
+
#elif defined(__GNUC__) || defined(__clang__)
|
1500
|
+
# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
|
1501
|
+
# define XXH_NO_INLINE static __attribute__((noinline))
|
1453
1502
|
#elif defined(_MSC_VER) /* Visual Studio */
|
1454
1503
|
# define XXH_FORCE_INLINE static __forceinline
|
1455
1504
|
# define XXH_NO_INLINE static __declspec(noinline)
|
1456
|
-
#elif defined(__GNUC__)
|
1457
|
-
# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
|
1458
|
-
# define XXH_NO_INLINE static __attribute__((noinline))
|
1459
1505
|
#elif defined (__cplusplus) \
|
1460
1506
|
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
|
1461
1507
|
# define XXH_FORCE_INLINE static inline
|
@@ -1522,7 +1568,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
|
|
1522
1568
|
* We also use it to prevent unwanted constant folding for AArch64 in
|
1523
1569
|
* XXH3_initCustomSecret_scalar().
|
1524
1570
|
*/
|
1525
|
-
#
|
1571
|
+
#if defined(__GNUC__) || defined(__clang__)
|
1526
1572
|
# define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
|
1527
1573
|
#else
|
1528
1574
|
# define XXH_COMPILER_GUARD(var) ((void)0)
|
@@ -1615,30 +1661,31 @@ static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr;
|
|
1615
1661
|
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
|
1616
1662
|
|
1617
1663
|
/*
|
1618
|
-
*
|
1619
|
-
*
|
1620
|
-
*
|
1621
|
-
*
|
1664
|
+
* __attribute__((aligned(1))) is supported by gcc and clang. Originally the
|
1665
|
+
* documentation claimed that it only increased the alignment, but actually it
|
1666
|
+
* can decrease it on gcc, clang, and icc:
|
1667
|
+
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
|
1668
|
+
* https://gcc.godbolt.org/z/xYez1j67Y.
|
1622
1669
|
*/
|
1623
1670
|
#ifdef XXH_OLD_NAMES
|
1624
1671
|
typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
|
1625
1672
|
#endif
|
1626
1673
|
static xxh_u32 XXH_read32(const void* ptr)
|
1627
1674
|
{
|
1628
|
-
typedef
|
1629
|
-
return ((const
|
1675
|
+
typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
|
1676
|
+
return *((const xxh_unalign32*)ptr);
|
1630
1677
|
}
|
1631
1678
|
|
1632
1679
|
#else
|
1633
1680
|
|
1634
1681
|
/*
|
1635
1682
|
* Portable and safe solution. Generally efficient.
|
1636
|
-
* see:
|
1683
|
+
* see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
|
1637
1684
|
*/
|
1638
1685
|
static xxh_u32 XXH_read32(const void* memPtr)
|
1639
1686
|
{
|
1640
1687
|
xxh_u32 val;
|
1641
|
-
|
1688
|
+
XXH_memcpy(&val, memPtr, sizeof(val));
|
1642
1689
|
return val;
|
1643
1690
|
}
|
1644
1691
|
|
@@ -1955,8 +2002,10 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
1955
2002
|
h32 = XXH_rotl32(h32, 17) * XXH_PRIME32_4; \
|
1956
2003
|
} while (0)
|
1957
2004
|
|
1958
|
-
|
1959
|
-
|
2005
|
+
if (ptr==NULL) XXH_ASSERT(len == 0);
|
2006
|
+
|
2007
|
+
/* Compact rerolled version; generally faster */
|
2008
|
+
if (!XXH32_ENDJMP) {
|
1960
2009
|
len &= 15;
|
1961
2010
|
while (len >= 4) {
|
1962
2011
|
XXH_PROCESS4;
|
@@ -2024,24 +2073,19 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
2024
2073
|
* @internal
|
2025
2074
|
* @brief The implementation for @ref XXH32().
|
2026
2075
|
*
|
2027
|
-
* @param input, len, seed Directly passed from @ref XXH32().
|
2076
|
+
* @param input , len , seed Directly passed from @ref XXH32().
|
2028
2077
|
* @param align Whether @p input is aligned.
|
2029
2078
|
* @return The calculated hash.
|
2030
2079
|
*/
|
2031
2080
|
XXH_FORCE_INLINE xxh_u32
|
2032
2081
|
XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
|
2033
2082
|
{
|
2034
|
-
const xxh_u8* bEnd = input ? input + len : NULL;
|
2035
2083
|
xxh_u32 h32;
|
2036
2084
|
|
2037
|
-
|
2038
|
-
if (input==NULL) {
|
2039
|
-
len=0;
|
2040
|
-
bEnd=input=(const xxh_u8*)(size_t)16;
|
2041
|
-
}
|
2042
|
-
#endif
|
2085
|
+
if (input==NULL) XXH_ASSERT(len == 0);
|
2043
2086
|
|
2044
2087
|
if (len>=16) {
|
2088
|
+
const xxh_u8* const bEnd = input + len;
|
2045
2089
|
const xxh_u8* const limit = bEnd - 15;
|
2046
2090
|
xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
|
2047
2091
|
xxh_u32 v2 = seed + XXH_PRIME32_2;
|
@@ -2105,20 +2149,18 @@ XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
|
|
2105
2149
|
/*! @ingroup xxh32_family */
|
2106
2150
|
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
|
2107
2151
|
{
|
2108
|
-
|
2152
|
+
XXH_memcpy(dstState, srcState, sizeof(*dstState));
|
2109
2153
|
}
|
2110
2154
|
|
2111
2155
|
/*! @ingroup xxh32_family */
|
2112
2156
|
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
|
2113
2157
|
{
|
2114
|
-
|
2115
|
-
memset(
|
2116
|
-
|
2117
|
-
|
2118
|
-
|
2119
|
-
|
2120
|
-
/* do not write into reserved, planned to be removed in a future version */
|
2121
|
-
memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
|
2158
|
+
XXH_ASSERT(statePtr != NULL);
|
2159
|
+
memset(statePtr, 0, sizeof(*statePtr));
|
2160
|
+
statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
|
2161
|
+
statePtr->v[1] = seed + XXH_PRIME32_2;
|
2162
|
+
statePtr->v[2] = seed + 0;
|
2163
|
+
statePtr->v[3] = seed - XXH_PRIME32_1;
|
2122
2164
|
return XXH_OK;
|
2123
2165
|
}
|
2124
2166
|
|
@@ -2127,12 +2169,10 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t s
|
|
2127
2169
|
XXH_PUBLIC_API XXH_errorcode
|
2128
2170
|
XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
2129
2171
|
{
|
2130
|
-
if (input==NULL)
|
2131
|
-
|
2172
|
+
if (input==NULL) {
|
2173
|
+
XXH_ASSERT(len == 0);
|
2132
2174
|
return XXH_OK;
|
2133
|
-
|
2134
|
-
return XXH_ERROR;
|
2135
|
-
#endif
|
2175
|
+
}
|
2136
2176
|
|
2137
2177
|
{ const xxh_u8* p = (const xxh_u8*)input;
|
2138
2178
|
const xxh_u8* const bEnd = p + len;
|
@@ -2149,10 +2189,10 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
|
2149
2189
|
if (state->memsize) { /* some data left from previous update */
|
2150
2190
|
XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
|
2151
2191
|
{ const xxh_u32* p32 = state->mem32;
|
2152
|
-
state->
|
2153
|
-
state->
|
2154
|
-
state->
|
2155
|
-
state->
|
2192
|
+
state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
|
2193
|
+
state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
|
2194
|
+
state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
|
2195
|
+
state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
|
2156
2196
|
}
|
2157
2197
|
p += 16-state->memsize;
|
2158
2198
|
state->memsize = 0;
|
@@ -2160,22 +2200,14 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
|
2160
2200
|
|
2161
2201
|
if (p <= bEnd-16) {
|
2162
2202
|
const xxh_u8* const limit = bEnd - 16;
|
2163
|
-
xxh_u32 v1 = state->v1;
|
2164
|
-
xxh_u32 v2 = state->v2;
|
2165
|
-
xxh_u32 v3 = state->v3;
|
2166
|
-
xxh_u32 v4 = state->v4;
|
2167
2203
|
|
2168
2204
|
do {
|
2169
|
-
|
2170
|
-
|
2171
|
-
|
2172
|
-
|
2205
|
+
state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
|
2206
|
+
state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
|
2207
|
+
state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
|
2208
|
+
state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
|
2173
2209
|
} while (p<=limit);
|
2174
2210
|
|
2175
|
-
state->v1 = v1;
|
2176
|
-
state->v2 = v2;
|
2177
|
-
state->v3 = v3;
|
2178
|
-
state->v4 = v4;
|
2179
2211
|
}
|
2180
2212
|
|
2181
2213
|
if (p < bEnd) {
|
@@ -2194,12 +2226,12 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
|
|
2194
2226
|
xxh_u32 h32;
|
2195
2227
|
|
2196
2228
|
if (state->large_len) {
|
2197
|
-
h32 = XXH_rotl32(state->
|
2198
|
-
+ XXH_rotl32(state->
|
2199
|
-
+ XXH_rotl32(state->
|
2200
|
-
+ XXH_rotl32(state->
|
2229
|
+
h32 = XXH_rotl32(state->v[0], 1)
|
2230
|
+
+ XXH_rotl32(state->v[1], 7)
|
2231
|
+
+ XXH_rotl32(state->v[2], 12)
|
2232
|
+
+ XXH_rotl32(state->v[3], 18);
|
2201
2233
|
} else {
|
2202
|
-
h32 = state->
|
2234
|
+
h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
|
2203
2235
|
}
|
2204
2236
|
|
2205
2237
|
h32 += state->total_len_32;
|
@@ -2228,7 +2260,7 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
|
|
2228
2260
|
{
|
2229
2261
|
XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
|
2230
2262
|
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
|
2231
|
-
|
2263
|
+
XXH_memcpy(dst, &hash, sizeof(*dst));
|
2232
2264
|
}
|
2233
2265
|
/*! @ingroup xxh32_family */
|
2234
2266
|
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
|
@@ -2271,30 +2303,31 @@ static xxh_u64 XXH_read64(const void* memPtr)
|
|
2271
2303
|
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
|
2272
2304
|
|
2273
2305
|
/*
|
2274
|
-
*
|
2275
|
-
*
|
2276
|
-
*
|
2277
|
-
*
|
2306
|
+
* __attribute__((aligned(1))) is supported by gcc and clang. Originally the
|
2307
|
+
* documentation claimed that it only increased the alignment, but actually it
|
2308
|
+
* can decrease it on gcc, clang, and icc:
|
2309
|
+
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
|
2310
|
+
* https://gcc.godbolt.org/z/xYez1j67Y.
|
2278
2311
|
*/
|
2279
2312
|
#ifdef XXH_OLD_NAMES
|
2280
2313
|
typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
|
2281
2314
|
#endif
|
2282
2315
|
static xxh_u64 XXH_read64(const void* ptr)
|
2283
2316
|
{
|
2284
|
-
typedef
|
2285
|
-
return ((const xxh_unalign64*)ptr)
|
2317
|
+
typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
|
2318
|
+
return *((const xxh_unalign64*)ptr);
|
2286
2319
|
}
|
2287
2320
|
|
2288
2321
|
#else
|
2289
2322
|
|
2290
2323
|
/*
|
2291
2324
|
* Portable and safe solution. Generally efficient.
|
2292
|
-
* see:
|
2325
|
+
* see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
|
2293
2326
|
*/
|
2294
2327
|
static xxh_u64 XXH_read64(const void* memPtr)
|
2295
2328
|
{
|
2296
2329
|
xxh_u64 val;
|
2297
|
-
|
2330
|
+
XXH_memcpy(&val, memPtr, sizeof(val));
|
2298
2331
|
return val;
|
2299
2332
|
}
|
2300
2333
|
|
@@ -2424,6 +2457,7 @@ static xxh_u64 XXH64_avalanche(xxh_u64 h64)
|
|
2424
2457
|
static xxh_u64
|
2425
2458
|
XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
2426
2459
|
{
|
2460
|
+
if (ptr==NULL) XXH_ASSERT(len == 0);
|
2427
2461
|
len &= 31;
|
2428
2462
|
while (len >= 8) {
|
2429
2463
|
xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
|
@@ -2459,18 +2493,12 @@ XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
2459
2493
|
XXH_FORCE_INLINE xxh_u64
|
2460
2494
|
XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
|
2461
2495
|
{
|
2462
|
-
const xxh_u8* bEnd = input ? input + len : NULL;
|
2463
2496
|
xxh_u64 h64;
|
2464
|
-
|
2465
|
-
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
|
2466
|
-
if (input==NULL) {
|
2467
|
-
len=0;
|
2468
|
-
bEnd=input=(const xxh_u8*)(size_t)32;
|
2469
|
-
}
|
2470
|
-
#endif
|
2497
|
+
if (input==NULL) XXH_ASSERT(len == 0);
|
2471
2498
|
|
2472
2499
|
if (len>=32) {
|
2473
|
-
const xxh_u8* const
|
2500
|
+
const xxh_u8* const bEnd = input + len;
|
2501
|
+
const xxh_u8* const limit = bEnd - 31;
|
2474
2502
|
xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
|
2475
2503
|
xxh_u64 v2 = seed + XXH_PRIME64_2;
|
2476
2504
|
xxh_u64 v3 = seed + 0;
|
@@ -2481,7 +2509,7 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
|
|
2481
2509
|
v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
|
2482
2510
|
v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
|
2483
2511
|
v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
|
2484
|
-
} while (input
|
2512
|
+
} while (input<limit);
|
2485
2513
|
|
2486
2514
|
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
2487
2515
|
h64 = XXH64_mergeRound(h64, v1);
|
@@ -2536,20 +2564,18 @@ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
|
|
2536
2564
|
/*! @ingroup xxh64_family */
|
2537
2565
|
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
|
2538
2566
|
{
|
2539
|
-
|
2567
|
+
XXH_memcpy(dstState, srcState, sizeof(*dstState));
|
2540
2568
|
}
|
2541
2569
|
|
2542
2570
|
/*! @ingroup xxh64_family */
|
2543
2571
|
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
|
2544
2572
|
{
|
2545
|
-
|
2546
|
-
memset(
|
2547
|
-
|
2548
|
-
|
2549
|
-
|
2550
|
-
|
2551
|
-
/* do not write into reserved64, might be removed in a future version */
|
2552
|
-
memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
|
2573
|
+
XXH_ASSERT(statePtr != NULL);
|
2574
|
+
memset(statePtr, 0, sizeof(*statePtr));
|
2575
|
+
statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
|
2576
|
+
statePtr->v[1] = seed + XXH_PRIME64_2;
|
2577
|
+
statePtr->v[2] = seed + 0;
|
2578
|
+
statePtr->v[3] = seed - XXH_PRIME64_1;
|
2553
2579
|
return XXH_OK;
|
2554
2580
|
}
|
2555
2581
|
|
@@ -2557,12 +2583,10 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t s
|
|
2557
2583
|
XXH_PUBLIC_API XXH_errorcode
|
2558
2584
|
XXH64_update (XXH64_state_t* state, const void* input, size_t len)
|
2559
2585
|
{
|
2560
|
-
if (input==NULL)
|
2561
|
-
|
2586
|
+
if (input==NULL) {
|
2587
|
+
XXH_ASSERT(len == 0);
|
2562
2588
|
return XXH_OK;
|
2563
|
-
|
2564
|
-
return XXH_ERROR;
|
2565
|
-
#endif
|
2589
|
+
}
|
2566
2590
|
|
2567
2591
|
{ const xxh_u8* p = (const xxh_u8*)input;
|
2568
2592
|
const xxh_u8* const bEnd = p + len;
|
@@ -2577,32 +2601,24 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
|
|
2577
2601
|
|
2578
2602
|
if (state->memsize) { /* tmp buffer is full */
|
2579
2603
|
XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
|
2580
|
-
state->
|
2581
|
-
state->
|
2582
|
-
state->
|
2583
|
-
state->
|
2604
|
+
state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
|
2605
|
+
state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
|
2606
|
+
state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
|
2607
|
+
state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
|
2584
2608
|
p += 32 - state->memsize;
|
2585
2609
|
state->memsize = 0;
|
2586
2610
|
}
|
2587
2611
|
|
2588
2612
|
if (p+32 <= bEnd) {
|
2589
2613
|
const xxh_u8* const limit = bEnd - 32;
|
2590
|
-
xxh_u64 v1 = state->v1;
|
2591
|
-
xxh_u64 v2 = state->v2;
|
2592
|
-
xxh_u64 v3 = state->v3;
|
2593
|
-
xxh_u64 v4 = state->v4;
|
2594
2614
|
|
2595
2615
|
do {
|
2596
|
-
|
2597
|
-
|
2598
|
-
|
2599
|
-
|
2616
|
+
state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
|
2617
|
+
state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
|
2618
|
+
state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
|
2619
|
+
state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
|
2600
2620
|
} while (p<=limit);
|
2601
2621
|
|
2602
|
-
state->v1 = v1;
|
2603
|
-
state->v2 = v2;
|
2604
|
-
state->v3 = v3;
|
2605
|
-
state->v4 = v4;
|
2606
2622
|
}
|
2607
2623
|
|
2608
2624
|
if (p < bEnd) {
|
@@ -2621,18 +2637,13 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
|
|
2621
2637
|
xxh_u64 h64;
|
2622
2638
|
|
2623
2639
|
if (state->total_len >= 32) {
|
2624
|
-
|
2625
|
-
|
2626
|
-
|
2627
|
-
|
2628
|
-
|
2629
|
-
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
2630
|
-
h64 = XXH64_mergeRound(h64, v1);
|
2631
|
-
h64 = XXH64_mergeRound(h64, v2);
|
2632
|
-
h64 = XXH64_mergeRound(h64, v3);
|
2633
|
-
h64 = XXH64_mergeRound(h64, v4);
|
2640
|
+
h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
|
2641
|
+
h64 = XXH64_mergeRound(h64, state->v[0]);
|
2642
|
+
h64 = XXH64_mergeRound(h64, state->v[1]);
|
2643
|
+
h64 = XXH64_mergeRound(h64, state->v[2]);
|
2644
|
+
h64 = XXH64_mergeRound(h64, state->v[3]);
|
2634
2645
|
} else {
|
2635
|
-
h64 = state->
|
2646
|
+
h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
|
2636
2647
|
}
|
2637
2648
|
|
2638
2649
|
h64 += (xxh_u64) state->total_len;
|
@@ -2648,7 +2659,7 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t
|
|
2648
2659
|
{
|
2649
2660
|
XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
|
2650
2661
|
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
|
2651
|
-
|
2662
|
+
XXH_memcpy(dst, &hash, sizeof(*dst));
|
2652
2663
|
}
|
2653
2664
|
|
2654
2665
|
/*! @ingroup xxh64_family */
|
@@ -2691,17 +2702,21 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
|
|
2691
2702
|
# define XXH_unlikely(x) (x)
|
2692
2703
|
#endif
|
2693
2704
|
|
2694
|
-
#if defined(__GNUC__)
|
2695
|
-
# if defined(
|
2696
|
-
|
2697
|
-
|
2698
|
-
# include <emmintrin.h>
|
2699
|
-
# elif defined(__ARM_NEON__) || defined(__ARM_NEON)
|
2705
|
+
#if defined(__GNUC__) || defined(__clang__)
|
2706
|
+
# if defined(__ARM_NEON__) || defined(__ARM_NEON) \
|
2707
|
+
|| defined(__aarch64__) || defined(_M_ARM) \
|
2708
|
+
|| defined(_M_ARM64) || defined(_M_ARM64EC)
|
2700
2709
|
# define inline __inline__ /* circumvent a clang bug */
|
2701
2710
|
# include <arm_neon.h>
|
2702
2711
|
# undef inline
|
2712
|
+
# elif defined(__AVX2__)
|
2713
|
+
# include <immintrin.h>
|
2714
|
+
# elif defined(__SSE2__)
|
2715
|
+
# include <emmintrin.h>
|
2703
2716
|
# endif
|
2704
|
-
#
|
2717
|
+
#endif
|
2718
|
+
|
2719
|
+
#if defined(_MSC_VER)
|
2705
2720
|
# include <intrin.h>
|
2706
2721
|
#endif
|
2707
2722
|
|
@@ -2839,17 +2854,20 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
2839
2854
|
#endif
|
2840
2855
|
|
2841
2856
|
#ifndef XXH_VECTOR /* can be defined on command line */
|
2842
|
-
# if
|
2857
|
+
# if ( \
|
2858
|
+
defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
|
2859
|
+
|| defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
|
2860
|
+
) && ( \
|
2861
|
+
defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
|
2862
|
+
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
|
2863
|
+
)
|
2864
|
+
# define XXH_VECTOR XXH_NEON
|
2865
|
+
# elif defined(__AVX512F__)
|
2843
2866
|
# define XXH_VECTOR XXH_AVX512
|
2844
2867
|
# elif defined(__AVX2__)
|
2845
2868
|
# define XXH_VECTOR XXH_AVX2
|
2846
2869
|
# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
|
2847
2870
|
# define XXH_VECTOR XXH_SSE2
|
2848
|
-
# elif defined(__GNUC__) /* msvc support maybe later */ \
|
2849
|
-
&& (defined(__ARM_NEON__) || defined(__ARM_NEON)) \
|
2850
|
-
&& (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \
|
2851
|
-
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
|
2852
|
-
# define XXH_VECTOR XXH_NEON
|
2853
2871
|
# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
|
2854
2872
|
|| (defined(__s390x__) && defined(__VEC__)) \
|
2855
2873
|
&& defined(__GNUC__) /* TODO: IBM XL */
|
@@ -2999,8 +3017,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
2999
3017
|
* }
|
3000
3018
|
*/
|
3001
3019
|
# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
|
3002
|
-
&& defined(__GNUC__) \
|
3003
|
-
&&
|
3020
|
+
&& (defined(__GNUC__) || defined(__clang__)) \
|
3021
|
+
&& (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
|
3004
3022
|
# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
|
3005
3023
|
do { \
|
3006
3024
|
/* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
|
@@ -3017,6 +3035,76 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
3017
3035
|
(outHi) = vshrn_n_u64 ((in), 32); \
|
3018
3036
|
} while (0)
|
3019
3037
|
# endif
|
3038
|
+
|
3039
|
+
/*!
|
3040
|
+
* @internal
|
3041
|
+
* @brief `vld1q_u64` but faster and alignment-safe.
|
3042
|
+
*
|
3043
|
+
* On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
|
3044
|
+
* *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
|
3045
|
+
*
|
3046
|
+
* GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
|
3047
|
+
* prohibits load-store optimizations. Therefore, a direct dereference is used.
|
3048
|
+
*
|
3049
|
+
* Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
|
3050
|
+
* unaligned load.
|
3051
|
+
*/
|
3052
|
+
#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
|
3053
|
+
XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
|
3054
|
+
{
|
3055
|
+
return *(uint64x2_t const*)ptr;
|
3056
|
+
}
|
3057
|
+
#else
|
3058
|
+
XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
|
3059
|
+
{
|
3060
|
+
return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
|
3061
|
+
}
|
3062
|
+
#endif
|
3063
|
+
/*!
|
3064
|
+
* @ingroup tuning
|
3065
|
+
* @brief Controls the NEON to scalar ratio for XXH3
|
3066
|
+
*
|
3067
|
+
* On AArch64 when not optimizing for size, XXH3 will run 6 lanes using NEON and
|
3068
|
+
* 2 lanes on scalar by default.
|
3069
|
+
*
|
3070
|
+
* This can be set to 2, 4, 6, or 8. ARMv7 will default to all 8 NEON lanes, as the
|
3071
|
+
* emulated 64-bit arithmetic is too slow.
|
3072
|
+
*
|
3073
|
+
* Modern ARM CPUs are _very_ sensitive to how their pipelines are used.
|
3074
|
+
*
|
3075
|
+
* For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but it can't
|
3076
|
+
* have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
|
3077
|
+
* you are only using 2/3 of the CPU bandwidth.
|
3078
|
+
*
|
3079
|
+
* This is even more noticable on the more advanced cores like the A76 which
|
3080
|
+
* can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
|
3081
|
+
*
|
3082
|
+
* Therefore, @ref XXH3_NEON_LANES lanes will be processed using NEON, and the
|
3083
|
+
* remaining lanes will use scalar instructions. This improves the bandwidth
|
3084
|
+
* and also gives the integer pipelines something to do besides twiddling loop
|
3085
|
+
* counters and pointers.
|
3086
|
+
*
|
3087
|
+
* This change benefits CPUs with large micro-op buffers without negatively affecting
|
3088
|
+
* other CPUs:
|
3089
|
+
*
|
3090
|
+
* | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. |
|
3091
|
+
* |:----------------------|:--------------------|----------:|-----------:|------:|
|
3092
|
+
* | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% |
|
3093
|
+
* | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% |
|
3094
|
+
* | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% |
|
3095
|
+
*
|
3096
|
+
* It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
|
3097
|
+
*
|
3098
|
+
* @see XXH3_accumulate_512_neon()
|
3099
|
+
*/
|
3100
|
+
# ifndef XXH3_NEON_LANES
|
3101
|
+
# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
|
3102
|
+
&& !defined(__OPTIMIZE_SIZE__)
|
3103
|
+
# define XXH3_NEON_LANES 6
|
3104
|
+
# else
|
3105
|
+
# define XXH3_NEON_LANES XXH_ACC_NB
|
3106
|
+
# endif
|
3107
|
+
# endif
|
3020
3108
|
#endif /* XXH_VECTOR == XXH_NEON */
|
3021
3109
|
|
3022
3110
|
/*
|
@@ -3083,7 +3171,7 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
|
|
3083
3171
|
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
|
3084
3172
|
{
|
3085
3173
|
xxh_u64x2 ret;
|
3086
|
-
|
3174
|
+
XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
|
3087
3175
|
# if XXH_VSX_BE
|
3088
3176
|
ret = XXH_vec_revb(ret);
|
3089
3177
|
# endif
|
@@ -3193,7 +3281,6 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
|
|
3193
3281
|
return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
|
3194
3282
|
}
|
3195
3283
|
#elif defined(_MSC_VER) && defined(_M_IX86)
|
3196
|
-
# include <intrin.h>
|
3197
3284
|
# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
|
3198
3285
|
#else
|
3199
3286
|
/*
|
@@ -3212,7 +3299,7 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
|
|
3212
3299
|
* Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
|
3213
3300
|
* version.
|
3214
3301
|
*
|
3215
|
-
* @param lhs, rhs The 64-bit integers to be multiplied
|
3302
|
+
* @param lhs , rhs The 64-bit integers to be multiplied
|
3216
3303
|
* @return The 128-bit result represented in an @ref XXH128_hash_t.
|
3217
3304
|
*/
|
3218
3305
|
static XXH128_hash_t
|
@@ -3233,7 +3320,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3233
3320
|
* In that case it is best to use the portable one.
|
3234
3321
|
* https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
|
3235
3322
|
*/
|
3236
|
-
#if defined(__GNUC__) && !defined(__wasm__) \
|
3323
|
+
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
|
3237
3324
|
&& defined(__SIZEOF_INT128__) \
|
3238
3325
|
|| (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
|
3239
3326
|
|
@@ -3250,7 +3337,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3250
3337
|
*
|
3251
3338
|
* This compiles to single operand MUL on x64.
|
3252
3339
|
*/
|
3253
|
-
#elif defined(_M_X64) || defined(_M_IA64)
|
3340
|
+
#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
|
3254
3341
|
|
3255
3342
|
#ifndef _MSC_VER
|
3256
3343
|
# pragma intrinsic(_umul128)
|
@@ -3262,6 +3349,21 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3262
3349
|
r128.high64 = product_high;
|
3263
3350
|
return r128;
|
3264
3351
|
|
3352
|
+
/*
|
3353
|
+
* MSVC for ARM64's __umulh method.
|
3354
|
+
*
|
3355
|
+
* This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
|
3356
|
+
*/
|
3357
|
+
#elif defined(_M_ARM64) || defined(_M_ARM64EC)
|
3358
|
+
|
3359
|
+
#ifndef _MSC_VER
|
3360
|
+
# pragma intrinsic(__umulh)
|
3361
|
+
#endif
|
3362
|
+
XXH128_hash_t r128;
|
3363
|
+
r128.low64 = lhs * rhs;
|
3364
|
+
r128.high64 = __umulh(lhs, rhs);
|
3365
|
+
return r128;
|
3366
|
+
|
3265
3367
|
#else
|
3266
3368
|
/*
|
3267
3369
|
* Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
|
@@ -3330,7 +3432,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3330
3432
|
* The reason for the separate function is to prevent passing too many structs
|
3331
3433
|
* around by value. This will hopefully inline the multiply, but we don't force it.
|
3332
3434
|
*
|
3333
|
-
* @param lhs, rhs The 64-bit integers to multiply
|
3435
|
+
* @param lhs , rhs The 64-bit integers to multiply
|
3334
3436
|
* @return The low 64 bits of the product XOR'd by the high 64 bits.
|
3335
3437
|
* @see XXH_mult64to128()
|
3336
3438
|
*/
|
@@ -3632,7 +3734,7 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
3632
3734
|
XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
|
3633
3735
|
{
|
3634
3736
|
if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
|
3635
|
-
|
3737
|
+
XXH_memcpy(dst, &v64, sizeof(v64));
|
3636
3738
|
}
|
3637
3739
|
|
3638
3740
|
/* Several intrinsic functions below are supposed to accept __int64 as argument,
|
@@ -3649,6 +3751,7 @@ XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
|
|
3649
3751
|
typedef long long xxh_i64;
|
3650
3752
|
#endif
|
3651
3753
|
|
3754
|
+
|
3652
3755
|
/*
|
3653
3756
|
* XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
|
3654
3757
|
*
|
@@ -3684,7 +3787,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
|
|
3684
3787
|
const void* XXH_RESTRICT input,
|
3685
3788
|
const void* XXH_RESTRICT secret)
|
3686
3789
|
{
|
3687
|
-
|
3790
|
+
__m512i* const xacc = (__m512i *) acc;
|
3688
3791
|
XXH_ASSERT((((size_t)acc) & 63) == 0);
|
3689
3792
|
XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
|
3690
3793
|
|
@@ -3733,7 +3836,7 @@ XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
3733
3836
|
{
|
3734
3837
|
XXH_ASSERT((((size_t)acc) & 63) == 0);
|
3735
3838
|
XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
|
3736
|
-
{
|
3839
|
+
{ __m512i* const xacc = (__m512i*) acc;
|
3737
3840
|
const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
|
3738
3841
|
|
3739
3842
|
/* xacc[0] ^= (xacc[0] >> 47) */
|
@@ -3794,7 +3897,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
|
|
3794
3897
|
const void* XXH_RESTRICT secret)
|
3795
3898
|
{
|
3796
3899
|
XXH_ASSERT((((size_t)acc) & 31) == 0);
|
3797
|
-
{
|
3900
|
+
{ __m256i* const xacc = (__m256i *) acc;
|
3798
3901
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3799
3902
|
* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
|
3800
3903
|
const __m256i* const xinput = (const __m256i *) input;
|
@@ -3826,7 +3929,7 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void
|
|
3826
3929
|
XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
3827
3930
|
{
|
3828
3931
|
XXH_ASSERT((((size_t)acc) & 31) == 0);
|
3829
|
-
{
|
3932
|
+
{ __m256i* const xacc = (__m256i*) acc;
|
3830
3933
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3831
3934
|
* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
|
3832
3935
|
const __m256i* const xsecret = (const __m256i *) secret;
|
@@ -3900,7 +4003,7 @@ XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
|
|
3900
4003
|
{
|
3901
4004
|
/* SSE2 is just a half-scale version of the AVX2 version. */
|
3902
4005
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
3903
|
-
{
|
4006
|
+
{ __m128i* const xacc = (__m128i *) acc;
|
3904
4007
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3905
4008
|
* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
|
3906
4009
|
const __m128i* const xinput = (const __m128i *) input;
|
@@ -3932,7 +4035,7 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void
|
|
3932
4035
|
XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
3933
4036
|
{
|
3934
4037
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
3935
|
-
{
|
4038
|
+
{ __m128i* const xacc = (__m128i*) acc;
|
3936
4039
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3937
4040
|
* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
|
3938
4041
|
const __m128i* const xsecret = (const __m128i *) secret;
|
@@ -3994,40 +4097,66 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTR
|
|
3994
4097
|
|
3995
4098
|
#if (XXH_VECTOR == XXH_NEON)
|
3996
4099
|
|
4100
|
+
/* forward declarations for the scalar routines */
|
4101
|
+
XXH_FORCE_INLINE void
|
4102
|
+
XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
|
4103
|
+
void const* XXH_RESTRICT secret, size_t lane);
|
4104
|
+
|
4105
|
+
XXH_FORCE_INLINE void
|
4106
|
+
XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
|
4107
|
+
void const* XXH_RESTRICT secret, size_t lane);
|
4108
|
+
|
4109
|
+
/*!
|
4110
|
+
* @internal
|
4111
|
+
* @brief The bulk processing loop for NEON.
|
4112
|
+
*
|
4113
|
+
* The NEON code path is actually partially scalar when running on AArch64. This
|
4114
|
+
* is to optimize the pipelining and can have up to 15% speedup depending on the
|
4115
|
+
* CPU, and it also mitigates some GCC codegen issues.
|
4116
|
+
*
|
4117
|
+
* @see XXH3_NEON_LANES for configuring this and details about this optimization.
|
4118
|
+
*/
|
3997
4119
|
XXH_FORCE_INLINE void
|
3998
4120
|
XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
|
3999
4121
|
const void* XXH_RESTRICT input,
|
4000
4122
|
const void* XXH_RESTRICT secret)
|
4001
4123
|
{
|
4002
4124
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
4125
|
+
XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
|
4003
4126
|
{
|
4004
|
-
|
4127
|
+
uint64x2_t* const xacc = (uint64x2_t *) acc;
|
4005
4128
|
/* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
|
4006
4129
|
uint8_t const* const xinput = (const uint8_t *) input;
|
4007
4130
|
uint8_t const* const xsecret = (const uint8_t *) secret;
|
4008
4131
|
|
4009
4132
|
size_t i;
|
4010
|
-
|
4133
|
+
/* AArch64 uses both scalar and neon at the same time */
|
4134
|
+
for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
|
4135
|
+
XXH3_scalarRound(acc, input, secret, i);
|
4136
|
+
}
|
4137
|
+
for (i=0; i < XXH3_NEON_LANES / 2; i++) {
|
4138
|
+
uint64x2_t acc_vec = xacc[i];
|
4011
4139
|
/* data_vec = xinput[i]; */
|
4012
|
-
|
4140
|
+
uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
|
4013
4141
|
/* key_vec = xsecret[i]; */
|
4014
|
-
|
4142
|
+
uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
|
4015
4143
|
uint64x2_t data_key;
|
4016
4144
|
uint32x2_t data_key_lo, data_key_hi;
|
4017
|
-
/*
|
4018
|
-
uint64x2_t
|
4019
|
-
uint64x2_t const swapped = vextq_u64(data64, data64, 1);
|
4020
|
-
xacc[i] = vaddq_u64 (xacc[i], swapped);
|
4145
|
+
/* acc_vec_2 = swap(data_vec) */
|
4146
|
+
uint64x2_t acc_vec_2 = vextq_u64(data_vec, data_vec, 1);
|
4021
4147
|
/* data_key = data_vec ^ key_vec; */
|
4022
|
-
data_key =
|
4148
|
+
data_key = veorq_u64(data_vec, key_vec);
|
4023
4149
|
/* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
|
4024
4150
|
* data_key_hi = (uint32x2_t) (data_key >> 32);
|
4025
4151
|
* data_key = UNDEFINED; */
|
4026
4152
|
XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
|
4027
|
-
/*
|
4028
|
-
|
4029
|
-
|
4153
|
+
/* acc_vec_2 += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
|
4154
|
+
acc_vec_2 = vmlal_u32 (acc_vec_2, data_key_lo, data_key_hi);
|
4155
|
+
/* xacc[i] += acc_vec_2; */
|
4156
|
+
acc_vec = vaddq_u64 (acc_vec, acc_vec_2);
|
4157
|
+
xacc[i] = acc_vec;
|
4030
4158
|
}
|
4159
|
+
|
4031
4160
|
}
|
4032
4161
|
}
|
4033
4162
|
|
@@ -4041,15 +4170,19 @@ XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
4041
4170
|
uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
|
4042
4171
|
|
4043
4172
|
size_t i;
|
4044
|
-
|
4173
|
+
/* AArch64 uses both scalar and neon at the same time */
|
4174
|
+
for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
|
4175
|
+
XXH3_scalarScrambleRound(acc, secret, i);
|
4176
|
+
}
|
4177
|
+
for (i=0; i < XXH3_NEON_LANES / 2; i++) {
|
4045
4178
|
/* xacc[i] ^= (xacc[i] >> 47); */
|
4046
4179
|
uint64x2_t acc_vec = xacc[i];
|
4047
|
-
uint64x2_t shifted = vshrq_n_u64
|
4048
|
-
uint64x2_t data_vec = veorq_u64
|
4180
|
+
uint64x2_t shifted = vshrq_n_u64 (acc_vec, 47);
|
4181
|
+
uint64x2_t data_vec = veorq_u64 (acc_vec, shifted);
|
4049
4182
|
|
4050
4183
|
/* xacc[i] ^= xsecret[i]; */
|
4051
|
-
|
4052
|
-
uint64x2_t data_key = veorq_u64(data_vec,
|
4184
|
+
uint64x2_t key_vec = XXH_vld1q_u64 (xsecret + (i * 16));
|
4185
|
+
uint64x2_t data_key = veorq_u64 (data_vec, key_vec);
|
4053
4186
|
|
4054
4187
|
/* xacc[i] *= XXH_PRIME32_1 */
|
4055
4188
|
uint32x2_t data_key_lo, data_key_hi;
|
@@ -4077,11 +4210,12 @@ XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
4077
4210
|
*/
|
4078
4211
|
uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
|
4079
4212
|
/* xacc[i] = prod_hi << 32; */
|
4080
|
-
|
4213
|
+
prod_hi = vshlq_n_u64(prod_hi, 32);
|
4081
4214
|
/* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
|
4082
|
-
xacc[i] = vmlal_u32(
|
4215
|
+
xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
|
4083
4216
|
}
|
4084
|
-
|
4217
|
+
}
|
4218
|
+
}
|
4085
4219
|
}
|
4086
4220
|
|
4087
4221
|
#endif
|
@@ -4093,7 +4227,8 @@ XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
|
|
4093
4227
|
const void* XXH_RESTRICT input,
|
4094
4228
|
const void* XXH_RESTRICT secret)
|
4095
4229
|
{
|
4096
|
-
|
4230
|
+
/* presumed aligned */
|
4231
|
+
unsigned int* const xacc = (unsigned int*) acc;
|
4097
4232
|
xxh_u64x2 const* const xinput = (xxh_u64x2 const*) input; /* no alignment restriction */
|
4098
4233
|
xxh_u64x2 const* const xsecret = (xxh_u64x2 const*) secret; /* no alignment restriction */
|
4099
4234
|
xxh_u64x2 const v32 = { 32, 32 };
|
@@ -4108,14 +4243,18 @@ XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
|
|
4108
4243
|
xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
|
4109
4244
|
/* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
|
4110
4245
|
xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
|
4111
|
-
xacc[i]
|
4246
|
+
/* acc_vec = xacc[i]; */
|
4247
|
+
xxh_u64x2 acc_vec = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
|
4248
|
+
acc_vec += product;
|
4112
4249
|
|
4113
4250
|
/* swap high and low halves */
|
4114
4251
|
#ifdef __s390x__
|
4115
|
-
|
4252
|
+
acc_vec += vec_permi(data_vec, data_vec, 2);
|
4116
4253
|
#else
|
4117
|
-
|
4254
|
+
acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
|
4118
4255
|
#endif
|
4256
|
+
/* xacc[i] = acc_vec; */
|
4257
|
+
vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
|
4119
4258
|
}
|
4120
4259
|
}
|
4121
4260
|
|
@@ -4153,38 +4292,90 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
4153
4292
|
|
4154
4293
|
/* scalar variants - universal */
|
4155
4294
|
|
4295
|
+
/*!
|
4296
|
+
* @internal
|
4297
|
+
* @brief Scalar round for @ref XXH3_accumulate_512_scalar().
|
4298
|
+
*
|
4299
|
+
* This is extracted to its own function because the NEON path uses a combination
|
4300
|
+
* of NEON and scalar.
|
4301
|
+
*/
|
4302
|
+
XXH_FORCE_INLINE void
|
4303
|
+
XXH3_scalarRound(void* XXH_RESTRICT acc,
|
4304
|
+
void const* XXH_RESTRICT input,
|
4305
|
+
void const* XXH_RESTRICT secret,
|
4306
|
+
size_t lane)
|
4307
|
+
{
|
4308
|
+
xxh_u64* xacc = (xxh_u64*) acc;
|
4309
|
+
xxh_u8 const* xinput = (xxh_u8 const*) input;
|
4310
|
+
xxh_u8 const* xsecret = (xxh_u8 const*) secret;
|
4311
|
+
XXH_ASSERT(lane < XXH_ACC_NB);
|
4312
|
+
XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
|
4313
|
+
{
|
4314
|
+
xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
|
4315
|
+
xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
|
4316
|
+
xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
|
4317
|
+
xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
|
4318
|
+
}
|
4319
|
+
}
|
4320
|
+
|
4321
|
+
/*!
|
4322
|
+
* @internal
|
4323
|
+
* @brief Processes a 64 byte block of data using the scalar path.
|
4324
|
+
*/
|
4156
4325
|
XXH_FORCE_INLINE void
|
4157
4326
|
XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
|
4158
4327
|
const void* XXH_RESTRICT input,
|
4159
4328
|
const void* XXH_RESTRICT secret)
|
4160
4329
|
{
|
4161
|
-
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
|
4162
|
-
const xxh_u8* const xinput = (const xxh_u8*) input; /* no alignment restriction */
|
4163
|
-
const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
|
4164
4330
|
size_t i;
|
4165
|
-
|
4331
|
+
/* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
|
4332
|
+
#if defined(__GNUC__) && !defined(__clang__) \
|
4333
|
+
&& (defined(__arm__) || defined(__thumb2__)) \
|
4334
|
+
&& defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
|
4335
|
+
&& !defined(__OPTIMIZE_SIZE__)
|
4336
|
+
# pragma GCC unroll 8
|
4337
|
+
#endif
|
4166
4338
|
for (i=0; i < XXH_ACC_NB; i++) {
|
4167
|
-
|
4168
|
-
xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);
|
4169
|
-
xacc[i ^ 1] += data_val; /* swap adjacent lanes */
|
4170
|
-
xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
|
4339
|
+
XXH3_scalarRound(acc, input, secret, i);
|
4171
4340
|
}
|
4172
4341
|
}
|
4173
4342
|
|
4343
|
+
/*!
|
4344
|
+
* @internal
|
4345
|
+
* @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
|
4346
|
+
*
|
4347
|
+
* This is extracted to its own function because the NEON path uses a combination
|
4348
|
+
* of NEON and scalar.
|
4349
|
+
*/
|
4174
4350
|
XXH_FORCE_INLINE void
|
4175
|
-
|
4351
|
+
XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
|
4352
|
+
void const* XXH_RESTRICT secret,
|
4353
|
+
size_t lane)
|
4176
4354
|
{
|
4177
|
-
|
4355
|
+
xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
|
4178
4356
|
const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
|
4179
|
-
size_t i;
|
4180
4357
|
XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
|
4181
|
-
|
4182
|
-
|
4183
|
-
xxh_u64
|
4358
|
+
XXH_ASSERT(lane < XXH_ACC_NB);
|
4359
|
+
{
|
4360
|
+
xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
|
4361
|
+
xxh_u64 acc64 = xacc[lane];
|
4184
4362
|
acc64 = XXH_xorshift64(acc64, 47);
|
4185
4363
|
acc64 ^= key64;
|
4186
4364
|
acc64 *= XXH_PRIME32_1;
|
4187
|
-
xacc[
|
4365
|
+
xacc[lane] = acc64;
|
4366
|
+
}
|
4367
|
+
}
|
4368
|
+
|
4369
|
+
/*!
|
4370
|
+
* @internal
|
4371
|
+
* @brief Scrambles the accumulators after a large chunk has been read
|
4372
|
+
*/
|
4373
|
+
XXH_FORCE_INLINE void
|
4374
|
+
XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
4375
|
+
{
|
4376
|
+
size_t i;
|
4377
|
+
for (i=0; i < XXH_ACC_NB; i++) {
|
4378
|
+
XXH3_scalarScrambleRound(acc, secret, i);
|
4188
4379
|
}
|
4189
4380
|
}
|
4190
4381
|
|
@@ -4206,8 +4397,9 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
|
|
4206
4397
|
* placed sequentially, in order, at the top of the unrolled loop.
|
4207
4398
|
*
|
4208
4399
|
* While MOVK is great for generating constants (2 cycles for a 64-bit
|
4209
|
-
* constant compared to 4 cycles for LDR),
|
4210
|
-
*
|
4400
|
+
* constant compared to 4 cycles for LDR), it fights for bandwidth with
|
4401
|
+
* the arithmetic instructions.
|
4402
|
+
*
|
4211
4403
|
* I L S
|
4212
4404
|
* MOVK
|
4213
4405
|
* MOVK
|
@@ -4224,6 +4416,9 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
|
|
4224
4416
|
* ADD LDR
|
4225
4417
|
* SUB STR
|
4226
4418
|
* STR
|
4419
|
+
*
|
4420
|
+
* See XXH3_NEON_LANES for details on the pipsline.
|
4421
|
+
*
|
4227
4422
|
* XXH3_64bits_withSeed, len == 256, Snapdragon 835
|
4228
4423
|
* without hack: 2654.4 MB/s
|
4229
4424
|
* with hack: 3202.9 MB/s
|
@@ -4422,9 +4617,11 @@ XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
|
|
4422
4617
|
}
|
4423
4618
|
|
4424
4619
|
/*
|
4425
|
-
* It's important for performance
|
4620
|
+
* It's important for performance to transmit secret's size (when it's static)
|
4621
|
+
* so that the compiler can properly optimize the vectorized loop.
|
4622
|
+
* This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
|
4426
4623
|
*/
|
4427
|
-
|
4624
|
+
XXH_FORCE_INLINE XXH64_hash_t
|
4428
4625
|
XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
|
4429
4626
|
XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
|
4430
4627
|
{
|
@@ -4433,11 +4630,10 @@ XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
|
|
4433
4630
|
}
|
4434
4631
|
|
4435
4632
|
/*
|
4436
|
-
* It's
|
4437
|
-
*
|
4438
|
-
*
|
4439
|
-
*
|
4440
|
-
* and uses this opportunity to streamline the generated code for better performance.
|
4633
|
+
* It's preferable for performance that XXH3_hashLong is not inlined,
|
4634
|
+
* as it results in a smaller function for small data, easier to the instruction cache.
|
4635
|
+
* Note that inside this no_inline function, we do inline the internal loop,
|
4636
|
+
* and provide a statically defined secret size to allow optimization of vector loop.
|
4441
4637
|
*/
|
4442
4638
|
XXH_NO_INLINE XXH64_hash_t
|
4443
4639
|
XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
|
@@ -4537,6 +4733,14 @@ XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
|
|
4537
4733
|
return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
|
4538
4734
|
}
|
4539
4735
|
|
4736
|
+
XXH_PUBLIC_API XXH64_hash_t
|
4737
|
+
XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
|
4738
|
+
{
|
4739
|
+
if (len <= XXH3_MIDSIZE_MAX)
|
4740
|
+
return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
|
4741
|
+
return XXH3_hashLong_64b_withSecret(input, len, seed, (const xxh_u8*)secret, secretSize);
|
4742
|
+
}
|
4743
|
+
|
4540
4744
|
|
4541
4745
|
/* === XXH3 streaming === */
|
4542
4746
|
|
@@ -4625,13 +4829,13 @@ XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
|
|
4625
4829
|
XXH_PUBLIC_API void
|
4626
4830
|
XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
|
4627
4831
|
{
|
4628
|
-
|
4832
|
+
XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
|
4629
4833
|
}
|
4630
4834
|
|
4631
4835
|
static void
|
4632
4836
|
XXH3_reset_internal(XXH3_state_t* statePtr,
|
4633
|
-
|
4634
|
-
|
4837
|
+
XXH64_hash_t seed,
|
4838
|
+
const void* secret, size_t secretSize)
|
4635
4839
|
{
|
4636
4840
|
size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
|
4637
4841
|
size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
|
@@ -4648,6 +4852,7 @@ XXH3_reset_internal(XXH3_state_t* statePtr,
|
|
4648
4852
|
statePtr->acc[6] = XXH_PRIME64_5;
|
4649
4853
|
statePtr->acc[7] = XXH_PRIME32_1;
|
4650
4854
|
statePtr->seed = seed;
|
4855
|
+
statePtr->useSeed = (seed != 0);
|
4651
4856
|
statePtr->extSecret = (const unsigned char*)secret;
|
4652
4857
|
XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
|
4653
4858
|
statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
|
@@ -4680,11 +4885,24 @@ XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
|
|
4680
4885
|
{
|
4681
4886
|
if (statePtr == NULL) return XXH_ERROR;
|
4682
4887
|
if (seed==0) return XXH3_64bits_reset(statePtr);
|
4683
|
-
if (seed != statePtr->seed)
|
4888
|
+
if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
|
4889
|
+
XXH3_initCustomSecret(statePtr->customSecret, seed);
|
4684
4890
|
XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
|
4685
4891
|
return XXH_OK;
|
4686
4892
|
}
|
4687
4893
|
|
4894
|
+
/*! @ingroup xxh3_family */
|
4895
|
+
XXH_PUBLIC_API XXH_errorcode
|
4896
|
+
XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
|
4897
|
+
{
|
4898
|
+
if (statePtr == NULL) return XXH_ERROR;
|
4899
|
+
if (secret == NULL) return XXH_ERROR;
|
4900
|
+
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
4901
|
+
XXH3_reset_internal(statePtr, seed64, secret, secretSize);
|
4902
|
+
statePtr->useSeed = 1; /* always, even if seed64==0 */
|
4903
|
+
return XXH_OK;
|
4904
|
+
}
|
4905
|
+
|
4688
4906
|
/* Note : when XXH3_consumeStripes() is invoked,
|
4689
4907
|
* there must be a guarantee that at least one more byte must be consumed from input
|
4690
4908
|
* so that the function can blindly consume all stripes using the "normal" secret segment */
|
@@ -4712,35 +4930,48 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
|
|
4712
4930
|
}
|
4713
4931
|
}
|
4714
4932
|
|
4933
|
+
#ifndef XXH3_STREAM_USE_STACK
|
4934
|
+
# ifndef __clang__ /* clang doesn't need additional stack space */
|
4935
|
+
# define XXH3_STREAM_USE_STACK 1
|
4936
|
+
# endif
|
4937
|
+
#endif
|
4715
4938
|
/*
|
4716
4939
|
* Both XXH3_64bits_update and XXH3_128bits_update use this routine.
|
4717
4940
|
*/
|
4718
4941
|
XXH_FORCE_INLINE XXH_errorcode
|
4719
|
-
XXH3_update(XXH3_state_t* state,
|
4720
|
-
const xxh_u8* input, size_t len,
|
4942
|
+
XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
|
4943
|
+
const xxh_u8* XXH_RESTRICT input, size_t len,
|
4721
4944
|
XXH3_f_accumulate_512 f_acc512,
|
4722
4945
|
XXH3_f_scrambleAcc f_scramble)
|
4723
4946
|
{
|
4724
|
-
if (input==NULL)
|
4725
|
-
|
4947
|
+
if (input==NULL) {
|
4948
|
+
XXH_ASSERT(len == 0);
|
4726
4949
|
return XXH_OK;
|
4727
|
-
|
4728
|
-
return XXH_ERROR;
|
4729
|
-
#endif
|
4950
|
+
}
|
4730
4951
|
|
4952
|
+
XXH_ASSERT(state != NULL);
|
4731
4953
|
{ const xxh_u8* const bEnd = input + len;
|
4732
4954
|
const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
|
4733
|
-
|
4955
|
+
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
|
4956
|
+
/* For some reason, gcc and MSVC seem to suffer greatly
|
4957
|
+
* when operating accumulators directly into state.
|
4958
|
+
* Operating into stack space seems to enable proper optimization.
|
4959
|
+
* clang, on the other hand, doesn't seem to need this trick */
|
4960
|
+
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
|
4961
|
+
#else
|
4962
|
+
xxh_u64* XXH_RESTRICT const acc = state->acc;
|
4963
|
+
#endif
|
4734
4964
|
state->totalLen += len;
|
4735
4965
|
XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
|
4736
4966
|
|
4737
|
-
|
4967
|
+
/* small input : just fill in tmp buffer */
|
4968
|
+
if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
|
4738
4969
|
XXH_memcpy(state->buffer + state->bufferedSize, input, len);
|
4739
4970
|
state->bufferedSize += (XXH32_hash_t)len;
|
4740
4971
|
return XXH_OK;
|
4741
4972
|
}
|
4742
|
-
/* total input is now > XXH3_INTERNALBUFFER_SIZE */
|
4743
4973
|
|
4974
|
+
/* total input is now > XXH3_INTERNALBUFFER_SIZE */
|
4744
4975
|
#define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
|
4745
4976
|
XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
|
4746
4977
|
|
@@ -4752,7 +4983,7 @@ XXH3_update(XXH3_state_t* state,
|
|
4752
4983
|
size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
|
4753
4984
|
XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
|
4754
4985
|
input += loadSize;
|
4755
|
-
XXH3_consumeStripes(
|
4986
|
+
XXH3_consumeStripes(acc,
|
4756
4987
|
&state->nbStripesSoFar, state->nbStripesPerBlock,
|
4757
4988
|
state->buffer, XXH3_INTERNALBUFFER_STRIPES,
|
4758
4989
|
secret, state->secretLimit,
|
@@ -4761,25 +4992,62 @@ XXH3_update(XXH3_state_t* state,
|
|
4761
4992
|
}
|
4762
4993
|
XXH_ASSERT(input < bEnd);
|
4763
4994
|
|
4764
|
-
/*
|
4765
|
-
if (bEnd - input >
|
4766
|
-
|
4767
|
-
|
4768
|
-
|
4769
|
-
|
4770
|
-
|
4771
|
-
|
4772
|
-
|
4773
|
-
|
4774
|
-
|
4775
|
-
|
4776
|
-
|
4995
|
+
/* large input to consume : ingest per full block */
|
4996
|
+
if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
|
4997
|
+
size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
|
4998
|
+
XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
|
4999
|
+
/* join to current block's end */
|
5000
|
+
{ size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
|
5001
|
+
XXH_ASSERT(nbStripesToEnd <= nbStripes);
|
5002
|
+
XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
|
5003
|
+
f_scramble(acc, secret + state->secretLimit);
|
5004
|
+
state->nbStripesSoFar = 0;
|
5005
|
+
input += nbStripesToEnd * XXH_STRIPE_LEN;
|
5006
|
+
nbStripes -= nbStripesToEnd;
|
5007
|
+
}
|
5008
|
+
/* consume per entire blocks */
|
5009
|
+
while(nbStripes >= state->nbStripesPerBlock) {
|
5010
|
+
XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
|
5011
|
+
f_scramble(acc, secret + state->secretLimit);
|
5012
|
+
input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
|
5013
|
+
nbStripes -= state->nbStripesPerBlock;
|
5014
|
+
}
|
5015
|
+
/* consume last partial block */
|
5016
|
+
XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
|
5017
|
+
input += nbStripes * XXH_STRIPE_LEN;
|
5018
|
+
XXH_ASSERT(input < bEnd); /* at least some bytes left */
|
5019
|
+
state->nbStripesSoFar = nbStripes;
|
5020
|
+
/* buffer predecessor of last partial stripe */
|
5021
|
+
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
|
5022
|
+
XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
|
5023
|
+
} else {
|
5024
|
+
/* content to consume <= block size */
|
5025
|
+
/* Consume input by a multiple of internal buffer size */
|
5026
|
+
if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
|
5027
|
+
const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
|
5028
|
+
do {
|
5029
|
+
XXH3_consumeStripes(acc,
|
5030
|
+
&state->nbStripesSoFar, state->nbStripesPerBlock,
|
5031
|
+
input, XXH3_INTERNALBUFFER_STRIPES,
|
5032
|
+
secret, state->secretLimit,
|
5033
|
+
f_acc512, f_scramble);
|
5034
|
+
input += XXH3_INTERNALBUFFER_SIZE;
|
5035
|
+
} while (input<limit);
|
5036
|
+
/* buffer predecessor of last partial stripe */
|
5037
|
+
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
|
5038
|
+
}
|
4777
5039
|
}
|
4778
|
-
XXH_ASSERT(input < bEnd);
|
4779
5040
|
|
4780
5041
|
/* Some remaining input (always) : buffer it */
|
5042
|
+
XXH_ASSERT(input < bEnd);
|
5043
|
+
XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
|
5044
|
+
XXH_ASSERT(state->bufferedSize == 0);
|
4781
5045
|
XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
|
4782
5046
|
state->bufferedSize = (XXH32_hash_t)(bEnd-input);
|
5047
|
+
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
|
5048
|
+
/* save stack accumulators into state */
|
5049
|
+
memcpy(state->acc, acc, sizeof(acc));
|
5050
|
+
#endif
|
4783
5051
|
}
|
4784
5052
|
|
4785
5053
|
return XXH_OK;
|
@@ -4803,7 +5071,7 @@ XXH3_digest_long (XXH64_hash_t* acc,
|
|
4803
5071
|
* Digest on a local copy. This way, the state remains unaltered, and it can
|
4804
5072
|
* continue ingesting more input afterwards.
|
4805
5073
|
*/
|
4806
|
-
|
5074
|
+
XXH_memcpy(acc, state->acc, sizeof(state->acc));
|
4807
5075
|
if (state->bufferedSize >= XXH_STRIPE_LEN) {
|
4808
5076
|
size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
|
4809
5077
|
size_t nbStripesSoFar = state->nbStripesSoFar;
|
@@ -4820,8 +5088,8 @@ XXH3_digest_long (XXH64_hash_t* acc,
|
|
4820
5088
|
xxh_u8 lastStripe[XXH_STRIPE_LEN];
|
4821
5089
|
size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
|
4822
5090
|
XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
|
4823
|
-
|
4824
|
-
|
5091
|
+
XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
|
5092
|
+
XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
|
4825
5093
|
XXH3_accumulate_512(acc,
|
4826
5094
|
lastStripe,
|
4827
5095
|
secret + state->secretLimit - XXH_SECRET_LASTACC_START);
|
@@ -4840,58 +5108,13 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
|
|
4840
5108
|
(xxh_u64)state->totalLen * XXH_PRIME64_1);
|
4841
5109
|
}
|
4842
5110
|
/* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
|
4843
|
-
if (state->
|
5111
|
+
if (state->useSeed)
|
4844
5112
|
return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
|
4845
5113
|
return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
|
4846
5114
|
secret, state->secretLimit + XXH_STRIPE_LEN);
|
4847
5115
|
}
|
4848
5116
|
|
4849
5117
|
|
4850
|
-
#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
|
4851
|
-
|
4852
|
-
/*! @ingroup xxh3_family */
|
4853
|
-
XXH_PUBLIC_API void
|
4854
|
-
XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize)
|
4855
|
-
{
|
4856
|
-
XXH_ASSERT(secretBuffer != NULL);
|
4857
|
-
if (customSeedSize == 0) {
|
4858
|
-
memcpy(secretBuffer, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
|
4859
|
-
return;
|
4860
|
-
}
|
4861
|
-
XXH_ASSERT(customSeed != NULL);
|
4862
|
-
|
4863
|
-
{ size_t const segmentSize = sizeof(XXH128_hash_t);
|
4864
|
-
size_t const nbSegments = XXH_SECRET_DEFAULT_SIZE / segmentSize;
|
4865
|
-
XXH128_canonical_t scrambler;
|
4866
|
-
XXH64_hash_t seeds[12];
|
4867
|
-
size_t segnb;
|
4868
|
-
XXH_ASSERT(nbSegments == 12);
|
4869
|
-
XXH_ASSERT(segmentSize * nbSegments == XXH_SECRET_DEFAULT_SIZE); /* exact multiple */
|
4870
|
-
XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
|
4871
|
-
|
4872
|
-
/*
|
4873
|
-
* Copy customSeed to seeds[], truncating or repeating as necessary.
|
4874
|
-
*/
|
4875
|
-
{ size_t toFill = XXH_MIN(customSeedSize, sizeof(seeds));
|
4876
|
-
size_t filled = toFill;
|
4877
|
-
memcpy(seeds, customSeed, toFill);
|
4878
|
-
while (filled < sizeof(seeds)) {
|
4879
|
-
toFill = XXH_MIN(filled, sizeof(seeds) - filled);
|
4880
|
-
memcpy((char*)seeds + filled, seeds, toFill);
|
4881
|
-
filled += toFill;
|
4882
|
-
} }
|
4883
|
-
|
4884
|
-
/* generate secret */
|
4885
|
-
memcpy(secretBuffer, &scrambler, sizeof(scrambler));
|
4886
|
-
for (segnb=1; segnb < nbSegments; segnb++) {
|
4887
|
-
size_t const segmentStart = segnb * segmentSize;
|
4888
|
-
XXH128_canonical_t segment;
|
4889
|
-
XXH128_canonicalFromHash(&segment,
|
4890
|
-
XXH128(&scrambler, sizeof(scrambler), XXH_readLE64(seeds + segnb) + segnb) );
|
4891
|
-
memcpy((char*)secretBuffer + segmentStart, &segment, sizeof(segment));
|
4892
|
-
} }
|
4893
|
-
}
|
4894
|
-
|
4895
5118
|
|
4896
5119
|
/* ==========================================
|
4897
5120
|
* XXH3 128 bits (a.k.a XXH128)
|
@@ -5193,9 +5416,10 @@ XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
|
|
5193
5416
|
}
|
5194
5417
|
|
5195
5418
|
/*
|
5196
|
-
* It's important for performance
|
5419
|
+
* It's important for performance to pass @secretLen (when it's static)
|
5420
|
+
* to the compiler, so that it can properly optimize the vectorized loop.
|
5197
5421
|
*/
|
5198
|
-
|
5422
|
+
XXH_FORCE_INLINE XXH128_hash_t
|
5199
5423
|
XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
|
5200
5424
|
XXH64_hash_t seed64,
|
5201
5425
|
const void* XXH_RESTRICT secret, size_t secretLen)
|
@@ -5288,6 +5512,15 @@ XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
|
|
5288
5512
|
XXH3_hashLong_128b_withSeed);
|
5289
5513
|
}
|
5290
5514
|
|
5515
|
+
/*! @ingroup xxh3_family */
|
5516
|
+
XXH_PUBLIC_API XXH128_hash_t
|
5517
|
+
XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
|
5518
|
+
{
|
5519
|
+
if (len <= XXH3_MIDSIZE_MAX)
|
5520
|
+
return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
|
5521
|
+
return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
|
5522
|
+
}
|
5523
|
+
|
5291
5524
|
/*! @ingroup xxh3_family */
|
5292
5525
|
XXH_PUBLIC_API XXH128_hash_t
|
5293
5526
|
XXH128(const void* input, size_t len, XXH64_hash_t seed)
|
@@ -5299,7 +5532,7 @@ XXH128(const void* input, size_t len, XXH64_hash_t seed)
|
|
5299
5532
|
/* === XXH3 128-bit streaming === */
|
5300
5533
|
|
5301
5534
|
/*
|
5302
|
-
* All
|
5535
|
+
* All initialization and update functions are identical to 64-bit streaming variant.
|
5303
5536
|
* The only difference is the finalization routine.
|
5304
5537
|
*/
|
5305
5538
|
|
@@ -5307,31 +5540,28 @@ XXH128(const void* input, size_t len, XXH64_hash_t seed)
|
|
5307
5540
|
XXH_PUBLIC_API XXH_errorcode
|
5308
5541
|
XXH3_128bits_reset(XXH3_state_t* statePtr)
|
5309
5542
|
{
|
5310
|
-
|
5311
|
-
XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
|
5312
|
-
return XXH_OK;
|
5543
|
+
return XXH3_64bits_reset(statePtr);
|
5313
5544
|
}
|
5314
5545
|
|
5315
5546
|
/*! @ingroup xxh3_family */
|
5316
5547
|
XXH_PUBLIC_API XXH_errorcode
|
5317
5548
|
XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
|
5318
5549
|
{
|
5319
|
-
|
5320
|
-
XXH3_reset_internal(statePtr, 0, secret, secretSize);
|
5321
|
-
if (secret == NULL) return XXH_ERROR;
|
5322
|
-
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
5323
|
-
return XXH_OK;
|
5550
|
+
return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
|
5324
5551
|
}
|
5325
5552
|
|
5326
5553
|
/*! @ingroup xxh3_family */
|
5327
5554
|
XXH_PUBLIC_API XXH_errorcode
|
5328
5555
|
XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
|
5329
5556
|
{
|
5330
|
-
|
5331
|
-
|
5332
|
-
|
5333
|
-
|
5334
|
-
|
5557
|
+
return XXH3_64bits_reset_withSeed(statePtr, seed);
|
5558
|
+
}
|
5559
|
+
|
5560
|
+
/*! @ingroup xxh3_family */
|
5561
|
+
XXH_PUBLIC_API XXH_errorcode
|
5562
|
+
XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
|
5563
|
+
{
|
5564
|
+
return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
|
5335
5565
|
}
|
5336
5566
|
|
5337
5567
|
/*! @ingroup xxh3_family */
|
@@ -5406,8 +5636,8 @@ XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
|
|
5406
5636
|
hash.high64 = XXH_swap64(hash.high64);
|
5407
5637
|
hash.low64 = XXH_swap64(hash.low64);
|
5408
5638
|
}
|
5409
|
-
|
5410
|
-
|
5639
|
+
XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
|
5640
|
+
XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
|
5411
5641
|
}
|
5412
5642
|
|
5413
5643
|
/*! @ingroup xxh3_family */
|
@@ -5420,6 +5650,77 @@ XXH128_hashFromCanonical(const XXH128_canonical_t* src)
|
|
5420
5650
|
return h;
|
5421
5651
|
}
|
5422
5652
|
|
5653
|
+
|
5654
|
+
|
5655
|
+
/* ==========================================
|
5656
|
+
* Secret generators
|
5657
|
+
* ==========================================
|
5658
|
+
*/
|
5659
|
+
#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
|
5660
|
+
|
5661
|
+
XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
|
5662
|
+
{
|
5663
|
+
XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
|
5664
|
+
XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
|
5665
|
+
}
|
5666
|
+
|
5667
|
+
/*! @ingroup xxh3_family */
|
5668
|
+
XXH_PUBLIC_API XXH_errorcode
|
5669
|
+
XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
|
5670
|
+
{
|
5671
|
+
#if (XXH_DEBUGLEVEL >= 1)
|
5672
|
+
XXH_ASSERT(secretBuffer != NULL);
|
5673
|
+
XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
|
5674
|
+
#else
|
5675
|
+
/* production mode, assert() are disabled */
|
5676
|
+
if (secretBuffer == NULL) return XXH_ERROR;
|
5677
|
+
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
5678
|
+
#endif
|
5679
|
+
|
5680
|
+
if (customSeedSize == 0) {
|
5681
|
+
customSeed = XXH3_kSecret;
|
5682
|
+
customSeedSize = XXH_SECRET_DEFAULT_SIZE;
|
5683
|
+
}
|
5684
|
+
#if (XXH_DEBUGLEVEL >= 1)
|
5685
|
+
XXH_ASSERT(customSeed != NULL);
|
5686
|
+
#else
|
5687
|
+
if (customSeed == NULL) return XXH_ERROR;
|
5688
|
+
#endif
|
5689
|
+
|
5690
|
+
/* Fill secretBuffer with a copy of customSeed - repeat as needed */
|
5691
|
+
{ size_t pos = 0;
|
5692
|
+
while (pos < secretSize) {
|
5693
|
+
size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
|
5694
|
+
memcpy((char*)secretBuffer + pos, customSeed, toCopy);
|
5695
|
+
pos += toCopy;
|
5696
|
+
} }
|
5697
|
+
|
5698
|
+
{ size_t const nbSeg16 = secretSize / 16;
|
5699
|
+
size_t n;
|
5700
|
+
XXH128_canonical_t scrambler;
|
5701
|
+
XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
|
5702
|
+
for (n=0; n<nbSeg16; n++) {
|
5703
|
+
XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
|
5704
|
+
XXH3_combine16((char*)secretBuffer + n*16, h128);
|
5705
|
+
}
|
5706
|
+
/* last segment */
|
5707
|
+
XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
|
5708
|
+
}
|
5709
|
+
return XXH_OK;
|
5710
|
+
}
|
5711
|
+
|
5712
|
+
/*! @ingroup xxh3_family */
|
5713
|
+
XXH_PUBLIC_API void
|
5714
|
+
XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
|
5715
|
+
{
|
5716
|
+
XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
|
5717
|
+
XXH3_initCustomSecret(secret, seed);
|
5718
|
+
XXH_ASSERT(secretBuffer != NULL);
|
5719
|
+
memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
|
5720
|
+
}
|
5721
|
+
|
5722
|
+
|
5723
|
+
|
5423
5724
|
/* Pop our optimization override from above */
|
5424
5725
|
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
|
5425
5726
|
&& defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
|