digest-xxhash 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -2
- data/digest-xxhash.gemspec +1 -1
- data/ext/digest/xxhash/ext.c +2 -0
- data/ext/digest/xxhash/xxhash.h +708 -407
- data/lib/digest/xxhash/version.rb +1 -1
- data/test/test.rb +6 -0
- metadata +21 -3
data/ext/digest/xxhash/xxhash.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* xxHash - Extremely Fast Hash algorithm
|
3
3
|
* Header File
|
4
|
-
* Copyright (C) 2012-
|
4
|
+
* Copyright (C) 2012-2021 Yann Collet
|
5
5
|
*
|
6
6
|
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
|
7
7
|
*
|
@@ -157,6 +157,7 @@ extern "C" {
|
|
157
157
|
# undef XXH3_64bits
|
158
158
|
# undef XXH3_64bits_withSecret
|
159
159
|
# undef XXH3_64bits_withSeed
|
160
|
+
# undef XXH3_64bits_withSecretandSeed
|
160
161
|
# undef XXH3_createState
|
161
162
|
# undef XXH3_freeState
|
162
163
|
# undef XXH3_copyState
|
@@ -174,6 +175,7 @@ extern "C" {
|
|
174
175
|
# undef XXH3_128bits_reset
|
175
176
|
# undef XXH3_128bits_reset_withSeed
|
176
177
|
# undef XXH3_128bits_reset_withSecret
|
178
|
+
# undef XXH3_128bits_reset_withSecretandSeed
|
177
179
|
# undef XXH3_128bits_update
|
178
180
|
# undef XXH3_128bits_digest
|
179
181
|
# undef XXH128_isEqual
|
@@ -284,23 +286,28 @@ extern "C" {
|
|
284
286
|
# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
|
285
287
|
# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
|
286
288
|
# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
|
289
|
+
# define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
|
287
290
|
# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
|
288
291
|
# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
|
289
292
|
# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
|
290
293
|
# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
|
291
294
|
# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
|
292
295
|
# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
|
296
|
+
# define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
|
293
297
|
# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
|
294
298
|
# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
|
295
299
|
# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
|
300
|
+
# define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
|
296
301
|
/* XXH3_128bits */
|
297
302
|
# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
|
298
303
|
# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
|
299
304
|
# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
|
300
305
|
# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
|
306
|
+
# define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
|
301
307
|
# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
|
302
308
|
# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
|
303
309
|
# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
|
310
|
+
# define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
|
304
311
|
# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
|
305
312
|
# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
|
306
313
|
# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
|
@@ -321,16 +328,16 @@ extern "C" {
|
|
321
328
|
/*!
|
322
329
|
* @brief Obtains the xxHash version.
|
323
330
|
*
|
324
|
-
* This is
|
325
|
-
*
|
331
|
+
* This is mostly useful when xxHash is compiled as a shared library,
|
332
|
+
* since the returned value comes from the library, as opposed to header file.
|
326
333
|
*
|
327
|
-
* @return `XXH_VERSION_NUMBER`
|
334
|
+
* @return `XXH_VERSION_NUMBER` of the invoked library.
|
328
335
|
*/
|
329
336
|
XXH_PUBLIC_API unsigned XXH_versionNumber (void);
|
330
337
|
|
331
338
|
|
332
339
|
/* ****************************
|
333
|
-
*
|
340
|
+
* Common basic types
|
334
341
|
******************************/
|
335
342
|
#include <stddef.h> /* size_t */
|
336
343
|
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
|
@@ -374,10 +381,9 @@ typedef uint32_t XXH32_hash_t;
|
|
374
381
|
* Contains functions used in the classic 32-bit xxHash algorithm.
|
375
382
|
*
|
376
383
|
* @note
|
377
|
-
* XXH32 is
|
378
|
-
*
|
379
|
-
* systems, and offers true 64/128 bit hash results.
|
380
|
-
* level of dispersion, and greatly reduces the risks of collisions.
|
384
|
+
* XXH32 is useful for older platforms, with no or poor 64-bit performance.
|
385
|
+
* Note that @ref xxh3_family provides competitive speed
|
386
|
+
* for both 32-bit and 64-bit systems, and offers true 64/128 bit hash results.
|
381
387
|
*
|
382
388
|
* @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
|
383
389
|
* @see @ref xxh32_impl for implementation details
|
@@ -594,36 +600,39 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
|
|
594
600
|
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
|
595
601
|
|
596
602
|
|
603
|
+
#ifdef __has_attribute
|
604
|
+
# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
|
605
|
+
#else
|
606
|
+
# define XXH_HAS_ATTRIBUTE(x) 0
|
607
|
+
#endif
|
608
|
+
|
609
|
+
/* C-language Attributes are added in C23. */
|
610
|
+
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
|
611
|
+
# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
|
612
|
+
#else
|
613
|
+
# define XXH_HAS_C_ATTRIBUTE(x) 0
|
614
|
+
#endif
|
615
|
+
|
616
|
+
#if defined(__cplusplus) && defined(__has_cpp_attribute)
|
617
|
+
# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
|
618
|
+
#else
|
619
|
+
# define XXH_HAS_CPP_ATTRIBUTE(x) 0
|
620
|
+
#endif
|
621
|
+
|
597
622
|
/*
|
598
623
|
Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
|
599
624
|
introduced in CPP17 and C23.
|
600
625
|
CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
|
601
626
|
C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
|
602
627
|
*/
|
603
|
-
|
604
|
-
#
|
605
|
-
#
|
606
|
-
#
|
607
|
-
#
|
608
|
-
|
609
|
-
#
|
610
|
-
#
|
611
|
-
# define XXH_FALLTHROUGH [[fallthrough]]
|
612
|
-
# endif
|
613
|
-
#endif
|
614
|
-
|
615
|
-
#ifndef XXH_FALLTHROUGH
|
616
|
-
# if defined(__GNUC__) && __GNUC__ >= 7
|
617
|
-
# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
|
618
|
-
# elif defined(__clang__) && (__clang_major__ >= 10) \
|
619
|
-
&& (!defined(__APPLE__) || (__clang_major__ >= 12))
|
620
|
-
/* Apple clang 12 is effectively clang-10 ,
|
621
|
-
* see https://en.wikipedia.org/wiki/Xcode for details
|
622
|
-
*/
|
623
|
-
# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
|
624
|
-
# else
|
625
|
-
# define XXH_FALLTHROUGH
|
626
|
-
# endif
|
628
|
+
#if XXH_HAS_C_ATTRIBUTE(x)
|
629
|
+
# define XXH_FALLTHROUGH [[fallthrough]]
|
630
|
+
#elif XXH_HAS_CPP_ATTRIBUTE(x)
|
631
|
+
# define XXH_FALLTHROUGH [[fallthrough]]
|
632
|
+
#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
|
633
|
+
# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
|
634
|
+
#else
|
635
|
+
# define XXH_FALLTHROUGH
|
627
636
|
#endif
|
628
637
|
|
629
638
|
/*!
|
@@ -669,8 +678,8 @@ typedef uint64_t XXH64_hash_t;
|
|
669
678
|
*
|
670
679
|
* @note
|
671
680
|
* XXH3 provides competitive speed for both 32-bit and 64-bit systems,
|
672
|
-
* and offers true 64/128 bit hash results.
|
673
|
-
*
|
681
|
+
* and offers true 64/128 bit hash results.
|
682
|
+
* It provides better speed for systems with vector processing capabilities.
|
674
683
|
*/
|
675
684
|
|
676
685
|
|
@@ -719,6 +728,8 @@ typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t
|
|
719
728
|
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
|
720
729
|
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
|
721
730
|
|
731
|
+
#ifndef XXH_NO_XXH3
|
732
|
+
|
722
733
|
/*!
|
723
734
|
* @}
|
724
735
|
* ************************************************************************
|
@@ -796,13 +807,17 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, X
|
|
796
807
|
* It's possible to provide any blob of bytes as a "secret" to generate the hash.
|
797
808
|
* This makes it more difficult for an external actor to prepare an intentional collision.
|
798
809
|
* The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
|
799
|
-
* However, the quality of
|
800
|
-
*
|
810
|
+
* However, the quality of the secret impacts the dispersion of the hash algorithm.
|
811
|
+
* Therefore, the secret _must_ look like a bunch of random bytes.
|
801
812
|
* Avoid "trivial" or structured data such as repeated sequences or a text document.
|
802
|
-
* Whenever
|
803
|
-
* consider
|
804
|
-
*
|
805
|
-
*
|
813
|
+
* Whenever in doubt about the "randomness" of the blob of bytes,
|
814
|
+
* consider employing "XXH3_generateSecret()" instead (see below).
|
815
|
+
* It will generate a proper high entropy secret derived from the blob of bytes.
|
816
|
+
* Another advantage of using XXH3_generateSecret() is that
|
817
|
+
* it guarantees that all bits within the initial blob of bytes
|
818
|
+
* will impact every bit of the output.
|
819
|
+
* This is not necessarily the case when using the blob of bytes directly
|
820
|
+
* because, when hashing _small_ inputs, only a portion of the secret is employed.
|
806
821
|
*/
|
807
822
|
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
|
808
823
|
|
@@ -922,6 +937,7 @@ XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_has
|
|
922
937
|
XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
|
923
938
|
|
924
939
|
|
940
|
+
#endif /* !XXH_NO_XXH3 */
|
925
941
|
#endif /* XXH_NO_LONG_LONG */
|
926
942
|
|
927
943
|
/*!
|
@@ -962,13 +978,10 @@ XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t*
|
|
962
978
|
struct XXH32_state_s {
|
963
979
|
XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
|
964
980
|
XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
|
965
|
-
XXH32_hash_t
|
966
|
-
XXH32_hash_t v2; /*!< Second accumulator lane */
|
967
|
-
XXH32_hash_t v3; /*!< Third accumulator lane */
|
968
|
-
XXH32_hash_t v4; /*!< Fourth accumulator lane */
|
981
|
+
XXH32_hash_t v[4]; /*!< Accumulator lanes */
|
969
982
|
XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
|
970
983
|
XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */
|
971
|
-
XXH32_hash_t reserved; /*!< Reserved field. Do not read
|
984
|
+
XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */
|
972
985
|
}; /* typedef'd to XXH32_state_t */
|
973
986
|
|
974
987
|
|
@@ -988,16 +1001,15 @@ struct XXH32_state_s {
|
|
988
1001
|
*/
|
989
1002
|
struct XXH64_state_s {
|
990
1003
|
XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */
|
991
|
-
XXH64_hash_t
|
992
|
-
XXH64_hash_t v2; /*!< Second accumulator lane */
|
993
|
-
XXH64_hash_t v3; /*!< Third accumulator lane */
|
994
|
-
XXH64_hash_t v4; /*!< Fourth accumulator lane */
|
1004
|
+
XXH64_hash_t v[4]; /*!< Accumulator lanes */
|
995
1005
|
XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
|
996
1006
|
XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */
|
997
1007
|
XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/
|
998
|
-
XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it
|
1008
|
+
XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */
|
999
1009
|
}; /* typedef'd to XXH64_state_t */
|
1000
1010
|
|
1011
|
+
#ifndef XXH_NO_XXH3
|
1012
|
+
|
1001
1013
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
|
1002
1014
|
# include <stdalign.h>
|
1003
1015
|
# define XXH_ALIGN(n) alignas(n)
|
@@ -1070,7 +1082,7 @@ struct XXH3_state_s {
|
|
1070
1082
|
/*!< The internal buffer. @see XXH32_state_s::mem32 */
|
1071
1083
|
XXH32_hash_t bufferedSize;
|
1072
1084
|
/*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
|
1073
|
-
XXH32_hash_t
|
1085
|
+
XXH32_hash_t useSeed;
|
1074
1086
|
/*!< Reserved field. Needed for padding on 64-bit. */
|
1075
1087
|
size_t nbStripesSoFar;
|
1076
1088
|
/*!< Number or stripes processed. */
|
@@ -1106,6 +1118,12 @@ struct XXH3_state_s {
|
|
1106
1118
|
#define XXH3_INITSTATE(XXH3_state_ptr) { (XXH3_state_ptr)->seed = 0; }
|
1107
1119
|
|
1108
1120
|
|
1121
|
+
/* XXH128() :
|
1122
|
+
* simple alias to pre-selected XXH3_128bits variant
|
1123
|
+
*/
|
1124
|
+
XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
|
1125
|
+
|
1126
|
+
|
1109
1127
|
/* === Experimental API === */
|
1110
1128
|
/* Symbols defined below must be considered tied to a specific library version. */
|
1111
1129
|
|
@@ -1118,33 +1136,92 @@ struct XXH3_state_s {
|
|
1118
1136
|
* as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
|
1119
1137
|
*
|
1120
1138
|
* The function accepts as input a custom seed of any length and any content,
|
1121
|
-
* and derives from it a high-entropy secret of length
|
1122
|
-
* into an already allocated buffer secretBuffer.
|
1123
|
-
*
|
1139
|
+
* and derives from it a high-entropy secret of length @secretSize
|
1140
|
+
* into an already allocated buffer @secretBuffer.
|
1141
|
+
* @secretSize must be >= XXH3_SECRET_SIZE_MIN
|
1124
1142
|
*
|
1125
1143
|
* The generated secret can then be used with any `*_withSecret()` variant.
|
1126
1144
|
* Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
|
1127
1145
|
* `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
|
1128
1146
|
* are part of this list. They all accept a `secret` parameter
|
1129
|
-
* which must be
|
1147
|
+
* which must be large enough for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
|
1130
1148
|
* _and_ feature very high entropy (consist of random-looking bytes).
|
1131
1149
|
* These conditions can be a high bar to meet, so
|
1132
|
-
*
|
1150
|
+
* XXH3_generateSecret() can be employed to ensure proper quality.
|
1133
1151
|
*
|
1134
1152
|
* customSeed can be anything. It can have any size, even small ones,
|
1135
|
-
* and its content can be anything, even
|
1136
|
-
* The resulting `secret` will nonetheless provide all
|
1153
|
+
* and its content can be anything, even "poor entropy" sources such as a bunch of zeroes.
|
1154
|
+
* The resulting `secret` will nonetheless provide all required qualities.
|
1137
1155
|
*
|
1138
|
-
* Supplying NULL as the customSeed copies the default secret into `secretBuffer`.
|
1139
1156
|
* When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
|
1140
1157
|
*/
|
1141
|
-
XXH_PUBLIC_API
|
1158
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
|
1142
1159
|
|
1143
1160
|
|
1144
|
-
/*
|
1145
|
-
|
1161
|
+
/*
|
1162
|
+
* XXH3_generateSecret_fromSeed():
|
1163
|
+
*
|
1164
|
+
* Generate the same secret as the _withSeed() variants.
|
1165
|
+
*
|
1166
|
+
* The resulting secret has a length of XXH3_SECRET_DEFAULT_SIZE (necessarily).
|
1167
|
+
* @secretBuffer must be already allocated, of size at least XXH3_SECRET_DEFAULT_SIZE bytes.
|
1168
|
+
*
|
1169
|
+
* The generated secret can be used in combination with
|
1170
|
+
*`*_withSecret()` and `_withSecretandSeed()` variants.
|
1171
|
+
* This generator is notably useful in combination with `_withSecretandSeed()`,
|
1172
|
+
* as a way to emulate a faster `_withSeed()` variant.
|
1173
|
+
*/
|
1174
|
+
XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
|
1175
|
+
|
1176
|
+
/*
|
1177
|
+
* *_withSecretandSeed() :
|
1178
|
+
* These variants generate hash values using either
|
1179
|
+
* @seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
|
1180
|
+
* or @secret for "large" keys (>= XXH3_MIDSIZE_MAX).
|
1181
|
+
*
|
1182
|
+
* This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
|
1183
|
+
* `_withSeed()` has to generate the secret on the fly for "large" keys.
|
1184
|
+
* It's fast, but can be perceptible for "not so large" keys (< 1 KB).
|
1185
|
+
* `_withSecret()` has to generate the masks on the fly for "small" keys,
|
1186
|
+
* which requires more instructions than _withSeed() variants.
|
1187
|
+
* Therefore, _withSecretandSeed variant combines the best of both worlds.
|
1188
|
+
*
|
1189
|
+
* When @secret has been generated by XXH3_generateSecret_fromSeed(),
|
1190
|
+
* this variant produces *exactly* the same results as `_withSeed()` variant,
|
1191
|
+
* hence offering only a pure speed benefit on "large" input,
|
1192
|
+
* by skipping the need to regenerate the secret for every large input.
|
1193
|
+
*
|
1194
|
+
* Another usage scenario is to hash the secret to a 64-bit hash value,
|
1195
|
+
* for example with XXH3_64bits(), which then becomes the seed,
|
1196
|
+
* and then employ both the seed and the secret in _withSecretandSeed().
|
1197
|
+
* On top of speed, an added benefit is that each bit in the secret
|
1198
|
+
* has a 50% chance to swap each bit in the output,
|
1199
|
+
* via its impact to the seed.
|
1200
|
+
* This is not guaranteed when using the secret directly in "small data" scenarios,
|
1201
|
+
* because only portions of the secret are employed for small data.
|
1202
|
+
*/
|
1203
|
+
XXH_PUBLIC_API XXH64_hash_t
|
1204
|
+
XXH3_64bits_withSecretandSeed(const void* data, size_t len,
|
1205
|
+
const void* secret, size_t secretSize,
|
1206
|
+
XXH64_hash_t seed);
|
1207
|
+
|
1208
|
+
XXH_PUBLIC_API XXH128_hash_t
|
1209
|
+
XXH3_128bits_withSecretandSeed(const void* data, size_t len,
|
1210
|
+
const void* secret, size_t secretSize,
|
1211
|
+
XXH64_hash_t seed64);
|
1212
|
+
|
1213
|
+
XXH_PUBLIC_API XXH_errorcode
|
1214
|
+
XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
|
1215
|
+
const void* secret, size_t secretSize,
|
1216
|
+
XXH64_hash_t seed64);
|
1217
|
+
|
1218
|
+
XXH_PUBLIC_API XXH_errorcode
|
1219
|
+
XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
|
1220
|
+
const void* secret, size_t secretSize,
|
1221
|
+
XXH64_hash_t seed64);
|
1146
1222
|
|
1147
1223
|
|
1224
|
+
#endif /* !XXH_NO_XXH3 */
|
1148
1225
|
#endif /* XXH_NO_LONG_LONG */
|
1149
1226
|
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
|
1150
1227
|
# define XXH_IMPLEMENTATION
|
@@ -1221,7 +1298,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1221
1298
|
* Use `memcpy()`. Safe and portable. Note that most modern compilers will
|
1222
1299
|
* eliminate the function call and treat it as an unaligned access.
|
1223
1300
|
*
|
1224
|
-
* - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((
|
1301
|
+
* - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
|
1225
1302
|
* @par
|
1226
1303
|
* Depends on compiler extensions and is therefore not portable.
|
1227
1304
|
* This method is safe _if_ your compiler supports it,
|
@@ -1248,22 +1325,12 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1248
1325
|
* care, as what works on one compiler/platform/optimization level may cause
|
1249
1326
|
* another to read garbage data or even crash.
|
1250
1327
|
*
|
1251
|
-
* See
|
1328
|
+
* See http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
|
1252
1329
|
*
|
1253
1330
|
* Prefer these methods in priority order (0 > 3 > 1 > 2)
|
1254
1331
|
*/
|
1255
1332
|
# define XXH_FORCE_MEMORY_ACCESS 0
|
1256
|
-
|
1257
|
-
* @def XXH_ACCEPT_NULL_INPUT_POINTER
|
1258
|
-
* @brief Whether to add explicit `NULL` checks.
|
1259
|
-
*
|
1260
|
-
* If the input pointer is `NULL` and the length is non-zero, xxHash's default
|
1261
|
-
* behavior is to dereference it, triggering a segfault.
|
1262
|
-
*
|
1263
|
-
* When this macro is enabled, xxHash actively checks the input for a null pointer.
|
1264
|
-
* If it is, the result for null input pointers is the same as a zero-length input.
|
1265
|
-
*/
|
1266
|
-
# define XXH_ACCEPT_NULL_INPUT_POINTER 0
|
1333
|
+
|
1267
1334
|
/*!
|
1268
1335
|
* @def XXH_FORCE_ALIGN_CHECK
|
1269
1336
|
* @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
|
@@ -1315,18 +1382,16 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1315
1382
|
# define XXH_NO_INLINE_HINTS 0
|
1316
1383
|
|
1317
1384
|
/*!
|
1318
|
-
* @def
|
1319
|
-
* @brief Whether to
|
1320
|
-
*
|
1321
|
-
* For performance, `XXH32_finalize` uses an unrolled loop
|
1322
|
-
* in the form of a switch statement.
|
1385
|
+
* @def XXH32_ENDJMP
|
1386
|
+
* @brief Whether to use a jump for `XXH32_finalize`.
|
1323
1387
|
*
|
1324
|
-
*
|
1325
|
-
*
|
1388
|
+
* For performance, `XXH32_finalize` uses multiple branches in the finalizer.
|
1389
|
+
* This is generally preferable for performance,
|
1390
|
+
* but depending on exact architecture, a jmp may be preferable.
|
1326
1391
|
*
|
1327
|
-
* This is
|
1392
|
+
* This setting is only possibly making a difference for very small inputs.
|
1328
1393
|
*/
|
1329
|
-
# define
|
1394
|
+
# define XXH32_ENDJMP 0
|
1330
1395
|
|
1331
1396
|
/*!
|
1332
1397
|
* @internal
|
@@ -1343,32 +1408,18 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1343
1408
|
*/
|
1344
1409
|
|
1345
1410
|
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
|
1346
|
-
/* prefer __packed__ structures (method 1) for
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1350
|
-
( \
|
1351
|
-
defined(__GNUC__) && ( \
|
1352
|
-
(defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
|
1353
|
-
( \
|
1354
|
-
defined(__mips__) && \
|
1355
|
-
(__mips <= 5 || __mips_isa_rev < 6) && \
|
1356
|
-
(!defined(__mips16) || defined(__mips_mips16e2)) \
|
1357
|
-
) \
|
1358
|
-
) \
|
1359
|
-
) \
|
1360
|
-
)
|
1411
|
+
/* prefer __packed__ structures (method 1) for GCC
|
1412
|
+
* < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
|
1413
|
+
* which for some reason does unaligned loads. */
|
1414
|
+
# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
|
1361
1415
|
# define XXH_FORCE_MEMORY_ACCESS 1
|
1362
1416
|
# endif
|
1363
1417
|
#endif
|
1364
1418
|
|
1365
|
-
#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
|
1366
|
-
# define XXH_ACCEPT_NULL_INPUT_POINTER 0
|
1367
|
-
#endif
|
1368
|
-
|
1369
1419
|
#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
|
1370
|
-
|
1371
|
-
|
1420
|
+
/* don't check on x86, aarch64, or arm when unaligned access is available */
|
1421
|
+
# if defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
|
1422
|
+
|| defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */
|
1372
1423
|
# define XXH_FORCE_ALIGN_CHECK 0
|
1373
1424
|
# else
|
1374
1425
|
# define XXH_FORCE_ALIGN_CHECK 1
|
@@ -1384,14 +1435,9 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1384
1435
|
# endif
|
1385
1436
|
#endif
|
1386
1437
|
|
1387
|
-
#ifndef
|
1388
|
-
|
1389
|
-
|
1390
|
-
/* The if/then loop is preferable to switch/case on gcc (on x64) */
|
1391
|
-
# define XXH_REROLL 1
|
1392
|
-
# else
|
1393
|
-
# define XXH_REROLL 0
|
1394
|
-
# endif
|
1438
|
+
#ifndef XXH32_ENDJMP
|
1439
|
+
/* generally preferable for performance */
|
1440
|
+
# define XXH32_ENDJMP 0
|
1395
1441
|
#endif
|
1396
1442
|
|
1397
1443
|
/*!
|
@@ -1413,13 +1459,13 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1413
1459
|
* @internal
|
1414
1460
|
* @brief Modify this function to use a different routine than malloc().
|
1415
1461
|
*/
|
1416
|
-
static void* XXH_malloc(size_t s) { return
|
1462
|
+
static void* XXH_malloc(size_t s) { return ruby_xmalloc(s); }
|
1417
1463
|
|
1418
1464
|
/*!
|
1419
1465
|
* @internal
|
1420
1466
|
* @brief Modify this function to use a different routine than free().
|
1421
1467
|
*/
|
1422
|
-
static void XXH_free(void* p) {
|
1468
|
+
static void XXH_free(void* p) { ruby_xfree(p); }
|
1423
1469
|
|
1424
1470
|
#include <string.h>
|
1425
1471
|
|
@@ -1443,19 +1489,19 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
|
|
1443
1489
|
#endif
|
1444
1490
|
|
1445
1491
|
#if XXH_NO_INLINE_HINTS /* disable inlining hints */
|
1446
|
-
# if defined(__GNUC__)
|
1492
|
+
# if defined(__GNUC__) || defined(__clang__)
|
1447
1493
|
# define XXH_FORCE_INLINE static __attribute__((unused))
|
1448
1494
|
# else
|
1449
1495
|
# define XXH_FORCE_INLINE static
|
1450
1496
|
# endif
|
1451
1497
|
# define XXH_NO_INLINE static
|
1452
1498
|
/* enable inlining hints */
|
1499
|
+
#elif defined(__GNUC__) || defined(__clang__)
|
1500
|
+
# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
|
1501
|
+
# define XXH_NO_INLINE static __attribute__((noinline))
|
1453
1502
|
#elif defined(_MSC_VER) /* Visual Studio */
|
1454
1503
|
# define XXH_FORCE_INLINE static __forceinline
|
1455
1504
|
# define XXH_NO_INLINE static __declspec(noinline)
|
1456
|
-
#elif defined(__GNUC__)
|
1457
|
-
# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
|
1458
|
-
# define XXH_NO_INLINE static __attribute__((noinline))
|
1459
1505
|
#elif defined (__cplusplus) \
|
1460
1506
|
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
|
1461
1507
|
# define XXH_FORCE_INLINE static inline
|
@@ -1522,7 +1568,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
|
|
1522
1568
|
* We also use it to prevent unwanted constant folding for AArch64 in
|
1523
1569
|
* XXH3_initCustomSecret_scalar().
|
1524
1570
|
*/
|
1525
|
-
#
|
1571
|
+
#if defined(__GNUC__) || defined(__clang__)
|
1526
1572
|
# define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
|
1527
1573
|
#else
|
1528
1574
|
# define XXH_COMPILER_GUARD(var) ((void)0)
|
@@ -1615,30 +1661,31 @@ static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr;
|
|
1615
1661
|
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
|
1616
1662
|
|
1617
1663
|
/*
|
1618
|
-
*
|
1619
|
-
*
|
1620
|
-
*
|
1621
|
-
*
|
1664
|
+
* __attribute__((aligned(1))) is supported by gcc and clang. Originally the
|
1665
|
+
* documentation claimed that it only increased the alignment, but actually it
|
1666
|
+
* can decrease it on gcc, clang, and icc:
|
1667
|
+
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
|
1668
|
+
* https://gcc.godbolt.org/z/xYez1j67Y.
|
1622
1669
|
*/
|
1623
1670
|
#ifdef XXH_OLD_NAMES
|
1624
1671
|
typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
|
1625
1672
|
#endif
|
1626
1673
|
static xxh_u32 XXH_read32(const void* ptr)
|
1627
1674
|
{
|
1628
|
-
typedef
|
1629
|
-
return ((const
|
1675
|
+
typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
|
1676
|
+
return *((const xxh_unalign32*)ptr);
|
1630
1677
|
}
|
1631
1678
|
|
1632
1679
|
#else
|
1633
1680
|
|
1634
1681
|
/*
|
1635
1682
|
* Portable and safe solution. Generally efficient.
|
1636
|
-
* see:
|
1683
|
+
* see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
|
1637
1684
|
*/
|
1638
1685
|
static xxh_u32 XXH_read32(const void* memPtr)
|
1639
1686
|
{
|
1640
1687
|
xxh_u32 val;
|
1641
|
-
|
1688
|
+
XXH_memcpy(&val, memPtr, sizeof(val));
|
1642
1689
|
return val;
|
1643
1690
|
}
|
1644
1691
|
|
@@ -1955,8 +2002,10 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
1955
2002
|
h32 = XXH_rotl32(h32, 17) * XXH_PRIME32_4; \
|
1956
2003
|
} while (0)
|
1957
2004
|
|
1958
|
-
|
1959
|
-
|
2005
|
+
if (ptr==NULL) XXH_ASSERT(len == 0);
|
2006
|
+
|
2007
|
+
/* Compact rerolled version; generally faster */
|
2008
|
+
if (!XXH32_ENDJMP) {
|
1960
2009
|
len &= 15;
|
1961
2010
|
while (len >= 4) {
|
1962
2011
|
XXH_PROCESS4;
|
@@ -2024,24 +2073,19 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
2024
2073
|
* @internal
|
2025
2074
|
* @brief The implementation for @ref XXH32().
|
2026
2075
|
*
|
2027
|
-
* @param input, len, seed Directly passed from @ref XXH32().
|
2076
|
+
* @param input , len , seed Directly passed from @ref XXH32().
|
2028
2077
|
* @param align Whether @p input is aligned.
|
2029
2078
|
* @return The calculated hash.
|
2030
2079
|
*/
|
2031
2080
|
XXH_FORCE_INLINE xxh_u32
|
2032
2081
|
XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
|
2033
2082
|
{
|
2034
|
-
const xxh_u8* bEnd = input ? input + len : NULL;
|
2035
2083
|
xxh_u32 h32;
|
2036
2084
|
|
2037
|
-
|
2038
|
-
if (input==NULL) {
|
2039
|
-
len=0;
|
2040
|
-
bEnd=input=(const xxh_u8*)(size_t)16;
|
2041
|
-
}
|
2042
|
-
#endif
|
2085
|
+
if (input==NULL) XXH_ASSERT(len == 0);
|
2043
2086
|
|
2044
2087
|
if (len>=16) {
|
2088
|
+
const xxh_u8* const bEnd = input + len;
|
2045
2089
|
const xxh_u8* const limit = bEnd - 15;
|
2046
2090
|
xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
|
2047
2091
|
xxh_u32 v2 = seed + XXH_PRIME32_2;
|
@@ -2105,20 +2149,18 @@ XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
|
|
2105
2149
|
/*! @ingroup xxh32_family */
|
2106
2150
|
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
|
2107
2151
|
{
|
2108
|
-
|
2152
|
+
XXH_memcpy(dstState, srcState, sizeof(*dstState));
|
2109
2153
|
}
|
2110
2154
|
|
2111
2155
|
/*! @ingroup xxh32_family */
|
2112
2156
|
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
|
2113
2157
|
{
|
2114
|
-
|
2115
|
-
memset(
|
2116
|
-
|
2117
|
-
|
2118
|
-
|
2119
|
-
|
2120
|
-
/* do not write into reserved, planned to be removed in a future version */
|
2121
|
-
memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
|
2158
|
+
XXH_ASSERT(statePtr != NULL);
|
2159
|
+
memset(statePtr, 0, sizeof(*statePtr));
|
2160
|
+
statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
|
2161
|
+
statePtr->v[1] = seed + XXH_PRIME32_2;
|
2162
|
+
statePtr->v[2] = seed + 0;
|
2163
|
+
statePtr->v[3] = seed - XXH_PRIME32_1;
|
2122
2164
|
return XXH_OK;
|
2123
2165
|
}
|
2124
2166
|
|
@@ -2127,12 +2169,10 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t s
|
|
2127
2169
|
XXH_PUBLIC_API XXH_errorcode
|
2128
2170
|
XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
2129
2171
|
{
|
2130
|
-
if (input==NULL)
|
2131
|
-
|
2172
|
+
if (input==NULL) {
|
2173
|
+
XXH_ASSERT(len == 0);
|
2132
2174
|
return XXH_OK;
|
2133
|
-
|
2134
|
-
return XXH_ERROR;
|
2135
|
-
#endif
|
2175
|
+
}
|
2136
2176
|
|
2137
2177
|
{ const xxh_u8* p = (const xxh_u8*)input;
|
2138
2178
|
const xxh_u8* const bEnd = p + len;
|
@@ -2149,10 +2189,10 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
|
2149
2189
|
if (state->memsize) { /* some data left from previous update */
|
2150
2190
|
XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
|
2151
2191
|
{ const xxh_u32* p32 = state->mem32;
|
2152
|
-
state->
|
2153
|
-
state->
|
2154
|
-
state->
|
2155
|
-
state->
|
2192
|
+
state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
|
2193
|
+
state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
|
2194
|
+
state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
|
2195
|
+
state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
|
2156
2196
|
}
|
2157
2197
|
p += 16-state->memsize;
|
2158
2198
|
state->memsize = 0;
|
@@ -2160,22 +2200,14 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
|
2160
2200
|
|
2161
2201
|
if (p <= bEnd-16) {
|
2162
2202
|
const xxh_u8* const limit = bEnd - 16;
|
2163
|
-
xxh_u32 v1 = state->v1;
|
2164
|
-
xxh_u32 v2 = state->v2;
|
2165
|
-
xxh_u32 v3 = state->v3;
|
2166
|
-
xxh_u32 v4 = state->v4;
|
2167
2203
|
|
2168
2204
|
do {
|
2169
|
-
|
2170
|
-
|
2171
|
-
|
2172
|
-
|
2205
|
+
state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
|
2206
|
+
state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
|
2207
|
+
state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
|
2208
|
+
state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
|
2173
2209
|
} while (p<=limit);
|
2174
2210
|
|
2175
|
-
state->v1 = v1;
|
2176
|
-
state->v2 = v2;
|
2177
|
-
state->v3 = v3;
|
2178
|
-
state->v4 = v4;
|
2179
2211
|
}
|
2180
2212
|
|
2181
2213
|
if (p < bEnd) {
|
@@ -2194,12 +2226,12 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
|
|
2194
2226
|
xxh_u32 h32;
|
2195
2227
|
|
2196
2228
|
if (state->large_len) {
|
2197
|
-
h32 = XXH_rotl32(state->
|
2198
|
-
+ XXH_rotl32(state->
|
2199
|
-
+ XXH_rotl32(state->
|
2200
|
-
+ XXH_rotl32(state->
|
2229
|
+
h32 = XXH_rotl32(state->v[0], 1)
|
2230
|
+
+ XXH_rotl32(state->v[1], 7)
|
2231
|
+
+ XXH_rotl32(state->v[2], 12)
|
2232
|
+
+ XXH_rotl32(state->v[3], 18);
|
2201
2233
|
} else {
|
2202
|
-
h32 = state->
|
2234
|
+
h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
|
2203
2235
|
}
|
2204
2236
|
|
2205
2237
|
h32 += state->total_len_32;
|
@@ -2228,7 +2260,7 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
|
|
2228
2260
|
{
|
2229
2261
|
XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
|
2230
2262
|
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
|
2231
|
-
|
2263
|
+
XXH_memcpy(dst, &hash, sizeof(*dst));
|
2232
2264
|
}
|
2233
2265
|
/*! @ingroup xxh32_family */
|
2234
2266
|
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
|
@@ -2271,30 +2303,31 @@ static xxh_u64 XXH_read64(const void* memPtr)
|
|
2271
2303
|
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
|
2272
2304
|
|
2273
2305
|
/*
|
2274
|
-
*
|
2275
|
-
*
|
2276
|
-
*
|
2277
|
-
*
|
2306
|
+
* __attribute__((aligned(1))) is supported by gcc and clang. Originally the
|
2307
|
+
* documentation claimed that it only increased the alignment, but actually it
|
2308
|
+
* can decrease it on gcc, clang, and icc:
|
2309
|
+
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
|
2310
|
+
* https://gcc.godbolt.org/z/xYez1j67Y.
|
2278
2311
|
*/
|
2279
2312
|
#ifdef XXH_OLD_NAMES
|
2280
2313
|
typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
|
2281
2314
|
#endif
|
2282
2315
|
static xxh_u64 XXH_read64(const void* ptr)
|
2283
2316
|
{
|
2284
|
-
typedef
|
2285
|
-
return ((const xxh_unalign64*)ptr)
|
2317
|
+
typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
|
2318
|
+
return *((const xxh_unalign64*)ptr);
|
2286
2319
|
}
|
2287
2320
|
|
2288
2321
|
#else
|
2289
2322
|
|
2290
2323
|
/*
|
2291
2324
|
* Portable and safe solution. Generally efficient.
|
2292
|
-
* see:
|
2325
|
+
* see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
|
2293
2326
|
*/
|
2294
2327
|
static xxh_u64 XXH_read64(const void* memPtr)
|
2295
2328
|
{
|
2296
2329
|
xxh_u64 val;
|
2297
|
-
|
2330
|
+
XXH_memcpy(&val, memPtr, sizeof(val));
|
2298
2331
|
return val;
|
2299
2332
|
}
|
2300
2333
|
|
@@ -2424,6 +2457,7 @@ static xxh_u64 XXH64_avalanche(xxh_u64 h64)
|
|
2424
2457
|
static xxh_u64
|
2425
2458
|
XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
2426
2459
|
{
|
2460
|
+
if (ptr==NULL) XXH_ASSERT(len == 0);
|
2427
2461
|
len &= 31;
|
2428
2462
|
while (len >= 8) {
|
2429
2463
|
xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
|
@@ -2459,18 +2493,12 @@ XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
2459
2493
|
XXH_FORCE_INLINE xxh_u64
|
2460
2494
|
XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
|
2461
2495
|
{
|
2462
|
-
const xxh_u8* bEnd = input ? input + len : NULL;
|
2463
2496
|
xxh_u64 h64;
|
2464
|
-
|
2465
|
-
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
|
2466
|
-
if (input==NULL) {
|
2467
|
-
len=0;
|
2468
|
-
bEnd=input=(const xxh_u8*)(size_t)32;
|
2469
|
-
}
|
2470
|
-
#endif
|
2497
|
+
if (input==NULL) XXH_ASSERT(len == 0);
|
2471
2498
|
|
2472
2499
|
if (len>=32) {
|
2473
|
-
const xxh_u8* const
|
2500
|
+
const xxh_u8* const bEnd = input + len;
|
2501
|
+
const xxh_u8* const limit = bEnd - 31;
|
2474
2502
|
xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
|
2475
2503
|
xxh_u64 v2 = seed + XXH_PRIME64_2;
|
2476
2504
|
xxh_u64 v3 = seed + 0;
|
@@ -2481,7 +2509,7 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
|
|
2481
2509
|
v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
|
2482
2510
|
v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
|
2483
2511
|
v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
|
2484
|
-
} while (input
|
2512
|
+
} while (input<limit);
|
2485
2513
|
|
2486
2514
|
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
2487
2515
|
h64 = XXH64_mergeRound(h64, v1);
|
@@ -2536,20 +2564,18 @@ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
|
|
2536
2564
|
/*! @ingroup xxh64_family */
|
2537
2565
|
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
|
2538
2566
|
{
|
2539
|
-
|
2567
|
+
XXH_memcpy(dstState, srcState, sizeof(*dstState));
|
2540
2568
|
}
|
2541
2569
|
|
2542
2570
|
/*! @ingroup xxh64_family */
|
2543
2571
|
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
|
2544
2572
|
{
|
2545
|
-
|
2546
|
-
memset(
|
2547
|
-
|
2548
|
-
|
2549
|
-
|
2550
|
-
|
2551
|
-
/* do not write into reserved64, might be removed in a future version */
|
2552
|
-
memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
|
2573
|
+
XXH_ASSERT(statePtr != NULL);
|
2574
|
+
memset(statePtr, 0, sizeof(*statePtr));
|
2575
|
+
statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
|
2576
|
+
statePtr->v[1] = seed + XXH_PRIME64_2;
|
2577
|
+
statePtr->v[2] = seed + 0;
|
2578
|
+
statePtr->v[3] = seed - XXH_PRIME64_1;
|
2553
2579
|
return XXH_OK;
|
2554
2580
|
}
|
2555
2581
|
|
@@ -2557,12 +2583,10 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t s
|
|
2557
2583
|
XXH_PUBLIC_API XXH_errorcode
|
2558
2584
|
XXH64_update (XXH64_state_t* state, const void* input, size_t len)
|
2559
2585
|
{
|
2560
|
-
if (input==NULL)
|
2561
|
-
|
2586
|
+
if (input==NULL) {
|
2587
|
+
XXH_ASSERT(len == 0);
|
2562
2588
|
return XXH_OK;
|
2563
|
-
|
2564
|
-
return XXH_ERROR;
|
2565
|
-
#endif
|
2589
|
+
}
|
2566
2590
|
|
2567
2591
|
{ const xxh_u8* p = (const xxh_u8*)input;
|
2568
2592
|
const xxh_u8* const bEnd = p + len;
|
@@ -2577,32 +2601,24 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
|
|
2577
2601
|
|
2578
2602
|
if (state->memsize) { /* tmp buffer is full */
|
2579
2603
|
XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
|
2580
|
-
state->
|
2581
|
-
state->
|
2582
|
-
state->
|
2583
|
-
state->
|
2604
|
+
state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
|
2605
|
+
state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
|
2606
|
+
state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
|
2607
|
+
state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
|
2584
2608
|
p += 32 - state->memsize;
|
2585
2609
|
state->memsize = 0;
|
2586
2610
|
}
|
2587
2611
|
|
2588
2612
|
if (p+32 <= bEnd) {
|
2589
2613
|
const xxh_u8* const limit = bEnd - 32;
|
2590
|
-
xxh_u64 v1 = state->v1;
|
2591
|
-
xxh_u64 v2 = state->v2;
|
2592
|
-
xxh_u64 v3 = state->v3;
|
2593
|
-
xxh_u64 v4 = state->v4;
|
2594
2614
|
|
2595
2615
|
do {
|
2596
|
-
|
2597
|
-
|
2598
|
-
|
2599
|
-
|
2616
|
+
state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
|
2617
|
+
state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
|
2618
|
+
state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
|
2619
|
+
state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
|
2600
2620
|
} while (p<=limit);
|
2601
2621
|
|
2602
|
-
state->v1 = v1;
|
2603
|
-
state->v2 = v2;
|
2604
|
-
state->v3 = v3;
|
2605
|
-
state->v4 = v4;
|
2606
2622
|
}
|
2607
2623
|
|
2608
2624
|
if (p < bEnd) {
|
@@ -2621,18 +2637,13 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
|
|
2621
2637
|
xxh_u64 h64;
|
2622
2638
|
|
2623
2639
|
if (state->total_len >= 32) {
|
2624
|
-
|
2625
|
-
|
2626
|
-
|
2627
|
-
|
2628
|
-
|
2629
|
-
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
2630
|
-
h64 = XXH64_mergeRound(h64, v1);
|
2631
|
-
h64 = XXH64_mergeRound(h64, v2);
|
2632
|
-
h64 = XXH64_mergeRound(h64, v3);
|
2633
|
-
h64 = XXH64_mergeRound(h64, v4);
|
2640
|
+
h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
|
2641
|
+
h64 = XXH64_mergeRound(h64, state->v[0]);
|
2642
|
+
h64 = XXH64_mergeRound(h64, state->v[1]);
|
2643
|
+
h64 = XXH64_mergeRound(h64, state->v[2]);
|
2644
|
+
h64 = XXH64_mergeRound(h64, state->v[3]);
|
2634
2645
|
} else {
|
2635
|
-
h64 = state->
|
2646
|
+
h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
|
2636
2647
|
}
|
2637
2648
|
|
2638
2649
|
h64 += (xxh_u64) state->total_len;
|
@@ -2648,7 +2659,7 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t
|
|
2648
2659
|
{
|
2649
2660
|
XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
|
2650
2661
|
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
|
2651
|
-
|
2662
|
+
XXH_memcpy(dst, &hash, sizeof(*dst));
|
2652
2663
|
}
|
2653
2664
|
|
2654
2665
|
/*! @ingroup xxh64_family */
|
@@ -2691,17 +2702,21 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
|
|
2691
2702
|
# define XXH_unlikely(x) (x)
|
2692
2703
|
#endif
|
2693
2704
|
|
2694
|
-
#if defined(__GNUC__)
|
2695
|
-
# if defined(
|
2696
|
-
|
2697
|
-
|
2698
|
-
# include <emmintrin.h>
|
2699
|
-
# elif defined(__ARM_NEON__) || defined(__ARM_NEON)
|
2705
|
+
#if defined(__GNUC__) || defined(__clang__)
|
2706
|
+
# if defined(__ARM_NEON__) || defined(__ARM_NEON) \
|
2707
|
+
|| defined(__aarch64__) || defined(_M_ARM) \
|
2708
|
+
|| defined(_M_ARM64) || defined(_M_ARM64EC)
|
2700
2709
|
# define inline __inline__ /* circumvent a clang bug */
|
2701
2710
|
# include <arm_neon.h>
|
2702
2711
|
# undef inline
|
2712
|
+
# elif defined(__AVX2__)
|
2713
|
+
# include <immintrin.h>
|
2714
|
+
# elif defined(__SSE2__)
|
2715
|
+
# include <emmintrin.h>
|
2703
2716
|
# endif
|
2704
|
-
#
|
2717
|
+
#endif
|
2718
|
+
|
2719
|
+
#if defined(_MSC_VER)
|
2705
2720
|
# include <intrin.h>
|
2706
2721
|
#endif
|
2707
2722
|
|
@@ -2839,17 +2854,20 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
2839
2854
|
#endif
|
2840
2855
|
|
2841
2856
|
#ifndef XXH_VECTOR /* can be defined on command line */
|
2842
|
-
# if
|
2857
|
+
# if ( \
|
2858
|
+
defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
|
2859
|
+
|| defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
|
2860
|
+
) && ( \
|
2861
|
+
defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
|
2862
|
+
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
|
2863
|
+
)
|
2864
|
+
# define XXH_VECTOR XXH_NEON
|
2865
|
+
# elif defined(__AVX512F__)
|
2843
2866
|
# define XXH_VECTOR XXH_AVX512
|
2844
2867
|
# elif defined(__AVX2__)
|
2845
2868
|
# define XXH_VECTOR XXH_AVX2
|
2846
2869
|
# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
|
2847
2870
|
# define XXH_VECTOR XXH_SSE2
|
2848
|
-
# elif defined(__GNUC__) /* msvc support maybe later */ \
|
2849
|
-
&& (defined(__ARM_NEON__) || defined(__ARM_NEON)) \
|
2850
|
-
&& (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \
|
2851
|
-
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
|
2852
|
-
# define XXH_VECTOR XXH_NEON
|
2853
2871
|
# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
|
2854
2872
|
|| (defined(__s390x__) && defined(__VEC__)) \
|
2855
2873
|
&& defined(__GNUC__) /* TODO: IBM XL */
|
@@ -2999,8 +3017,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
2999
3017
|
* }
|
3000
3018
|
*/
|
3001
3019
|
# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
|
3002
|
-
&& defined(__GNUC__) \
|
3003
|
-
&&
|
3020
|
+
&& (defined(__GNUC__) || defined(__clang__)) \
|
3021
|
+
&& (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
|
3004
3022
|
# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
|
3005
3023
|
do { \
|
3006
3024
|
/* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
|
@@ -3017,6 +3035,76 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
3017
3035
|
(outHi) = vshrn_n_u64 ((in), 32); \
|
3018
3036
|
} while (0)
|
3019
3037
|
# endif
|
3038
|
+
|
3039
|
+
/*!
|
3040
|
+
* @internal
|
3041
|
+
* @brief `vld1q_u64` but faster and alignment-safe.
|
3042
|
+
*
|
3043
|
+
* On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
|
3044
|
+
* *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
|
3045
|
+
*
|
3046
|
+
* GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
|
3047
|
+
* prohibits load-store optimizations. Therefore, a direct dereference is used.
|
3048
|
+
*
|
3049
|
+
* Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
|
3050
|
+
* unaligned load.
|
3051
|
+
*/
|
3052
|
+
#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
|
3053
|
+
XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
|
3054
|
+
{
|
3055
|
+
return *(uint64x2_t const*)ptr;
|
3056
|
+
}
|
3057
|
+
#else
|
3058
|
+
XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
|
3059
|
+
{
|
3060
|
+
return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
|
3061
|
+
}
|
3062
|
+
#endif
|
3063
|
+
/*!
|
3064
|
+
* @ingroup tuning
|
3065
|
+
* @brief Controls the NEON to scalar ratio for XXH3
|
3066
|
+
*
|
3067
|
+
* On AArch64 when not optimizing for size, XXH3 will run 6 lanes using NEON and
|
3068
|
+
* 2 lanes on scalar by default.
|
3069
|
+
*
|
3070
|
+
* This can be set to 2, 4, 6, or 8. ARMv7 will default to all 8 NEON lanes, as the
|
3071
|
+
* emulated 64-bit arithmetic is too slow.
|
3072
|
+
*
|
3073
|
+
* Modern ARM CPUs are _very_ sensitive to how their pipelines are used.
|
3074
|
+
*
|
3075
|
+
* For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but it can't
|
3076
|
+
* have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
|
3077
|
+
* you are only using 2/3 of the CPU bandwidth.
|
3078
|
+
*
|
3079
|
+
* This is even more noticable on the more advanced cores like the A76 which
|
3080
|
+
* can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
|
3081
|
+
*
|
3082
|
+
* Therefore, @ref XXH3_NEON_LANES lanes will be processed using NEON, and the
|
3083
|
+
* remaining lanes will use scalar instructions. This improves the bandwidth
|
3084
|
+
* and also gives the integer pipelines something to do besides twiddling loop
|
3085
|
+
* counters and pointers.
|
3086
|
+
*
|
3087
|
+
* This change benefits CPUs with large micro-op buffers without negatively affecting
|
3088
|
+
* other CPUs:
|
3089
|
+
*
|
3090
|
+
* | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. |
|
3091
|
+
* |:----------------------|:--------------------|----------:|-----------:|------:|
|
3092
|
+
* | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% |
|
3093
|
+
* | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% |
|
3094
|
+
* | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% |
|
3095
|
+
*
|
3096
|
+
* It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
|
3097
|
+
*
|
3098
|
+
* @see XXH3_accumulate_512_neon()
|
3099
|
+
*/
|
3100
|
+
# ifndef XXH3_NEON_LANES
|
3101
|
+
# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
|
3102
|
+
&& !defined(__OPTIMIZE_SIZE__)
|
3103
|
+
# define XXH3_NEON_LANES 6
|
3104
|
+
# else
|
3105
|
+
# define XXH3_NEON_LANES XXH_ACC_NB
|
3106
|
+
# endif
|
3107
|
+
# endif
|
3020
3108
|
#endif /* XXH_VECTOR == XXH_NEON */
|
3021
3109
|
|
3022
3110
|
/*
|
@@ -3083,7 +3171,7 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
|
|
3083
3171
|
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
|
3084
3172
|
{
|
3085
3173
|
xxh_u64x2 ret;
|
3086
|
-
|
3174
|
+
XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
|
3087
3175
|
# if XXH_VSX_BE
|
3088
3176
|
ret = XXH_vec_revb(ret);
|
3089
3177
|
# endif
|
@@ -3193,7 +3281,6 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
|
|
3193
3281
|
return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
|
3194
3282
|
}
|
3195
3283
|
#elif defined(_MSC_VER) && defined(_M_IX86)
|
3196
|
-
# include <intrin.h>
|
3197
3284
|
# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
|
3198
3285
|
#else
|
3199
3286
|
/*
|
@@ -3212,7 +3299,7 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
|
|
3212
3299
|
* Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
|
3213
3300
|
* version.
|
3214
3301
|
*
|
3215
|
-
* @param lhs, rhs The 64-bit integers to be multiplied
|
3302
|
+
* @param lhs , rhs The 64-bit integers to be multiplied
|
3216
3303
|
* @return The 128-bit result represented in an @ref XXH128_hash_t.
|
3217
3304
|
*/
|
3218
3305
|
static XXH128_hash_t
|
@@ -3233,7 +3320,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3233
3320
|
* In that case it is best to use the portable one.
|
3234
3321
|
* https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
|
3235
3322
|
*/
|
3236
|
-
#if defined(__GNUC__) && !defined(__wasm__) \
|
3323
|
+
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
|
3237
3324
|
&& defined(__SIZEOF_INT128__) \
|
3238
3325
|
|| (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
|
3239
3326
|
|
@@ -3250,7 +3337,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3250
3337
|
*
|
3251
3338
|
* This compiles to single operand MUL on x64.
|
3252
3339
|
*/
|
3253
|
-
#elif defined(_M_X64) || defined(_M_IA64)
|
3340
|
+
#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
|
3254
3341
|
|
3255
3342
|
#ifndef _MSC_VER
|
3256
3343
|
# pragma intrinsic(_umul128)
|
@@ -3262,6 +3349,21 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3262
3349
|
r128.high64 = product_high;
|
3263
3350
|
return r128;
|
3264
3351
|
|
3352
|
+
/*
|
3353
|
+
* MSVC for ARM64's __umulh method.
|
3354
|
+
*
|
3355
|
+
* This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
|
3356
|
+
*/
|
3357
|
+
#elif defined(_M_ARM64) || defined(_M_ARM64EC)
|
3358
|
+
|
3359
|
+
#ifndef _MSC_VER
|
3360
|
+
# pragma intrinsic(__umulh)
|
3361
|
+
#endif
|
3362
|
+
XXH128_hash_t r128;
|
3363
|
+
r128.low64 = lhs * rhs;
|
3364
|
+
r128.high64 = __umulh(lhs, rhs);
|
3365
|
+
return r128;
|
3366
|
+
|
3265
3367
|
#else
|
3266
3368
|
/*
|
3267
3369
|
* Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
|
@@ -3330,7 +3432,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3330
3432
|
* The reason for the separate function is to prevent passing too many structs
|
3331
3433
|
* around by value. This will hopefully inline the multiply, but we don't force it.
|
3332
3434
|
*
|
3333
|
-
* @param lhs, rhs The 64-bit integers to multiply
|
3435
|
+
* @param lhs , rhs The 64-bit integers to multiply
|
3334
3436
|
* @return The low 64 bits of the product XOR'd by the high 64 bits.
|
3335
3437
|
* @see XXH_mult64to128()
|
3336
3438
|
*/
|
@@ -3632,7 +3734,7 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
3632
3734
|
XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
|
3633
3735
|
{
|
3634
3736
|
if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
|
3635
|
-
|
3737
|
+
XXH_memcpy(dst, &v64, sizeof(v64));
|
3636
3738
|
}
|
3637
3739
|
|
3638
3740
|
/* Several intrinsic functions below are supposed to accept __int64 as argument,
|
@@ -3649,6 +3751,7 @@ XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
|
|
3649
3751
|
typedef long long xxh_i64;
|
3650
3752
|
#endif
|
3651
3753
|
|
3754
|
+
|
3652
3755
|
/*
|
3653
3756
|
* XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
|
3654
3757
|
*
|
@@ -3684,7 +3787,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
|
|
3684
3787
|
const void* XXH_RESTRICT input,
|
3685
3788
|
const void* XXH_RESTRICT secret)
|
3686
3789
|
{
|
3687
|
-
|
3790
|
+
__m512i* const xacc = (__m512i *) acc;
|
3688
3791
|
XXH_ASSERT((((size_t)acc) & 63) == 0);
|
3689
3792
|
XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
|
3690
3793
|
|
@@ -3733,7 +3836,7 @@ XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
3733
3836
|
{
|
3734
3837
|
XXH_ASSERT((((size_t)acc) & 63) == 0);
|
3735
3838
|
XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
|
3736
|
-
{
|
3839
|
+
{ __m512i* const xacc = (__m512i*) acc;
|
3737
3840
|
const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
|
3738
3841
|
|
3739
3842
|
/* xacc[0] ^= (xacc[0] >> 47) */
|
@@ -3794,7 +3897,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
|
|
3794
3897
|
const void* XXH_RESTRICT secret)
|
3795
3898
|
{
|
3796
3899
|
XXH_ASSERT((((size_t)acc) & 31) == 0);
|
3797
|
-
{
|
3900
|
+
{ __m256i* const xacc = (__m256i *) acc;
|
3798
3901
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3799
3902
|
* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
|
3800
3903
|
const __m256i* const xinput = (const __m256i *) input;
|
@@ -3826,7 +3929,7 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void
|
|
3826
3929
|
XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
3827
3930
|
{
|
3828
3931
|
XXH_ASSERT((((size_t)acc) & 31) == 0);
|
3829
|
-
{
|
3932
|
+
{ __m256i* const xacc = (__m256i*) acc;
|
3830
3933
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3831
3934
|
* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
|
3832
3935
|
const __m256i* const xsecret = (const __m256i *) secret;
|
@@ -3900,7 +4003,7 @@ XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
|
|
3900
4003
|
{
|
3901
4004
|
/* SSE2 is just a half-scale version of the AVX2 version. */
|
3902
4005
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
3903
|
-
{
|
4006
|
+
{ __m128i* const xacc = (__m128i *) acc;
|
3904
4007
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3905
4008
|
* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
|
3906
4009
|
const __m128i* const xinput = (const __m128i *) input;
|
@@ -3932,7 +4035,7 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void
|
|
3932
4035
|
XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
3933
4036
|
{
|
3934
4037
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
3935
|
-
{
|
4038
|
+
{ __m128i* const xacc = (__m128i*) acc;
|
3936
4039
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3937
4040
|
* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
|
3938
4041
|
const __m128i* const xsecret = (const __m128i *) secret;
|
@@ -3994,40 +4097,66 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTR
|
|
3994
4097
|
|
3995
4098
|
#if (XXH_VECTOR == XXH_NEON)
|
3996
4099
|
|
4100
|
+
/* forward declarations for the scalar routines */
|
4101
|
+
XXH_FORCE_INLINE void
|
4102
|
+
XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
|
4103
|
+
void const* XXH_RESTRICT secret, size_t lane);
|
4104
|
+
|
4105
|
+
XXH_FORCE_INLINE void
|
4106
|
+
XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
|
4107
|
+
void const* XXH_RESTRICT secret, size_t lane);
|
4108
|
+
|
4109
|
+
/*!
|
4110
|
+
* @internal
|
4111
|
+
* @brief The bulk processing loop for NEON.
|
4112
|
+
*
|
4113
|
+
* The NEON code path is actually partially scalar when running on AArch64. This
|
4114
|
+
* is to optimize the pipelining and can have up to 15% speedup depending on the
|
4115
|
+
* CPU, and it also mitigates some GCC codegen issues.
|
4116
|
+
*
|
4117
|
+
* @see XXH3_NEON_LANES for configuring this and details about this optimization.
|
4118
|
+
*/
|
3997
4119
|
XXH_FORCE_INLINE void
|
3998
4120
|
XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
|
3999
4121
|
const void* XXH_RESTRICT input,
|
4000
4122
|
const void* XXH_RESTRICT secret)
|
4001
4123
|
{
|
4002
4124
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
4125
|
+
XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
|
4003
4126
|
{
|
4004
|
-
|
4127
|
+
uint64x2_t* const xacc = (uint64x2_t *) acc;
|
4005
4128
|
/* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
|
4006
4129
|
uint8_t const* const xinput = (const uint8_t *) input;
|
4007
4130
|
uint8_t const* const xsecret = (const uint8_t *) secret;
|
4008
4131
|
|
4009
4132
|
size_t i;
|
4010
|
-
|
4133
|
+
/* AArch64 uses both scalar and neon at the same time */
|
4134
|
+
for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
|
4135
|
+
XXH3_scalarRound(acc, input, secret, i);
|
4136
|
+
}
|
4137
|
+
for (i=0; i < XXH3_NEON_LANES / 2; i++) {
|
4138
|
+
uint64x2_t acc_vec = xacc[i];
|
4011
4139
|
/* data_vec = xinput[i]; */
|
4012
|
-
|
4140
|
+
uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
|
4013
4141
|
/* key_vec = xsecret[i]; */
|
4014
|
-
|
4142
|
+
uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
|
4015
4143
|
uint64x2_t data_key;
|
4016
4144
|
uint32x2_t data_key_lo, data_key_hi;
|
4017
|
-
/*
|
4018
|
-
uint64x2_t
|
4019
|
-
uint64x2_t const swapped = vextq_u64(data64, data64, 1);
|
4020
|
-
xacc[i] = vaddq_u64 (xacc[i], swapped);
|
4145
|
+
/* acc_vec_2 = swap(data_vec) */
|
4146
|
+
uint64x2_t acc_vec_2 = vextq_u64(data_vec, data_vec, 1);
|
4021
4147
|
/* data_key = data_vec ^ key_vec; */
|
4022
|
-
data_key =
|
4148
|
+
data_key = veorq_u64(data_vec, key_vec);
|
4023
4149
|
/* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
|
4024
4150
|
* data_key_hi = (uint32x2_t) (data_key >> 32);
|
4025
4151
|
* data_key = UNDEFINED; */
|
4026
4152
|
XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
|
4027
|
-
/*
|
4028
|
-
|
4029
|
-
|
4153
|
+
/* acc_vec_2 += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
|
4154
|
+
acc_vec_2 = vmlal_u32 (acc_vec_2, data_key_lo, data_key_hi);
|
4155
|
+
/* xacc[i] += acc_vec_2; */
|
4156
|
+
acc_vec = vaddq_u64 (acc_vec, acc_vec_2);
|
4157
|
+
xacc[i] = acc_vec;
|
4030
4158
|
}
|
4159
|
+
|
4031
4160
|
}
|
4032
4161
|
}
|
4033
4162
|
|
@@ -4041,15 +4170,19 @@ XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
4041
4170
|
uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
|
4042
4171
|
|
4043
4172
|
size_t i;
|
4044
|
-
|
4173
|
+
/* AArch64 uses both scalar and neon at the same time */
|
4174
|
+
for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
|
4175
|
+
XXH3_scalarScrambleRound(acc, secret, i);
|
4176
|
+
}
|
4177
|
+
for (i=0; i < XXH3_NEON_LANES / 2; i++) {
|
4045
4178
|
/* xacc[i] ^= (xacc[i] >> 47); */
|
4046
4179
|
uint64x2_t acc_vec = xacc[i];
|
4047
|
-
uint64x2_t shifted = vshrq_n_u64
|
4048
|
-
uint64x2_t data_vec = veorq_u64
|
4180
|
+
uint64x2_t shifted = vshrq_n_u64 (acc_vec, 47);
|
4181
|
+
uint64x2_t data_vec = veorq_u64 (acc_vec, shifted);
|
4049
4182
|
|
4050
4183
|
/* xacc[i] ^= xsecret[i]; */
|
4051
|
-
|
4052
|
-
uint64x2_t data_key = veorq_u64(data_vec,
|
4184
|
+
uint64x2_t key_vec = XXH_vld1q_u64 (xsecret + (i * 16));
|
4185
|
+
uint64x2_t data_key = veorq_u64 (data_vec, key_vec);
|
4053
4186
|
|
4054
4187
|
/* xacc[i] *= XXH_PRIME32_1 */
|
4055
4188
|
uint32x2_t data_key_lo, data_key_hi;
|
@@ -4077,11 +4210,12 @@ XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
4077
4210
|
*/
|
4078
4211
|
uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
|
4079
4212
|
/* xacc[i] = prod_hi << 32; */
|
4080
|
-
|
4213
|
+
prod_hi = vshlq_n_u64(prod_hi, 32);
|
4081
4214
|
/* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
|
4082
|
-
xacc[i] = vmlal_u32(
|
4215
|
+
xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
|
4083
4216
|
}
|
4084
|
-
|
4217
|
+
}
|
4218
|
+
}
|
4085
4219
|
}
|
4086
4220
|
|
4087
4221
|
#endif
|
@@ -4093,7 +4227,8 @@ XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
|
|
4093
4227
|
const void* XXH_RESTRICT input,
|
4094
4228
|
const void* XXH_RESTRICT secret)
|
4095
4229
|
{
|
4096
|
-
|
4230
|
+
/* presumed aligned */
|
4231
|
+
unsigned int* const xacc = (unsigned int*) acc;
|
4097
4232
|
xxh_u64x2 const* const xinput = (xxh_u64x2 const*) input; /* no alignment restriction */
|
4098
4233
|
xxh_u64x2 const* const xsecret = (xxh_u64x2 const*) secret; /* no alignment restriction */
|
4099
4234
|
xxh_u64x2 const v32 = { 32, 32 };
|
@@ -4108,14 +4243,18 @@ XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
|
|
4108
4243
|
xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
|
4109
4244
|
/* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
|
4110
4245
|
xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
|
4111
|
-
xacc[i]
|
4246
|
+
/* acc_vec = xacc[i]; */
|
4247
|
+
xxh_u64x2 acc_vec = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
|
4248
|
+
acc_vec += product;
|
4112
4249
|
|
4113
4250
|
/* swap high and low halves */
|
4114
4251
|
#ifdef __s390x__
|
4115
|
-
|
4252
|
+
acc_vec += vec_permi(data_vec, data_vec, 2);
|
4116
4253
|
#else
|
4117
|
-
|
4254
|
+
acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
|
4118
4255
|
#endif
|
4256
|
+
/* xacc[i] = acc_vec; */
|
4257
|
+
vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
|
4119
4258
|
}
|
4120
4259
|
}
|
4121
4260
|
|
@@ -4153,38 +4292,90 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
4153
4292
|
|
4154
4293
|
/* scalar variants - universal */
|
4155
4294
|
|
4295
|
+
/*!
|
4296
|
+
* @internal
|
4297
|
+
* @brief Scalar round for @ref XXH3_accumulate_512_scalar().
|
4298
|
+
*
|
4299
|
+
* This is extracted to its own function because the NEON path uses a combination
|
4300
|
+
* of NEON and scalar.
|
4301
|
+
*/
|
4302
|
+
XXH_FORCE_INLINE void
|
4303
|
+
XXH3_scalarRound(void* XXH_RESTRICT acc,
|
4304
|
+
void const* XXH_RESTRICT input,
|
4305
|
+
void const* XXH_RESTRICT secret,
|
4306
|
+
size_t lane)
|
4307
|
+
{
|
4308
|
+
xxh_u64* xacc = (xxh_u64*) acc;
|
4309
|
+
xxh_u8 const* xinput = (xxh_u8 const*) input;
|
4310
|
+
xxh_u8 const* xsecret = (xxh_u8 const*) secret;
|
4311
|
+
XXH_ASSERT(lane < XXH_ACC_NB);
|
4312
|
+
XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
|
4313
|
+
{
|
4314
|
+
xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
|
4315
|
+
xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
|
4316
|
+
xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
|
4317
|
+
xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
|
4318
|
+
}
|
4319
|
+
}
|
4320
|
+
|
4321
|
+
/*!
|
4322
|
+
* @internal
|
4323
|
+
* @brief Processes a 64 byte block of data using the scalar path.
|
4324
|
+
*/
|
4156
4325
|
XXH_FORCE_INLINE void
|
4157
4326
|
XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
|
4158
4327
|
const void* XXH_RESTRICT input,
|
4159
4328
|
const void* XXH_RESTRICT secret)
|
4160
4329
|
{
|
4161
|
-
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
|
4162
|
-
const xxh_u8* const xinput = (const xxh_u8*) input; /* no alignment restriction */
|
4163
|
-
const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
|
4164
4330
|
size_t i;
|
4165
|
-
|
4331
|
+
/* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
|
4332
|
+
#if defined(__GNUC__) && !defined(__clang__) \
|
4333
|
+
&& (defined(__arm__) || defined(__thumb2__)) \
|
4334
|
+
&& defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
|
4335
|
+
&& !defined(__OPTIMIZE_SIZE__)
|
4336
|
+
# pragma GCC unroll 8
|
4337
|
+
#endif
|
4166
4338
|
for (i=0; i < XXH_ACC_NB; i++) {
|
4167
|
-
|
4168
|
-
xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);
|
4169
|
-
xacc[i ^ 1] += data_val; /* swap adjacent lanes */
|
4170
|
-
xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
|
4339
|
+
XXH3_scalarRound(acc, input, secret, i);
|
4171
4340
|
}
|
4172
4341
|
}
|
4173
4342
|
|
4343
|
+
/*!
|
4344
|
+
* @internal
|
4345
|
+
* @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
|
4346
|
+
*
|
4347
|
+
* This is extracted to its own function because the NEON path uses a combination
|
4348
|
+
* of NEON and scalar.
|
4349
|
+
*/
|
4174
4350
|
XXH_FORCE_INLINE void
|
4175
|
-
|
4351
|
+
XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
|
4352
|
+
void const* XXH_RESTRICT secret,
|
4353
|
+
size_t lane)
|
4176
4354
|
{
|
4177
|
-
|
4355
|
+
xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
|
4178
4356
|
const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
|
4179
|
-
size_t i;
|
4180
4357
|
XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
|
4181
|
-
|
4182
|
-
|
4183
|
-
xxh_u64
|
4358
|
+
XXH_ASSERT(lane < XXH_ACC_NB);
|
4359
|
+
{
|
4360
|
+
xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
|
4361
|
+
xxh_u64 acc64 = xacc[lane];
|
4184
4362
|
acc64 = XXH_xorshift64(acc64, 47);
|
4185
4363
|
acc64 ^= key64;
|
4186
4364
|
acc64 *= XXH_PRIME32_1;
|
4187
|
-
xacc[
|
4365
|
+
xacc[lane] = acc64;
|
4366
|
+
}
|
4367
|
+
}
|
4368
|
+
|
4369
|
+
/*!
|
4370
|
+
* @internal
|
4371
|
+
* @brief Scrambles the accumulators after a large chunk has been read
|
4372
|
+
*/
|
4373
|
+
XXH_FORCE_INLINE void
|
4374
|
+
XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
4375
|
+
{
|
4376
|
+
size_t i;
|
4377
|
+
for (i=0; i < XXH_ACC_NB; i++) {
|
4378
|
+
XXH3_scalarScrambleRound(acc, secret, i);
|
4188
4379
|
}
|
4189
4380
|
}
|
4190
4381
|
|
@@ -4206,8 +4397,9 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
|
|
4206
4397
|
* placed sequentially, in order, at the top of the unrolled loop.
|
4207
4398
|
*
|
4208
4399
|
* While MOVK is great for generating constants (2 cycles for a 64-bit
|
4209
|
-
* constant compared to 4 cycles for LDR),
|
4210
|
-
*
|
4400
|
+
* constant compared to 4 cycles for LDR), it fights for bandwidth with
|
4401
|
+
* the arithmetic instructions.
|
4402
|
+
*
|
4211
4403
|
* I L S
|
4212
4404
|
* MOVK
|
4213
4405
|
* MOVK
|
@@ -4224,6 +4416,9 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
|
|
4224
4416
|
* ADD LDR
|
4225
4417
|
* SUB STR
|
4226
4418
|
* STR
|
4419
|
+
*
|
4420
|
+
* See XXH3_NEON_LANES for details on the pipsline.
|
4421
|
+
*
|
4227
4422
|
* XXH3_64bits_withSeed, len == 256, Snapdragon 835
|
4228
4423
|
* without hack: 2654.4 MB/s
|
4229
4424
|
* with hack: 3202.9 MB/s
|
@@ -4422,9 +4617,11 @@ XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
|
|
4422
4617
|
}
|
4423
4618
|
|
4424
4619
|
/*
|
4425
|
-
* It's important for performance
|
4620
|
+
* It's important for performance to transmit secret's size (when it's static)
|
4621
|
+
* so that the compiler can properly optimize the vectorized loop.
|
4622
|
+
* This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
|
4426
4623
|
*/
|
4427
|
-
|
4624
|
+
XXH_FORCE_INLINE XXH64_hash_t
|
4428
4625
|
XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
|
4429
4626
|
XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
|
4430
4627
|
{
|
@@ -4433,11 +4630,10 @@ XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
|
|
4433
4630
|
}
|
4434
4631
|
|
4435
4632
|
/*
|
4436
|
-
* It's
|
4437
|
-
*
|
4438
|
-
*
|
4439
|
-
*
|
4440
|
-
* and uses this opportunity to streamline the generated code for better performance.
|
4633
|
+
* It's preferable for performance that XXH3_hashLong is not inlined,
|
4634
|
+
* as it results in a smaller function for small data, easier to the instruction cache.
|
4635
|
+
* Note that inside this no_inline function, we do inline the internal loop,
|
4636
|
+
* and provide a statically defined secret size to allow optimization of vector loop.
|
4441
4637
|
*/
|
4442
4638
|
XXH_NO_INLINE XXH64_hash_t
|
4443
4639
|
XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
|
@@ -4537,6 +4733,14 @@ XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
|
|
4537
4733
|
return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
|
4538
4734
|
}
|
4539
4735
|
|
4736
|
+
XXH_PUBLIC_API XXH64_hash_t
|
4737
|
+
XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
|
4738
|
+
{
|
4739
|
+
if (len <= XXH3_MIDSIZE_MAX)
|
4740
|
+
return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
|
4741
|
+
return XXH3_hashLong_64b_withSecret(input, len, seed, (const xxh_u8*)secret, secretSize);
|
4742
|
+
}
|
4743
|
+
|
4540
4744
|
|
4541
4745
|
/* === XXH3 streaming === */
|
4542
4746
|
|
@@ -4625,13 +4829,13 @@ XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
|
|
4625
4829
|
XXH_PUBLIC_API void
|
4626
4830
|
XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
|
4627
4831
|
{
|
4628
|
-
|
4832
|
+
XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
|
4629
4833
|
}
|
4630
4834
|
|
4631
4835
|
static void
|
4632
4836
|
XXH3_reset_internal(XXH3_state_t* statePtr,
|
4633
|
-
|
4634
|
-
|
4837
|
+
XXH64_hash_t seed,
|
4838
|
+
const void* secret, size_t secretSize)
|
4635
4839
|
{
|
4636
4840
|
size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
|
4637
4841
|
size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
|
@@ -4648,6 +4852,7 @@ XXH3_reset_internal(XXH3_state_t* statePtr,
|
|
4648
4852
|
statePtr->acc[6] = XXH_PRIME64_5;
|
4649
4853
|
statePtr->acc[7] = XXH_PRIME32_1;
|
4650
4854
|
statePtr->seed = seed;
|
4855
|
+
statePtr->useSeed = (seed != 0);
|
4651
4856
|
statePtr->extSecret = (const unsigned char*)secret;
|
4652
4857
|
XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
|
4653
4858
|
statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
|
@@ -4680,11 +4885,24 @@ XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
|
|
4680
4885
|
{
|
4681
4886
|
if (statePtr == NULL) return XXH_ERROR;
|
4682
4887
|
if (seed==0) return XXH3_64bits_reset(statePtr);
|
4683
|
-
if (seed != statePtr->seed)
|
4888
|
+
if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
|
4889
|
+
XXH3_initCustomSecret(statePtr->customSecret, seed);
|
4684
4890
|
XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
|
4685
4891
|
return XXH_OK;
|
4686
4892
|
}
|
4687
4893
|
|
4894
|
+
/*! @ingroup xxh3_family */
|
4895
|
+
XXH_PUBLIC_API XXH_errorcode
|
4896
|
+
XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
|
4897
|
+
{
|
4898
|
+
if (statePtr == NULL) return XXH_ERROR;
|
4899
|
+
if (secret == NULL) return XXH_ERROR;
|
4900
|
+
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
4901
|
+
XXH3_reset_internal(statePtr, seed64, secret, secretSize);
|
4902
|
+
statePtr->useSeed = 1; /* always, even if seed64==0 */
|
4903
|
+
return XXH_OK;
|
4904
|
+
}
|
4905
|
+
|
4688
4906
|
/* Note : when XXH3_consumeStripes() is invoked,
|
4689
4907
|
* there must be a guarantee that at least one more byte must be consumed from input
|
4690
4908
|
* so that the function can blindly consume all stripes using the "normal" secret segment */
|
@@ -4712,35 +4930,48 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
|
|
4712
4930
|
}
|
4713
4931
|
}
|
4714
4932
|
|
4933
|
+
#ifndef XXH3_STREAM_USE_STACK
|
4934
|
+
# ifndef __clang__ /* clang doesn't need additional stack space */
|
4935
|
+
# define XXH3_STREAM_USE_STACK 1
|
4936
|
+
# endif
|
4937
|
+
#endif
|
4715
4938
|
/*
|
4716
4939
|
* Both XXH3_64bits_update and XXH3_128bits_update use this routine.
|
4717
4940
|
*/
|
4718
4941
|
XXH_FORCE_INLINE XXH_errorcode
|
4719
|
-
XXH3_update(XXH3_state_t* state,
|
4720
|
-
const xxh_u8* input, size_t len,
|
4942
|
+
XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
|
4943
|
+
const xxh_u8* XXH_RESTRICT input, size_t len,
|
4721
4944
|
XXH3_f_accumulate_512 f_acc512,
|
4722
4945
|
XXH3_f_scrambleAcc f_scramble)
|
4723
4946
|
{
|
4724
|
-
if (input==NULL)
|
4725
|
-
|
4947
|
+
if (input==NULL) {
|
4948
|
+
XXH_ASSERT(len == 0);
|
4726
4949
|
return XXH_OK;
|
4727
|
-
|
4728
|
-
return XXH_ERROR;
|
4729
|
-
#endif
|
4950
|
+
}
|
4730
4951
|
|
4952
|
+
XXH_ASSERT(state != NULL);
|
4731
4953
|
{ const xxh_u8* const bEnd = input + len;
|
4732
4954
|
const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
|
4733
|
-
|
4955
|
+
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
|
4956
|
+
/* For some reason, gcc and MSVC seem to suffer greatly
|
4957
|
+
* when operating accumulators directly into state.
|
4958
|
+
* Operating into stack space seems to enable proper optimization.
|
4959
|
+
* clang, on the other hand, doesn't seem to need this trick */
|
4960
|
+
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
|
4961
|
+
#else
|
4962
|
+
xxh_u64* XXH_RESTRICT const acc = state->acc;
|
4963
|
+
#endif
|
4734
4964
|
state->totalLen += len;
|
4735
4965
|
XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
|
4736
4966
|
|
4737
|
-
|
4967
|
+
/* small input : just fill in tmp buffer */
|
4968
|
+
if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
|
4738
4969
|
XXH_memcpy(state->buffer + state->bufferedSize, input, len);
|
4739
4970
|
state->bufferedSize += (XXH32_hash_t)len;
|
4740
4971
|
return XXH_OK;
|
4741
4972
|
}
|
4742
|
-
/* total input is now > XXH3_INTERNALBUFFER_SIZE */
|
4743
4973
|
|
4974
|
+
/* total input is now > XXH3_INTERNALBUFFER_SIZE */
|
4744
4975
|
#define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
|
4745
4976
|
XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
|
4746
4977
|
|
@@ -4752,7 +4983,7 @@ XXH3_update(XXH3_state_t* state,
|
|
4752
4983
|
size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
|
4753
4984
|
XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
|
4754
4985
|
input += loadSize;
|
4755
|
-
XXH3_consumeStripes(
|
4986
|
+
XXH3_consumeStripes(acc,
|
4756
4987
|
&state->nbStripesSoFar, state->nbStripesPerBlock,
|
4757
4988
|
state->buffer, XXH3_INTERNALBUFFER_STRIPES,
|
4758
4989
|
secret, state->secretLimit,
|
@@ -4761,25 +4992,62 @@ XXH3_update(XXH3_state_t* state,
|
|
4761
4992
|
}
|
4762
4993
|
XXH_ASSERT(input < bEnd);
|
4763
4994
|
|
4764
|
-
/*
|
4765
|
-
if (bEnd - input >
|
4766
|
-
|
4767
|
-
|
4768
|
-
|
4769
|
-
|
4770
|
-
|
4771
|
-
|
4772
|
-
|
4773
|
-
|
4774
|
-
|
4775
|
-
|
4776
|
-
|
4995
|
+
/* large input to consume : ingest per full block */
|
4996
|
+
if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
|
4997
|
+
size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
|
4998
|
+
XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
|
4999
|
+
/* join to current block's end */
|
5000
|
+
{ size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
|
5001
|
+
XXH_ASSERT(nbStripesToEnd <= nbStripes);
|
5002
|
+
XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
|
5003
|
+
f_scramble(acc, secret + state->secretLimit);
|
5004
|
+
state->nbStripesSoFar = 0;
|
5005
|
+
input += nbStripesToEnd * XXH_STRIPE_LEN;
|
5006
|
+
nbStripes -= nbStripesToEnd;
|
5007
|
+
}
|
5008
|
+
/* consume per entire blocks */
|
5009
|
+
while(nbStripes >= state->nbStripesPerBlock) {
|
5010
|
+
XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
|
5011
|
+
f_scramble(acc, secret + state->secretLimit);
|
5012
|
+
input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
|
5013
|
+
nbStripes -= state->nbStripesPerBlock;
|
5014
|
+
}
|
5015
|
+
/* consume last partial block */
|
5016
|
+
XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
|
5017
|
+
input += nbStripes * XXH_STRIPE_LEN;
|
5018
|
+
XXH_ASSERT(input < bEnd); /* at least some bytes left */
|
5019
|
+
state->nbStripesSoFar = nbStripes;
|
5020
|
+
/* buffer predecessor of last partial stripe */
|
5021
|
+
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
|
5022
|
+
XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
|
5023
|
+
} else {
|
5024
|
+
/* content to consume <= block size */
|
5025
|
+
/* Consume input by a multiple of internal buffer size */
|
5026
|
+
if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
|
5027
|
+
const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
|
5028
|
+
do {
|
5029
|
+
XXH3_consumeStripes(acc,
|
5030
|
+
&state->nbStripesSoFar, state->nbStripesPerBlock,
|
5031
|
+
input, XXH3_INTERNALBUFFER_STRIPES,
|
5032
|
+
secret, state->secretLimit,
|
5033
|
+
f_acc512, f_scramble);
|
5034
|
+
input += XXH3_INTERNALBUFFER_SIZE;
|
5035
|
+
} while (input<limit);
|
5036
|
+
/* buffer predecessor of last partial stripe */
|
5037
|
+
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
|
5038
|
+
}
|
4777
5039
|
}
|
4778
|
-
XXH_ASSERT(input < bEnd);
|
4779
5040
|
|
4780
5041
|
/* Some remaining input (always) : buffer it */
|
5042
|
+
XXH_ASSERT(input < bEnd);
|
5043
|
+
XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
|
5044
|
+
XXH_ASSERT(state->bufferedSize == 0);
|
4781
5045
|
XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
|
4782
5046
|
state->bufferedSize = (XXH32_hash_t)(bEnd-input);
|
5047
|
+
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
|
5048
|
+
/* save stack accumulators into state */
|
5049
|
+
memcpy(state->acc, acc, sizeof(acc));
|
5050
|
+
#endif
|
4783
5051
|
}
|
4784
5052
|
|
4785
5053
|
return XXH_OK;
|
@@ -4803,7 +5071,7 @@ XXH3_digest_long (XXH64_hash_t* acc,
|
|
4803
5071
|
* Digest on a local copy. This way, the state remains unaltered, and it can
|
4804
5072
|
* continue ingesting more input afterwards.
|
4805
5073
|
*/
|
4806
|
-
|
5074
|
+
XXH_memcpy(acc, state->acc, sizeof(state->acc));
|
4807
5075
|
if (state->bufferedSize >= XXH_STRIPE_LEN) {
|
4808
5076
|
size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
|
4809
5077
|
size_t nbStripesSoFar = state->nbStripesSoFar;
|
@@ -4820,8 +5088,8 @@ XXH3_digest_long (XXH64_hash_t* acc,
|
|
4820
5088
|
xxh_u8 lastStripe[XXH_STRIPE_LEN];
|
4821
5089
|
size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
|
4822
5090
|
XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
|
4823
|
-
|
4824
|
-
|
5091
|
+
XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
|
5092
|
+
XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
|
4825
5093
|
XXH3_accumulate_512(acc,
|
4826
5094
|
lastStripe,
|
4827
5095
|
secret + state->secretLimit - XXH_SECRET_LASTACC_START);
|
@@ -4840,58 +5108,13 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
|
|
4840
5108
|
(xxh_u64)state->totalLen * XXH_PRIME64_1);
|
4841
5109
|
}
|
4842
5110
|
/* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
|
4843
|
-
if (state->
|
5111
|
+
if (state->useSeed)
|
4844
5112
|
return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
|
4845
5113
|
return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
|
4846
5114
|
secret, state->secretLimit + XXH_STRIPE_LEN);
|
4847
5115
|
}
|
4848
5116
|
|
4849
5117
|
|
4850
|
-
#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
|
4851
|
-
|
4852
|
-
/*! @ingroup xxh3_family */
|
4853
|
-
XXH_PUBLIC_API void
|
4854
|
-
XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize)
|
4855
|
-
{
|
4856
|
-
XXH_ASSERT(secretBuffer != NULL);
|
4857
|
-
if (customSeedSize == 0) {
|
4858
|
-
memcpy(secretBuffer, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
|
4859
|
-
return;
|
4860
|
-
}
|
4861
|
-
XXH_ASSERT(customSeed != NULL);
|
4862
|
-
|
4863
|
-
{ size_t const segmentSize = sizeof(XXH128_hash_t);
|
4864
|
-
size_t const nbSegments = XXH_SECRET_DEFAULT_SIZE / segmentSize;
|
4865
|
-
XXH128_canonical_t scrambler;
|
4866
|
-
XXH64_hash_t seeds[12];
|
4867
|
-
size_t segnb;
|
4868
|
-
XXH_ASSERT(nbSegments == 12);
|
4869
|
-
XXH_ASSERT(segmentSize * nbSegments == XXH_SECRET_DEFAULT_SIZE); /* exact multiple */
|
4870
|
-
XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
|
4871
|
-
|
4872
|
-
/*
|
4873
|
-
* Copy customSeed to seeds[], truncating or repeating as necessary.
|
4874
|
-
*/
|
4875
|
-
{ size_t toFill = XXH_MIN(customSeedSize, sizeof(seeds));
|
4876
|
-
size_t filled = toFill;
|
4877
|
-
memcpy(seeds, customSeed, toFill);
|
4878
|
-
while (filled < sizeof(seeds)) {
|
4879
|
-
toFill = XXH_MIN(filled, sizeof(seeds) - filled);
|
4880
|
-
memcpy((char*)seeds + filled, seeds, toFill);
|
4881
|
-
filled += toFill;
|
4882
|
-
} }
|
4883
|
-
|
4884
|
-
/* generate secret */
|
4885
|
-
memcpy(secretBuffer, &scrambler, sizeof(scrambler));
|
4886
|
-
for (segnb=1; segnb < nbSegments; segnb++) {
|
4887
|
-
size_t const segmentStart = segnb * segmentSize;
|
4888
|
-
XXH128_canonical_t segment;
|
4889
|
-
XXH128_canonicalFromHash(&segment,
|
4890
|
-
XXH128(&scrambler, sizeof(scrambler), XXH_readLE64(seeds + segnb) + segnb) );
|
4891
|
-
memcpy((char*)secretBuffer + segmentStart, &segment, sizeof(segment));
|
4892
|
-
} }
|
4893
|
-
}
|
4894
|
-
|
4895
5118
|
|
4896
5119
|
/* ==========================================
|
4897
5120
|
* XXH3 128 bits (a.k.a XXH128)
|
@@ -5193,9 +5416,10 @@ XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
|
|
5193
5416
|
}
|
5194
5417
|
|
5195
5418
|
/*
|
5196
|
-
* It's important for performance
|
5419
|
+
* It's important for performance to pass @secretLen (when it's static)
|
5420
|
+
* to the compiler, so that it can properly optimize the vectorized loop.
|
5197
5421
|
*/
|
5198
|
-
|
5422
|
+
XXH_FORCE_INLINE XXH128_hash_t
|
5199
5423
|
XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
|
5200
5424
|
XXH64_hash_t seed64,
|
5201
5425
|
const void* XXH_RESTRICT secret, size_t secretLen)
|
@@ -5288,6 +5512,15 @@ XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
|
|
5288
5512
|
XXH3_hashLong_128b_withSeed);
|
5289
5513
|
}
|
5290
5514
|
|
5515
|
+
/*! @ingroup xxh3_family */
|
5516
|
+
XXH_PUBLIC_API XXH128_hash_t
|
5517
|
+
XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
|
5518
|
+
{
|
5519
|
+
if (len <= XXH3_MIDSIZE_MAX)
|
5520
|
+
return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
|
5521
|
+
return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
|
5522
|
+
}
|
5523
|
+
|
5291
5524
|
/*! @ingroup xxh3_family */
|
5292
5525
|
XXH_PUBLIC_API XXH128_hash_t
|
5293
5526
|
XXH128(const void* input, size_t len, XXH64_hash_t seed)
|
@@ -5299,7 +5532,7 @@ XXH128(const void* input, size_t len, XXH64_hash_t seed)
|
|
5299
5532
|
/* === XXH3 128-bit streaming === */
|
5300
5533
|
|
5301
5534
|
/*
|
5302
|
-
* All
|
5535
|
+
* All initialization and update functions are identical to 64-bit streaming variant.
|
5303
5536
|
* The only difference is the finalization routine.
|
5304
5537
|
*/
|
5305
5538
|
|
@@ -5307,31 +5540,28 @@ XXH128(const void* input, size_t len, XXH64_hash_t seed)
|
|
5307
5540
|
XXH_PUBLIC_API XXH_errorcode
|
5308
5541
|
XXH3_128bits_reset(XXH3_state_t* statePtr)
|
5309
5542
|
{
|
5310
|
-
|
5311
|
-
XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
|
5312
|
-
return XXH_OK;
|
5543
|
+
return XXH3_64bits_reset(statePtr);
|
5313
5544
|
}
|
5314
5545
|
|
5315
5546
|
/*! @ingroup xxh3_family */
|
5316
5547
|
XXH_PUBLIC_API XXH_errorcode
|
5317
5548
|
XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
|
5318
5549
|
{
|
5319
|
-
|
5320
|
-
XXH3_reset_internal(statePtr, 0, secret, secretSize);
|
5321
|
-
if (secret == NULL) return XXH_ERROR;
|
5322
|
-
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
5323
|
-
return XXH_OK;
|
5550
|
+
return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
|
5324
5551
|
}
|
5325
5552
|
|
5326
5553
|
/*! @ingroup xxh3_family */
|
5327
5554
|
XXH_PUBLIC_API XXH_errorcode
|
5328
5555
|
XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
|
5329
5556
|
{
|
5330
|
-
|
5331
|
-
|
5332
|
-
|
5333
|
-
|
5334
|
-
|
5557
|
+
return XXH3_64bits_reset_withSeed(statePtr, seed);
|
5558
|
+
}
|
5559
|
+
|
5560
|
+
/*! @ingroup xxh3_family */
|
5561
|
+
XXH_PUBLIC_API XXH_errorcode
|
5562
|
+
XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
|
5563
|
+
{
|
5564
|
+
return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
|
5335
5565
|
}
|
5336
5566
|
|
5337
5567
|
/*! @ingroup xxh3_family */
|
@@ -5406,8 +5636,8 @@ XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
|
|
5406
5636
|
hash.high64 = XXH_swap64(hash.high64);
|
5407
5637
|
hash.low64 = XXH_swap64(hash.low64);
|
5408
5638
|
}
|
5409
|
-
|
5410
|
-
|
5639
|
+
XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
|
5640
|
+
XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
|
5411
5641
|
}
|
5412
5642
|
|
5413
5643
|
/*! @ingroup xxh3_family */
|
@@ -5420,6 +5650,77 @@ XXH128_hashFromCanonical(const XXH128_canonical_t* src)
|
|
5420
5650
|
return h;
|
5421
5651
|
}
|
5422
5652
|
|
5653
|
+
|
5654
|
+
|
5655
|
+
/* ==========================================
|
5656
|
+
* Secret generators
|
5657
|
+
* ==========================================
|
5658
|
+
*/
|
5659
|
+
#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
|
5660
|
+
|
5661
|
+
XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
|
5662
|
+
{
|
5663
|
+
XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
|
5664
|
+
XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
|
5665
|
+
}
|
5666
|
+
|
5667
|
+
/*! @ingroup xxh3_family */
|
5668
|
+
XXH_PUBLIC_API XXH_errorcode
|
5669
|
+
XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
|
5670
|
+
{
|
5671
|
+
#if (XXH_DEBUGLEVEL >= 1)
|
5672
|
+
XXH_ASSERT(secretBuffer != NULL);
|
5673
|
+
XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
|
5674
|
+
#else
|
5675
|
+
/* production mode, assert() are disabled */
|
5676
|
+
if (secretBuffer == NULL) return XXH_ERROR;
|
5677
|
+
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
5678
|
+
#endif
|
5679
|
+
|
5680
|
+
if (customSeedSize == 0) {
|
5681
|
+
customSeed = XXH3_kSecret;
|
5682
|
+
customSeedSize = XXH_SECRET_DEFAULT_SIZE;
|
5683
|
+
}
|
5684
|
+
#if (XXH_DEBUGLEVEL >= 1)
|
5685
|
+
XXH_ASSERT(customSeed != NULL);
|
5686
|
+
#else
|
5687
|
+
if (customSeed == NULL) return XXH_ERROR;
|
5688
|
+
#endif
|
5689
|
+
|
5690
|
+
/* Fill secretBuffer with a copy of customSeed - repeat as needed */
|
5691
|
+
{ size_t pos = 0;
|
5692
|
+
while (pos < secretSize) {
|
5693
|
+
size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
|
5694
|
+
memcpy((char*)secretBuffer + pos, customSeed, toCopy);
|
5695
|
+
pos += toCopy;
|
5696
|
+
} }
|
5697
|
+
|
5698
|
+
{ size_t const nbSeg16 = secretSize / 16;
|
5699
|
+
size_t n;
|
5700
|
+
XXH128_canonical_t scrambler;
|
5701
|
+
XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
|
5702
|
+
for (n=0; n<nbSeg16; n++) {
|
5703
|
+
XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
|
5704
|
+
XXH3_combine16((char*)secretBuffer + n*16, h128);
|
5705
|
+
}
|
5706
|
+
/* last segment */
|
5707
|
+
XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
|
5708
|
+
}
|
5709
|
+
return XXH_OK;
|
5710
|
+
}
|
5711
|
+
|
5712
|
+
/*! @ingroup xxh3_family */
|
5713
|
+
XXH_PUBLIC_API void
|
5714
|
+
XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
|
5715
|
+
{
|
5716
|
+
XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
|
5717
|
+
XXH3_initCustomSecret(secret, seed);
|
5718
|
+
XXH_ASSERT(secretBuffer != NULL);
|
5719
|
+
memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
|
5720
|
+
}
|
5721
|
+
|
5722
|
+
|
5723
|
+
|
5423
5724
|
/* Pop our optimization override from above */
|
5424
5725
|
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
|
5425
5726
|
&& defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
|